1、数据准备的代码
private String[] ids = {"1","2","3","4","5","6"}; private String[] emails = {"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"}; private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football", "I like football and I like basketball too", "I like movie and swim" }; private Date[] dates = null; private int[] attachs = {2,3,1,4,5,5}; private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"}; private Directory directory = null;
2、该类的构造函数
public IndexUtil() { try { setDates(); //在磁盘空间构建索引 //directory = FSDirectory.open(new File("d:/lucene/index02")); //在内存中构件索引 directory = new RAMDirectory(); reader = IndexReader.open(directory,false); } catch (IOException e) { e.printStackTrace(); } }
//这个方法也是在为测试数据做准备 private void setDates() { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); try { dates = new Date[ids.length]; dates[0] = sdf.parse("2010-02-19"); dates[1] = sdf.parse("2012-01-11"); dates[2] = sdf.parse("2011-09-19"); dates[3] = sdf.parse("2010-12-22"); dates[4] = sdf.parse("2012-01-01"); dates[5] = sdf.parse("2011-05-19"); } catch (ParseException e) { e.printStackTrace(); } }
3、索引的构建
Field.Store.YES或者NO(存储域选项)
设置为YES表示或把这个域中的内容完全存储到文件中,方便进行文本的还原
设置为NO表示把这个域的内容不存储到文件中,但是可以被索引,此时内容无法完全还原(doc.get)
Field.Index(索引选项)
Index.ANALYZED:进行分词和索引,适用于标题、内容等
Index.NOT_ANALYZED:进行索引,但是不进行分词,如果身份证号,姓名,ID等,适用于精确搜索
Index.ANALYZED_NOT_NORMS:进行分词但是不存储norms信息,这个norms中包括了创建索引的时间和权值等信息
Index.NOT_ANALYZED_NOT_NORMS:即不进行分词也不存储norms信息
Index.NO:不进行索引
public void index() { IndexWriter writer = null; try { writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); //一下这个方法是先把directory的索引先删除,否则索引是会不断新增的 //writer.deleteAll(); Document doc = null; for(int i=0;i<ids.length;i++) { doc = new Document(); doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new Field("email","test"+i+"@test.com",Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED)); doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); //存储数字 doc.add(new NumericField("attach",Field.Store.YES,true).setIntValue(attachs[i])); //存储日期 doc.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime())); writer.addDocument(doc); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if(writer!=null)writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
4、测试方法
@Test public void testIndex() { IndexUtil iu = new IndexUtil(); iu.index(); }
5、在磁盘创建索引截图
6、当创建索引之前,没有执行writer.deleteAll()效果截图