环境搭建:
1、lucene-core-3.5.0.jar
2、mmseg4j-all-1.8.5-with-dic.jar(中文分词器)
3、lucene-highlighter-3.5.0.jar、lucene-memory-3.5.0.jar(高亮显示)
在全文索引工具中,都是由这样的三部分组成
1、索引部分(I am a boy)
2、分词部分
3、搜索部分
/** * 建立索引 */ public void index() { //1、创建Directory //Directory directory = new RAMDirectory();//建立在内存中 Directory directory = null; try { directory = FSDirectory.open(new File("D:/test/index01")); } catch (IOException e1) { e1.printStackTrace(); } //2、创建IndexWriter IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer( Version.LUCENE_36)); IndexWriter writer = null; try { writer = new IndexWriter(directory, config); //3、创建Document对象 Document document = null; //4、为Document添加Field Collection<File> files = FileUtils.listFiles(new File("D:/test/lucene"), FileFileFilter.FILE, null); for (File file : files) { document = new Document(); document.add(new Field("content", new FileReader(file))); document.add(new Field("filename", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.add(new Field("path", file.getAbsolutePath(), Field.Store.YES, Field.Index.NOT_ANALYZED));//是否存储路径,是否进行分词 writer.addDocument(document); } //5、通过IndexWriter添加对象到索引中 } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (writer != null) { try { writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } } /** * 搜索 */ public void searcher() { try { //1、创建Directory Directory directory = FSDirectory.open(new File("D:/test/index01")); //2、创建IndexReader IndexReader reader = IndexReader.open(directory); //3、根据IndexReader创建IndexSearcher IndexSearcher searcher = new IndexSearcher(reader); //4、创建搜索的Query QueryParser parser = new QueryParser(Version.LUCENE_36, "content", new StandardAnalyzer(Version.LUCENE_36));//content表示搜索的Field Query query = parser.parse("File");//表示内容中含有“File”的 //5、根据searcher搜索并返回TopDocs TopDocs topDocs = searcher.search(query, 10);//只搜索10条 //6、根据TopDocs对象获取ScoreDoc对象 ScoreDoc[] sds = topDocs.scoreDocs; for (ScoreDoc scoreDoc : sds) { //7、根据searcher和ScoreDoc对象获取具体的Document对象 Document document = searcher.doc(scoreDoc.doc); //8、根据Document对象获取需要的值 System.out.println(document.get("filename")); System.out.println(document.get("path")); } //9、关闭reader reader.clone(); } catch (Exception e) { e.printStackTrace(); } }