Lucece官网: http://lucene.apache.org/
2. Lucene实现
New maven project -> Create a simple project -> Group Id: com.andrew.lucene Artifact Id: Lucene01 Version: 0.0.1-SNAPSHOT Packaging: jar
Indexer.java代码 package com.andrew.lucene; import java.io.File; import java.io.FileReader; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class Indexer { private IndexWriter writer; // 写索引实例 // 构造方法 实例化IndexWriter public Indexer(String indexDir) throws Exception { Directory dir = FSDirectory.open(Paths.get(indexDir)); Analyzer analyzer = new StandardAnalyzer(); // 标准分词器 IndexWriterConfig iwc = new IndexWriterConfig(analyzer); writer = new IndexWriter(dir, iwc); } // 索引指定目录的所有文件 public int index(String dataDir) throws Exception { File[] files = new File(dataDir).listFiles(); for (File f : files) { indexFile(f); } return writer.numDocs(); } // 索引指定文件 private void indexFile(File f) throws Exception { System.out.println("索引文件:" + f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); } // 获取文档,文档里再设置每个字段 private Document getDocument(File f) throws Exception { Document doc = new Document(); doc.add(new TextField("contents", new FileReader(f))); doc.add(new TextField("fileName", f.getName(), Field.Store.YES)); doc.add(new TextField("fullPath", f.getCanonicalPath(), Field.Store.YES)); return doc; } // 关闭写索引 public void close() throws Exception { writer.close(); } public static void main(String[] args) { String indexDir = "E:\\lucene"; String dataDir = "E:\\lucene\\data"; Indexer indexer = null; int numIndexed = 0; long start = System.currentTimeMillis(); try { indexer = new Indexer(indexDir); numIndexed = indexer.index(dataDir); } catch (Exception e) { e.printStackTrace(); } finally { try { indexer.close(); } catch (Exception e) { e.printStackTrace(); } } long end = System.currentTimeMillis(); System.out.println("索引:" + numIndexed + " 个文件,花费了" + (end - start) + "毫秒"); } } 执行结果 索引文件:E:\lucene\data\CHANGES.txt 索引文件:E:\lucene\data\JRE_VERSION_MIGRATION.txt 索引文件:E:\lucene\data\LICENSE.txt 索引文件:E:\lucene\data\MIGRATE.txt 索引文件:E:\lucene\data\NOTICE.txt 索引文件:E:\lucene\data\README.txt 索引文件:E:\lucene\data\SYSTEM_REQUIREMENTS.txt 索引:7 个文件,花费了901毫秒
Searcher.java代码 package com.andrew.lucene; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class Searcher { public static void search(String indexDir, String q) throws Exception { Directory dir = FSDirectory.open(Paths.get(indexDir)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher is = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); // 标准分词器 QueryParser parser = new QueryParser("contents", analyzer); Query query = parser.parse(q); long start = System.currentTimeMillis(); TopDocs hits = is.search(query, 10); long end = System.currentTimeMillis(); System.out.println("匹配 " + q + ",总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录"); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullPath")); } reader.close(); } public static void main(String[] args) { String indexDir = "E:\\lucene"; String q = "Zygmunt Saloni"; try { search(indexDir, q); } catch (Exception e) { e.printStackTrace(); } } } 执行结果 匹配 Zygmunt Saloni,总共花费24毫秒查询到1个记录 E:\lucene\data\LICENSE.txt