import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.Date; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * 创建处理文档的索引类,这里使用的是Lucene3.0 * @author wawa * */ public class IndexTest { public static void main(String [] args) { //存放索引的目录 String INDEX_STORE_PATH="D:\\java\\lucene\\indexCh2"; //需要建立索引的文件 String input="D:\\java\\lucene\\zhuxian"; try { long start = new Date().getTime(); int docNum=createIndex(new File(INDEX_STORE_PATH), new File(input)); long end = new Date().getTime(); System.out.println("Indexing " + docNum + " files took " + (end - start) + " milliseconds"); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static int createIndex(File indexDir, File dataDir) throws Exception { //与2的不同之处 IndexWriter writer=new IndexWriter(FSDirectory.open(indexDir), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED); indexDirectory(writer, dataDir); int numIndexed = writer.numDocs(); writer.optimize(); writer.close(); return numIndexed; } /**循环遍历目录下的所有.txt文件并进行索引 * @param writer * @param dir * @throws IOException */ private static void indexDirectory(IndexWriter writer, File dir) throws IOException { File[] files = dir.listFiles(); for (int i = 0; i < files.length; i++) { File f = files[i]; if (f.isDirectory()) { indexDirectory(writer, f); // recurse } else if (f.getName().endsWith(".txt")) { indexFile(writer, f); } } } /**对单个txt文件进行索引 * @param writer * @param f * @throws IOException */ private static void indexFile(IndexWriter writer, File f) throws IOException { if (f.isHidden() || !f.exists() || !f.canRead()) { return; } System.out.println("Indexing " + f.getCanonicalPath()); Document doc = new Document(); doc.add(new Field("contents",new FileReader(f)));//有变化的地方 doc.add(new Field("filename",f.getCanonicalPath(),Field.Store.YES, Field.Index.ANALYZED));//有变化的地方 writer.addDocument(doc); } }
import java.io.File; import java.io.FileReader; import java.util.Date; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class SearchTest { /**lucene3.0 搜索类 * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub String srotPath="D:\\java\\lucene\\indexCh2\\"; String keys="林惊羽"; try { createSearch(new File(srotPath), keys); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static void createSearch(File storPath,String keys) throws Exception { IndexSearcher searcher=new IndexSearcher(FSDirectory.open(storPath),true);//只读 String field="contents"; QueryParser parser = new QueryParser(Version.LUCENE_30, field, new StandardAnalyzer(Version.LUCENE_30));//有变化的地方 Query query = parser.parse(keys); TopScoreDocCollector collector = TopScoreDocCollector.create(100 , false);//有变化的地方 long start = new Date().getTime();// start time searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println(hits.length); for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc);//new method searcher.doc() System.out.println(doc.getField("filename")+" "+hits[i].toString()+" "); } long end = new Date().getTime();//end time System.out.println("Found :" + collector.getTotalHits() + " document(s) (in " + (end - start) + " milliseconds) that matched query '" + keys + "':"); } }