lucene-core-4.7.2.jar
lucene-analyzers-common-4.7.2.jar
lucene-queryparser-4.7.2.jar
lucene-highlighter-4.7.2.jar //高亮
IKAnalyzer2012FF_u1.jar //中文分析器
JDK1.6
在D:\lucene\test目录创建4个txt
package com.search.lucene; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.LongField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.queryparser.classic.QueryParser.Operator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.junit.Before; import org.junit.Test; import org.wltea.analyzer.lucene.IKAnalyzer; /** * txt文件索引 */ public class IndexFile { private Directory directory; private String indexPath = "D://lucene/index"; // 建立索引文件的目录 private String dirPath = "D://lucene/test"; // txt资源目录 private Analyzer analyzer = new IKAnalyzer(); private IndexWriter indexWriter; @Before public void init() { try { directory=FSDirectory.open(new File(indexPath)); indexWriter=getIndexWriter(directory); } catch(Exception e) { System.out.println("索引打开异常!"); } } /** * 获得所有txt文件 * @param dirPath * @return */ public List<File> getFileList(String dirPath) { File[] files=new File(dirPath).listFiles(); List<File> fileList=new ArrayList<File>(); for(File file: files) { if(isTxtFile(file.getName())) { fileList.add(file); } } return fileList; } /** * 创建索引 * @throws Exception */ @Test public void createIndex() throws Exception{ List<File> fileList = getFileList(dirPath); Document document = null; for(File file:fileList){ document = fileToDocument(file); indexWriter.addDocument(document); System.out.println("filename=="+document.get("filename")); indexWriter.commit(); } closeWriter(); } /** * 判断是否是txt文件 * @param fileName * @return */ public boolean isTxtFile(String fileName) { if(fileName.lastIndexOf(".txt") > 0) { return true; } return false; } /** * 将文件转换成Document对象 * @param file * @return * @throws Exception */ public Document fileToDocument(File file) throws Exception { Document document=new Document(); document.add(new TextField("filename", file.getName(), Store.YES)); document.add(new TextField("content", getFileContent(file), Store.YES)); document.add(new LongField("size", file.getTotalSpace(), Store.YES)); return document; } /** * 获得indexwriter对象 * @param dir * @return * @throws Exception */ public IndexWriter getIndexWriter(Directory dir) throws Exception { IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_47, analyzer); return new IndexWriter(dir, iwc); } /** * 关闭indexwriter对象 * @throws Exception */ public void closeWriter() throws Exception { if(indexWriter != null) { indexWriter.close(); } } /** * 读取文件内容 * @param file * @return * @throws Exception */ public String getFileContent(File file) throws Exception{ Reader reader = new InputStreamReader(new FileInputStream(file),"GBK"); BufferedReader br = new BufferedReader(reader); String result =""; while(br.readLine() != null){ result = result+"\n"+br.readLine(); } br.close(); reader.close(); return result; } @Test public void search() throws Exception { String filePath=indexPath; Directory dir=FSDirectory.open(new File(filePath)); IndexReader reader=DirectoryReader.open(dir); IndexSearcher searcher=new IndexSearcher(reader); //方法1:TermQuery Term term=new Term("content", "中国"); TermQuery query=new TermQuery(term); //方法2:TermQuery // QueryParser parser = new QueryParser(Version.LUCENE_47, "content", analyzer); // parser.setDefaultOperator(Operator.AND); // Query query = parser.parse("中国"); TopDocs topdocs=searcher.search(query, 10); ScoreDoc[] scoreDocs=topdocs.scoreDocs; System.out.println("命中:" + topdocs.totalHits); for(int i=0; i < scoreDocs.length; i++) { Document targetDoc = searcher.doc(scoreDocs[i].doc); System.out.println("内容=:" + targetDoc.toString()); } // 分页,高亮显示 higherIndex(analyzer, searcher, query, topdocs); reader.close(); } /** * 分页,高亮显示 * * @param analyzer * @param isearcher * @param query * @param topDocs * @throws IOException * @throws Exception */ public void higherIndex(Analyzer analyzer, IndexSearcher isearcher, Query query, TopDocs topDocs) throws IOException, Exception { TopScoreDocCollector results = TopScoreDocCollector.create(topDocs.totalHits, false); isearcher.search(query, results); // 分页取出指定的doc(开始条数, 取几条) ScoreDoc[] docs = results.topDocs(1, 2).scoreDocs; for (int i = 0; i < docs.length; i++) { Document targetDoc = isearcher.doc(docs[i].doc); System.out.println("内容:" + targetDoc.toString()); } // 关键字高亮显示的html标签,需要导入lucene-highlighter-x.jar SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); for (int i = 0; i < docs.length; i++) { Document doc = isearcher.doc(docs[i].doc); // 标题增加高亮显示 TokenStream tokenStream1 = analyzer.tokenStream("filename", new StringReader(doc.get("filename"))); String title = highlighter.getBestFragment(tokenStream1, doc.get("filename")); // 内容增加高亮显示 TokenStream tokenStream2 = analyzer.tokenStream("content", new StringReader(doc.get("content"))); String content = highlighter.getBestFragment(tokenStream2, doc.get("content")); System.out.println(doc.get("filename") + " : " + title + " : " + content); } } }