来自 caicongyang http://blog.csdn.net/caicongyang
直接上代码,注释清晰!
package com.ccy.lucene.app; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField.Type; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.RAMDirectory; import org.junit.Test; /** * * <p> * Title: IndexDao.java * Package com.ccy.lucene.app * </p> * <p> * Description: Lucene应用 * <p> * @author Tom.Cai * @created 2015-11-9 下午9:57:00 * @version V1.0 * */ public class IndexDao { // 源文件 String filePath1 = "D:\\newWork\\lucene5.3\\luceneSourceFile\\小笑话_总统的房间 Room .txt"; String filePath2 = "D:\\newWork\\lucene5.3\\luceneSourceFile\\A股.txt"; String filePath3 = "D:\\newWork\\lucene5.3\\luceneSourceFile\\IndexWriter addDocument's a javadoc .txt"; // 索引文件 String indexPath = "D:\\newWork\\lucene5.3\\luceneIndex"; //分词器 Analyzer analyzer = new SmartChineseAnalyzer();// 词库分词 /** * 创建索引 * @throws Exception */ @Test public void createIndex() throws Exception { FSDirectory fsDir = FSDirectory.open(Paths.get(indexPath)); // 1.启动时读取原有磁盘索引文件 Directory ramDir = new RAMDirectory(fsDir, new IOContext()); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter ramIndexWiter = new IndexWriter(ramDir, config); // 2. 添加 Document Document doc1 = file2Document(filePath1); Document doc2 = file2Document(filePath2); Document doc3 = file2Document(filePath3); //内存索引添加Document ramIndexWiter.addDocument(doc1); ramIndexWiter.addDocument(doc2); ramIndexWiter.addDocument(doc3); ramIndexWiter.close(); // 2.退出时将内存索引保存到磁盘索引中 IndexWriterConfig indexConfig = new IndexWriterConfig(analyzer); IndexWriter fsIndexWriter = new IndexWriter(fsDir, indexConfig); fsIndexWriter.addIndexes(ramDir); fsIndexWriter.close(); } /** * 根据关键字删除索引 * @param term */ public void delete(Term term) { IndexWriterConfig indexConfig = new IndexWriterConfig(analyzer); FSDirectory fsDir = null; IndexWriter indexWriter = null; try { fsDir = FSDirectory.open(Paths.get(indexPath)); indexWriter = new IndexWriter(fsDir, indexConfig); indexWriter.deleteDocuments(term); } catch (Exception e) { throw new RuntimeException(e); } finally { try { indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } } } /** * 根据关键字更新索引 * * <pre> * indexWriter.deleteDocuments(term); * indexWriter.addDocument(doc); * </pre> * * @param term * @param doc */ public void update(Term term, Document doc) { IndexWriterConfig indexConfig = new IndexWriterConfig(analyzer); FSDirectory fsDir = null; IndexWriter indexWriter = null; try { fsDir = FSDirectory.open(Paths.get(indexPath)); indexWriter = new IndexWriter(fsDir, indexConfig); indexWriter.updateDocument(term, doc); } catch (Exception e) { throw new RuntimeException(e); } finally { try { indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } } } /** * 搜索 */ @Test public void search() { IndexReader reader = null; try { reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher searcher = new IndexSearcher(reader); // 排序 Sort sort = new Sort(); sort.setSort(new SortField("name", Type.SCORE)); // 默认为升序 //lucene 5.x不建议使用filter,而使用BooleanQuery来代替 //Filter filter = new TermFilter(new Term("title", "A")); // 2、搜索解析器 QueryParser parser = new QueryParser("content", analyzer); Query query = parser.parse("房间"); //lucene 4.x 此方法已经过时,不建议使用filter,而使用BooleanQuery来代替 //TopDocs topDocs = searcher.search(query, filter, Integer.MAX_VALUE, sort); TopDocs topDocs = searcher.search(query,Integer.MAX_VALUE,sort); int recordCount = topDocs.totalHits; System.out.println("总共有【" + recordCount + "】条匹配结果"); List<Document> recordList = new ArrayList<Document>(); //准备高亮器 Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); // 摘要 Fragmenter fragmenter = new SimpleFragmenter(50); highlighter.setTextFragmenter(fragmenter); // 3取出当前页的数据 int end = Math.min(10, topDocs.totalHits); //循环读出前10条 for (int i = 0; i < end; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int docSn = scoreDoc.doc; // 文档内部编号 Document doc = searcher.doc(docSn); // 根据编号取出相应的文档 // 返回高亮后的结果,如果当前属性值中没有出现关键字,会返回 null String hc = highlighter.getBestFragment(analyzer, "content", doc.get("content")); if (hc == null) { String content = doc.get("content"); int endIndex = Math.min(50, content.length()); hc = content.substring(0, endIndex);// 最多前50个字符 } doc.removeField("content"); doc.add(new Field("content", hc, TextField.TYPE_STORED)); recordList.add(doc); } for (Document document : recordList) { System.out.println("------------------------------"); System.out.println("name = " + document.get("name")); System.out.println("content = " + document.get("content")); System.out.println("size = " + document.get("size")); System.out.println("path = " + document.get("path")); } } catch (Exception e) { e.printStackTrace(); } } /** * //1.将文本转化为doc对象 * * @param path * @return */ public Document file2Document(String path) { File file = new File(path); Document doc = new Document(); doc.add(new Field("name", file.getName(), StringField.TYPE_STORED)); doc.add(new Field("content", readFileContent(file), TextField.TYPE_STORED)); doc.add(new LongField("size", file.length(), LongField.TYPE_STORED)); doc.add(new Field("path", file.getAbsolutePath(), StringField.TYPE_STORED)); return doc; } /** * 读取文件内容 */ public static String readFileContent(File file) { try { BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file))); StringBuffer content = new StringBuffer(); for (String line = null; (line = reader.readLine()) != null;) { content.append(line).append("\n"); } return content.toString(); } catch (Exception e) { throw new RuntimeException(e); } } }