/** * lucene-core-3.3.0 * lucene-highlighter-3.3.0 */ public class LuceneIndex { //分词器 private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_33); //索引文件存放位置 private final String indexPath = "/home/zhanghc/luence/index/"; /** * 创建索引 */ public boolean createIndex() throws IOException { Directory directory = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, config); String [] array_content = {"在泽州县彤康食品有限公司", "屠宰加工项目建成投产", "比如肉制品均来自双汇", "雨润等大型肉类生产商"}; String [] array_name = {"泽州", "屠宰", "肉制品", "肉类生产商"}; for (int i = 0; i < 4; i++) { Document document = new Document(); document.add(new Field("id", String.valueOf(i), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES)); document.add(new Field("name", array_name[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); document.add(new Field("content", array_content[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); writer.addDocument(document); } writer.optimize(); writer.close(); return true; } /** * 删除索引 * @throws ParseException */ public void deletes() throws IOException, ParseException{ Directory directory = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, config); Term term = new Term("id", "0"); writer.deleteDocuments(term); // Query query = new QueryParser(Version.LUCENE_33, "id", new StandardAnalyzer(Version.LUCENE_33)).parse("2"); // writer.deleteDocuments(query); writer.optimize(); System.out.println(writer.numDocs()); writer.close(); } /** * 更新索引 * @throws IOException */ public void updates() throws IOException{ Directory directory = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, config); String [] array_content = {"索引的删除,纠结了很久,看到一篇总结不错的文章,转载过来好好学习", "即使在不关闭IndexReader的情况下"}; String [] array_name = {"索引", "关闭"}; for (int i = 0; i < 2; i++) { Document document = new Document(); Term term = new Term("id", String.valueOf(i)); document.add(new Field("id", String.valueOf(i), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES)); document.add(new Field("name", array_name[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); document.add(new Field("content", array_content[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); writer.updateDocument(term, document); //Updates a document by first deleting the document(s) containing term and then adding the new document. The delete and then add are atomic as seen by a reader on the same index (flush may happen only after the add). } System.out.println(writer.numDocs()); writer.optimize(); writer.close(); } /** * 单条件查询 */ public void search(String val) throws CorruptIndexException, IOException, ParseException{ IndexSearcher search = new IndexSearcher(FSDirectory.open(new File(indexPath)), true); final String field = "name"; QueryParser queryParser = new QueryParser(Version.LUCENE_33, field, analyzer); Query query = queryParser.parse(val); //设置高亮显示 //设置高亮显示格式 SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font><strong>", "</strong></font>"); //语法高亮显示设置 Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(100));//100似乎是表示最终输出字的个数 TopDocs result = search.search(query, 10);// 10是显示队列的Size System.out.println("搜索结果,搜索条数为:" + result.totalHits); for (ScoreDoc item : result.scoreDocs) { Document document = search.doc(item.doc); String out = document.get(field); //搜索结果 TokenStream tream = analyzer.tokenStream(field, new StringReader(out)); try { System.out.println(highlighter.getBestFragment(tream, out)); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } } search.close(); } /** * 多条件查询 * @throws IOException * @throws CorruptIndexException * @throws ParseException */ public void mutilSearch(String val) throws CorruptIndexException, IOException, ParseException{ IndexSearcher search = new IndexSearcher(FSDirectory.open(new File(indexPath)), true); BooleanClause.Occur [] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD}; //BooleanClause.Occur [] 表示多个条件之间的关系, //BooleanClause.Occur.MUST表示必须含有 //BooleanClause.Occur.MUST_NOT表示不含有 //BooleanClause.Occur.SHOULDb表示含不含有均可 final String [] fields = {"name", "content"}; //根据 name content两个属性查询 Query query = MultiFieldQueryParser.parse(Version.LUCENE_33, val, fields, clauses, analyzer); //设置高亮显示 //设置高亮显示格式 SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font><strong>", "</strong></font>"); //语法高亮显示设置 Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(100));//100是高亮范围 TopDocs result = search.search(query, 10);// 10是显示队列的Size System.out.println("搜索结果,搜索条数为:" + result.totalHits); for (ScoreDoc item : result.scoreDocs) { Document document = search.doc(item.doc); String out_name = document.get("name"); String out_content = document.get("content"); //搜索结果 TokenStream tream_name = analyzer.tokenStream("name", new StringReader(out_name)); TokenStream tream_content = analyzer.tokenStream("content", new StringReader(out_content)); try { String name = highlighter.getBestFragment(tream_name, out_name); String content = highlighter.getBestFragment(tream_content, out_content); if(name == null) name = out_name; if(content == null) content = out_content; System.out.println(name); System.out.println(content); System.out.println("---------------"); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } } search.close(); } /** * 多条件查询 * @throws IOException * @throws CorruptIndexException * @throws ParseException */ public void mutilSearch_2(String val) throws CorruptIndexException, IOException, ParseException{ IndexSearcher search = new IndexSearcher(FSDirectory.open(new File(indexPath)), true); BooleanQuery booleanQuery = new BooleanQuery(); final String field = "name"; QueryParser queryParser = new QueryParser(Version.LUCENE_33, field, analyzer); Query query = queryParser.parse(val); booleanQuery.add(query, Occur.MUST_NOT); final String field_2 = "content"; QueryParser queryParser_2 = new QueryParser(Version.LUCENE_33, field_2, analyzer); Query query_2 = queryParser_2.parse(val); booleanQuery.add(query_2, Occur.MUST); //设置高亮显示 //设置高亮显示格式 SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font><strong>", "</strong></font>"); //语法高亮显示设置 Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(100));//100似乎是表示最终输出字的个数 TopDocs result = search.search(booleanQuery, 10);// 10是显示队列的Size System.out.println("搜索结果,搜索条数为:" + result.totalHits); for (ScoreDoc item : result.scoreDocs) { Document document = search.doc(item.doc); String out_name = document.get("name"); String out_content = document.get("content"); //搜索结果 TokenStream tream_name = analyzer.tokenStream("name", new StringReader(out_name)); TokenStream tream_content = analyzer.tokenStream("content", new StringReader(out_content)); try { String name = highlighter.getBestFragment(tream_name, out_name); String content = highlighter.getBestFragment(tream_content, out_content); if(name == null) name = out_name; if(content == null) content = out_content; System.out.println(name); System.out.println(content); System.out.println("---------------"); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } } search.close(); } }