本示例是对Lucene查询,对结果进行了一些处理(Lucene 3.5):
4、结果高亮显示,详细见代码高亮部分。
package cn.test.gxg.engine.query; import java.io.File; import java.io.IOException; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.NumericField; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; /** * 创建索引并查询示例 * * @createTime: Feb 22, 2010 3:02:28 PM * @author: <a href="mailto:[email protected]">天涯 </a> * @version: 0.1 * @lastVersion: 0.1 * @updateTime: * @updateAuthor: <a href="mailto:[email protected]">天涯 </a> * @changesSum: * */ public class QueryTest { public static void main(String[] args) { //索引目录 D:\workspace\code\java\TestLucene3\index\txt\test String INDNEX_PATH = "D:\\workspace\\code\\java\\TestLucene3\\index\\txt\\test"; createIndex(INDNEX_PATH); search(INDNEX_PATH); } public static void createIndex (String indexPath) { // 获取中文分词器,查询的时候也要用一样的分词器。不然会导致查询结果不准确 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // 建立索引 IndexWriter writer; NumericField nField = null; try { writer = new IndexWriter(FSDirectory.open(new File(indexPath)), analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); Field field = null; for(int i =0; i <10; i++) { doc = new Document(); field = new Field("Code", "feinnocdb_App_info"+i, Field.Store.YES, Field.Index.ANALYZED); doc.add(field); nField = new NumericField("Id", Store.YES, true).setIntValue(i%3); doc.add(nField); field = new Field("Name", "国家名字-" + i, Field.Store.YES, Field.Index.ANALYZED); doc.add(nField); field = new Field("Content", "中国中华人民共和国—" + i, Field.Store.YES, Field.Index.ANALYZED); doc.add(field); nField = new NumericField("Type", Store.YES, true).setIntValue((i%10)); doc.add(nField); nField = new NumericField("Price", Store.YES, true).setFloatValue((i%3)); doc.add(nField); nField = new NumericField("Sex", Store.YES, true).setIntValue((i%2)); doc.add(nField); writer.addDocument(doc); } writer.close(); System.out.println("Indexed success!"); } catch (Exception e) { e.printStackTrace(); } } public static void search(String indexPath) { //获取Lucene标准分词器,可以使用其他分词器,前提是创建索引的时候也使用相同的分词器 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); //建立索引 try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "Content", analyzer); Query query = null; String q = "Content:国"; try { query = parser.parse(q); // 支持后缀匹配,如*国 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。 parser.setAllowLeadingWildcard(true); // 有通配符时不转换大小写 parser.setLowercaseExpandedTerms(false); } catch (ParseException e) { e.printStackTrace(); } // 多字段排序,设置在前面的会优先排序 SortField[] sortFields = new SortField[2]; SortField sortField = new SortField("Id", SortField.INT, true); SortField FIELD_SEX = new SortField("Sex", SortField.INT, false); sortFields[0] = sortField; sortFields[1] = FIELD_SEX; Sort sort = new Sort(sortFields); // 单字段排序 /* SortField sortField = new SortField("Id", SortField.INT, true); Sort sort = new Sort(sortField); */ Searcher searcher = new IndexSearcher(reader); // 如果不需要排序则使用注释掉的代码查询 // TopDocs topDocs = searcher.search(query, 100); TopDocs topDocs = searcher.search(query, null, 1000, sort); System.out.println("查询语句为:" + query.toString()); System.out.println("查询到数据条数为:" + topDocs.totalHits); if (topDocs.totalHits != 0) { // 用作高亮显示的Query语句。绝大多数情况都是使用查询的Query语句。 // 这里为了演示,所以不那样做 Query hilightQuery = null; try { hilightQuery = parser.parse("Content:中"); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } // 设置需要高亮的字段值 String[] highlightCol = {"Content", "Name"}; Highlighter highlighter = null; // 关键字高亮显示设置 // 设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(" <FONT COLOR='RED'>", " </FONT>"); highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(hilightQuery)); //设置每次返回的字符数 highlighter.setTextFragmenter(new SimpleFragmenter(1000)); // 遍历查询的索引,得到具体索引值。 for(ScoreDoc sd : topDocs.scoreDocs) { Document document = searcher.doc(sd.doc); for (Fieldable fa : document.getFields()) { String value = document.get(fa.name()); for (String col : highlightCol) { if(fa.name().equals(col)) { //设置高显内容 TokenStream tokenStream = analyzer.tokenStream("Content",new StringReader(value)); value = highlighter.getBestFragment(tokenStream, value); } } System.out.print(fa.name() + ":" + value + " "); } System.out.println(); } } reader.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }