Lucene 2.4 + Highlighter 2.4 的分页+高亮显示代码例子

Lucene 2.4 + Highlighter 2.4 的分页+高亮显示

import net.paoding.analysis.analyzer.PaodingAnalyzer;  
import org.apache.lucene.analysis.Analyzer;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.queryParser.MultiFieldQueryParser;  
import org.apache.lucene.search.BooleanClause;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.Query;  
import org.apache.lucene.search.ScoreDoc;  
import org.apache.lucene.search.TopDocCollector;  
import org.apache.lucene.search.highlight.Highlighter;  
import org.apache.lucene.search.highlight.QueryScorer;  
import org.apache.lucene.search.highlight.SimpleFragmenter;  
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;  
/** 
* Lucene 2.4 + Highlighter 2.4 的分页+高亮显示代码例子.<br> 
* 高亮的css自己写吧。 
*  
* @author 老紫竹研究室(laozizhu.com) 
*/ 
public class TestLuceneHighlighter {  
  private static final String FIELD_TITLE = "title";  
  private static final String FIELD_BODY = "body";  
  public synchronized Analyzer getAnalyzer() {  
    return new PaodingAnalyzer();  
  }  
  public void test(String queryString, int begin, int number) {  
    IndexSearcher isearcher = null;  
    try {  
      isearcher = new IndexSearcher("d:/index");  
      /* 下面这个表示要同时搜索这两个域,而且只要一个域里面有满足我们搜索的内容就行 */ 
      BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };  
      TopDocCollector collector = new TopDocCollector(10);  
      Query query = MultiFieldQueryParser.parse(queryString, new String[] { FIELD_TITLE, FIELD_BODY }, clauses, getAnalyzer());  
      isearcher.search(query, collector);  
      ScoreDoc[] hits = collector.topDocs().scoreDocs;  
      // 用这个进行高亮显示,默认是<b>..</b>  
      // 用这个指定<read>..</read>  
      SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<read>", "</read>");  
      // 构造高亮  
      // 指定高亮的格式  
      // 指定查询评分  
      Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));  
      // 这个一般等于你要返回的,高亮的数据长度  
      // 如果太小,则只有数据的开始部分被解析并高亮,且返回的数据也少  
      // 太大,有时太浪费了。  
      highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));  
      for (int i = begin; i < hits.length && i < begin + number; i++) {  
        Document doc = isearcher.doc(hits[i].doc);  
        // 有三个参数  
        // 分析器  
        // 要解析的字段名  
        // 要解析的数据  
        System.out.println(highlighter.getBestFragment(getAnalyzer(), FIELD_TITLE, doc.get(FIELD_TITLE)));  
      }  
    } catch (Exception e) {  
      e.printStackTrace();  
    } finally {  
      if (isearcher != null) {  
        try {  
          isearcher.close();  
        } catch (Exception e) {  
          e.printStackTrace();  
        }  
      }  
    }  
  } 

你可能感兴趣的:(apache,.net,css,Blog,Lucene)