搜索技术--搜索、排序并高亮显示(lucene3.0)

搜索思路:

1 用一个indexSearcher对象,去一个预先建立的索引文件(indexFir)中查找关键字(key).

2其中要先对关键字采用特定的分词器(analyzer)进行解析(解析器(parser)),解析后作为query对象,再去查。

3.遍历结果集,根据docId去doc集合中找相应的doc.

4.设置高亮显示的格式,每条匹配记录显示的字符数。用highlighter对象,取得文章或数据的片段并将与关键字相同的文章,标红。

关于如何建立索引,请查看:搜索技术--建立索引(lucene3.0)

package com.jrj.datamart.action;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.jrj.datamart.model.ApiIndexEntity;
import com.jrj.datamart.model.Pagination;

//
//搜索并返回结果
public class SearcherAction extends BaseAction{

// 保存索引的地方
private String INDEX_DIR = "f:\\lucene\\luceneIndexDir";
private String keyword = "行情";
private int TOP_NUM = 100;
private int numIndexed;
// 保持搜索结果的List
List<ApiIndexEntity> apiIndexEntityLists = new ArrayList<ApiIndexEntity>();
// 搜索结果的个数
private int totalHitsNumber;
// 用时(毫秒)
private long spentTime;
  //struts2中的主方法
public String execute() throws Exception {
  System.out.println("execue of SearcherAction started...keyword: "
    + keyword);
  long start = new Date().getTime();

  File indexDir = new File(INDEX_DIR);
  if (!indexDir.exists() || !indexDir.isDirectory()) {
   throw new Exception(indexDir
     + " does not exist or is not a directory.");
  }
  search(indexDir, keyword);
  long end = new Date().getTime();
  spentTime = (end - start);
  System.out.println("searing spent: " + spentTime + " milliseconds");

  return "success";
}
//查詢
// @param indexDir
//           索引目录地址
// @param q
//            要查询的字符串
// @throws Exception
//
public void search(File indexDir, String q) throws Exception {
  SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(
    Version.LUCENE_30);

  String field = "contents";
  try {
   // 排序
   IndexSearcher indexSearch = new IndexSearcher(
     FSDirectory.open(indexDir));
   QueryParser parser = new QueryParser(Version.LUCENE_30, field,
     analyzer);
   // AND--->OR
   parser.setDefaultOperator(QueryParser.OR_OPERATOR);
   // 生成Query对象
   // 多域查询
   // String[] fields = { "icnname", "contents" };
   // BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD,
   // BooleanClause.Occur.SHOULD };
   // MultiFieldQueryParser.parse(Version.LUCENE_30, q, fields, flags,
   // analyzer);
   // TopScoreDocCollector topCollector = TopScoreDocCollector.create(
   // indexSearch.maxDoc(), false);
   Query query = parser.parse(q);
   // 排序: 先API中文名,后描述
   // SortField sortArray[];
   // SortField sortField1 = new SortField("icnname", SortField.STRING,
   // false);// false代表升序,TRUE代表降序
   // SortField sortField2 = new SortField("contents",
   // SortField.STRING,
   // false);// false代表升序,TRUE代表降序
   // sortArray = new SortField[] { sortField1, sortField2 };
   // Sort sort = new Sort(sortArray);
   // TopDocs topDocs = indexSearch.search(query, null, 1000, sort);
   // indexSearch.search(query, topCollector);
   TopDocs topDocs = indexSearch.search(query, TOP_NUM);
   totalHitsNumber = topDocs.totalHits;
   System.out.println("命中:" + totalHitsNumber);
   // 输出结果
   ScoreDoc[] scoreDocs = topDocs.scoreDocs;
   ApiIndexEntity apiIndexEntity;
   // 将符合结果的都保持到list中。/
   // 设置需要高亮的字段值
   String[] highlightCol = { "icnname", "contents" };
   Highlighter highlighter = null;
   // 关键字高亮显示设置
   // 设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀
   SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
     "<FONT COLOR='#FF0000'>", "</FONT>");
   highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(
     query));
   // 设置每次返回的字符数
   highlighter.setTextFragmenter(new SimpleFragmenter(150));
   for (int i = 0; i < topDocs.totalHits; i++) {
    apiIndexEntity = new ApiIndexEntity();
    Document targetDoc = indexSearch.doc(scoreDocs[i].doc);
    for (Fieldable fa : targetDoc.getFields()) {
     String value = targetDoc.get(fa.name());
     for (String col : highlightCol) {
      if (fa.name().equals(col)) {
       // 设置高显内容
       TokenStream tokenStream = analyzer.tokenStream(
         "content", new StringReader(value));
       value = highlighter.getBestFragment(tokenStream,
         value);
       if ("icnname".equals(fa.name())) {
          apiIndexEntity.setIcnname(value == null ? targetDoc.get("icnname") : value);
       } else if ("contents".equals(fa.name())) {
        apiIndexEntity.setContents(value == null ? targetDoc.get("contents") : value);
       }
      }
     }  

}
    apiIndexEntity.setApiid(Integer.parseInt(targetDoc.get("apiid")));
    System.out.println("ApiId: "
      + targetDoc.get("apiid").toString());
    System.out.println("API中文名: "
      + targetDoc.get("icnname").toString());
    System.out
      .println("内容:" + targetDoc.get("contents").toString());
    apiIndexEntityLists.add(apiIndexEntity);

   }
   //将数据放入session中,
   getSession().setAttribute("apiIndexEntityLists",apiIndexEntityLists);

   //设置session时间1小时。
   getSession().setMaxInactiveInterval(60*60);
  

  } catch (CorruptIndexException e) {

   e.printStackTrace();
  } catch (IOException e) {

   e.printStackTrace();
  }
}

public int getNumIndexed() {
  return numIndexed;
}

public void setNumIndexed(int numIndexed) {
  this.numIndexed = numIndexed;
}

public String getINDEX_DIR() {
  return INDEX_DIR;
}

public String getKeyword() {
  return keyword;
}

public int getTOP_NUM() {
  return TOP_NUM;
}

public void setINDEX_DIR(String iNDEX_DIR) {
  INDEX_DIR = iNDEX_DIR;
}

public void setKeyword(String keyword) {
  this.keyword = keyword;
}

public void setTOP_NUM(int tOP_NUM) {
  TOP_NUM = tOP_NUM;
}

public List<ApiIndexEntity> getApiIndexEntityLists() {
  return apiIndexEntityLists;
}

public void setApiIndexEntityLists(List<ApiIndexEntity> apiIndexEntityLists) {
  this.apiIndexEntityLists = apiIndexEntityLists;
}

public int getTotalHitsNumber() {
  return totalHitsNumber;
}

public void setTotalHitsNumber(int totalHitsNumber) {
  this.totalHitsNumber = totalHitsNumber;
}

public long getSpentTime() {
  return spentTime;
}

public void setSpentTime(int spentTime) {
  this.spentTime = spentTime;
}

}

你可能感兴趣的:(lucene3.0)