IKAnalyzer 中文分词 高亮

创建索引

 

public void execute() {
		System.out.println("开始创建索引工单");
		Analyzer analyzer = new IKAnalyzer();//
		IndexWriter iwriter = new IndexWriter(path, analyzer, false);// false 增量索引
		creatCspIndex(iwriter);
		iwriter.close();
		System.out.println("完成创建索引工单");
	}

public  void creatCspIndex(IndexWriter iwriter) {
	 proSet = prossDAO.queryProblemProcessList(start,end);
        List  contListMap = proSet.getDataResult();

	for (String key : contListMap.keySet()) {
	      if (null == contMap.get(key)) {
			continue;
		}
		Document doc = new Document();
		doc.add(new Field("title",key,Field.Store.YES,Field.Index.ANALYZED));
		doc.add(new Field("content",contMap.get(key),Field.Store.YES,Field.Index.ANALYZED));
			iwriter.addDocument(doc);		
	}

 

搜索 高亮

 

Directory directory = null;
IndexSearcher is = null;
TopDocs topDocs2 = null;
String keyword = "第二章提示用户要求另外收费";
Query query2 = IKQueryParser.parse("content", keyword); //
File indexDir = new File(path);
directory = SimpleFSDirectory.open(indexDir);
IndexReader reader = IndexReader.open(new SimpleFSDirectory(
				indexDir));
is = new IndexSearcher(reader);
is.setSimilarity(new IKSimilarity());
topDocs2 = is.search(query2, 5);//搜索前5个最相似的
ScoreDoc[] scoreDocs = topDocs2.scoreDocs;

//高亮设置
Analyzer analyzer = new IKAnalyzer();//设定分词器
//设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color='red'><B>","</B></font>");
Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query2));
highlighter.setTextFragmenter(new SimpleFragmenter(150));
//设置每次返回的字符数.想必大家在使用搜索引擎的时候也没有一并把全部数据展示出来吧,当然这里也是设定只展示部分数据
for(int i=0;i<scoreDocs.length;i++){
   Document doc = is.doc(scoreDocs[i].doc);
   TokenStream tokenStream = analyzer.tokenStream("",new StringReader(doc.get("content")));
   String str = highlighter.getBestFragment(tokenStream, doc.get("content"));
}
 

 

你可能感兴趣的:(IKAnalyzer)