Lucene处理高亮显示的大概流程如下:
1.首先获取原始文档的TokenStream//首先获取docId的TokenStream TokenStream tokenStream=TokenSources.getAnyTokenStream(reader, docId, field, analyzer); //构建Fragmenter对象,用于文档切片 Fragmenter fragmenter = new SimpleFragmenter(100);//默认字符为100 //构建Scorer,用于选取最佳切片 Scorer fragmentScore = new QueryScorer(query); //构建Formatter格式化最终显示(将字体颜色设置为红色) Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>"); //实例化Highlighter组件 Highlighter highlighter = new Highlighter(formatter,fragmentScore); highlighter.setTextFragmenter(fragmenter); // 最后一步,获取hightlightText highlighter.getBestFragment(tokenStream, text);//第二个参数为原始文档信息
//构造Highlighter组件 FragListBuilder fragListBuilder = new SimpleFragListBuilder(); FragmentsBuilder fragmentBuilder =new ScoreOrderFragmentsBuilder( BaseFragmentsBuilder.COLORED_PRE_TAGS, BaseFragmentsBuilder.COLORED_POST_TAGS); FastVectorHighlighter highligher=new FastVectorHighlighter(true, true,fragListBuilder, fragmentBuilder); //构造FieldQuery Query query=new QueryParser(...).parse(...); FieldQuery fieldQuery = highlighter.getFieldQuery(query); //高亮最佳文档切片 highlighter.getBestFragment(fieldQuery, IndexReader reader, int docId, String field, int fragCharSize);//fragCharSize需要大于18注意:使用FastVectorHighlighter,在创建索引时,必须要为field指定TermVector.WITH_POSITIONS_OFFSETS和Store.YES属性