public void createIndex(Article article) throws Exception { // 实例化分词器,使用的是中文分词器 Analyzer analyzer = new IKAnalyzer(); // 指定要保存的文件路径并保存到FSDirectory中 FSDirectory directory = FSDirectory.open(new File(URLDecoder.decode( AnalyzerAction.class.getResource("/date/index/article/") .toString(), "UTF-8").substring(6))); // true表示覆盖原来已经创建的索引,如果是false表示不覆盖,而是继续添加索引 IndexWriter writer = new IndexWriter(directory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.add(new Field("id", String.valueOf(article.getId()), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("article_title", article.getArticleTitle(), Field.Store.YES, Field.Index.ANALYZED)); String content = FunctionUtil.Html2Text(article.getArticleContent()); doc.add(new Field("article_content", content, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); writer.optimize(); writer.close(); }
/** * 通过关键词 得到结果 */ public void searchIndex(String path, String keywords) throws Exception { FSDirectory directory = FSDirectory.open(new File(path)); IndexReader reader = IndexReader.open(directory); Searcher searcher = new IndexSearcher(directory); // MultiFieldQueryParser.parse中的参数分别为: // 1.关键词 // 2.要查询的字段,字符串类型的数组 String[] field = { "article_title", "article_content" }; // 3.两个字段的关系(与或非) BooleanClause.Occur[] flags = new BooleanClause.Occur[] { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD }; // 4.指明分词的时候所用的分词器 Analyzer analyzer = new IKAnalyzer(); // Query query = MultiFieldQueryParser.parse(keywords, field, flags, // analyzer); Query query = MultiFieldQueryParser.parse(Version.LUCENE_30, keywords, field, flags, analyzer); // 由于我们目前使用的查询是多字段查询,需要匹配度的排序 // QueryScorer内置计分器 query.rewrite(reader);// 用于重写query对象,目的能够让计分器识别当前的query. // 获得结果集 // Hits hits = searcher.search(query); TopDocs hits = searcher.search(query,20000); this.maxResultSize = String.valueOf(hits.scoreDocs.length); for (int i = 0; i < hits.scoreDocs.length; i++) { ScoreDoc sdoc = hits.scoreDocs[i]; Document doc = searcher.doc(sdoc.doc); Article article = new Article(); article.setId(Integer.valueOf(doc.get("id"))); // title String title = doc.get("article_title"); // content String content = doc.get("article_content"); // 以上两项需要加亮 // Highlighter的构造函数中需要添加两个参数 // 1.高亮文字的格式(这个格式是基于html) SimpleHTMLFormatter simpleHTMLFOrmatter = new SimpleHTMLFormatter( "<font color=red>", "</font>"); // 2.计分器 Highlighter highlighter = new Highlighter(simpleHTMLFOrmatter, new QueryScorer(query)); // 关键字附近字符串的截取,截取120个字 Fragmenter fragmenter = new SimpleFragmenter(120); highlighter.setTextFragmenter(fragmenter); // 针对某个字段的加亮以及截取 TokenStream tokenStream = analyzer.tokenStream("article_content", new StringReader(content)); // 将加亮并截取的字符串取出来 String highLightText = highlighter.getBestFragment(tokenStream, content); article.setArticleContent(highLightText); // 针对某个字段的加亮以及截取 TokenStream title_tokenStream = analyzer.tokenStream( "article_title", new StringReader(title)); // 将加亮并截取的字符串取出来 String title_highLightText = highlighter.getBestFragment( title_tokenStream, title); article.setArticleTitle(title_highLightText); searcheResult.add(article); } reader.close(); }
改动的地方不是很多,注意看一下