Lucene5.3.1最新版Query测试

1.作者

caicongyang:http://blog.csdn.net/caicongyang

2.代码(注释清晰)

package com.ccy.lucene.app;

import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

/**
 * 
 * 

* Title: QueryTest.java * Package com.ccy.lucene.app *

*

* Description: 各种Query测试 *

* @author Tom.Cai * @created 2015-11-10 下午8:17:50 * @version V1.0 * */ public class QueryTest { //索引文件 String indexPath = "D:\\newWork\\lucene5.3\\luceneIndex"; //分词器 Analyzer analyzer = new SmartChineseAnalyzer();// 词库分词 /** * 关键字查询(英文经过分词器全部为小写) * @throws Exception */ @Test public void testTermQuery() throws Exception{ Term term = new Term("content", "room"); TermQuery query = new TermQuery(term); search(query); } /** * 短语查询 * @throws Exception */ @Test public void testPhraseQuery() throws Exception { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.add(new Term("content", "房间")); builder.add(new Term("content", "安排")); builder.setSlop(5);//中级差几个字 PhraseQuery phraseQuery = builder.build(); search(phraseQuery); } /** * 通配符查询 * '?' 代表一个字符, '*' 代表0个或多个字符 * @throws Exception */ @Test public void testWildcardQuery() throws Exception { Term term = new Term("content", "roo?"); Query query = new WildcardQuery(term); search(query); } /** * 布尔关联查询 * @throws Exception */ @Test public void testBooleanQuery() throws Exception { // 条件1 PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.add(new Term("content", "房间")); builder.add(new Term("content", "安排")); builder.setSlop(5);//中间差几个字 PhraseQuery query1 = builder.build(); // 条件2 Term term = new Term("content", "总统"); TermQuery query2 = new TermQuery(term); // 组合 BooleanQuery.Builder bbuilder = new BooleanQuery.Builder(); bbuilder.add(query1, Occur.MUST); bbuilder.add(query2, Occur.MUST); BooleanQuery boolQuery = bbuilder.build(); search(boolQuery); } public void search(Query query) throws Exception{ //1、打开索引库 IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); IndexSearcher searcher = new IndexSearcher(reader); //3、根据关键词进行搜索 TopDocs topDocs = searcher.search(query, 100);; int recordCount = topDocs.totalHits; System.out.println("总共有【" + recordCount + "】条匹配结果"); List recordList = new ArrayList(); //准备高亮器 Formatter formatter = new SimpleHTMLFormatter("", ""); Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); // 摘要 Fragmenter fragmenter = new SimpleFragmenter(50); highlighter.setTextFragmenter(fragmenter); // 3,取出当前页的数据 int end = Math.min(10, topDocs.totalHits); for (int i = 0; i < end; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int docSn = scoreDoc.doc; // 文档内部编号 Document doc = searcher.doc(docSn); // 根据编号取出相应的文档 // 返回高亮后的结果,如果当前属性值中没有出现关键字,会返回 null String hc = highlighter.getBestFragment(analyzer, "content", doc.get("content")); if (hc == null) { String content = doc.get("content"); int endIndex = Math.min(50, content.length()); hc = content.substring(0, endIndex);// 最多前50个字符 } doc.removeField("content"); doc.add(new Field("content", hc, TextField.TYPE_STORED)); recordList.add(doc); } for (Document document : recordList) { System.out.println("------------------------------"); System.out.println("name = " + document.get("name")); System.out.println("content = " + document.get("content")); System.out.println("size = " + document.get("size")); System.out.println("path = " + document.get("path")); } reader.close(); } }


3.我的博客

我的博客: http://blog.csdn.net/caicongyang


你可能感兴趣的:(搜索/爬虫)