转载自:http://blog.csdn.net/xiaojimanman/article/details/42969443
在Lucene索引的搜索过程中,构建Query对象是一个十分重要的过程。几种常用的Query子类:QueryParser、 MultiFieldQueryParser、TermQuery 、PrefixQuery、 PhraseQuery、 WildcardQuery、
TermRangeQuery、 NumericRangeQuery、 BooleanQuery等等。
TermQuery
TermQuery重要对一个Term(最小的索引块,包含一个field名和值),TermQuery可以用于对关键字域查询时Query的创建,比如分类、文档唯一ID等。
PrefixQuery
PrefixQuery前缀查询字符串的构建,其效果和“abc*”这种通配符使用WildcardQuery构建Query类似;PrefixQuery只需要前缀指定的若干个字符相同即可,如PrefixQuery(new Term("", "lu")),将会匹配lucene、luke等。
PhraseQuery
PhraseQuery短语搜索,它可以指定关键词之间的最大距离,如下面demo程序中,指定了“基于”“案例”这两个词之间最大的距离为2,一旦文档中,这两个词的具体大于2就不满足条件。
WildcardQuery
WildcardQuery通配符搜索,可以想象是PrefixQuery的升级版,WildcardQuery提供了更细的控制和更大的灵活行,lucene中有* ? 这两个通配符,*表示匹配任意多个字符,?表示匹配一个任意字符。如lu*e可以和lucene、luke匹配;lu?e可以和luke匹配,但和lucene却不匹配。
TermRangeQuery
TermRangeQuery字符串范围搜索,在创建时一般有5个参数分别是 域名、域下限值、域上限值、是否包括下限、是否包括上限,这个和下面的NumericRangeQuery的参数含义相同。
NumericRangeQuery
NumericRangeQuery数字范围搜索,它针对不同的数据类型(int、float、double),提供的不同的方法,参数含义参照TermRangeQuery。
QueryParser
QueryParser主要用于对单个域搜索时创建查询query,QueryParser的构造方法需指定具体的域名和分词方法,lucene不同版本,创建的Query对象会略有不同,具体不同还请参照博客:关于QueryParser类前后修改。
BooleanQuery
上面介绍的Query子类几乎都是针对单个域或多个域单个关键字的,那多个域不同关键字有该如何处理?多个Query对象又如何组成一个Query对象?这些BooleanQuery都可以实现,BooleanQuery可以嵌套非常复杂的查询,其和布尔运算类似,提供与(Occur.MUST)、或(Occur.SHOULD)、非(Occur.MUST_NOT)三种逻辑关系。
MultiFieldQueryParser
MultiFieldQueryParser可以想象成QueryParser的升级版,QueryParser主要用于单个域的搜索,而MultiFieldQueryParser则用于多个域的搜索,其构造方法和具体使用和QueryParser类似。
分页有两种方式:再查询方式和searchAfter。
简单的demo:
1: import java.io.IOException;
2:
3: import org.apache.lucene.analysis.standard.StandardAnalyzer;
4: import org.apache.lucene.document.Document;
5: import org.apache.lucene.document.Field;
6: import org.apache.lucene.document.StringField;
7: import org.apache.lucene.document.TextField;
8: import org.apache.lucene.index.CorruptIndexException;
9: import org.apache.lucene.index.DirectoryReader;
10: import org.apache.lucene.index.IndexReader;
11: import org.apache.lucene.index.IndexWriter;
12: import org.apache.lucene.index.IndexWriterConfig;
13: import org.apache.lucene.index.Term;
14: import org.apache.lucene.queryparser.classic.QueryParser;
15: import org.apache.lucene.search.IndexSearcher;
16: import org.apache.lucene.search.Query;
17: import org.apache.lucene.search.ScoreDoc;
18: import org.apache.lucene.search.TermQuery;
19: import org.apache.lucene.search.TopDocs;
20: import org.apache.lucene.store.Directory;
21: import org.apache.lucene.store.RAMDirectory;
22:
23: public class LuceneSearch {
24: private Directory directory;
25: private IndexReader reader;
26: private String[] ids = { "1", "2", "3", "4", "5", "6" };
27: private String[] emails = { "[email protected]", "[email protected]", "[email protected]",
28: "[email protected]", "[email protected]", "[email protected]" };
29: private String[] contents = { "welcome to visited the space,I like book",
30: "hello boy, I like pingpeng ball", "my name is cc I like game",
31: "I like football", "I like football and I like basketball too",
32: "I like movie and swim" };
33: private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike",
34: "jake" };
35: public LuceneSearch() {
36: directory = new RAMDirectory();
37: createIndex();
38: }
39:
40: public void createIndex() {
41: IndexWriter writer = null;
42: try {
43: writer = new IndexWriter(directory, new IndexWriterConfig(
44: new StandardAnalyzer()));
45: writer.deleteAll();
46: Document doc = null;
47: for (int i = 0; i < ids.length; i++) {
48: doc = new Document();
49: doc.add(new StringField("id", ids[i], Field.Store.YES));
50: doc.add(new StringField("email", emails[i], Field.Store.YES));
51: doc.add(new StringField("email", "test" + i + "@test.com",
52: Field.Store.YES));
53: doc.add(new TextField("content", contents[i], Field.Store.NO));
54: doc.add(new StringField("name", names[i], Field.Store.YES));
55: String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);
56: // System.out.println(et);
57: writer.addDocument(doc);
58: // 在索引库没有建立并且没有索引文件的时候首先要commit一下让他建立一个
59: // 索引库的版本信息.
60: // 如果第一次没有commit就打开一个索引读取器的话就会报异常
61: writer.commit();
62: }
63: } catch (IOException e) {
64: // TODO Auto-generated catch block
65: e.printStackTrace();
66: } finally {
67: try {
68: if (writer != null)
69: writer.close();
70: } catch (CorruptIndexException e) {
71: e.printStackTrace();
72: } catch (IOException e) {
73: e.printStackTrace();
74: }
75: }
76: }
77:
78: public IndexSearcher getSearcher() {
79: try {
80: reader = DirectoryReader.open(directory);
81: } catch (IOException e) {
82: // TODO Auto-generated catch block
83: e.printStackTrace();
84: }
85: return new IndexSearcher(reader);
86: }
87:
88: public void searchByTerm(String field, String name, int num) {
89: try {
90: IndexSearcher searcher = getSearcher();
91: Query query = new TermQuery(new Term(field, name));
92: TopDocs tds = searcher.search(query, num);
93: System.out.println("一共查询了:" + tds.totalHits);
94: for (ScoreDoc sd : tds.scoreDocs) {
95: Document doc = searcher.doc(sd.doc);
96: System.out.println(doc.get("id") + "---->" + doc.get("name")
97: + "[" + doc.get("email") + "]");
98: }
99: } catch (CorruptIndexException e) {
100: e.printStackTrace();
101: } catch (IOException e) {
102: e.printStackTrace();
103: }
104: }
105:
106: public void searchByQueryParse(Query query, int num) {
107: try {
108: IndexSearcher searcher = getSearcher();
109: TopDocs tds = searcher.search(query, num);
110: System.out.println("一共查询了:" + tds.totalHits);
111: for (ScoreDoc sd : tds.scoreDocs) {
112: Document doc = searcher.doc(sd.doc);
113: System.out.println(doc.get("id") + "---->" + doc.get("name")
114: + "[" + doc.get("email") + "]");
115: }
116: } catch (CorruptIndexException e) {
117: e.printStackTrace();
118: } catch (IOException e) {
119: e.printStackTrace();
120: }
121: }
122:
123: // 分页,再查询方式
124: public void searchPage(String query, int pageIndex, int pageSize) {
125: try {
126: IndexSearcher searcher = getSearcher();
127: QueryParser parser = new QueryParser("content",
128: new StandardAnalyzer());
129: Query q = parser.parse(query);
130: TopDocs tds = searcher.search(q, 500);
131: ScoreDoc[] sds = tds.scoreDocs;
132: int start = (pageIndex - 1) * pageSize;
133: int end = pageIndex * pageSize;
134: for (int i = start; i < end; i++) {
135: Document doc = searcher.doc(sds[i].doc);
136: System.out.println(sds[i].doc + ":" + doc.get("path") + "-->"
137: + doc.get("filename"));
138: }
139: } catch (Exception e) {
140: e.printStackTrace();
141: }
142: }
143:
144: /**
145: * 根据页码和分页大小获取上一次的最后一个ScoreDoc
146: */
147: private ScoreDoc getLastScoreDoc(int pageIndex, int pageSize, Query query,
148: IndexSearcher searcher) throws IOException {
149: if (pageIndex == 1)
150: return null;// 如果是第一页就返回空
151: int num = pageSize * (pageIndex - 1);// 获取上一页的数量
152: TopDocs tds = searcher.search(query, num);
153: return tds.scoreDocs[num - 1];
154: }
155:
156: public void searchPageByAfter(String query, int pageIndex, int pageSize) {
157: try {
158: IndexSearcher searcher = getSearcher();
159: QueryParser parser = new QueryParser("content",
160: new StandardAnalyzer());
161: Query q = parser.parse(query);
162: // 先获取上一页的最后一个元素
163: ScoreDoc lastSd = getLastScoreDoc(pageIndex, pageSize, q, searcher);
164: // 通过最后一个元素搜索下页的pageSize个元素
165: TopDocs tds = searcher.searchAfter(lastSd, q, pageSize);
166: for (ScoreDoc sd : tds.scoreDocs) {
167: Document doc = searcher.doc(sd.doc);
168: System.out.println(sd.doc + ":" + doc.get("path") + "-->"
169: + doc.get("filename"));
170: }
171: } catch (Exception e) {
172: e.printStackTrace();
173: }
174: }
175:
176: }
1: import org.apache.lucene.analysis.standard.StandardAnalyzer;
2: import org.apache.lucene.queryparser.classic.ParseException;
3: import org.apache.lucene.queryparser.classic.QueryParser;
4: import org.apache.lucene.search.Query;
5: import org.junit.Before;
6: import org.junit.Test;
7:
8: import com.zero.lucene.index.LuceneIndex;
9: import com.zero.lucene.search.LuceneSearch;
10:
11: public class LuceneSearchTest {
12: private LuceneSearch test = null;
13:
14: @Before
15: public void init() {
16: test = new LuceneSearch();
17: }
18:
19: @Test
20: public void searchByTerm() {
21: test.searchByTerm("id", "4", 3);
22: // 4---->jetty[[email protected]]
23: }
24:
25: @Test
26: public void testSearchByQueryParse() throws ParseException {
27: // 1、创建QueryParser对象,默认搜索域为content
28: QueryParser parser = new QueryParser("content", new StandardAnalyzer());
29: // 改变空格的默认操作符,以下可以改成AND
30: // parser.setDefaultOperator(Operator.AND);
31:
32: // 搜索content中包含有like的
33: Query query = parser.parse("like");
34: /*
35: * 5---->mike[[email protected]] 4---->jetty[[email protected]]
36: * 6---->jake[[email protected]] 1---->zhangsan[[email protected]]
37: * 2---->lisi[[email protected]] 3---->john[[email protected]]
38: */
39:
40: // 有basketball或者football的,空格默认就是OR
41: query = parser.parse("basketball football");
42: /*
43: * 5---->mike[[email protected]] 4---->jetty[[email protected]]
44: */
45:
46: // 改变搜索域为name为mike
47: query = parser.parse("name:zhangsan");
48: /*
49: * 1---->zhangsan[[email protected]]
50: */
51:
52: // 同样可以使用*和?来进行通配符匹配
53: query = parser.parse("name:jo*");
54: /*
55: * 3---->john[[email protected]]
56: */
57:
58: // 开启第一个字符的通配符匹配,默认关闭因为效率不高
59: parser.setAllowLeadingWildcard(true);
60: // 通配符默认不能放在首位
61: query = parser.parse("email:*@itat.org");
62: /*
63: * 1---->zhangsan[[email protected]] 2---->lisi[[email protected]]
64: * 6---->jake[[email protected]]
65: */
66:
67: // 匹配name中没有mike但是content中必须有football的,+和-要放置到域说明前面
68: query = parser.parse("-name:mike +football");
69: /*
70: * 4---->jetty[[email protected]]
71: */
72:
73: // 匹配一个区间,注意:TO必须是大写
74: query = parser.parse("id:[3 TO 5]");
75: /*
76: * 3---->john[[email protected]] 4---->jetty[[email protected]]
77: * 5---->mike[[email protected]]
78: */
79:
80: // 闭区间匹配只会匹配到2
81: query = parser.parse("id:{1 TO 3}");
82: /*
83: * 2---->lisi[[email protected]]
84: */
85:
86: // 完全匹配I Like Football的
87: query = parser.parse("\"I like football\"");
88: /*
89: * 4---->jetty[[email protected]] 5---->mike[[email protected]]
90: */
91:
92: // 匹配I 和football之间有一个单词距离的
93: query = parser.parse("\"I football\"~1");
94: /*
95: * 4---->jetty[[email protected]] 5---->mike[[email protected]]
96: */
97:
98: // 模糊查询
99: query = parser.parse("name:make~");
100: /*
101: * 5---->mike[[email protected]] 6---->jake[[email protected]]
102: */
103:
104: // 没有办法匹配数字范围(自己扩展Parser)
105: query = parser.parse("attach:[2 TO 10]");
106: /* 一共查询了:0 */
107: test.searchByQueryParse(query, 10);
108: }
109: }