检索
IndexSearcher :检索操作的核心组件,用于对 IndexWriter 创建的索引执行,只读的检索操作,工作模式为接收 Query 对象而返回 ScoreDoc对象。
Term :检索的基本单元,标示检索的字段名称和检索对象的值,如Term( “title”, “lucene” )。即表示在 title 字段中搜寻关键词 lucene 。
Query :表示查询的抽象类,由相应的 Term 来标识。
TermQuery :最基本的查询类型,用于匹配含有指定值字段的文档。
TopDocs:保存查询结果的类。
ScoreDoc(Hits):用来装载搜索结果文档队列指针的数组容器。
样例代码:
package com.lucene.test.T02;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestSearcher {
public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();
String indexDir = "d:/temp/luceneindex";
Directory dir = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = null;
Term term = new Term("address", "beijing");// 查找address为nanjing的列
TermQuery query = new TermQuery(term);
TopDocs topDocs = searcher.search(query, 10); // 通过searcher查找,10代表查出前10条,如果不够10天则全查出
hits = topDocs.scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);
System.out.print(hits[i].score + " ");
System.out.print(doc.get("id") + " ");
System.out.print(doc.get("name") + " ");
System.out.print(doc.get("address") + " ");
System.out.println(doc.get("birthday") + " ");
}
searcher.close();
dir.close();
}
}
输出结果:
1.287682 2 lisi beijing 19860105
1.287682 4 zhaoliu beijing 19550719
注意:使用前一章建立索引的例子建索引,且要注意当Term term = new Term("address", "beijing");查询address列,前一章的document.add(new Field("address",address[i], Field.Store.YES,
Field.Index.NO));没有保存索引,这样是查不到结果的,要改成
Field.Index.ANALYZED。
RangSearch查找一定范围的值
package com.lucene.test.T02;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestRangSearch {
public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();
String indexDir = "d:/temp/luceneindex";
Directory dir = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = null;
Term beginTerm = new Term("birthday", "19860105");
Term endTerm = new Term("birthday", "19880101");
RangeQuery query = new RangeQuery(beginTerm, endTerm, true);// RangQuery可以查找到某个范围的值
TopDocs topDocs = searcher.search(query, 2);
hits = topDocs.scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);
System.out.print(hits[i].score + " ");
System.out.print(doc.get("id") + " ");
System.out.print(doc.get("name") + " ");
System.out.print(doc.get("address") + " ");
System.out.println(doc.get("birthday") + " ");
}
searcher.close();
dir.close();
}
}
输出:
1.1972358 1 zhangsan shanghai 19880101
1.1972358 2 lisi beijing 19860105
PrefixQuery 以XX开头的查询
package com.lucene.test.T02;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestPrefixQuery {
public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();
String indexDir = "d:/temp/luceneindex";
Directory dir = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = null;
Term term=new Term("name","zh");//查找以zh开头的
PrefixQuery query=new PrefixQuery(term);
TopDocs topDocs=searcher.search(query, 10);
hits=topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document doc=searcher.doc(hits[i].doc);
System.out.print(hits[i].score+" ");
System.out.print(doc.get("id")+" ");
System.out.print(doc.get("name")+" ");
System.out.print(doc.get("address")+" ");
System.out.println(doc.get("birthday")+" ");
}
searcher.close();
dir.close();
}
}
输出:
1.1972358 1 zhangsan shanghai 19880101
1.1972358 4 zhaoliu beijing 19550719
FuzzyQuery 模糊查询
package com.lucene.test.T02;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestFuzzyQuery {
public static void main(String[] args) throws IOException {
Analyzer analyzer = new StandardAnalyzer();
String indexDir = "d:/temp/luceneindex";
Directory dir = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = null;
Term term=new Term("name","zhangsan");
FuzzyQuery query=new FuzzyQuery(term);
TopDocs topDocs=searcher.search(query, 10);
hits=topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document doc=searcher.doc(hits[i].doc);
System.out.print(hits[i].score+" ");
System.out.print(doc.get("id")+" ");
System.out.print(doc.get("name")+" ");
System.out.print(doc.get("address")+" ");
System.out.println(doc.get("birthday")+" ");
}
searcher.close();
dir.close();
}
}
测试时将前一章建立索引的改为String[] names = { "zhangsan", "zhangsun", "zhangson", "zhaoliu" };便于测试,
模糊查询会将 zhangsan zhangsun zhangson查出来
输出结果:
1.1614895 1 zhangsan shanghai 19880101
0.8711171 2 zhangsun beijing 19860105
0.8711171 3 zhangson guangzhou 19760205
BooleanQuery 可以将多个查询条件组合
package com.lucene.test.T02;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestBooleanQuery {
public static void main(String[] args) throws IOException {
Analyzer analyzer=new StandardAnalyzer();
String indexDir = "d:/temp/luceneindex";
Directory dir=FSDirectory.getDirectory(indexDir);
IndexSearcher searcher=new IndexSearcher(dir);
ScoreDoc [] hits=null;
Term nterm=new Term("name","*g??");
WildcardQuery wildcardQuery=new WildcardQuery(nterm);
Term aterm=new Term("address","tianjing");
TermQuery termQuery=new TermQuery(aterm);
//booleanQuery 可以将多个query条件组合起来
BooleanQuery query=new BooleanQuery();
query.add(wildcardQuery, BooleanClause.Occur.SHOULD);//可以满足
query.add(termQuery, BooleanClause.Occur.MUST_NOT); //必须
TopDocs topDocs=searcher.search(query, 10);
hits=topDocs.scoreDocs;
for(int i=0;i<hits.length;i++){
Document doc=searcher.doc(hits[i].doc);
// System.out.print(hits[i].score+" ");
System.out.print(doc.get("id")+" ");
System.out.print(doc.get("name")+" ");
System.out.print(doc.get("address")+" ");
System.out.println(doc.get("birthday")+" ");
}
searcher.close();
dir.close();
}
}