最近做lucence的应用,趁着一个节点的间歇,总结了下lucence中有关自定义搜索结果排序的相关代码,一来和大家共同探讨,二来也便于备忘。
众所周知,lucence默认的结果是根据Score从高到低,当Score相等时,则会根据建立索引时创建的docID由小到大排序。通过自定义搜索结果的排序,则可以实现完全按照真实业务的需要,自定义结果的排序。
下面以一个查询餐馆距离的例子配合代码进行讲解(该例很多地方都有,但是我参考的时候发现很多地方提供的例子都是不能直接运行的)。并提供可以直接运行的例子代码如下:
DistanceComparatorSource.java
package com.xxx.demo;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.FieldCache.IntParser;
public class DistanceComparatorSource extends FieldComparatorSource{
private int x;
private int y;
public DistanceComparatorSource(int x,int y){
this.x = x;
this.y = y;
}
@Override
public FieldComparator newComparator(String fieldname,int numHits,
int sortPos,boolean reversed) throws IOException{
return new DistanceScoreDocLookupComparator(fieldname,numHits);
}
private class DistanceScoreDocLookupComparator extends FieldComparator{
private int[] xDoc,yDoc;
private float[] values;
private float bottom;
String fieldName;
public DistanceScoreDocLookupComparator(String fieldName,int numHits){
values = new float[numHits];
this.fieldName = fieldName;
}
private class DistanceXIntParser implements IntParser{
@Override
public int parseInt(String string){
return Integer.parseInt(string.split(",")[0]);
}
}
private class DistanceYIntParser implements IntParser{
@Override
public int parseInt(String string){
return Integer.parseInt(string.split(",")[1]);
}
}
@Override
public int compare(int slot1,int slot2){
if(values[slot1]<values[slot2]) return -1;
if(values[slot1]>values[slot2]) return 1;
return 0;
}
@Override
public int compareBottom(int doc) throws IOException{
float docDistance = getDistance(doc);
if(bottom<docDistance) return -1;
if(bottom>docDistance) return 1;
return 0;
}
@Override
public void copy(int slot,int doc) throws IOException{
values[slot] = getDistance(doc);
}
@Override
public void setBottom(int slot){
bottom = values[slot];
}
@Override
public void setNextReader(IndexReader reader,int docBase)
throws IOException{
xDoc = FieldCache.DEFAULT.getInts(reader,this.fieldName,new DistanceXIntParser());
yDoc = FieldCache.DEFAULT.getInts(reader,this.fieldName,new DistanceYIntParser());
}
@Override
public Float value(int slot){
return new Float(values[slot]);
}
private float getDistance(int doc){
int deltax = xDoc[doc] - x;
int deltay = yDoc[doc] - y;
return (float)Math.sqrt(deltax*deltax + deltay*deltay);
}
public int sortType(){
return SortField.CUSTOM;
}
}
public String toString(){
return "Distance from ("+x+","+y+")";
}
}
DistanceSortingTest.java
package com.xxx.demo;
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class DistanceSortingTest extends TestCase{
private RAMDirectory directory;
private IndexSearcher searcher ;
private Query query;
protected void setUp() throws Exception{
directory = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33,new StandardAnalyzer(Version.LUCENE_33));
config.setOpenMode(OpenMode.CREATE);
IndexWriter writer = new IndexWriter(directory,config);
addPoint(writer,"El Charro","restaurant restaurant restaurant",1,2);//5
addPoint(writer,"Cafe Poca Cosa","restaurant",5,9);//25+81=106
addPoint(writer,"Los Betos","restaurant",9,6);//81+36=117
addPoint(writer,"Nico's Taco Shop","restaurant restaurant",3,8);//9+64=73
writer.close();
searcher = new IndexSearcher(directory);
QueryParser parser = new QueryParser(Version.LUCENE_33, "type", new StandardAnalyzer(Version.LUCENE_33));
query = parser.parse("type:restaurant");
}
private void addPoint(IndexWriter writer,String name,String type,int x,int y)
throws CorruptIndexException, IOException{
Document doc = new Document();
doc.add(new Field("name",name,Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("type",type,Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("location",x+","+y,Field.Store.YES,Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
}
public void testNormRestaurant() throws IOException{
TopDocs hits = searcher.search(query,10);
System.out.println("--------testNormRestaurant---------- ");
for(ScoreDoc doc : hits.scoreDocs){
System.out.println("docId:"+doc.doc+"score:"+doc.score+", name:"+searcher.doc(doc.doc).get("name"));
}
assertEquals("first","Cafe Poca Cosa",searcher.doc(hits.scoreDocs[0].doc).get("name"));
assertEquals("second","Los Betos",searcher.doc(hits.scoreDocs[1].doc).get("name"));
assertEquals("third","Nico's Taco Shop",searcher.doc(hits.scoreDocs[2].doc).get("name"));
assertEquals("forth","El Charro",searcher.doc(hits.scoreDocs[3].doc).get("name"));
}
public void testNearestRestaurantToHome() throws IOException{
Sort sort = new Sort(new SortField("location",new DistanceComparatorSource(0,0)));
TopDocs hits = searcher.search(query,null,10,sort);
System.out.println("--------testNearestRestaurantToHome---------- ");
for(ScoreDoc doc : hits.scoreDocs){
System.out.println("docId:"+doc.doc+"name:"+searcher.doc(doc.doc).get("name"));
}
assertEquals("cloest","El Charro",searcher.doc(hits.scoreDocs[0].doc).get("name"));
assertEquals("second","Nico's Taco Shop",searcher.doc(hits.scoreDocs[1].doc).get("name"));
assertEquals("third","Cafe Poca Cosa",searcher.doc(hits.scoreDocs[2].doc).get("name"));
assertEquals("furthest","Los Betos",searcher.doc(hits.scoreDocs[3].doc).get("name"));
}
public void testNearestRestaurantToWork() throws IOException{
Sort sort = new Sort(new SortField("location",new DistanceComparatorSource(10,10)));
TopFieldDocs docs = searcher.search(query,null,3,sort);
assertEquals(4,docs.totalHits);
assertEquals(3,docs.scoreDocs.length);
FieldDoc fieldDoc = (FieldDoc)docs.scoreDocs[0];
assertEquals("(10,10) -> (9,6) = sqrt(17)",new Float(Math.sqrt(17)),fieldDoc.fields[0]);
Document document = searcher.doc(fieldDoc.doc);
assertEquals("Los Betos", document.get("name"));
}
}