使用Lucene来搜索内容,搜索结果的显示顺序当然是比较重要的.Lucene中Build-in的几个排序定义在大多数情况下是不适合我们使用的.要适合自己的应用程序的场景,就只能自定义排序功能,本节我们就来看看在Lucene中如何实现自定义排序功能.
Lucene中的自定义排序功能和Java集合中的自定义排序的实现方法差不多,都要实现一下比较接口. 在Java中只要实现Comparable接口就可以了.但是在Lucene中要实现SortComparatorSource接口和ScoreDocComparator接口.在了解具体实现方法之前先来看看这两个接口的定义吧.
SortComparatorSource接口的功能是返回一个用来排序ScoreDocs的comparator(Expert: returns a comparator for sorting ScoreDocs).该接口只定义了一个方法.如下:
- publicScoreDocComparatornewComparator(IndexReaderreader,Stringfieldname)throwsIOException
该方法只是创造一个ScoreDocComparator 实例用来实现排序.所以我们还要实现ScoreDocComparator 接口.来看看ScoreDocComparator 接口.功能是比较来两个ScoreDoc 对象来排序(Compares two ScoreDoc objects for sorting) 里面定义了两个Lucene实现的静态实例.如下:
- publicstaticfinalScoreDocComparatorRELEVANCE;
-
-
- publicstaticfinalScoreDocComparatorINDEXORDER;
有3个方法与排序相关,需要我们实现 分别如下:
- publicintcompare(ScoreDoci,ScoreDocj);
-
- publicComparablesortValue(ScoreDoci);
-
- publicintsortType();
看个例子吧!
该例子为Lucene in Action中的一个实现,用来搜索距你最近的餐馆的名字. 餐馆坐标用字符串"x,y"来存储.
- packagecom.nikee.lucene;
- importjava.io.IOException;
- importorg.apache.lucene.index.IndexReader;
- importorg.apache.lucene.index.Term;
- importorg.apache.lucene.index.TermDocs;
- importorg.apache.lucene.index.TermEnum;
- importorg.apache.lucene.search.ScoreDoc;
- importorg.apache.lucene.search.ScoreDocComparator;
- importorg.apache.lucene.search.SortComparatorSource;
- importorg.apache.lucene.search.SortField;
-
-
- publicclassDistanceComparatorSourceimplementsSortComparatorSource{
- privatestaticfinallongserialVersionUID=1L;
-
-
- privateintx;
- privateinty;
-
- publicDistanceComparatorSource(intx,inty){
- this.x=x;
- this.y=y;
- }
-
-
- publicScoreDocComparatornewComparator(IndexReaderreader,Stringfieldname)throwsIOException{
- returnnewDistanceScoreDocLookupComparator(reader,fieldname,x,y);
- }
-
-
- privatestaticclassDistanceScoreDocLookupComparatorimplementsScoreDocComparator{
- privatefloat[]distances;
-
-
- publicDistanceScoreDocLookupComparator(IndexReaderreader,Stringfieldname,intx,inty)throwsIOException{
- System.out.println("fieldName2="+fieldname);
- finalTermEnumenumerator=reader.terms(newTerm(fieldname,""));
-
- System.out.println("maxDoc="+reader.maxDoc());
- distances=newfloat[reader.maxDoc()];
- if(distances.length>0){
- TermDocstermDocs=reader.termDocs();
- try{
- if(enumerator.term()==null){
- thrownewRuntimeException("notermsinfield"+fieldname);
- }
- inti=0,j=0;
- do{
- System.out.println("indo-while:"+i++);
- Termterm=enumerator.term();
- if(term.field()!=fieldname)
- break;
-
-
-
- termDocs.seek(enumerator);
- while(termDocs.next()){
- System.out.println("inwhile:"+j++);
- System.out.println("inwhile,Term:"+term.toString());
-
- String[]xy=term.text().split(",");
- intdeltax=Integer.parseInt(xy[0])-x;
- intdeltay=Integer.parseInt(xy[1])-y;
-
- distances[termDocs.doc()]=(float)Math.sqrt(deltax*deltax+deltay*deltay);
- }
- }
- while(enumerator.next());
- }finally{
- termDocs.close();
- }
- }
- }
-
- publicintcompare(ScoreDoci,ScoreDocj){
- if(distances[i.doc]<distances[j.doc])
- return-1;
- if(distances[i.doc]>distances[j.doc])
- return1;
- return0;
- }
-
-
- publicComparablesortValue(ScoreDoci){
- returnnewFloat(distances[i.doc]);
- }
-
-
- publicintsortType(){
- returnSortField.FLOAT;
- }
- }
-
- publicStringtoString(){
- return"Distancefrom("+x+","+y+")";
- }
- }
这是一个实现了上面两个接口的两个类, 里面带有详细注释, 可以看出 自定义排序并不是很难的. 该实现能否正确实现,我们来看看测试代码能否通过吧.
- packagecom.nikee.lucene.test;
- importjava.io.IOException;
- importjunit.framework.TestCase;
- importorg.apache.lucene.analysis.WhitespaceAnalyzer;
- importorg.apache.lucene.document.Document;
- importorg.apache.lucene.document.Field;
- importorg.apache.lucene.index.IndexWriter;
- importorg.apache.lucene.index.Term;
- importorg.apache.lucene.search.FieldDoc;
- importorg.apache.lucene.search.Hits;
- importorg.apache.lucene.search.IndexSearcher;
- importorg.apache.lucene.search.Query;
- importorg.apache.lucene.search.ScoreDoc;
- importorg.apache.lucene.search.Sort;
- importorg.apache.lucene.search.SortField;
- importorg.apache.lucene.search.TermQuery;
- importorg.apache.lucene.search.TopFieldDocs;
- importorg.apache.lucene.store.RAMDirectory;
- importcom.nikee.lucene.DistanceComparatorSource;
- publicclassDistanceComparatorSourceTestextendsTestCase{
- privateRAMDirectorydirectory;
-
- privateIndexSearchersearcher;
- privateQueryquery;
-
-
- protectedvoidsetUp()throwsException{
- directory=newRAMDirectory();
- IndexWriterwriter=newIndexWriter(directory,newWhitespaceAnalyzer(),true);
-
- addPoint(writer,"ElCharro","restaurant",1,2);
- addPoint(writer,"CafePocaCosa","restaurant",5,9);
- addPoint(writer,"LosBetos","restaurant",9,6);
- addPoint(writer,"Nico'sTacoShop","restaurant",3,8);
- writer.close();
- searcher=newIndexSearcher(directory);
- query=newTermQuery(newTerm("type","restaurant"));
- }
-
- privatevoidaddPoint(IndexWriterwriter,Stringname,Stringtype,intx,inty)throwsIOException{
- Documentdoc=newDocument();
- doc.add(newField("name",name,Field.Store.YES,Field.Index.TOKENIZED));
- doc.add(newField("type",type,Field.Store.YES,Field.Index.TOKENIZED));
- doc.add(newField("location",x+","+y,Field.Store.YES,Field.Index.UN_TOKENIZED));
- writer.addDocument(doc);
- }
-
- publicvoidtestNearestRestaurantToHome()throwsException{
-
- Sortsort=newSort(newSortField("location",newDistanceComparatorSource(0,0)));
- Hitshits=searcher.search(query,sort);
-
-
- assertEquals("closest","ElCharro",hits.doc(0).get("name"));
- assertEquals("furthest","LosBetos",hits.doc(3).get("name"));
- }
-
- publicvoidtestNeareastRestaurantToWork()throwsException{
- Sortsort=newSort(newSortField("location",newDistanceComparatorSource(10,10)));
-
-
- TopFieldDocsdocs=searcher.search(query,null,3,sort);
-
- assertEquals(4,docs.totalHits);
- assertEquals(3,docs.scoreDocs.length);
-
-
- FieldDocfieldDoc=(FieldDoc)docs.scoreDocs[0];
- assertEquals("(10,10)->(9,6)=sqrt(17)",newFloat(Math.sqrt(17)),fieldDoc.fields[0]);
- Documentdocument=searcher.doc(fieldDoc.doc);
- assertEquals("LosBetos",document.get("name"));
- dumpDocs(sort,docs);
- }
-
-
- privatevoiddumpDocs(Sortsort,TopFieldDocsdocs)throwsIOException{
- System.out.println("Sortedby:"+sort);
- ScoreDoc[]scoreDocs=docs.scoreDocs;
- for(inti=0;i<scoreDocs.length;i++){
- FieldDocfieldDoc=(FieldDoc)scoreDocs[i];
- Floatdistance=(Float)fieldDoc.fields[0];
- Documentdoc=searcher.doc(fieldDoc.doc);
- System.out.println(""+doc.get("name")+"@("+doc.get("location")+")->"+distance);
- }
- }
- }