Lucene Collector使用例子

Lucene的Collector是一项高级功能,涉及到搜索过程的详细步骤,通过定制Collector可以改变默认搜索,也可以在此阶段收集命中的文档的信息。

 

下面用一个例子演示如何自定义Collector以及如何在自定义的Collector中实现数据收集功能。

 

package test; import java.io.IOException; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.Collector; import org.apache.lucene.search.HitCollector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.ParallelMultiSearcher; import org.apache.lucene.search.RangeFilter; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Searcher; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.RAMDirectory; import casia.isiteam.plgroup.indexer.commons.AnalyzerFactory; /** * 自定义收集器 * @author [email protected] * */ class MyCollector extends Collector { IndexReader reader = null;//reader用于读取获取文档 //收集信息的map public Map<Integer, Integer> map = new TreeMap<Integer, Integer>(); @Override public boolean acceptsDocsOutOfOrder() { // TODO Auto-generated method stub return true; } /** * 收集 */ @Override public void collect(int doc) throws IOException { System.out.println("doc:" + doc); Document document = reader.document(doc); int id = Integer.parseInt(document.get("id")); int count = Integer.parseInt(document.get("count")); map.put(id, count); System.out.println("put:" + id + " " + count); } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { this.reader = reader;//假设reader由多个subReader构成,那么本方法将被调用与subReader个数相同的次数 System.out.println("set reader"); } @Override public void setScorer(Scorer scorer) throws IOException { // do nothing } } public class HitCollectorTest { @SuppressWarnings("deprecation") public static void test() throws CorruptIndexException, LockObtainFailedException, IOException { // 构建内存索引 RAMDirectory directory = new RAMDirectory(); RAMDirectory directory2 = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, AnalyzerFactory.getMMInstance()); IndexWriter writer2 = new IndexWriter(directory2, AnalyzerFactory.getMMInstance()); Document document = new Document(); Field idField = new Field("id", "1", Store.YES, Index.ANALYZED); Field countFiled = new Field("count", "33", Store.YES, Index.ANALYZED); Field range = new Field("range", "1", Store.YES, Index.ANALYZED);// 用于过滤数据 document.add(idField); document.add(countFiled); document.add(range); writer.addDocument(document); writer.close(); document = new Document(); idField = new Field("id", "2", Store.YES, Index.ANALYZED); countFiled = new Field("count", "211", Store.YES, Index.ANALYZED); range = new Field("range", "2", Store.YES, Index.ANALYZED); document.add(idField); document.add(countFiled); document.add(range); writer2.addDocument(document); writer2.close(); // 使用MatchAllDocsQuery结合filter进行搜索,使用自定义Collector对数据进行收集 Searcher searcher = new IndexSearcher(directory); Searcher searcher2 = new IndexSearcher(directory2); Searcher searcher3=new ParallelMultiSearcher(new Searcher[]{searcher,searcher2}); MyCollector collector = new MyCollector(); RangeFilter filter = new RangeFilter("range", "0", "4", true, true); searcher3.search(new MatchAllDocsQuery(), filter, collector); searcher3.close(); directory.close(); // 取出数据 Map<Integer, Integer> map = collector.map; Set<Integer> keySet = map.keySet(); for (int i : keySet) { System.out.println("<" + i + "," + map.get(i) + ">"); } } public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException { test(); } }  

你可能感兴趣的:(String,filter,Integer,Lucene,Class,文档)