browse()函数的最后部分,是对分组计数结果的一个整理:
finally { //每个Facet对应一个FacetHitCollector for (FacetHitCollector facetCollector : facetHitCollectorList) { String name = facetCollector.facetHandler.getName(); LinkedList<FacetCountCollector> resultcollector=null; //每一个FacetHitCollector维护一个FacetCountCollector列表,每一个IndexReader对应一个FacetCountCollector resultcollector = facetCollector._countCollectorList; if (resultcollector == null || resultcollector.size() == 0){ resultcollector = facetCollector._collectAllCollectorList; } if (resultcollector!=null){ FacetSpec fspec = req.getFacetSpec(name); assert fspec != null; if(resultcollector.size() == 1) { facetMap.put(name, resultcollector.get(0)); } else { ArrayList<FacetAccessible> finalList = new ArrayList<FacetAccessible>(resultcollector.size()); for (FacetCountCollector fc : resultcollector){ finalList.add((FacetAccessible)fc); } //多个IndexReader下的分组计数结果要进行归并,相同的属性的count求和 CombinedFacetAccessible combinedCollector = new CombinedFacetAccessible(fspec, finalList); facetMap.put(name, combinedCollector); } } } }
同一个facet下的多个IndexReader的分组计数结果要进行合并,相同的属性的计数要进行求和。这个过程是通过构造CombinedFacetAccessible(FacetSpec, List<FacetAccessible>) 来实现的,这个类的对外的功能函数是getFacets(),也就是返回最终的分组计数结果:
public List<BrowseFacet> getFacets() { int maxCnt = _fspec.getMaxCount(); if(maxCnt <= 0) maxCnt = Integer.MAX_VALUE; int minHits = _fspec.getMinHitCount(); LinkedList<BrowseFacet> list = new LinkedList<BrowseFacet>(); int cnt = 0; Comparable facet = null; FacetIterator iter = (FacetIterator)this.iterator(); Comparator<BrowseFacet> comparator; if (FacetSortSpec.OrderValueAsc.equals(_fspec.getOrderBy())) { while((facet = iter.next(minHits)) != null) { // find the next facet whose combined hit count obeys minHits list.add(new BrowseFacet(String.valueOf(facet), iter.count)); if(++cnt >= maxCnt) break; } } else if(FacetSortSpec.OrderHitsDesc.equals(_fspec.getOrderBy())) { comparator = new Comparator<BrowseFacet>() { public int compare(BrowseFacet f1, BrowseFacet f2) { int val=f2.getHitCount() - f1.getHitCount(); if (val==0) { val = (f1.getValue().compareTo(f2.getValue())); } return val; } }; if(maxCnt != Integer.MAX_VALUE) { // we will maintain a min heap of size maxCnt // Order by hits in descending order and max count is supplied PriorityQueue queue = createPQ(maxCnt, comparator); int qsize = 0; while( (qsize < maxCnt) && ((facet = iter.next(minHits)) != null) ) { queue.add(new BrowseFacet(String.valueOf(facet), iter.count)); qsize++; } if(facet != null) { BrowseFacet rootFacet = (BrowseFacet)queue.top(); minHits = rootFacet.getHitCount() + 1; // facet count less than top of min heap, it will never be added while(((facet = iter.next(minHits)) != null)) { rootFacet.setValue(String.valueOf(facet)); rootFacet.setHitCount(iter.count); rootFacet = (BrowseFacet) queue.updateTop(); minHits = rootFacet.getHitCount() + 1; } } // at this point, queue contains top maxCnt facets that have hitcount >= minHits while(qsize-- > 0) { // append each entry to the beginning of the facet list to order facets by hits descending list.addFirst((BrowseFacet) queue.pop()); } } else { // no maxCnt specified. So fetch all facets according to minHits and sort them later while((facet = iter.next(minHits)) != null) list.add(new BrowseFacet(String.valueOf(facet), iter.count)); Collections.sort(list, comparator); } } else // FacetSortSpec.OrderByCustom.equals(_fspec.getOrderBy() { comparator = _fspec.getCustomComparatorFactory().newComparator(); if(maxCnt != Integer.MAX_VALUE) { PriorityQueue queue = createPQ(maxCnt, comparator); BrowseFacet browseFacet = new BrowseFacet(); int qsize = 0; while( (qsize < maxCnt) && ((facet = iter.next(minHits)) != null) ) { queue.add(new BrowseFacet(String.valueOf(facet), iter.count)); qsize++; } if(facet != null) { while((facet = iter.next(minHits)) != null) { // check with the top of min heap browseFacet.setHitCount(iter.count); browseFacet.setValue(String.valueOf(facet)); browseFacet = (BrowseFacet)queue.insertWithOverflow(browseFacet); } } // remove from queue and add to the list while(qsize-- > 0) list.addFirst((BrowseFacet)queue.pop()); } else { // order by custom but no max count supplied while((facet = iter.next(minHits)) != null) list.add(new BrowseFacet(String.valueOf(facet), iter.count)); Collections.sort(list, comparator); } } return list; }
其一行代码,
FacetIterator iter = (FacetIterator)this.iterator();
这个函数iterator()很重要,是将FacetAccessible.iterator() 返回的统计分组结果列表的迭代器存储到一个列表中,然后将这个列表赋给new CombinedFacetIterator(List<FacetIterator>)
public FacetIterator iterator() { ArrayList<FacetIterator> iterList = new ArrayList<FacetIterator>(_list.size()); FacetIterator iter; for (FacetAccessible facetAccessor : _list) { //这里其实是将FacetCountCollector的分组统计结果从数组转换成list,并获取列表的iterator iter = (FacetIterator) facetAccessor.iterator(); if(iter != null) iterList.add(iter); } if (iterList.get(0) instanceof IntFacetIterator) { ArrayList<IntFacetIterator> il = new ArrayList<IntFacetIterator>(); for (FacetAccessible facetAccessor : _list) { iter = (FacetIterator) facetAccessor.iterator(); if(iter != null) il.add((IntFacetIterator) iter); } return new CombinedIntFacetIterator(il, _fspec.getMinHitCount()); } if (iterList.get(0) instanceof LongFacetIterator) { ArrayList<LongFacetIterator> il = new ArrayList<LongFacetIterator>(); for (FacetAccessible facetAccessor : _list) { iter = (FacetIterator) facetAccessor.iterator(); if(iter != null) il.add((LongFacetIterator) iter); } return new CombinedLongFacetIterator(il, _fspec.getMinHitCount()); } if (iterList.get(0) instanceof ShortFacetIterator) { ArrayList<ShortFacetIterator> il = new ArrayList<ShortFacetIterator>(); for (FacetAccessible facetAccessor : _list) { iter = (FacetIterator) facetAccessor.iterator(); if(iter != null) il.add((ShortFacetIterator) iter); } return new CombinedShortFacetIterator(il, _fspec.getMinHitCount()); } if (iterList.get(0) instanceof FloatFacetIterator) { ArrayList<FloatFacetIterator> il = new ArrayList<FloatFacetIterator>(); for (FacetAccessible facetAccessor : _list) { iter = (FacetIterator) facetAccessor.iterator(); if(iter != null) il.add((FloatFacetIterator) iter); } return new CombinedFloatFacetIterator(il, _fspec.getMinHitCount()); } if (iterList.get(0) instanceof DoubleFacetIterator) { ArrayList<DoubleFacetIterator> il = new ArrayList<DoubleFacetIterator>(); for (FacetAccessible facetAccessor : _list) { iter = (FacetIterator) facetAccessor.iterator(); if(iter != null) il.add((DoubleFacetIterator) iter); } return new CombinedDoubleFacetIterator(il, _fspec.getMinHitCount()); } return new CombinedFacetIterator(iterList); }
FacetAccessor有接口iterator,是将分组计数结果由数组转换成队列,并返回iterator,这里举例是DefaultFacetCountCollector的具体实现:
public FacetIterator iterator() { if (_dataCache.valArray.getType().equals(Integer.class)) { return new DefaultIntFacetIterator((TermIntList) _dataCache.valArray, _count, _countlength, false); } else if (_dataCache.valArray.getType().equals(Long.class)) { return new DefaultLongFacetIterator((TermLongList) _dataCache.valArray, _count, _countlength, false); } else if (_dataCache.valArray.getType().equals(Short.class)) { return new DefaultShortFacetIterator((TermShortList) _dataCache.valArray, _count, _countlength, false); } else if (_dataCache.valArray.getType().equals(Float.class)) { return new DefaultFloatFacetIterator((TermFloatList) _dataCache.valArray, _count, _countlength, false); } else if (_dataCache.valArray.getType().equals(Double.class)) { return new DefaultDoubleFacetIterator((TermDoubleList) _dataCache.valArray, _count, _countlength, false); } else return new DefaultFacetIterator(_dataCache.valArray, _count, _countlength, false); }
可以看到是根据facet的属性值的类型来返回iterator的
那么多个IndexReader的分组计数结果是如何进行合并的呢?
public CombinedFacetIterator(final List<FacetIterator> iterators) { _iterators = iterators; heap = new FacetIterator[iterators.size() + 1]; size = 0; for(FacetIterator iterator : iterators) { if(iterator.next(0) != null) add(iterator); } facet = null; count = 0; }
类CombinedFacetIterator维护着一个最小堆,最小堆的元素是FacetIterator。这个最小堆,以facet的属性值作为比较进行排列,那么必然是相同的属性值的facet会连续的从最小堆中弹出,并且将它们的count进行求和,最后合并成一个facet返回:
public Comparable next(int minHits) { if(size == 0) { facet = null; count = 0; return null; } FacetIterator node = heap[1]; facet = node.facet; count = node.count; int min = (minHits > 0 ? 1 : 0); while(true) { if(node.next(min) != null) { //重新对最小堆排序 downHeap(); node = heap[1]; } else { //heap[1]的iterator已经没有元素,那么将这个it弹出 pop(); if(size > 0) { node = heap[1]; } else { // we reached the end. check if this facet obeys the minHits if(count < minHits) { facet = null; count = 0; } break; } } Comparable next = node.facet; if (next==null) throw new RuntimeException(); if(!next.equals(facet))//当前的facet已经全部弹出、求和完成 { // check if this facet obeys the minHits if(count >= minHits) break; // else, continue iterating to the next facet facet = next; count = node.count; } else//同一个facet,那么对count求和 { count += node.count; } } return format(facet); }