lucene创建和查询索引Demo
package com.git.lucene; import java.io.File; import java.io.IOException; import java.util.ArrayList; import org.apache.commons.io.FileUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version; import com.chenlb.mmseg4j.analysis.MMSegAnalyzer; /** * 使用lucene进行索引的增删改查时 观察具体的源码执行过程 * 目前先看查询 * @author songqinghu * */ public class LuceneSourceReadTest { public static void main(String[] args) throws Exception { // fullIndex(); readIndex(); } /** * * @throws IOException * @描述:读取索引信息 */ public static void readIndex() throws IOException{ Directory directory = FSDirectory.open(new File("E:\\lucene\\index")) ; DirectoryReader reader = DirectoryReader.open(directory); IndexSearcher index = new IndexSearcher(reader); Term t = new Term("content", "apache"); Query query = new TermQuery(t); TopDocs topdocs = index.search(query,20); int totalHits = topdocs.totalHits; System.out.println("总条目: "+totalHits); ScoreDoc[] scoreDocs = topdocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { int docid = scoreDoc.doc; System.out.println(docid); Document doc = index.doc(docid); System.out.println("fileName : "+doc.getField("fileName")); System.out.println("content : "+doc.getField("content")); } } /** * @throws IOException * @描述:索引的建立 */ public static void fullIndex() throws IOException{ //索引存放 Directory d = SimpleFSDirectory.open(new File("E:\\lucene\\index")) ; //分词器 Analyzer analyzer = new MMSegAnalyzer(); //写入设置 IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer); //写入器 IndexWriter indexWriter = new IndexWriter(d, conf); ArrayList<Document> docs = new ArrayList<Document>(); File filepath = new File("E:\\lucene\\file"); if(filepath.isDirectory()){ File[] files = filepath.listFiles(); for (File file : files) { String fileName = file.getName(); long fileSize = FileUtils.sizeOf(file); String content = FileUtils.readFileToString(file); if(content.length()>100){ content = content.substring(0,100); } String path = file.getPath(); Document doc = new Document(); StringField fileNameField = new StringField("fileName", fileName, Store.YES); doc.add(fileNameField); StringField pathField = new StringField("path", path, Store.YES); doc.add(fileNameField); LongField fileSizeField = new LongField("fileSize", fileSize, Store.YES); doc.add(fileSizeField); TextField contentField = new TextField("content", content, Store.YES); doc.add(contentField); docs.add(doc); } } indexWriter.addDocuments(docs); indexWriter.commit(); System.out.println("全量索引导入结束!"); } }
今天跟踪了下lucene的查询过程的源码,发现其纪录下的docId是按照叠剪的手法进行记录的及 1,1,2 为纪录的id 但是 实际的id为1 2 4
下面是跟踪到源码 索引的查询和记录就在其中,具体的明天再仔细分析
package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE; import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE; import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE; import java.io.IOException; import java.util.Arrays; import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.TermState; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; /** * Concrete class that reads docId(maybe frq,pos,offset,payloads) list * with postings format. * * @see Lucene41SkipReader for details * @lucene.experimental */ public final class Lucene41PostingsReader extends PostingsReaderBase { private final IndexInput docIn; private final IndexInput posIn; private final IndexInput payIn; private final ForUtil forUtil; // public static boolean DEBUG = false; /** Sole constructor. */ public Lucene41PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException { boolean success = false; IndexInput docIn = null; IndexInput posIn = null; IndexInput payIn = null; try { docIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), ioContext); CodecUtil.checkHeader(docIn, Lucene41PostingsWriter.DOC_CODEC, Lucene41PostingsWriter.VERSION_CURRENT, Lucene41PostingsWriter.VERSION_CURRENT); forUtil = new ForUtil(docIn); if (fieldInfos.hasProx()) { posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), ioContext); CodecUtil.checkHeader(posIn, Lucene41PostingsWriter.POS_CODEC, Lucene41PostingsWriter.VERSION_CURRENT, Lucene41PostingsWriter.VERSION_CURRENT); if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) { payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), ioContext); CodecUtil.checkHeader(payIn, Lucene41PostingsWriter.PAY_CODEC, Lucene41PostingsWriter.VERSION_CURRENT, Lucene41PostingsWriter.VERSION_CURRENT); } } this.docIn = docIn; this.posIn = posIn; this.payIn = payIn; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(docIn, posIn, payIn); } } } @Override public void init(IndexInput termsIn) throws IOException { // Make sure we are talking to the matching postings writer CodecUtil.checkHeader(termsIn, Lucene41PostingsWriter.TERMS_CODEC, Lucene41PostingsWriter.VERSION_CURRENT, Lucene41PostingsWriter.VERSION_CURRENT); final int indexBlockSize = termsIn.readVInt(); if (indexBlockSize != BLOCK_SIZE) { throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")"); } } /** * Read values that have been written using variable-length encoding instead of bit-packing. */ static void readVIntBlock(IndexInput docIn, int[] docBuffer, int[] freqBuffer, int num, boolean indexHasFreq) throws IOException { if (indexHasFreq) { for(int i=0;i<num;i++) { final int code = docIn.readVInt(); docBuffer[i] = code >>> 1; if ((code & 1) != 0) { freqBuffer[i] = 1; } else { freqBuffer[i] = docIn.readVInt(); } } } else { for(int i=0;i<num;i++) { docBuffer[i] = docIn.readVInt(); } } } // Must keep final because we do non-standard clone private final static class IntBlockTermState extends BlockTermState { long docStartFP; long posStartFP; long payStartFP; long skipOffset; long lastPosBlockOffset; // docid when there is a single pulsed posting, otherwise -1 // freq is always implicitly totalTermFreq in this case. int singletonDocID; // Only used by the "primary" TermState -- clones don't // copy this (basically they are "transient"): ByteArrayDataInput bytesReader; // TODO: should this NOT be in the TermState...? byte[] bytes; @Override public IntBlockTermState clone() { IntBlockTermState other = new IntBlockTermState(); other.copyFrom(this); return other; } @Override public void copyFrom(TermState _other) { super.copyFrom(_other); IntBlockTermState other = (IntBlockTermState) _other; docStartFP = other.docStartFP; posStartFP = other.posStartFP; payStartFP = other.payStartFP; lastPosBlockOffset = other.lastPosBlockOffset; skipOffset = other.skipOffset; singletonDocID = other.singletonDocID; // Do not copy bytes, bytesReader (else TermState is // very heavy, ie drags around the entire block's // byte[]). On seek back, if next() is in fact used // (rare!), they will be re-read from disk. } @Override public String toString() { return super.toString() + " docStartFP=" + docStartFP + " posStartFP=" + posStartFP + " payStartFP=" + payStartFP + " lastPosBlockOffset=" + lastPosBlockOffset + " singletonDocID=" + singletonDocID; } } @Override public IntBlockTermState newTermState() { return new IntBlockTermState(); } @Override public void close() throws IOException { IOUtils.close(docIn, posIn, payIn); } /* Reads but does not decode the byte[] blob holding metadata for the current terms block */ @Override public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException { final IntBlockTermState termState = (IntBlockTermState) _termState; final int numBytes = termsIn.readVInt(); if (termState.bytes == null) { termState.bytes = new byte[ArrayUtil.oversize(numBytes, 1)]; termState.bytesReader = new ByteArrayDataInput(); } else if (termState.bytes.length < numBytes) { termState.bytes = new byte[ArrayUtil.oversize(numBytes, 1)]; } termsIn.readBytes(termState.bytes, 0, numBytes); termState.bytesReader.reset(termState.bytes, 0, numBytes); } @Override public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException { final IntBlockTermState termState = (IntBlockTermState) _termState; final boolean isFirstTerm = termState.termBlockOrd == 0; final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; final boolean fieldHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; final boolean fieldHasPayloads = fieldInfo.hasPayloads(); final DataInput in = termState.bytesReader; if (isFirstTerm) { if (termState.docFreq == 1) { termState.singletonDocID = in.readVInt(); termState.docStartFP = 0; } else { termState.singletonDocID = -1; termState.docStartFP = in.readVLong(); } if (fieldHasPositions) { termState.posStartFP = in.readVLong(); if (termState.totalTermFreq > BLOCK_SIZE) { termState.lastPosBlockOffset = in.readVLong(); } else { termState.lastPosBlockOffset = -1; } if ((fieldHasPayloads || fieldHasOffsets) && termState.totalTermFreq >= BLOCK_SIZE) { termState.payStartFP = in.readVLong(); } else { termState.payStartFP = -1; } } } else { if (termState.docFreq == 1) { termState.singletonDocID = in.readVInt(); } else { termState.singletonDocID = -1; termState.docStartFP += in.readVLong(); } if (fieldHasPositions) { termState.posStartFP += in.readVLong(); if (termState.totalTermFreq > BLOCK_SIZE) { termState.lastPosBlockOffset = in.readVLong(); } else { termState.lastPosBlockOffset = -1; } if ((fieldHasPayloads || fieldHasOffsets) && termState.totalTermFreq >= BLOCK_SIZE) { long delta = in.readVLong(); if (termState.payStartFP == -1) { termState.payStartFP = delta; } else { termState.payStartFP += delta; } } } } if (termState.docFreq > BLOCK_SIZE) { termState.skipOffset = in.readVLong(); } else { termState.skipOffset = -1; } } @Override public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException { BlockDocsEnum docsEnum; if (reuse instanceof BlockDocsEnum) { docsEnum = (BlockDocsEnum) reuse; if (!docsEnum.canReuse(docIn, fieldInfo)) { docsEnum = new BlockDocsEnum(fieldInfo); } } else { docsEnum = new BlockDocsEnum(fieldInfo); } return docsEnum.reset(liveDocs, (IntBlockTermState) termState, flags); } // TODO: specialize to liveDocs vs not @Override public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; boolean indexHasPayloads = fieldInfo.hasPayloads(); if ((!indexHasOffsets || (flags & DocsAndPositionsEnum.FLAG_OFFSETS) == 0) && (!indexHasPayloads || (flags & DocsAndPositionsEnum.FLAG_PAYLOADS) == 0)) { BlockDocsAndPositionsEnum docsAndPositionsEnum; if (reuse instanceof BlockDocsAndPositionsEnum) { docsAndPositionsEnum = (BlockDocsAndPositionsEnum) reuse; if (!docsAndPositionsEnum.canReuse(docIn, fieldInfo)) { docsAndPositionsEnum = new BlockDocsAndPositionsEnum(fieldInfo); } } else { docsAndPositionsEnum = new BlockDocsAndPositionsEnum(fieldInfo); } return docsAndPositionsEnum.reset(liveDocs, (IntBlockTermState) termState); } else { EverythingEnum everythingEnum; if (reuse instanceof EverythingEnum) { everythingEnum = (EverythingEnum) reuse; if (!everythingEnum.canReuse(docIn, fieldInfo)) { everythingEnum = new EverythingEnum(fieldInfo); } } else { everythingEnum = new EverythingEnum(fieldInfo); } return everythingEnum.reset(liveDocs, (IntBlockTermState) termState, flags); } } final class BlockDocsEnum extends DocsEnum { private final byte[] encoded; private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE]; private final int[] freqBuffer = new int[MAX_DATA_SIZE]; private int docBufferUpto; private Lucene41SkipReader skipper; private boolean skipped; final IndexInput startDocIn; IndexInput docIn; final boolean indexHasFreq; final boolean indexHasPos; final boolean indexHasOffsets; final boolean indexHasPayloads; private int docFreq; // number of docs in this posting list private long totalTermFreq; // sum of freqs in this posting list (or docFreq when omitted) private int docUpto; // how many docs we've read private int doc; // doc we last read private int accum; // accumulator for doc deltas private int freq; // freq we last read // Where this term's postings start in the .doc file: private long docTermStartFP; // Where this term's skip data starts (after // docTermStartFP) in the .doc file (or -1 if there is // no skip data for this term): private long skipOffset; // docID for next skip point, we won't use skipper if // target docID is not larger than this private int nextSkipDoc; private Bits liveDocs; private boolean needsFreq; // true if the caller actually needs frequencies private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 public BlockDocsEnum(FieldInfo fieldInfo) throws IOException { this.startDocIn = Lucene41PostingsReader.this.docIn; this.docIn = null; indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; indexHasPayloads = fieldInfo.hasPayloads(); encoded = new byte[MAX_ENCODED_SIZE]; } public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) { return docIn == startDocIn && indexHasFreq == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) && indexHasPos == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) && indexHasPayloads == fieldInfo.hasPayloads(); } public DocsEnum reset(Bits liveDocs, IntBlockTermState termState, int flags) throws IOException { this.liveDocs = liveDocs; // if (DEBUG) { // System.out.println(" FPR.reset: termState=" + termState); // } docFreq = termState.docFreq; totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq; docTermStartFP = termState.docStartFP; skipOffset = termState.skipOffset; singletonDocID = termState.singletonDocID; if (docFreq > 1) { if (docIn == null) { // lazy init docIn = startDocIn.clone(); } docIn.seek(docTermStartFP); } doc = -1; this.needsFreq = (flags & DocsEnum.FLAG_FREQS) != 0; if (!indexHasFreq) { Arrays.fill(freqBuffer, 1); } accum = 0; docUpto = 0; nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block docBufferUpto = BLOCK_SIZE; skipped = false; return this; } @Override public int freq() throws IOException { return freq; } @Override public int docID() { return doc; } private void refillDocs() throws IOException { final int left = docFreq - docUpto; assert left > 0; if (left >= BLOCK_SIZE) { // if (DEBUG) { // System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); // } forUtil.readBlock(docIn, encoded, docDeltaBuffer); if (indexHasFreq) { // if (DEBUG) { // System.out.println(" fill freq block from fp=" + docIn.getFilePointer()); // } if (needsFreq) { forUtil.readBlock(docIn, encoded, freqBuffer); } else { forUtil.skipBlock(docIn); // skip over freqs } } } else if (docFreq == 1) { docDeltaBuffer[0] = singletonDocID; freqBuffer[0] = (int) totalTermFreq; } else { // Read vInts: // if (DEBUG) { // System.out.println(" fill last vInt block from fp=" + docIn.getFilePointer()); // } readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, indexHasFreq); } docBufferUpto = 0; } @Override public int nextDoc() throws IOException { // if (DEBUG) { // System.out.println("\nFPR.nextDoc"); // } while (true) { // if (DEBUG) { // System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto); // } if (docUpto == docFreq) { // if (DEBUG) { // System.out.println(" return doc=END"); // } return doc = NO_MORE_DOCS; } if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } // if (DEBUG) { // System.out.println(" accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]); // } accum += docDeltaBuffer[docBufferUpto]; docUpto++; if (liveDocs == null || liveDocs.get(accum)) { doc = accum; freq = freqBuffer[docBufferUpto]; docBufferUpto++; // if (DEBUG) { // System.out.println(" return doc=" + doc + " freq=" + freq); // } return doc; } // if (DEBUG) { // System.out.println(" doc=" + accum + " is deleted; try next doc"); // } docBufferUpto++; } } @Override public int advance(int target) throws IOException { // TODO: make frq block load lazy/skippable // if (DEBUG) { // System.out.println(" FPR.advance target=" + target); // } // current skip docID < docIDs generated from current buffer <= next skip docID // we don't need to skip if target is buffered already if (docFreq > BLOCK_SIZE && target > nextSkipDoc) { // if (DEBUG) { // System.out.println("load skipper"); // } if (skipper == null) { // Lazy init: first time this enum has ever been used for skipping skipper = new Lucene41SkipReader(docIn.clone(), Lucene41PostingsWriter.maxSkipLevels, BLOCK_SIZE, indexHasPos, indexHasOffsets, indexHasPayloads); } if (!skipped) { assert skipOffset != -1; // This is the first time this enum has skipped // since reset() was called; load the skip data: skipper.init(docTermStartFP+skipOffset, docTermStartFP, 0, 0, docFreq); skipped = true; } // always plus one to fix the result, since skip position in Lucene41SkipReader // is a little different from MultiLevelSkipListReader final int newDocUpto = skipper.skipTo(target) + 1; if (newDocUpto > docUpto) { // Skipper moved // if (DEBUG) { // System.out.println("skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer()); // } assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; docUpto = newDocUpto; // Force to read next block docBufferUpto = BLOCK_SIZE; accum = skipper.getDoc(); // actually, this is just lastSkipEntry docIn.seek(skipper.getDocPointer()); // now point to the block we want to search } // next time we call advance, this is used to // foresee whether skipper is necessary. nextSkipDoc = skipper.getNextSkipDoc(); } if (docUpto == docFreq) { return doc = NO_MORE_DOCS; } if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } // Now scan... this is an inlined/pared down version // of nextDoc(): while (true) { // if (DEBUG) { // System.out.println(" scan doc=" + accum + " docBufferUpto=" + docBufferUpto); // } accum += docDeltaBuffer[docBufferUpto]; docUpto++; if (accum >= target) { break; } docBufferUpto++; if (docUpto == docFreq) { return doc = NO_MORE_DOCS; } } if (liveDocs == null || liveDocs.get(accum)) { // if (DEBUG) { // System.out.println(" return doc=" + accum); // } freq = freqBuffer[docBufferUpto]; docBufferUpto++; return doc = accum; } else { // if (DEBUG) { // System.out.println(" now do nextDoc()"); // } docBufferUpto++; return nextDoc(); } } @Override public long cost() { return docFreq; } } final class BlockDocsAndPositionsEnum extends DocsAndPositionsEnum { private final byte[] encoded; private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE]; private final int[] freqBuffer = new int[MAX_DATA_SIZE]; private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE]; private int docBufferUpto; private int posBufferUpto; private Lucene41SkipReader skipper; private boolean skipped; final IndexInput startDocIn; IndexInput docIn; final IndexInput posIn; final boolean indexHasOffsets; final boolean indexHasPayloads; private int docFreq; // number of docs in this posting list private long totalTermFreq; // number of positions in this posting list private int docUpto; // how many docs we've read private int doc; // doc we last read private int accum; // accumulator for doc deltas private int freq; // freq we last read private int position; // current position // how many positions "behind" we are; nextPosition must // skip these to "catch up": private int posPendingCount; // Lazy pos seek: if != -1 then we must seek to this FP // before reading positions: private long posPendingFP; // Where this term's postings start in the .doc file: private long docTermStartFP; // Where this term's postings start in the .pos file: private long posTermStartFP; // Where this term's payloads/offsets start in the .pay // file: private long payTermStartFP; // File pointer where the last (vInt encoded) pos delta // block is. We need this to know whether to bulk // decode vs vInt decode the block: private long lastPosBlockFP; // Where this term's skip data starts (after // docTermStartFP) in the .doc file (or -1 if there is // no skip data for this term): private long skipOffset; private int nextSkipDoc; private Bits liveDocs; private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 public BlockDocsAndPositionsEnum(FieldInfo fieldInfo) throws IOException { this.startDocIn = Lucene41PostingsReader.this.docIn; this.docIn = null; this.posIn = Lucene41PostingsReader.this.posIn.clone(); encoded = new byte[MAX_ENCODED_SIZE]; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; indexHasPayloads = fieldInfo.hasPayloads(); } public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) { return docIn == startDocIn && indexHasOffsets == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) && indexHasPayloads == fieldInfo.hasPayloads(); } public DocsAndPositionsEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException { this.liveDocs = liveDocs; // if (DEBUG) { // System.out.println(" FPR.reset: termState=" + termState); // } docFreq = termState.docFreq; docTermStartFP = termState.docStartFP; posTermStartFP = termState.posStartFP; payTermStartFP = termState.payStartFP; skipOffset = termState.skipOffset; totalTermFreq = termState.totalTermFreq; singletonDocID = termState.singletonDocID; if (docFreq > 1) { if (docIn == null) { // lazy init docIn = startDocIn.clone(); } docIn.seek(docTermStartFP); } posPendingFP = posTermStartFP; posPendingCount = 0; if (termState.totalTermFreq < BLOCK_SIZE) { lastPosBlockFP = posTermStartFP; } else if (termState.totalTermFreq == BLOCK_SIZE) { lastPosBlockFP = -1; } else { lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset; } doc = -1; accum = 0; docUpto = 0; nextSkipDoc = BLOCK_SIZE - 1; docBufferUpto = BLOCK_SIZE; skipped = false; return this; } @Override public int freq() throws IOException { return freq; } @Override public int docID() { return doc; } private void refillDocs() throws IOException { final int left = docFreq - docUpto; assert left > 0; if (left >= BLOCK_SIZE) { // if (DEBUG) { // System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); // } forUtil.readBlock(docIn, encoded, docDeltaBuffer); // if (DEBUG) { // System.out.println(" fill freq block from fp=" + docIn.getFilePointer()); // } forUtil.readBlock(docIn, encoded, freqBuffer); } else if (docFreq == 1) { docDeltaBuffer[0] = singletonDocID; freqBuffer[0] = (int) totalTermFreq; } else { // Read vInts: // if (DEBUG) { // System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer()); // } readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true); } docBufferUpto = 0; } private void refillPositions() throws IOException { // if (DEBUG) { // System.out.println(" refillPositions"); // } if (posIn.getFilePointer() == lastPosBlockFP) { // if (DEBUG) { // System.out.println(" vInt pos block @ fp=" + posIn.getFilePointer() + " hasPayloads=" + indexHasPayloads + " hasOffsets=" + indexHasOffsets); // } final int count = (int) (totalTermFreq % BLOCK_SIZE); int payloadLength = 0; for(int i=0;i<count;i++) { int code = posIn.readVInt(); if (indexHasPayloads) { if ((code & 1) != 0) { payloadLength = posIn.readVInt(); } posDeltaBuffer[i] = code >>> 1; if (payloadLength != 0) { posIn.seek(posIn.getFilePointer() + payloadLength); } } else { posDeltaBuffer[i] = code; } if (indexHasOffsets) { if ((posIn.readVInt() & 1) != 0) { // offset length changed posIn.readVInt(); } } } } else { // if (DEBUG) { // System.out.println(" bulk pos block @ fp=" + posIn.getFilePointer()); // } forUtil.readBlock(posIn, encoded, posDeltaBuffer); } } @Override public int nextDoc() throws IOException { // if (DEBUG) { // System.out.println(" FPR.nextDoc"); // } while (true) { // if (DEBUG) { // System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto); // } if (docUpto == docFreq) { return doc = NO_MORE_DOCS; } if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } // if (DEBUG) { // System.out.println(" accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]); // } accum += docDeltaBuffer[docBufferUpto]; freq = freqBuffer[docBufferUpto]; posPendingCount += freq; docBufferUpto++; docUpto++; if (liveDocs == null || liveDocs.get(accum)) { doc = accum; position = 0; // if (DEBUG) { // System.out.println(" return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount); // } return doc; } // if (DEBUG) { // System.out.println(" doc=" + accum + " is deleted; try next doc"); // } } } @Override public int advance(int target) throws IOException { // TODO: make frq block load lazy/skippable // if (DEBUG) { // System.out.println(" FPR.advance target=" + target); // } if (docFreq > BLOCK_SIZE && target > nextSkipDoc) { // if (DEBUG) { // System.out.println(" try skipper"); // } if (skipper == null) { // Lazy init: first time this enum has ever been used for skipping // if (DEBUG) { // System.out.println(" create skipper"); // } skipper = new Lucene41SkipReader(docIn.clone(), Lucene41PostingsWriter.maxSkipLevels, BLOCK_SIZE, true, indexHasOffsets, indexHasPayloads); } if (!skipped) { assert skipOffset != -1; // This is the first time this enum has skipped // since reset() was called; load the skip data: // if (DEBUG) { // System.out.println(" init skipper"); // } skipper.init(docTermStartFP+skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq); skipped = true; } final int newDocUpto = skipper.skipTo(target) + 1; if (newDocUpto > docUpto) { // Skipper moved // if (DEBUG) { // System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto()); // } assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; docUpto = newDocUpto; // Force to read next block docBufferUpto = BLOCK_SIZE; accum = skipper.getDoc(); docIn.seek(skipper.getDocPointer()); posPendingFP = skipper.getPosPointer(); posPendingCount = skipper.getPosBufferUpto(); } nextSkipDoc = skipper.getNextSkipDoc(); } if (docUpto == docFreq) { return doc = NO_MORE_DOCS; } if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } // Now scan... this is an inlined/pared down version // of nextDoc(): while (true) { // if (DEBUG) { // System.out.println(" scan doc=" + accum + " docBufferUpto=" + docBufferUpto); // } accum += docDeltaBuffer[docBufferUpto]; freq = freqBuffer[docBufferUpto]; posPendingCount += freq; docBufferUpto++; docUpto++; if (accum >= target) { break; } if (docUpto == docFreq) { return doc = NO_MORE_DOCS; } } if (liveDocs == null || liveDocs.get(accum)) { // if (DEBUG) { // System.out.println(" return doc=" + accum); // } position = 0; return doc = accum; } else { // if (DEBUG) { // System.out.println(" now do nextDoc()"); // } return nextDoc(); } } // TODO: in theory we could avoid loading frq block // when not needed, ie, use skip data to load how far to // seek the pos pointer ... instead of having to load frq // blocks only to sum up how many positions to skip private void skipPositions() throws IOException { // Skip positions now: int toSkip = posPendingCount - freq; // if (DEBUG) { // System.out.println(" FPR.skipPositions: toSkip=" + toSkip); // } final int leftInBlock = BLOCK_SIZE - posBufferUpto; if (toSkip < leftInBlock) { posBufferUpto += toSkip; // if (DEBUG) { // System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto); // } } else { toSkip -= leftInBlock; while(toSkip >= BLOCK_SIZE) { // if (DEBUG) { // System.out.println(" skip whole block @ fp=" + posIn.getFilePointer()); // } assert posIn.getFilePointer() != lastPosBlockFP; forUtil.skipBlock(posIn); toSkip -= BLOCK_SIZE; } refillPositions(); posBufferUpto = toSkip; // if (DEBUG) { // System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto); // } } position = 0; } @Override public int nextPosition() throws IOException { // if (DEBUG) { // System.out.println(" FPR.nextPosition posPendingCount=" + posPendingCount + " posBufferUpto=" + posBufferUpto); // } if (posPendingFP != -1) { // if (DEBUG) { // System.out.println(" seek to pendingFP=" + posPendingFP); // } posIn.seek(posPendingFP); posPendingFP = -1; // Force buffer refill: posBufferUpto = BLOCK_SIZE; } if (posPendingCount > freq) { skipPositions(); posPendingCount = freq; } if (posBufferUpto == BLOCK_SIZE) { refillPositions(); posBufferUpto = 0; } position += posDeltaBuffer[posBufferUpto++]; posPendingCount--; // if (DEBUG) { // System.out.println(" return pos=" + position); // } return position; } @Override public int startOffset() { return -1; } @Override public int endOffset() { return -1; } @Override public BytesRef getPayload() { return null; } @Override public long cost() { return docFreq; } } // Also handles payloads + offsets final class EverythingEnum extends DocsAndPositionsEnum { private final byte[] encoded; private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE]; private final int[] freqBuffer = new int[MAX_DATA_SIZE]; private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE]; private final int[] payloadLengthBuffer; private final int[] offsetStartDeltaBuffer; private final int[] offsetLengthBuffer; private byte[] payloadBytes; private int payloadByteUpto; private int payloadLength; private int lastStartOffset; private int startOffset; private int endOffset; private int docBufferUpto; private int posBufferUpto; private Lucene41SkipReader skipper; private boolean skipped; final IndexInput startDocIn; IndexInput docIn; final IndexInput posIn; final IndexInput payIn; final BytesRef payload; final boolean indexHasOffsets; final boolean indexHasPayloads; private int docFreq; // number of docs in this posting list private long totalTermFreq; // number of positions in this posting list private int docUpto; // how many docs we've read private int doc; // doc we last read private int accum; // accumulator for doc deltas private int freq; // freq we last read private int position; // current position // how many positions "behind" we are; nextPosition must // skip these to "catch up": private int posPendingCount; // Lazy pos seek: if != -1 then we must seek to this FP // before reading positions: private long posPendingFP; // Lazy pay seek: if != -1 then we must seek to this FP // before reading payloads/offsets: private long payPendingFP; // Where this term's postings start in the .doc file: private long docTermStartFP; // Where this term's postings start in the .pos file: private long posTermStartFP; // Where this term's payloads/offsets start in the .pay // file: private long payTermStartFP; // File pointer where the last (vInt encoded) pos delta // block is. We need this to know whether to bulk // decode vs vInt decode the block: private long lastPosBlockFP; // Where this term's skip data starts (after // docTermStartFP) in the .doc file (or -1 if there is // no skip data for this term): private long skipOffset; private int nextSkipDoc; private Bits liveDocs; private boolean needsOffsets; // true if we actually need offsets private boolean needsPayloads; // true if we actually need payloads private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 public EverythingEnum(FieldInfo fieldInfo) throws IOException { this.startDocIn = Lucene41PostingsReader.this.docIn; this.docIn = null; this.posIn = Lucene41PostingsReader.this.posIn.clone(); this.payIn = Lucene41PostingsReader.this.payIn.clone(); encoded = new byte[MAX_ENCODED_SIZE]; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; if (indexHasOffsets) { offsetStartDeltaBuffer = new int[MAX_DATA_SIZE]; offsetLengthBuffer = new int[MAX_DATA_SIZE]; } else { offsetStartDeltaBuffer = null; offsetLengthBuffer = null; startOffset = -1; endOffset = -1; } indexHasPayloads = fieldInfo.hasPayloads(); if (indexHasPayloads) { payloadLengthBuffer = new int[MAX_DATA_SIZE]; payloadBytes = new byte[128]; payload = new BytesRef(); } else { payloadLengthBuffer = null; payloadBytes = null; payload = null; } } public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) { return docIn == startDocIn && indexHasOffsets == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) && indexHasPayloads == fieldInfo.hasPayloads(); } public EverythingEnum reset(Bits liveDocs, IntBlockTermState termState, int flags) throws IOException { this.liveDocs = liveDocs; // if (DEBUG) { // System.out.println(" FPR.reset: termState=" + termState); // } docFreq = termState.docFreq; docTermStartFP = termState.docStartFP; posTermStartFP = termState.posStartFP; payTermStartFP = termState.payStartFP; skipOffset = termState.skipOffset; totalTermFreq = termState.totalTermFreq; singletonDocID = termState.singletonDocID; if (docFreq > 1) { if (docIn == null) { // lazy init docIn = startDocIn.clone(); } docIn.seek(docTermStartFP); } posPendingFP = posTermStartFP; payPendingFP = payTermStartFP; posPendingCount = 0; if (termState.totalTermFreq < BLOCK_SIZE) { lastPosBlockFP = posTermStartFP; } else if (termState.totalTermFreq == BLOCK_SIZE) { lastPosBlockFP = -1; } else { lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset; } this.needsOffsets = (flags & DocsAndPositionsEnum.FLAG_OFFSETS) != 0; this.needsPayloads = (flags & DocsAndPositionsEnum.FLAG_PAYLOADS) != 0; doc = -1; accum = 0; docUpto = 0; nextSkipDoc = BLOCK_SIZE - 1; docBufferUpto = BLOCK_SIZE; skipped = false; return this; } @Override public int freq() throws IOException { return freq; } @Override public int docID() { return doc; } private void refillDocs() throws IOException { final int left = docFreq - docUpto; assert left > 0; if (left >= BLOCK_SIZE) { // if (DEBUG) { // System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); // } forUtil.readBlock(docIn, encoded, docDeltaBuffer); // if (DEBUG) { // System.out.println(" fill freq block from fp=" + docIn.getFilePointer()); // } forUtil.readBlock(docIn, encoded, freqBuffer); } else if (docFreq == 1) { docDeltaBuffer[0] = singletonDocID; freqBuffer[0] = (int) totalTermFreq; } else { // if (DEBUG) { // System.out.println(" fill last vInt doc block from fp=" + docIn.getFilePointer()); // } readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true); } docBufferUpto = 0; } private void refillPositions() throws IOException { // if (DEBUG) { // System.out.println(" refillPositions"); // } if (posIn.getFilePointer() == lastPosBlockFP) { // if (DEBUG) { // System.out.println(" vInt pos block @ fp=" + posIn.getFilePointer() + " hasPayloads=" + indexHasPayloads + " hasOffsets=" + indexHasOffsets); // } final int count = (int) (totalTermFreq % BLOCK_SIZE); int payloadLength = 0; int offsetLength = 0; payloadByteUpto = 0; for(int i=0;i<count;i++) { int code = posIn.readVInt(); if (indexHasPayloads) { if ((code & 1) != 0) { payloadLength = posIn.readVInt(); } // if (DEBUG) { // System.out.println(" i=" + i + " payloadLen=" + payloadLength); // } payloadLengthBuffer[i] = payloadLength; posDeltaBuffer[i] = code >>> 1; if (payloadLength != 0) { if (payloadByteUpto + payloadLength > payloadBytes.length) { payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength); } //System.out.println(" read payload @ pos.fp=" + posIn.getFilePointer()); posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength); payloadByteUpto += payloadLength; } } else { posDeltaBuffer[i] = code; } if (indexHasOffsets) { // if (DEBUG) { // System.out.println(" i=" + i + " read offsets from posIn.fp=" + posIn.getFilePointer()); // } int deltaCode = posIn.readVInt(); if ((deltaCode & 1) != 0) { offsetLength = posIn.readVInt(); } offsetStartDeltaBuffer[i] = deltaCode >>> 1; offsetLengthBuffer[i] = offsetLength; // if (DEBUG) { // System.out.println(" startOffDelta=" + offsetStartDeltaBuffer[i] + " offsetLen=" + offsetLengthBuffer[i]); // } } } payloadByteUpto = 0; } else { // if (DEBUG) { // System.out.println(" bulk pos block @ fp=" + posIn.getFilePointer()); // } forUtil.readBlock(posIn, encoded, posDeltaBuffer); if (indexHasPayloads) { // if (DEBUG) { // System.out.println(" bulk payload block @ pay.fp=" + payIn.getFilePointer()); // } if (needsPayloads) { forUtil.readBlock(payIn, encoded, payloadLengthBuffer); int numBytes = payIn.readVInt(); // if (DEBUG) { // System.out.println(" " + numBytes + " payload bytes @ pay.fp=" + payIn.getFilePointer()); // } if (numBytes > payloadBytes.length) { payloadBytes = ArrayUtil.grow(payloadBytes, numBytes); } payIn.readBytes(payloadBytes, 0, numBytes); } else { // this works, because when writing a vint block we always force the first length to be written forUtil.skipBlock(payIn); // skip over lengths int numBytes = payIn.readVInt(); // read length of payloadBytes payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes } payloadByteUpto = 0; } if (indexHasOffsets) { // if (DEBUG) { // System.out.println(" bulk offset block @ pay.fp=" + payIn.getFilePointer()); // } if (needsOffsets) { forUtil.readBlock(payIn, encoded, offsetStartDeltaBuffer); forUtil.readBlock(payIn, encoded, offsetLengthBuffer); } else { // this works, because when writing a vint block we always force the first length to be written forUtil.skipBlock(payIn); // skip over starts forUtil.skipBlock(payIn); // skip over lengths } } } } @Override public int nextDoc() throws IOException { // if (DEBUG) { // System.out.println(" FPR.nextDoc"); // } while (true) { // if (DEBUG) { // System.out.println(" docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto); // } if (docUpto == docFreq) { return doc = NO_MORE_DOCS; } if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } // if (DEBUG) { // System.out.println(" accum=" + accum + " docDeltaBuffer[" + docBufferUpto + "]=" + docDeltaBuffer[docBufferUpto]); // } accum += docDeltaBuffer[docBufferUpto]; freq = freqBuffer[docBufferUpto]; posPendingCount += freq; docBufferUpto++; docUpto++; if (liveDocs == null || liveDocs.get(accum)) { doc = accum; // if (DEBUG) { // System.out.println(" return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount); // } position = 0; lastStartOffset = 0; return doc; } // if (DEBUG) { // System.out.println(" doc=" + accum + " is deleted; try next doc"); // } } } @Override public int advance(int target) throws IOException { // TODO: make frq block load lazy/skippable // if (DEBUG) { // System.out.println(" FPR.advance target=" + target); // } if (docFreq > BLOCK_SIZE && target > nextSkipDoc) { // if (DEBUG) { // System.out.println(" try skipper"); // } if (skipper == null) { // Lazy init: first time this enum has ever been used for skipping // if (DEBUG) { // System.out.println(" create skipper"); // } skipper = new Lucene41SkipReader(docIn.clone(), Lucene41PostingsWriter.maxSkipLevels, BLOCK_SIZE, true, indexHasOffsets, indexHasPayloads); } if (!skipped) { assert skipOffset != -1; // This is the first time this enum has skipped // since reset() was called; load the skip data: // if (DEBUG) { // System.out.println(" init skipper"); // } skipper.init(docTermStartFP+skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq); skipped = true; } final int newDocUpto = skipper.skipTo(target) + 1; if (newDocUpto > docUpto) { // Skipper moved // if (DEBUG) { // System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto() + " pay.fp=" + skipper.getPayPointer() + " lastStartOffset=" + lastStartOffset); // } assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto; docUpto = newDocUpto; // Force to read next block docBufferUpto = BLOCK_SIZE; accum = skipper.getDoc(); docIn.seek(skipper.getDocPointer()); posPendingFP = skipper.getPosPointer(); payPendingFP = skipper.getPayPointer(); posPendingCount = skipper.getPosBufferUpto(); lastStartOffset = 0; // new document payloadByteUpto = skipper.getPayloadByteUpto(); } nextSkipDoc = skipper.getNextSkipDoc(); } if (docUpto == docFreq) { return doc = NO_MORE_DOCS; } if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } // Now scan: while (true) { // if (DEBUG) { // System.out.println(" scan doc=" + accum + " docBufferUpto=" + docBufferUpto); // } accum += docDeltaBuffer[docBufferUpto]; freq = freqBuffer[docBufferUpto]; posPendingCount += freq; docBufferUpto++; docUpto++; if (accum >= target) { break; } if (docUpto == docFreq) { return doc = NO_MORE_DOCS; } } if (liveDocs == null || liveDocs.get(accum)) { // if (DEBUG) { // System.out.println(" return doc=" + accum); // } position = 0; lastStartOffset = 0; return doc = accum; } else { // if (DEBUG) { // System.out.println(" now do nextDoc()"); // } return nextDoc(); } } // TODO: in theory we could avoid loading frq block // when not needed, ie, use skip data to load how far to // seek the pos pointer ... instead of having to load frq // blocks only to sum up how many positions to skip private void skipPositions() throws IOException { // Skip positions now: int toSkip = posPendingCount - freq; // if (DEBUG) { // System.out.println(" FPR.skipPositions: toSkip=" + toSkip); // } final int leftInBlock = BLOCK_SIZE - posBufferUpto; if (toSkip < leftInBlock) { int end = posBufferUpto + toSkip; while(posBufferUpto < end) { if (indexHasPayloads) { payloadByteUpto += payloadLengthBuffer[posBufferUpto]; } posBufferUpto++; } // if (DEBUG) { // System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto); // } } else { toSkip -= leftInBlock; while(toSkip >= BLOCK_SIZE) { // if (DEBUG) { // System.out.println(" skip whole block @ fp=" + posIn.getFilePointer()); // } assert posIn.getFilePointer() != lastPosBlockFP; forUtil.skipBlock(posIn); if (indexHasPayloads) { // Skip payloadLength block: forUtil.skipBlock(payIn); // Skip payloadBytes block: int numBytes = payIn.readVInt(); payIn.seek(payIn.getFilePointer() + numBytes); } if (indexHasOffsets) { forUtil.skipBlock(payIn); forUtil.skipBlock(payIn); } toSkip -= BLOCK_SIZE; } refillPositions(); payloadByteUpto = 0; posBufferUpto = 0; while(posBufferUpto < toSkip) { if (indexHasPayloads) { payloadByteUpto += payloadLengthBuffer[posBufferUpto]; } posBufferUpto++; } // if (DEBUG) { // System.out.println(" skip w/in block to posBufferUpto=" + posBufferUpto); // } } position = 0; lastStartOffset = 0; } @Override public int nextPosition() throws IOException { // if (DEBUG) { // System.out.println(" FPR.nextPosition posPendingCount=" + posPendingCount + " posBufferUpto=" + posBufferUpto + " payloadByteUpto=" + payloadByteUpto)// ; // } if (posPendingFP != -1) { // if (DEBUG) { // System.out.println(" seek pos to pendingFP=" + posPendingFP); // } posIn.seek(posPendingFP); posPendingFP = -1; if (payPendingFP != -1) { // if (DEBUG) { // System.out.println(" seek pay to pendingFP=" + payPendingFP); // } payIn.seek(payPendingFP); payPendingFP = -1; } // Force buffer refill: posBufferUpto = BLOCK_SIZE; } if (posPendingCount > freq) { skipPositions(); posPendingCount = freq; } if (posBufferUpto == BLOCK_SIZE) { refillPositions(); posBufferUpto = 0; } position += posDeltaBuffer[posBufferUpto]; if (indexHasPayloads) { payloadLength = payloadLengthBuffer[posBufferUpto]; payload.bytes = payloadBytes; payload.offset = payloadByteUpto; payload.length = payloadLength; payloadByteUpto += payloadLength; } if (indexHasOffsets) { startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto]; endOffset = startOffset + offsetLengthBuffer[posBufferUpto]; lastStartOffset = startOffset; } posBufferUpto++; posPendingCount--; // if (DEBUG) { // System.out.println(" return pos=" + position); // } return position; } @Override public int startOffset() { return startOffset; } @Override public int endOffset() { return endOffset; } @Override public BytesRef getPayload() { // if (DEBUG) { // System.out.println(" FPR.getPayload payloadLength=" + payloadLength + " payloadByteUpto=" + payloadByteUpto); // } if (payloadLength == 0) { return null; } else { return payload; } } @Override public long cost() { return docFreq; } } }