DefaultSkipListReader查找docId

DefaultSkipListReader查找docId
MultiLevelSkipListReader
public MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
/**每个层的文件文件的IndexInput读取对象,是通过定位到每个文件的相对应的skiptable的位置层的位置,clone下就得到新的IndexInput 对象了
**/
this.skipStream = new IndexInput[maxSkipLevels];
/**
Skiplist每次的
**/
this.skipPointer = new long[maxSkipLevels];
//
this.childPointer = new long[maxSkipLevels];
// 当前层相对原始层跳过的元素个数
this.numSkipped = new int[maxSkipLevels];
// 跳表有多少层
this.maxNumberOfSkipLevels = maxSkipLevels;
// 每层相对于原始层跳表的间隔的元素个数
this.skipInterval = new int[maxSkipLevels];

    this.skipStream [0]= skipStream;
    this.inputIsBuffered = (skipStream instanceof BufferedIndexInput);
this.skipInterval[0] = skipInterval;
// 由于skipInterval 是已知的,所以每层的间隔就可以计算出来
    for (int i = 1; i < maxSkipLevels; i++) {
      // cache skip intervals
      this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
}
//记录当前层的docId的
    skipDoc = new int[maxSkipLevels];
  }


skipTo(int target)
扫描skiplist返回不小于某个docId的前面的那个skipdata所比较的doc的数目
int skipTo(int target) throws IOException {
    if (!haveSkipped) {
      // first time, load skip levels
      loadSkipLevels();
      haveSkipped = true;
    }
 
// skipDoc 记录是当前level遍历到的docId,从最低层向最高层比较,直到找到targt大//于某个level的docId
    // walk up the levels until highest level is found that has a skip
    // for this target
    int level = 0;
    while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
      level++;
    }   
// 查找
    while (level >= 0) {
      if (target > skipDoc[level]) {// 如果target大于level上的docId,读取//下一个skiplist实体skipdata,直到找到大于这个target的docId
        if (!loadNextSkip(level)) {
          continue;
        }
      } else {
        // no more skips on this level, go down one level
        if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
          seekChild(level - 1);
        }
        level--;
      }
    }
    //
    return numSkipped[0] - skipInterval[0] - 1;
  }


loadSkipLevels()
加载level信息,
/** Loads the skip levels  */
  private void loadSkipLevels() throws IOException {

/**由包含这个term的document的数目计算skiptable的层数,如果超过maxNumberOfSkipLevels则为maxNumberOfSkipLevels**/

    numberOfSkipLevels = docCount == 0 ? 0 : (int) Math.floor(Math.log(docCount) / Math.log(skipInterval[0]));
    if (numberOfSkipLevels > maxNumberOfSkipLevels) {
      numberOfSkipLevels = maxNumberOfSkipLevels;
}

//Seek到skipPointer[0]的位置,也就是也就是,在frg文件里面skipdata起始位置如果图2

    skipStream[0].seek(skipPointer[0]);
   
/** 标识读取到内存中的skiptable中level的数目**/  
int toBuffer = numberOfLevelsToBuffer;
    //
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
//  skiptable的层的长度,如后面图1标示
      // the length of the current level
      long length = skipStream[0].readVLong();
      // 当前层的起始偏移量
      // the start pointer of the current level
      skipPointer[i] = skipStream[0].getFilePointer();
      if (toBuffer > 0) {
// 将文件数据读入到内存,定位到下一个level的起始位置
        // buffer this level
        skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
        toBuffer--;
      } else {
// 克隆这个IndexInput,为了每个level的读取
        // clone this stream, it is already at the start of the current level
        skipStream[i] = (IndexInput) skipStream[0].clone();
        if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
          ((BufferedIndexInput) skipStream[i]).setBufferSize((int) length);
        }
        //定位到下一个level的起始位置
        // move base stream beyond the current level
        skipStream[0].seek(skipStream[0].getFilePointer() + length);
      }
    }
  
    // use base stream for the lowest level
    skipPointer[0] = skipStream[0].getFilePointer();
  }









loadNextSkip
private boolean loadNextSkip(int level) throws IOException {
/**
设置最后访问层的docId和下个节点的位置
**/
    // we have to skip, the target document is greater than the current
    // skip list entry       
    setLastSkipData(level);
    // 记录跳过的元素的个数,例如跳表的间隔为16,则第0层的第一个元素相对于原数据,跳过了16个元素,第1层相对于第0层跳过了16 个元素,第1层相对于原始层跳过了16*16 个元素,这个地方记录的是相对原始层跳过的元素的个数
    numSkipped[level] += skipInterval[level];
      // 判断某层跳过的document的数目是否大于最大文档数目
    if (numSkipped[level] > docCount) {
      // this skip list is exhausted
      skipDoc[level] = Integer.MAX_VALUE;
      if (numberOfSkipLevels > level) numberOfSkipLevels = level;
      return false;
    }
// 读取跳表中实体的值,返回的docId和前面一个实体的docId的差值,所以正确的值应//该是,返回值加上前面的差值skipDoc[],这个数组记录的是当前level的移动到的实体的//docId
    // read next skip entry
    skipDoc[level] += readSkipData(level, skipStream[level]);
   
if (level != 0) {
// 计算下一个level的起始位置,也就是本层的skipdata在下个层的位置
      // read the child pointer if we are not on the leaf level
      childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
    }
   
    return true;

  }




readSkipData
//读取一个跳表中的实体
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
    int delta;// docId 运营delDa规则存储的
    if (currentFieldStoresPayloads) {
      // the current field stores payloads.
      // if the doc delta is odd then we have
      // to read the current payload length
      // because it differs from the length of the
      // previous payload
      delta = skipStream.readVInt();
      if ((delta & 1) != 0) {
        payloadLength[level] = skipStream.readVInt();
      }
      delta >>>= 1;
    } else {
      delta = skipStream.readVInt();
}
//文档号对应的倒排表中的节点在frq
//中的偏移量,文档号对应的倒排表中的节点在prx中的偏移量。
    freqPointer[level] += skipStream.readVInt();
    proxPointer[level] += skipStream.readVInt();
   
    return delta;
  }

next()
public boolean next() throws IOException {
    while (true) {
      if (count == df)
        return false;
// 读取下一个docId
      final int docCode = freqStream.readVInt();
     
      if (currentFieldOmitTermFreqAndPositions) {
        doc += docCode;
        freq = 1;
      } else {
// 由于使用了DocDelta[, Freq?],规则,所以读到的docId,向左移一位得到和前面skipdata的docId的差值,加上前面的docId的值就是实际的docId的值,由于frg等于1,则docCode 的最后一位是 1,说明frg等于1,不用往后读取frg的值了。

        doc += docCode >>> 1;       // shift off low bit
        if ((docCode & 1) != 0)       // if low bit is set
          freq = 1;         // freq is one
        else
          freq = freqStream.readVInt();     // else read freq
      }
     
      count++;

// 查看docId是否在删除的文档里面     
if (deletedDocs == null || !deletedDocs.get(doc))
        break;
      skippingDoc();
    }
    return true;
  }


图1



图2



你可能感兴趣的:(cache,UP,Go)