Hadoop MapReduce之MapTask任务执行(三)

  前面讲到KV的输出,会先写入KV buffer,当buffer用完或达到一定比例的时候spillThread会将buffer中的数据写入磁盘,以便buffer重用,刷新过程并不是直接写入的,写入前会先进行一个排序操作,写入时会将一个partition的数据写在一起,并且会生成一个index文件作为标记文件。每次spill都会生成一个新的数据文件,数据文件和索引文件的结构图如下:
Hadoop MapReduce之MapTask任务执行(三)_第1张图片    
private void sortAndSpill() throws IOException, ClassNotFoundException,
                                       InterruptedException {
      //approximate the length of the output file to be the length of the
      //buffer + header lengths for the partitions
      long size = (bufend >= bufstart
          ? bufend - bufstart
          : (bufvoid - bufend) + bufstart) +
                  partitions * APPROX_HEADER_LENGTH;
      FSDataOutputStream out = null;
      try {
        // 创建spill文件
        final SpillRecord spillRec = new SpillRecord(partitions);
        final Path filename =
            mapOutputFile.getSpillFileForWrite(numSpills, size);
        out = rfs.create(filename);
        final int endPosition = (kvend > kvstart)
          ? kvend
          : kvoffsets.length + kvend;
        sorter.sort(MapOutputBuffer.this, kvstart, endPosition, reporter);
        int spindex = kvstart;
        IndexRecord rec = new IndexRecord();
        InMemValBytes value = new InMemValBytes();
        for (int i = 0; i < partitions; ++i) {
          IFile.Writer<K, V> writer = null;
          try {
            long segmentStart = out.getPos();//获得本partition写入的起始位置
            writer = new Writer<K, V>(job, out, keyClass, valClass, codec,
            if (combinerRunner == null) {
              // spill directly
              DataInputBuffer key = new DataInputBuffer();
              while (spindex < endPosition &&
                  kvindices[kvoffsets[spindex % kvoffsets.length]
                            + PARTITION] == i) {
                final int kvoff = kvoffsets[spindex % kvoffsets.length];
                getVBytesForOffset(kvoff, value);
                key.reset(kvbuffer, kvindices[kvoff + KEYSTART],
                          (kvindices[kvoff + VALSTART] - 
                           kvindices[kvoff + KEYSTART]));
                writer.append(key, value);
            } else {
              int spstart = spindex;
              while (spindex < endPosition &&
                  kvindices[kvoffsets[spindex % kvoffsets.length]
                            + PARTITION] == i) {
              // Note: we would like to avoid the combiner if we've fewer
              // than some threshold of records for a partition
              if (spstart != spindex) {
                RawKeyValueIterator kvIter =
                  new MRResultIterator(spstart, spindex);
                combinerRunner.combine(kvIter, combineCollector);

            // close the writer

            // record offsets
            rec.startOffset = segmentStart;//获得本分区的起始位置
            rec.rawLength = writer.getRawLength();//本次写入数据总量,注意会包含结束标志
            rec.partLength = writer.getCompressedLength();//该partition写入总的数据量,为了方便控制会加一些标志位
            spillRec.putIndex(rec, i);//添加recordIndex

            writer = null;
          } finally {
            if (null != writer) writer.close();

        if (totalIndexCacheMemory >= INDEX_CACHE_MEMORY_LIMIT) {//如果index所占用内存超过阈值,则写index文件
          // create spill index file
          Path indexFilename =
              mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
                  * MAP_OUTPUT_INDEX_RECORD_LENGTH);
          spillRec.writeToFile(indexFilename, job);//写入index文件
        } else {
          totalIndexCacheMemory +=     //更新索引内存信息,每条索引代表一个partition信息
            spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;
        LOG.info("Finished spill " + numSpills);
        ++numSpills; //sill文件数量
      } finally {
        if (out != null) out.close();

