《MapReduce 4》--自定义分区、shuffle技术、环形缓冲区(MapOutputBuffer源码解析)、Maptask源码解析


    原理:int getPartition(LongWritable key, Text value, int numPartitions)
               由于 if (partition < 0 || partition >= partitions) {
                           throw new IOException("Illegal partition for " + key + " (" + partition + ")");
                0 ~ partitions==job.getNumReduceTasks()-1;


    《MapReduce 4》--自定义分区、shuffle技术、环形缓冲区(MapOutputBuffer源码解析)、Maptask源码解析_第1张图片



    private int partitions; 
    private Class keyClass;  //在job中定义的map输出端的key类型
    private Class valClass;  //在job中定义的map输出端的value类型

    private Serializer keySerializer; //用来序列号key值,存储到buffer里
    private Serializer valSerializer; //用来序列化value值,存储到buffer里

    // k/v accounting
    private IntBuffer kvmeta; 
    //用来标记元数据的起始位置 (第一次的位置应该是倒数第四个int元素位置)
    int kvstart; 
    int kvend;              // 元数据在int数组的结束位置
    int kvindex;            // 存储下一个元数据的起始位置

    int equator;            // 分隔点的位置
    int bufstart;           // kv对的起始位置
    int bufend;             // kv对的结束位置
    int bufmark;            // kv对的结束位置的标记
    int bufindex;           // 下一个kv对的开始位置
    int bufvoid;            // 缓冲区的长度

    byte[] kvbuffer;        // 环形缓冲区
    private final byte[] b0 = new byte[0];    //校验是否到边界

    //一个元数据由四部分组成[valstart,keystart,parition,vallen]     ,四部分统计的是kv对的信息
    private static final int VALSTART = 0;  // valstart相对于kvindex的偏移量
    private static final int KEYSTART = 1;  // keystart元数据相对于kvindex的偏移量
    private static final int PARTITION = 2; // parition元数据相对于kvindex的偏移量
    private static final int VALLEN = 3;    // vallen元数据相对于kvindex的偏移量
    private static final int NMETA = 4;     // 一个元数据占4个int
    private static final int METASIZE = NMETA * 4; // 统计一个元数据实际所占字节个数

    final SpillThread spillThread = new SpillThread();//溢写线程

    private FileSystem rfs;//溢写文件系统

 public void init( MapOutputCollector.Context context
                    ) throws IOException, ClassNotFoundException {
      mapOutputFile = mapTask.getMapOutputFile();  //此对象决定map输出文件的位置
      sortPhase = mapTask.getSortPhase();  //设置排序阶段
      partitions = job.getNumReduceTasks();  //获取分区个数
      rfs = ((LocalFileSystem)FileSystem.getLocal(job)).getRaw();

       final float spillper =job.getFloat(
                    JobContext.MAP_SORT_SPILL_PERCENT, (float)0.8);
      final int sortmb = job.getInt(JobContext.IO_SORT_MB, 100);
      sorter = ReflectionUtils.newInstance(job.getClass("map.sort.class",
                                                                   QuickSort.class, IndexedSorter.class), job);
      int maxMemUsage = sortmb << 20;   //  将缓冲区的单位转成MB
      maxMemUsage -= maxMemUsage % METASIZE;   //保证maxMenUsage是16个倍数
      kvbuffer = new byte[maxMemUsage];  // 创建环形缓冲区
      bufvoid = kvbuffer.length;
      //将字节数组buffer 转成int数组视图
      kvmeta = ByteBuffer.wrap(kvbuffer)
      setEquator(0);//  定义分隔点,同时给kvindex进行赋值
      bufstart = bufend = bufindex = equator;
      kvstart = kvend = kvindex;

      // k/v serialization
      comparator = job.getOutputKeyComparator();
      keyClass = (Class)job.getMapOutputKeyClass();
      valClass = (Class)job.getMapOutputValueClass();
      serializationFactory = new SerializationFactory(job);
      keySerializer = serializationFactory.getSerializer(keyClass);
      valSerializer = serializationFactory.getSerializer(valClass);

      spillInProgress = false;
      minSpillsForCombine = job.getInt(JobContext.MAP_COMBINE_MIN_SPILLS, 3);
      try {
        spillThread.start();    //启动溢写线程
        while (!spillThreadRunning) {   //进行轮询的方式询问

public synchronized void collect(K key, V value, final int partition
                                     ) throws IOException {
      bufferRemaining -= METASIZE;// 80MB
      if (bufferRemaining <= 0) {  如果不小于0,不用溢写 
        // start spill if the thread is not running and the soft limit has been
        // reached
        try {
          do {
            if (!spillInProgress) {
              final int kvbidx = 4 * kvindex;
              final int kvbend = 4 * kvend;
              // serialized, unspilled bytes always lie between kvindex and
              // bufindex, crossing the equator. Note that any void space
              // created by a reset must be included in "used" bytes
              final int bUsed = distanceTo(kvbidx, bufindex);
              final boolean bufsoftlimit = bUsed >= softLimit;
              if ((kvbend + METASIZE) % kvbuffer.length !=
                  equator - (equator % METASIZE)) {
                // spill finished, reclaim space
                bufferRemaining = Math.min(
                    distanceTo(bufindex, kvbidx) - 2 * METASIZE,
                    softLimit - bUsed) - METASIZE;
              } else if (bufsoftlimit && kvindex != kvend) {
                // spill records, if any collected; check latter, as it may
                // be possible for metadata alignment to hit spill pcnt
                final int avgRec = (int)
                  (mapOutputByteCounter.getCounter() /
                // leave at least half the split buffer for serialization data
                // ensure that kvindex >= bufindex
                final int distkvi = distanceTo(bufindex, kvbidx);
                final int newPos = (bufindex +
                  Math.max(2 * METASIZE - 1,
                          Math.min(distkvi / 2,
                                   distkvi / (METASIZE + avgRec) * METASIZE)))
                  % kvbuffer.length;
                bufmark = bufindex = newPos;
                final int serBound = 4 * kvend;
                // bytes remaining before the lock must be held and limits
                // checked is the minimum of three arcs: the metadata space, the
                // serialization space, and the soft limit
                bufferRemaining = Math.min(
                    // metadata max
                    distanceTo(bufend, newPos),
                      // serialization max
                      distanceTo(newPos, serBound),
                      // soft limit
                      softLimit)) - 2 * METASIZE;
          } while (false);
        } finally {
      try {
        // serialize key bytes into buffer
        int keystart = bufindex;
        if (bufindex < keystart) {
          // wrapped the key; must make contiguous
          keystart = 0;
        // serialize value bytes into buffer
        final int valstart = bufindex;
        // It's possible for records to have zero length, i.e. the serializer
        // will perform no writes. To ensure that the boundary conditions are
        // checked and that the kvindex invariant is maintained, perform a
        // zero-length write into the buffer. The logic monitoring this could be
        // moved into collect, but this is cleaner and inexpensive. For now, it
        // is acceptable.
        bb.write(b0, 0, 0);

        // the record must be marked after the preceding write, as the metadata
        // for this record are not yet written
        int valend = bb.markRecord();//对kvindex进行标记

            distanceTo(keystart, valend, bufvoid));//计数:kv对的字节长度

        // 写kv对的元数据信息
        kvmeta.put(kvindex + PARTITION, partition);
        kvmeta.put(kvindex + KEYSTART, keystart);
        kvmeta.put(kvindex + VALSTART, valstart);
        kvmeta.put(kvindex + VALLEN, distanceTo(valstart, valend));
        // 移动kvindex,为下一个kvmeta做准备,移动了int数组的四个位置
        kvindex = (kvindex - NMETA + kvmeta.capacity()) % kvmeta.capacity();
      } catch (MapBufferTooSmallException e) {
        LOG.info("Record too large for in-memory buffer: " + e.getMessage());
        spillSingleRecord(key, value, partition);


    -->if (conf.getNumReduceTasks() == 0)
         说明:获取reduceTask的个数:                                                                                                                                                                    如果是0,将map阶段设置成100%
    -->  boolean useNewApi = job.getUseNewMapper();
    -->initialize(job, getJobID(), reporter, useNewApi);
    --> runNewMapper(job, splitMetaInfo, umbilical, reporter);

    -->input.initialize(split, mapperContext);(略)


