MapReduce源码解读之ReduceTask

写在前面

MapTask在执行context.write()之后,将结果溢写到磁盘,然后触发Reduce Shuffle,具体的shuffle过程可以参考MapReduce shuffle过程详解,这篇文章讲的通俗易懂,这样Map端的任务就算基本完成了。接下来我们看Reducer端做了哪些工作。跟源码时,可以从Reducer类的run方法开始分析。

1.入口:Reducer类的run()方法

/**
*根据Reducer的doc说明,Reducer的任务主要分为3个阶段
*1.Shuffle(相同的key被拉取到相同的分区中)
*2.Sort(map端已经做好了排序工作,reduce拉取的每个partition的数据都是有序的,这样很适合做归并排序)
*3.reduce
*/
public void run(Context context) throws IOException, InterruptedException {
    setup(context);
    try {
      //这里我们发现,map端的run()方法是context.nextKeyValue(),reduce端是context.nextKey()
      //可以看出它们的区别是,map按记录数来判断是否还有数据,而reduce是按key分组来判断是否还有数据
      //也就是说map端一条记录执行一次map方法,而ruduce端一组相同的key执行一次reduece方法
      //要了解如何去判断的,需要进入ReduceTask的run()去看源码
      while (context.nextKey()) {
        reduce(context.getCurrentKey(), context.getValues(), context);
        // If a back up store is used, reset it
        Iterator<VALUEIN> iter = context.getValues().iterator();
        if(iter instanceof ReduceContext.ValueIterator) {
          ((ReduceContext.ValueIterator<VALUEIN>)iter).resetBackupStore();        
        }
      }
    } finally {
      cleanup(context);
    }
  }

2. 进入Reduece Task类的run()方法

  public void run(JobConf job, final TaskUmbilicalProtocol umbilical)
    throws IOException, InterruptedException, ClassNotFoundException {
    job.setBoolean(JobContext.SKIP_RECORDS, isSkipping());

    if (isMapOrReduce()) {
	  //从Map的输出文件中拷贝属于自己分区的数据
      copyPhase = getProgress().addPhase("copy");
      //分组排序
      sortPhase  = getProgress().addPhase("sort");
      //reduce计算
      reducePhase = getProgress().addPhase("reduce");
    }
    // start thread that will handle communication with parent
    TaskReporter reporter = startReporter(umbilical);
    
    boolean useNewApi = job.getUseNewReducer();
    initialize(job, getJobID(), reporter, useNewApi);

    // check if it is a cleanupJobTask
    if (jobCleanup) {
      runJobCleanupTask(umbilical, reporter);
      return;
    }
    if (jobSetup) {
      runJobSetupTask(umbilical, reporter);
      return;
    }
    if (taskCleanup) {
      runTaskCleanupTask(umbilical, reporter);
      return;
    }
    
    // Initialize the codec
    codec = initCodec();
    RawKeyValueIterator rIter = null;
    ShuffleConsumerPlugin shuffleConsumerPlugin = null;
    
    Class combinerClass = conf.getCombinerClass();
    CombineOutputCollector combineCollector = 
      (null != combinerClass) ? 
     new CombineOutputCollector(reduceCombineOutputCounter, reporter, conf) : null;

    Class<? extends ShuffleConsumerPlugin> clazz =
          job.getClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, Shuffle.class, ShuffleConsumerPlugin.class);
					
    shuffleConsumerPlugin = ReflectionUtils.newInstance(clazz, job);
    LOG.info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin);

    ShuffleConsumerPlugin.Context shuffleContext = 
      new ShuffleConsumerPlugin.Context(getTaskID(), job, FileSystem.getLocal(job), umbilical, 
                  super.lDirAlloc, reporter, codec, 
                  combinerClass, combineCollector, 
                  spilledRecordsCounter, reduceCombineInputCounter,
                  shuffledMapsCounter,
                  reduceShuffleBytes, failedShuffleCounter,
                  mergedMapOutputsCounter,
                  taskStatus, copyPhase, sortPhase, this,
                  mapOutputFile, localMapFiles);
    shuffleConsumerPlugin.init(shuffleContext);
		
	//创建了一个迭代器,将数据从磁盘中一条一条地拉取
    rIter = shuffleConsumerPlugin.run();

    // free up the data structures
    mapOutputFilesOnDisk.clear();
     // 排序过程结束
    sortPhase.complete();                        
    setPhase(TaskStatus.Phase.REDUCE); 
    statusUpdate(umbilical);
    Class keyClass = job.getMapOutputKeyClass();
    Class valueClass = job.getMapOutputValueClass();
	
	//创建一个分组比较器
	//如果用户没有自定义这个分组比较器,那么就去取用户自定义的排序比较器,如果用户没有设置排序比较器,那么默认为getOutputKeyComparator()
    RawComparator comparator = job.getOutputValueGroupingComparator();

    if (useNewApi) {
    //将迭代器,比较器等传入runNewReducer()方法,进入这个方法
      runNewReducer(job, umbilical, reporter, rIter, comparator, 
                    keyClass, valueClass);
    } else {
      runOldReducer(job, umbilical, reporter, rIter, comparator, 
                    keyClass, valueClass);
    }

    shuffleConsumerPlugin.close();
    done(umbilical, reporter);
  }

3. 进入runNewReducer()方法

  private <INKEY,INVALUE,OUTKEY,OUTVALUE>
  void runNewReducer(JobConf job,
                     final TaskUmbilicalProtocol umbilical,
                     final TaskReporter reporter,
                     RawKeyValueIterator rIter,
                     RawComparator<INKEY> comparator,
                     Class<INKEY> keyClass,
                     Class<INVALUE> valueClass
                     ) throws IOException,InterruptedException, 
                              ClassNotFoundException {
    // 将传进来的迭代器包装成一个新的迭代器
    final RawKeyValueIterator rawIter = rIter;
    rIter = new RawKeyValueIterator() {
      public void close() throws IOException {
        rawIter.close();
      }
      public DataInputBuffer getKey() throws IOException {
        return rawIter.getKey();
      }
      public Progress getProgress() {
        return rawIter.getProgress();
      }
      public DataInputBuffer getValue() throws IOException {
        return rawIter.getValue();
      }
      public boolean next() throws IOException {
        boolean ret = rawIter.next();
        reporter.setProgress(rawIter.getProgress().getProgress());
        return ret;
      }
    };
    // 创建一个taskContext用于反射类
    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext =
      new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(job,
          getTaskID(), reporter);
    // 通过反射的方式,拿到用户实现的Reducer类
    org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE> reducer =
      (org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>)
        ReflectionUtils.newInstance(taskContext.getReducerClass(), job);
    org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> trackedRW = 
      new NewTrackingRecordWriter<OUTKEY, OUTVALUE>(this, taskContext);
    job.setBoolean("mapred.skip.on", isSkipping());
    job.setBoolean(JobContext.SKIP_RECORDS, isSkipping());
    org.apache.hadoop.mapreduce.Reducer.Context 

		//创建一个reducerContext,之后会将它传给reducer.run()方法
         reducerContext = createReduceContext(reducer, job, getTaskID(),
                                               rIter, reduceInputKeyCounter, 
                                               reduceInputValueCounter, 
                                               trackedRW,
                                               committer,
                                               reporter, comparator, keyClass,
                                               valueClass);
    try {
	
	  //这里由用户实现的reducer执行run()方法,进入run()方法
      reducer.run(reducerContext);
    } finally {
      trackedRW.close(reducerContext);
    }
  }

4.进入run()方法

public void run(Context context) throws IOException, InterruptedException {
    setup(context);
    try {
      //这里又回到了第一步,现在我们开始进入nextKey()方法去看
      while (context.nextKey()) {
        reduce(context.getCurrentKey(), context.getValues(), context);
        // If a back up store is used, reset it
        Iterator<VALUEIN> iter = context.getValues().iterator();
        if(iter instanceof ReduceContext.ValueIterator) {
          ((ReduceContext.ValueIterator<VALUEIN>)iter).resetBackupStore();        
        }
      }
    } finally {
      cleanup(context);
    }
  }

5. 进入nextKey()方法

 /** Start processing next unique key. */
  public boolean nextKey() throws IOException,InterruptedException {
  //这里判断了nextkey是否与当前key相等
    while (hasMore && nextKeyIsSame) {
      nextKeyValue();
    }
    if (hasMore) {
      if (inputKeyCounter != null) {
        inputKeyCounter.increment(1);
      }
      //这里返回是否还有记录和值,注意map阶段也有这个方法,进入这个方法
      return nextKeyValue();
    } else {
      return false;
    }
  }

6.进入nextKeyValue()方法

/**
*这个方法的逻辑与map端的nextKeyValue()方法很相似,但是它多做了一件事:多取一个key
*/
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (!hasMore) {
      key = null;
      value = null;
      return false;
    }
    firstValue = !nextKeyIsSame;
    
    //获取key
    DataInputBuffer nextKey = input.getKey();
    currentRawKey.set(nextKey.getData(), nextKey.getPosition(), 
                      nextKey.getLength() - nextKey.getPosition());
    buffer.reset(currentRawKey.getBytes(), 0, currentRawKey.getLength());
    key = keyDeserializer.deserialize(key);

	//获取value
    DataInputBuffer nextVal = input.getValue();
    buffer.reset(nextVal.getData(), nextVal.getPosition(), nextVal.getLength()
        - nextVal.getPosition());
    value = valueDeserializer.deserialize(value);

    currentKeyLength = nextKey.getLength() - nextKey.getPosition();
    currentValueLength = nextVal.getLength() - nextVal.getPosition();

    if (isMarked) {
      backupStore.write(nextKey, nextVal);
    }

	//这里多取了一个key
    hasMore = input.next();
    if (hasMore) {
      nextKey = input.getKey();

	  //这里调用了我们传进来的key比较器,如果两个key相等,就会返回true
	  //回到run()方法
      nextKeyIsSame = comparator.compare(currentRawKey.getBytes(), 0, 
                                     currentRawKey.getLength(),
                                     nextKey.getData(),
                                     nextKey.getPosition(),
                                     nextKey.getLength() - nextKey.getPosition()
                                         ) == 0;
    } else {
      nextKeyIsSame = false;
    }
    inputValueCounter.increment(1);
    return true;
  }

7.回到run()方法

public void run(Context context) throws IOException, InterruptedException {
    setup(context);
    try {
      while (context.nextKey()) {
		
		//获取当前key(就是直接return key),进入getValues()方法
        reduce(context.getCurrentKey(), context.getValues(), context);
        // If a back up store is used, reset it
        Iterator<VALUEIN> iter = context.getValues().iterator();
        if(iter instanceof ReduceContext.ValueIterator) {
          ((ReduceContext.ValueIterator<VALUEIN>)iter).resetBackupStore();        
        }
      }
    } finally {
      cleanup(context);
    }
  }

8. 进入getValues()方法

  public 
  Iterable<VALUEIN> getValues() throws IOException, InterruptedException {
  //这里返回了一个Iterable对象,里面封装了一个Iterator()方法
  //这个Iterator()方法会返回一个ValueIterator迭代器,进入ValueIterator类
    return iterable;
  }

9. 进入ValueIterator类

/**
*可以看到,这个ValueIterator实现了hasNext()和next()方法
*/
  protected class ValueIterator implements ReduceContext.ValueIterator<VALUEIN> {

    private boolean inReset = false;
    private boolean clearMarkFlag = false;

    @Override
    public boolean hasNext() {
      try {
        if (inReset && backupStore.hasNext()) {
          return true;
        } 
      } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException("hasNext failed", e);
      }
      return firstValue || nextKeyIsSame;
    }

    @Override
    public VALUEIN next() {
      if (inReset) {
        try {
          if (backupStore.hasNext()) {
            backupStore.next();
            DataInputBuffer next = backupStore.nextValue();
            buffer.reset(next.getData(), next.getPosition(), next.getLength()
                - next.getPosition());
            value = valueDeserializer.deserialize(value);
            return value;
          } else {
            inReset = false;
            backupStore.exitResetMode();
            if (clearMarkFlag) {
              clearMarkFlag = false;
              isMarked = false;
            }
          }
        } catch (IOException e) {
          e.printStackTrace();
          throw new RuntimeException("next value iterator failed", e);
        }
      } 
      // if this is the first record, we don't need to advance
      if (firstValue) {
        firstValue = false;
        return value;
      }
      // if this isn't the first record and the next key is different, they
      // can't advance it here.
      if (!nextKeyIsSame) {
        throw new NoSuchElementException("iterate past last value");
      }
      // otherwise, go to the next key/value pair
      try {

		//最终会调用nextKeyValue()
		//到这里reduce的nextKey()就分析完了,接下来就是执行用户实现的reduce逻辑代码的环节了
        nextKeyValue();
        return value;
      } catch (IOException ie) {
        throw new RuntimeException("next value iterator failed", ie);
      } catch (InterruptedException ie) {
        // this is bad, but we can't modify the exception list of java.util
        throw new RuntimeException("next value iterator interrupted", ie);        
      }
    }

10. 总结

reduce阶段并没有直接将所有数据都一次拉取到内存,而是先进行shuffle和归并排序,将map的输出数据整理成分区有序且分组有序的状态,然后创建一个迭代器Iter,但是,如果将这个迭代器直接传给reduce,那么reduce会将所有分区的数据都读进来,所以又创建了一个新的迭代器iterable去包装Iter并调用Iter的nextKeyValue()方法,当调用context.getValues()方法时,hasNext()方法判断NextKeyIsSame,如果为真则调用next()方法,next()方法调用了Iter的nextKeyValue()方法,这个方法会更新NextKeyIsSame的值,每传一条数据,都会更新NextKeyIsSame,当两个key不相等时,NextKeyIsSame为false,reduce()方法结束,但是Iter还在,下一次调用reduce()方法时,nextKeyValue()方法会被新的iterable调用,这样就开始了新的一组数据的计算。reduce()方法充分应用了迭代器模式,一次I/O就完成了数据的计算过程,规避了OOM的问题,

你可能感兴趣的:(MapReduce)