MapTask在执行context.write()
之后,将结果溢写到磁盘,然后触发Reduce Shuffle
,具体的shuffle过程可以参考MapReduce shuffle过程详解,这篇文章讲的通俗易懂,这样Map端的任务就算基本完成了。接下来我们看Reducer端做了哪些工作。跟源码时,可以从Reducer类的run方法开始分析。
/**
*根据Reducer的doc说明,Reducer的任务主要分为3个阶段
*1.Shuffle(相同的key被拉取到相同的分区中)
*2.Sort(map端已经做好了排序工作,reduce拉取的每个partition的数据都是有序的,这样很适合做归并排序)
*3.reduce
*/
public void run(Context context) throws IOException, InterruptedException {
setup(context);
try {
//这里我们发现,map端的run()方法是context.nextKeyValue(),reduce端是context.nextKey()
//可以看出它们的区别是,map按记录数来判断是否还有数据,而reduce是按key分组来判断是否还有数据
//也就是说map端一条记录执行一次map方法,而ruduce端一组相同的key执行一次reduece方法
//要了解如何去判断的,需要进入ReduceTask的run()去看源码
while (context.nextKey()) {
reduce(context.getCurrentKey(), context.getValues(), context);
// If a back up store is used, reset it
Iterator<VALUEIN> iter = context.getValues().iterator();
if(iter instanceof ReduceContext.ValueIterator) {
((ReduceContext.ValueIterator<VALUEIN>)iter).resetBackupStore();
}
}
} finally {
cleanup(context);
}
}
public void run(JobConf job, final TaskUmbilicalProtocol umbilical)
throws IOException, InterruptedException, ClassNotFoundException {
job.setBoolean(JobContext.SKIP_RECORDS, isSkipping());
if (isMapOrReduce()) {
//从Map的输出文件中拷贝属于自己分区的数据
copyPhase = getProgress().addPhase("copy");
//分组排序
sortPhase = getProgress().addPhase("sort");
//reduce计算
reducePhase = getProgress().addPhase("reduce");
}
// start thread that will handle communication with parent
TaskReporter reporter = startReporter(umbilical);
boolean useNewApi = job.getUseNewReducer();
initialize(job, getJobID(), reporter, useNewApi);
// check if it is a cleanupJobTask
if (jobCleanup) {
runJobCleanupTask(umbilical, reporter);
return;
}
if (jobSetup) {
runJobSetupTask(umbilical, reporter);
return;
}
if (taskCleanup) {
runTaskCleanupTask(umbilical, reporter);
return;
}
// Initialize the codec
codec = initCodec();
RawKeyValueIterator rIter = null;
ShuffleConsumerPlugin shuffleConsumerPlugin = null;
Class combinerClass = conf.getCombinerClass();
CombineOutputCollector combineCollector =
(null != combinerClass) ?
new CombineOutputCollector(reduceCombineOutputCounter, reporter, conf) : null;
Class<? extends ShuffleConsumerPlugin> clazz =
job.getClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, Shuffle.class, ShuffleConsumerPlugin.class);
shuffleConsumerPlugin = ReflectionUtils.newInstance(clazz, job);
LOG.info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin);
ShuffleConsumerPlugin.Context shuffleContext =
new ShuffleConsumerPlugin.Context(getTaskID(), job, FileSystem.getLocal(job), umbilical,
super.lDirAlloc, reporter, codec,
combinerClass, combineCollector,
spilledRecordsCounter, reduceCombineInputCounter,
shuffledMapsCounter,
reduceShuffleBytes, failedShuffleCounter,
mergedMapOutputsCounter,
taskStatus, copyPhase, sortPhase, this,
mapOutputFile, localMapFiles);
shuffleConsumerPlugin.init(shuffleContext);
//创建了一个迭代器,将数据从磁盘中一条一条地拉取
rIter = shuffleConsumerPlugin.run();
// free up the data structures
mapOutputFilesOnDisk.clear();
// 排序过程结束
sortPhase.complete();
setPhase(TaskStatus.Phase.REDUCE);
statusUpdate(umbilical);
Class keyClass = job.getMapOutputKeyClass();
Class valueClass = job.getMapOutputValueClass();
//创建一个分组比较器
//如果用户没有自定义这个分组比较器,那么就去取用户自定义的排序比较器,如果用户没有设置排序比较器,那么默认为getOutputKeyComparator()
RawComparator comparator = job.getOutputValueGroupingComparator();
if (useNewApi) {
//将迭代器,比较器等传入runNewReducer()方法,进入这个方法
runNewReducer(job, umbilical, reporter, rIter, comparator,
keyClass, valueClass);
} else {
runOldReducer(job, umbilical, reporter, rIter, comparator,
keyClass, valueClass);
}
shuffleConsumerPlugin.close();
done(umbilical, reporter);
}
private <INKEY,INVALUE,OUTKEY,OUTVALUE>
void runNewReducer(JobConf job,
final TaskUmbilicalProtocol umbilical,
final TaskReporter reporter,
RawKeyValueIterator rIter,
RawComparator<INKEY> comparator,
Class<INKEY> keyClass,
Class<INVALUE> valueClass
) throws IOException,InterruptedException,
ClassNotFoundException {
// 将传进来的迭代器包装成一个新的迭代器
final RawKeyValueIterator rawIter = rIter;
rIter = new RawKeyValueIterator() {
public void close() throws IOException {
rawIter.close();
}
public DataInputBuffer getKey() throws IOException {
return rawIter.getKey();
}
public Progress getProgress() {
return rawIter.getProgress();
}
public DataInputBuffer getValue() throws IOException {
return rawIter.getValue();
}
public boolean next() throws IOException {
boolean ret = rawIter.next();
reporter.setProgress(rawIter.getProgress().getProgress());
return ret;
}
};
// 创建一个taskContext用于反射类
org.apache.hadoop.mapreduce.TaskAttemptContext taskContext =
new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(job,
getTaskID(), reporter);
// 通过反射的方式,拿到用户实现的Reducer类
org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE> reducer =
(org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>)
ReflectionUtils.newInstance(taskContext.getReducerClass(), job);
org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> trackedRW =
new NewTrackingRecordWriter<OUTKEY, OUTVALUE>(this, taskContext);
job.setBoolean("mapred.skip.on", isSkipping());
job.setBoolean(JobContext.SKIP_RECORDS, isSkipping());
org.apache.hadoop.mapreduce.Reducer.Context
//创建一个reducerContext,之后会将它传给reducer.run()方法
reducerContext = createReduceContext(reducer, job, getTaskID(),
rIter, reduceInputKeyCounter,
reduceInputValueCounter,
trackedRW,
committer,
reporter, comparator, keyClass,
valueClass);
try {
//这里由用户实现的reducer执行run()方法,进入run()方法
reducer.run(reducerContext);
} finally {
trackedRW.close(reducerContext);
}
}
public void run(Context context) throws IOException, InterruptedException {
setup(context);
try {
//这里又回到了第一步,现在我们开始进入nextKey()方法去看
while (context.nextKey()) {
reduce(context.getCurrentKey(), context.getValues(), context);
// If a back up store is used, reset it
Iterator<VALUEIN> iter = context.getValues().iterator();
if(iter instanceof ReduceContext.ValueIterator) {
((ReduceContext.ValueIterator<VALUEIN>)iter).resetBackupStore();
}
}
} finally {
cleanup(context);
}
}
/** Start processing next unique key. */
public boolean nextKey() throws IOException,InterruptedException {
//这里判断了nextkey是否与当前key相等
while (hasMore && nextKeyIsSame) {
nextKeyValue();
}
if (hasMore) {
if (inputKeyCounter != null) {
inputKeyCounter.increment(1);
}
//这里返回是否还有记录和值,注意map阶段也有这个方法,进入这个方法
return nextKeyValue();
} else {
return false;
}
}
/**
*这个方法的逻辑与map端的nextKeyValue()方法很相似,但是它多做了一件事:多取一个key
*/
public boolean nextKeyValue() throws IOException, InterruptedException {
if (!hasMore) {
key = null;
value = null;
return false;
}
firstValue = !nextKeyIsSame;
//获取key
DataInputBuffer nextKey = input.getKey();
currentRawKey.set(nextKey.getData(), nextKey.getPosition(),
nextKey.getLength() - nextKey.getPosition());
buffer.reset(currentRawKey.getBytes(), 0, currentRawKey.getLength());
key = keyDeserializer.deserialize(key);
//获取value
DataInputBuffer nextVal = input.getValue();
buffer.reset(nextVal.getData(), nextVal.getPosition(), nextVal.getLength()
- nextVal.getPosition());
value = valueDeserializer.deserialize(value);
currentKeyLength = nextKey.getLength() - nextKey.getPosition();
currentValueLength = nextVal.getLength() - nextVal.getPosition();
if (isMarked) {
backupStore.write(nextKey, nextVal);
}
//这里多取了一个key
hasMore = input.next();
if (hasMore) {
nextKey = input.getKey();
//这里调用了我们传进来的key比较器,如果两个key相等,就会返回true
//回到run()方法
nextKeyIsSame = comparator.compare(currentRawKey.getBytes(), 0,
currentRawKey.getLength(),
nextKey.getData(),
nextKey.getPosition(),
nextKey.getLength() - nextKey.getPosition()
) == 0;
} else {
nextKeyIsSame = false;
}
inputValueCounter.increment(1);
return true;
}
public void run(Context context) throws IOException, InterruptedException {
setup(context);
try {
while (context.nextKey()) {
//获取当前key(就是直接return key),进入getValues()方法
reduce(context.getCurrentKey(), context.getValues(), context);
// If a back up store is used, reset it
Iterator<VALUEIN> iter = context.getValues().iterator();
if(iter instanceof ReduceContext.ValueIterator) {
((ReduceContext.ValueIterator<VALUEIN>)iter).resetBackupStore();
}
}
} finally {
cleanup(context);
}
}
public
Iterable<VALUEIN> getValues() throws IOException, InterruptedException {
//这里返回了一个Iterable对象,里面封装了一个Iterator()方法
//这个Iterator()方法会返回一个ValueIterator迭代器,进入ValueIterator类
return iterable;
}
/**
*可以看到,这个ValueIterator实现了hasNext()和next()方法
*/
protected class ValueIterator implements ReduceContext.ValueIterator<VALUEIN> {
private boolean inReset = false;
private boolean clearMarkFlag = false;
@Override
public boolean hasNext() {
try {
if (inReset && backupStore.hasNext()) {
return true;
}
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("hasNext failed", e);
}
return firstValue || nextKeyIsSame;
}
@Override
public VALUEIN next() {
if (inReset) {
try {
if (backupStore.hasNext()) {
backupStore.next();
DataInputBuffer next = backupStore.nextValue();
buffer.reset(next.getData(), next.getPosition(), next.getLength()
- next.getPosition());
value = valueDeserializer.deserialize(value);
return value;
} else {
inReset = false;
backupStore.exitResetMode();
if (clearMarkFlag) {
clearMarkFlag = false;
isMarked = false;
}
}
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException("next value iterator failed", e);
}
}
// if this is the first record, we don't need to advance
if (firstValue) {
firstValue = false;
return value;
}
// if this isn't the first record and the next key is different, they
// can't advance it here.
if (!nextKeyIsSame) {
throw new NoSuchElementException("iterate past last value");
}
// otherwise, go to the next key/value pair
try {
//最终会调用nextKeyValue()
//到这里reduce的nextKey()就分析完了,接下来就是执行用户实现的reduce逻辑代码的环节了
nextKeyValue();
return value;
} catch (IOException ie) {
throw new RuntimeException("next value iterator failed", ie);
} catch (InterruptedException ie) {
// this is bad, but we can't modify the exception list of java.util
throw new RuntimeException("next value iterator interrupted", ie);
}
}
reduce阶段并没有直接将所有数据都一次拉取到内存,而是先进行shuffle和归并排序,将map的输出数据整理成分区有序且分组有序的状态,然后创建一个迭代器Iter
,但是,如果将这个迭代器直接传给reduce,那么reduce会将所有分区的数据都读进来,所以又创建了一个新的迭代器iterable
去包装Iter并调用Iter的nextKeyValue()
方法,当调用context.getValues()
方法时,hasNext()
方法判断NextKeyIsSame
,如果为真则调用next()方法,next()方法调用了Iter的nextKeyValue()
方法,这个方法会更新NextKeyIsSame的值,每传一条数据,都会更新NextKeyIsSame,当两个key不相等时,NextKeyIsSame为false,reduce()
方法结束,但是Iter还在,下一次调用reduce()
方法时,nextKeyValue()
方法会被新的iterable调用,这样就开始了新的一组数据的计算。reduce()
方法充分应用了迭代器模式,一次I/O就完成了数据的计算过程,规避了OOM的问题,