Hadoop MapReduce之任务启动(一)

Hadoop MapReduce之任务启动(一)
TaskTracker在启动好后会通过主线程不停发送心跳包,以报告自身状态,如果JobTracker有了新作业,会把相应的任务返回给TaskTracker,此时TT就会解析返回值,并解析为自身任务执行,框架流程比较简单,这里我们看下TT接收到返回值后到任务执行前所做的操作。TT接收的任务会通过新启动的JVM来执行,并不是由TT来执行的,这样也是为了减轻MR框架自身的负载,不同任务之间互不影响,即使抛出异常也都是本地的,提高了整个系统的可用性。
TT发送心跳 -> 获得执行任务 -> 解析返回并构建TIP -> 任务注册并放入执行队列 -> 唤醒TaskLauncher取出任务执行。

TT发送心跳的流程如下:
State offerService() throws Exception {
		...
		// 发送心跳包并接受返回指令
		HeartbeatResponse heartbeatResponse = transmitHeartBeat(now);
		// 更新最后心跳时间
		lastHeartbeat = System.currentTimeMillis();
		...
		TaskTrackerAction[] actions = heartbeatResponse.getActions();
		if (LOG.isDebugEnabled()) {
			LOG
					.debug("Got heartbeatResponse from JobTracker with responseId: "
							+ heartbeatResponse.getResponseId()
							+ " and "
							+ ((actions != null) ? actions.length : 0)
							+ " actions");
		}
		//判断是否需要重新初始化TT
		if (reinitTaskTracker(actions)) {
			return State.STALE;
		}


		// resetting heartbeat interval from the response.
		heartbeatInterval = heartbeatResponse.getHeartbeatInterval();
		justStarted = false;
		justInited = false;
		if (actions != null) {
			//开始处理返回任务
			for (TaskTrackerAction action : actions) {
				if (action instanceof LaunchTaskAction) {
					addToTaskQueue((LaunchTaskAction) action);//先加入任务队列
				} else if (action instanceof CommitTaskAction) {
					CommitTaskAction commitAction = (CommitTaskAction) action;
					if (!commitResponses.contains(commitAction
							.getTaskID())) {
						LOG.info("Received commit task action for "
								+ commitAction.getTaskID());
						commitResponses.add(commitAction.getTaskID());
					}
				} else {
					tasksToCleanup.put(action);
				}
			}
		}
		...
}
下面是发送心跳的过程
HeartbeatResponse transmitHeartBeat(long now) throws IOException {
	// 判断是否发送counter
	boolean sendCounters;
	if (now > (previousUpdate + COUNTER_UPDATE_INTERVAL)) {
		sendCounters = true;
		previousUpdate = now;
	} else {
		sendCounters = false;
	}


	// 判断上一次心跳是否被处理,已处理则发送新的状态,否则发送旧的
	if (status == null) {
		synchronized (this) {
			status = new TaskTrackerStatus(taskTrackerName, localHostname,
					httpPort,
					cloneAndResetRunningTaskStatuses(sendCounters),
					failures, maxMapSlots, maxReduceSlots);
		}
	} else {
		LOG.info("Resending 'status' to '" + jobTrackAddr.getHostName()
				+ "' with reponseId '" + heartbeatResponseId);
	}


	//判断是否可以接受新的任务,条件:事物槽未占满,并且有足够的作业运行空间
	boolean askForNewTask;
	long localMinSpaceStart;
	synchronized (this) {
		askForNewTask = ((status.countOccupiedMapSlots() < maxMapSlots || status
				.countOccupiedReduceSlots() < maxReduceSlots) && acceptNewTasks);
		localMinSpaceStart = minSpaceStart;
	}
	//下面这些信息:CPU、内存、磁盘都可以封装在status中发送给JT,任务调度时可以参考这些信息优化调度
	//这些信息的采集都可以自己采集,然后以插件的形式集成到hadoop中,hadoop提供了这个接口
	if (askForNewTask) {
		//判断是否有最小启动空间的剩余,可以通过mapred.local.dir.minspacestart来指定
		askForNewTask = enoughFreeSpace(localMinSpaceStart);
		//获得本地空间,由mapred.local.dir指定的目录
		long freeDiskSpace = getFreeSpace();
		//获得有效线性内存,可以通过内存计算插件来实现
		long totVmem = getTotalVirtualMemoryOnTT();
		//物理内存
		long totPmem = getTotalPhysicalMemoryOnTT();
		//有效线性内存
		long availableVmem = getAvailableVirtualMemoryOnTT();
		//有效物理内存
		long availablePmem = getAvailablePhysicalMemoryOnTT();
		//CPU时间
		long cumuCpuTime = getCumulativeCpuTimeOnTT();
		//CPU频率
		long cpuFreq = getCpuFrequencyOnTT();
		//CPU个数
		int numCpu = getNumProcessorsOnTT();
		//CPU使用率
		float cpuUsage = getCpuUsageOnTT();


		status.getResourceStatus().setAvailableSpace(freeDiskSpace);
		status.getResourceStatus().setTotalVirtualMemory(totVmem);
		status.getResourceStatus().setTotalPhysicalMemory(totPmem);
		status.getResourceStatus().setMapSlotMemorySizeOnTT(
				mapSlotMemorySizeOnTT);
		status.getResourceStatus().setReduceSlotMemorySizeOnTT(
				reduceSlotSizeMemoryOnTT);
		status.getResourceStatus().setAvailableVirtualMemory(availableVmem);
		status.getResourceStatus()
				.setAvailablePhysicalMemory(availablePmem);
		status.getResourceStatus().setCumulativeCpuTime(cumuCpuTime);
		status.getResourceStatus().setCpuFrequency(cpuFreq);
		status.getResourceStatus().setNumProcessors(numCpu);
		status.getResourceStatus().setCpuUsage(cpuUsage);
	}
	//TT的健康信息
	TaskTrackerHealthStatus healthStatus = status.getHealthStatus();
	synchronized (this) {
		if (healthChecker != null) {
			healthChecker.setHealthStatus(healthStatus);
		} else {
			healthStatus.setNodeHealthy(true);
			healthStatus.setLastReported(0L);
			healthStatus.setHealthReport("");
		}
	}
	//
	// 开始发送心跳,注意内容:TaskTrackerStatus、是否接受新任务、心跳ID
	// jobClient是一个JT代理客户端
	//
	HeartbeatResponse heartbeatResponse = jobClient.heartbeat(status,
			justStarted, justInited, askForNewTask, heartbeatResponseId);


	//
	// The heartbeat got through successfully!
	//
	heartbeatResponseId = heartbeatResponse.getResponseId();
	//返回成功后遍历任务列表,如果未占用事物槽,则更新map或reduce的数量信息
	synchronized (this) {
		for (TaskStatus taskStatus : status.getTaskReports()) {
			if (taskStatus.getRunState() != TaskStatus.State.RUNNING
					&& taskStatus.getRunState() != TaskStatus.State.UNASSIGNED
					&& taskStatus.getRunState() != TaskStatus.State.COMMIT_PENDING
					&& !taskStatus.inTaskCleanupPhase()) {
				if (taskStatus.getIsMap()) {
					mapTotal--;
				} else {
					reduceTotal--;
				}
				myInstrumentation.completeTask(taskStatus.getTaskID());
				runningTasks.remove(taskStatus.getTaskID());
			}
		}


		// Clear transient status information which should only
		// be sent once to the JobTracker
		//更新ts临时信息
		for (TaskInProgress tip : runningTasks.values()) {
			tip.getStatus().clearStatus();
		}
	}


	// Force a rebuild of 'status' on the next iteration
	status = null;
	//返回心跳值
	return heartbeatResponse;
}
心跳发送完后会受到返回任务,首先会将需要运行的任务进行注册然后放入队列,过程如下:
private void addToTaskQueue(LaunchTaskAction action) {
	if (action.getTask().isMapTask()) {
		//mapLauncher是一个线程类,运行时维护了一个list队列
		mapLauncher.addToTaskQueue(action);
	} else {
		reduceLauncher.addToTaskQueue(action);
	}
}	


public void addToTaskQueue(LaunchTaskAction action) {
		synchronized (tasksToLaunch) {
			TaskInProgress tip = registerTask(action, this);//任务注册
			tasksToLaunch.add(tip);//加入队列
			tasksToLaunch.notifyAll();//唤醒TaskLauncher线程
		}
}
注册过程如下:
private TaskInProgress registerTask(LaunchTaskAction action,
		TaskLauncher launcher) {
	Task t = action.getTask();//提取task
	//打印日志,记录当前任务是未分配的,并且开始分配
	LOG.info("LaunchTaskAction (registerTask): " + t.getTaskID()
			+ " task's state:" + t.getState());
	TaskInProgress tip = new TaskInProgress(t, this.fConf, launcher);
	synchronized (this) {
		//放入相关集合并更新任务数量信息
		tasks.put(t.getTaskID(), tip);
		runningTasks.put(t.getTaskID(), tip);
		boolean isMap = t.isMapTask();
		if (isMap) {
			mapTotal++;
		} else {
			reduceTotal++;
		}
	}
	return tip;
}
TaskLauncher线程会一直监控tasksToLaunch集合,一旦有任务到达,便取出尝试执行
public void run() {
		while (!Thread.interrupted()) {
			try {
				TaskInProgress tip;
				Task task;
				//队列任务为空则一直等待
				synchronized (tasksToLaunch) {
					while (tasksToLaunch.isEmpty()) {
						tasksToLaunch.wait();
					}
					// 被唤醒后则证明有任务到达
					tip = tasksToLaunch.remove(0);
					//取出任务,打印日志尝试执行
					task = tip.getTask();
					LOG.info("Trying to launch : "
							+ tip.getTask().getTaskID() + " which needs "
							+ task.getNumSlotsRequired() + " slots");
				}
				// 等待空闲事物槽
				synchronized (numFreeSlots) {
					boolean canLaunch = true;
					while (numFreeSlots.get() < task.getNumSlotsRequired()) {
						// Make sure that there is no kill task action for
						// this task!
						// We are not locking tip here, because it would
						// reverse the
						// locking order!
						// Also, Lock for the tip is not required here!
						// because :
						// 1. runState of TaskStatus is volatile
						// 2. Any notification is not missed because
						// notification is
						// synchronized on numFreeSlots. So, while we are
						// doing the check,
						// if the tip is half way through the kill(), we
						// don't miss
						// notification for the following wait().
						//上面是注释原文,意思是这里不会锁定tip,因为tip的状态是易变的,生产环境中我们可以通过观察日志来判断
						//那些任务被外部中断、那些在等待事物槽,根据这些信息可以做适当调整
						if (!tip.canBeLaunched()) {
							// got killed externally while still in the
							// launcher queue
							LOG
									.info("Not blocking slots for "
											+ task.getTaskID()
											+ " as it got killed externally. Task's state is "
											+ tip.getRunState());
							canLaunch = false;
							break;
						}
						LOG.info("TaskLauncher : Waiting for "
								+ task.getNumSlotsRequired()
								+ " to launch " + task.getTaskID()
								+ ", currently we have "
								+ numFreeSlots.get() + " free slots");
						numFreeSlots.wait();//没有空闲事物槽则产生等待
					}
					if (!canLaunch) {
						continue;
					}
					//仍然是事物槽信息,可以看出任务槽在作业中的重要性,任务申请时各种可能性都可以通过日志来观察
					LOG.info("In TaskLauncher, current free slots : "
							+ numFreeSlots.get() + " and trying to launch "
							+ tip.getTask().getTaskID() + " which needs "
							+ task.getNumSlotsRequired() + " slots");
					numFreeSlots.set(numFreeSlots.get()
							- task.getNumSlotsRequired());
					assert (numFreeSlots.get() >= 0);
				}
				synchronized (tip) {
					// 确保该作业在这里不被干掉
					if (!tip.canBeLaunched()) {
						// got killed externally while still in the launcher
						// queue
						LOG.info("Not launching task " + task.getTaskID()
								+ " as it got"
								+ " killed externally. Task's state is "
								+ tip.getRunState());
						addFreeSlots(task.getNumSlotsRequired());
						continue;
					}
					tip.slotTaken = true;
				}
				// 任务槽申请好后,就可以启动一个新的任务了
				startNewTask(tip);
			} catch (InterruptedException e) {
				return; // ALL DONE
			} catch (Throwable th) {
				LOG.error("TaskLauncher error "
						+ StringUtils.stringifyException(th));
			}
		}
	}
}

你可能感兴趣的:(Hadoop MapReduce之任务启动(一))