客户端构建好RDD的DAG以后,会提交至DAGScheduler来处理,这是一个Stage级别的调度器,他首先会把作业切分为一个个Stage,每个Stage由一组相同运算的tasks组成,然后会以taskset的形式提交给TaskScheduler。DS还会跟踪stage的输出与物化情况、检测task运行时的最优位置,重新提交失败的stage。
DAGSheduler接收到JobSubmitted的消息后,进入作业提交逻辑
override def onReceive(event: DAGSchedulerEvent): Unit = event match {
case JobSubmitted(jobId, rdd, func, partitions, allowLocal, callSite, listener, properties) =>
//作业在此提交
dagScheduler.handleJobSubmitted(jobId, rdd, func, partitions, allowLocal, callSite,
listener, properties)
提交过程中会创建finalStage,根据该stage创建ActiveJob,最后启动该Job
private[scheduler] def handleJobSubmitted(jobId: Int,
finalRDD: RDD[_],
func: (TaskContext, Iterator[_]) => _,
partitions: Array[Int],
allowLocal: Boolean,
callSite: CallSite,
listener: JobListener,
properties: Properties) {
var finalStage: Stage = null
try {
finalStage = newStage(finalRDD, partitions.size, None, jobId, callSite)
} catch {
....
}
if (finalStage != null) {
val job = new ActiveJob(jobId, finalStage, func, partitions, callSite, listener, properties)
clearCacheLocs()
......
......
if (shouldRunLocally) {
...
} else {
jobIdToActiveJob(jobId) = job
activeJobs += job
finalStage.resultOfJob = Some(job)
val stageIds = jobIdToStageIds(jobId).toArray
val stageInfos = stageIds.flatMap(id => stageIdToStage.get(id).map(_.latestInfo))
listenerBus.post(
SparkListenerJobStart(job.jobId, jobSubmissionTime, stageInfos, properties))
submitStage(finalStage)
}
}
submitWaitingStages()
} <span style="font-family: 'Courier New'; background-color: rgb(255, 255, 255);"> </span>
在RDD的依赖关系中,有两种依赖:宽依赖和窄依赖,RDD遇到窄依赖会归到一个Stage中,形成pipeline,遇到宽依赖则切分stage,通常有shuffle就会形成宽依赖,所以shuffle成了stage切分的边界,newStage函数会生产stage的DAG图,该图记录了当前作业中各stage的依赖情况
private def newStage(
rdd: RDD[_],
numTasks: Int,
shuffleDep: Option[ShuffleDependency[_, _, _]],
jobId: Int,
callSite: CallSite)
: Stage =
{
//获得父stage
val parentStages = getParentStages(rdd, jobId)
//获得stage id
val id = nextStageId.getAndIncrement()
//创建新的stage,会传入父stage,形成依赖关系,组成DAG图
val stage = new Stage(id, rdd, numTasks, shuffleDep, parentStages, jobId, callSite)
stageIdToStage(id) = stage
updateJobIdStageIdMaps(jobId, stage)
stage
}
获取父stage的流程如下,注意visit函数,从该段逻辑中可以看出,只有碰到shuffle依赖才会切割stage
private def getParentStages(rdd: RDD[_], jobId: Int): List[Stage] = {
val parents = new HashSet[Stage]
val visited = new HashSet[RDD[_]]
// We are manually maintaining a stack here to prevent StackOverflowError
// caused by recursively visiting
val waitingForVisit = new Stack[RDD[_]]
def visit(r: RDD[_]) {
if (!visited(r)) {
visited += r
// 注意此处,两个stage之间的依赖必定是有shuffle依赖
//这里会一直向上查找,直到看到shuffle依赖,如果没有,那么这个作业就一个stage
for (dep <- r.dependencies) {
dep match {
case shufDep: ShuffleDependency[_, _, _] =>
parents += getShuffleMapStage(shufDep, jobId)
case _ =>
waitingForVisit.push(dep.rdd)
}
}
}
}
waitingForVisit.push(rdd)
//这里会不停的向上遍历
while (!waitingForVisit.isEmpty) {
visit(waitingForVisit.pop())
}
parents.toList
}
下面是父stage,也就是shuffle stage的创建流程
private def getShuffleMapStage(shuffleDep: ShuffleDependency[_, _, _], jobId: Int): Stage = {
shuffleToMapStage.get(shuffleDep.shuffleId) match {
case Some(stage) => stage
case None =>
// We are going to register ancestor shuffle dependencies
registerShuffleDependencies(shuffleDep, jobId)
// Then register current shuffleDep
val stage =
newOrUsedStage(
shuffleDep.rdd, shuffleDep.rdd.partitions.size, shuffleDep, jobId,
shuffleDep.rdd.creationSite)
shuffleToMapStage(shuffleDep.shuffleId) = stage
stage
}
} <span style="font-family: 'Courier New'; background-color: rgb(255, 255, 255);"> </span>
最終通过递归方式提交stage
private def submitStage(stage: Stage) {
val jobId = activeJobForStage(stage)
if (jobId.isDefined) {
logDebug("submitStage(" + stage + ")")
if (!waitingStages(stage) && !runningStages(stage) && !failedStages(stage)) {
val missing = getMissingParentStages(stage).sortBy(_.id)
logDebug("missing: " + missing)
if (missing == Nil) {
logInfo("Submitting " + stage + " (" + stage.rdd + "), which has no missing parents")
submitMissingTasks(stage, jobId.get)
} else {
for (parent <- missing) {
submitStage(parent)
}
waitingStages += stage
}
}
} else {
abortStage(stage, "No active job for stage " + stage.id)
}
}