spark1.1.0源码阅读-dagscheduler and stage

1. rdd action ->sparkContext.runJob->dagscheduler.runJob

 1   def runJob[T, U: ClassTag](

 2       rdd: RDD[T],

 3       func: (TaskContext, Iterator[T]) => U,

 4       partitions: Seq[Int],

 5       callSite: String,

 6       allowLocal: Boolean,

 7       resultHandler: (Int, U) => Unit,

 8       properties: Properties = null)

 9   {

10     val waiter = submitJob(rdd, func, partitions, callSite, allowLocal, resultHandler, properties)

11     waiter.awaitResult() match {

12       case JobSucceeded => {}

13       case JobFailed(exception: Exception) =>

14         logInfo("Failed to run " + callSite)

15         throw exception

16     }

17   }

2. sumbitJob

 1   /**

 2    * Submit a job to the job scheduler and get a JobWaiter object back. The JobWaiter object

 3    * can be used to block until the the job finishes executing or can be used to cancel the job.

 4    */

 5   def submitJob[T, U](

 6       rdd: RDD[T],

 7       func: (TaskContext, Iterator[T]) => U,

 8       partitions: Seq[Int],

 9       callSite: String,

10       allowLocal: Boolean,

11       resultHandler: (Int, U) => Unit,

12       properties: Properties = null): JobWaiter[U] =

13   {

14     // Check to make sure we are not launching a task on a partition that does not exist.

15     val maxPartitions = rdd.partitions.length

16     partitions.find(p => p >= maxPartitions || p < 0).foreach { p =>

17       throw new IllegalArgumentException(

18         "Attempting to access a non-existent partition: " + p + ". " +

19           "Total number of partitions: " + maxPartitions)

20     }

21 

22     val jobId = nextJobId.getAndIncrement()

23     if (partitions.size == 0) {

24       return new JobWaiter[U](this, jobId, 0, resultHandler)

25     }

26 

27     assert(partitions.size > 0)

28     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]

29     val waiter = new JobWaiter(this, jobId, partitions.size, resultHandler)

30     eventProcessActor ! JobSubmitted(

31       jobId, rdd, func2, partitions.toArray, allowLocal, callSite, waiter, properties) //向eventProcessActor发送消息,有个疑问:此处rdd怎么变成message?是将元数据(partition等位置信息)序列化吗?

32     waiter

33   }

3. DAGSchedulerEventProcessActor

 1 private[scheduler] class DAGSchedulerEventProcessActor(dagScheduler: DAGScheduler)

 2   extends Actor with Logging {

 3 

 4   override def preStart() {

 5     // set DAGScheduler for taskScheduler to ensure eventProcessActor is always

 6     // valid when the messages arrive

 7     dagScheduler.taskScheduler.setDAGScheduler(dagScheduler)

 8   }

 9 

10   /**

11    * The main event loop of the DAG scheduler.

12    */

13   def receive = {

14     case JobSubmitted(jobId, rdd, func, partitions, allowLocal, callSite, listener, properties) =>

15       dagScheduler.handleJobSubmitted(jobId, rdd, func, partitions, allowLocal, callSite,

16         listener, properties)

17 

18     case StageCancelled(stageId) =>

19       dagScheduler.handleStageCancellation(stageId)

20 

21     case JobCancelled(jobId) =>

22       dagScheduler.handleJobCancellation(jobId)

23 

24     case JobGroupCancelled(groupId) =>

25       dagScheduler.handleJobGroupCancelled(groupId)

26 

27     case AllJobsCancelled =>

28       dagScheduler.doCancelAllJobs()

4. actor调用 handleJobSubmitted

 1   private[scheduler] def handleJobSubmitted(jobId: Int,

 2       finalRDD: RDD[_],

 3       func: (TaskContext, Iterator[_]) => _,

 4       partitions: Array[Int],

 5       allowLocal: Boolean,

 6       callSite: String,

 7       listener: JobListener,

 8       properties: Properties = null)

 9   {

10     var finalStage: Stage = null

11     try {

12       // New stage creation may throw an exception if, for example, jobs are run on a

13       // HadoopRDD whose underlying HDFS files have been deleted.

14       finalStage = newStage(finalRDD, partitions.size, None, jobId, Some(callSite))

15     } catch {

16       case e: Exception =>

17         logWarning("Creating new stage failed due to exception - job: " + jobId, e)

18         listener.jobFailed(e)

19         return

20     }

21     if (finalStage != null) {

22       val job = new ActiveJob(jobId, finalStage, func, partitions, callSite, listener, properties)

23       clearCacheLocs()

24       logInfo("Got job %s (%s) with %d output partitions (allowLocal=%s)".format(

25         job.jobId, callSite, partitions.length, allowLocal))

26       logInfo("Final stage: " + finalStage + "(" + finalStage.name + ")")

27       logInfo("Parents of final stage: " + finalStage.parents)

28       logInfo("Missing parents: " + getMissingParentStages(finalStage))

29       if (allowLocal && finalStage.parents.size == 0 && partitions.length == 1) {

30         // Compute very short actions like first() or take() with no parent stages locally.

31         listenerBus.post(SparkListenerJobStart(job.jobId, Array[Int](), properties))

32         runLocally(job) //如果只有一个parition,而且没有parent,并运行本地运行,则单独起一个线程执行

33       } else {

34         jobIdToActiveJob(jobId) = job

35         activeJobs += job

36         resultStageToJob(finalStage) = job

37         listenerBus.post(SparkListenerJobStart(job.jobId, jobIdToStageIds(jobId).toArray,

38           properties))

39  submitStage(finalStage)

40       }

41     }

42     submitWaitingStages()

43   }
 1   /**

 2    * Create a Stage -- either directly for use as a result stage, or as part of the (re)-creation

 3    * of a shuffle map stage in newOrUsedStage.  The stage will be associated with the provided

 4    * jobId. Production of shuffle map stages should always use newOrUsedStage, not newStage

 5    * directly.

 6    */

 7   private def newStage(

 8       rdd: RDD[_],

 9       numTasks: Int,

10       shuffleDep: Option[ShuffleDependency[_,_]],

11       jobId: Int,

12       callSite: Option[String] = None)

13     : Stage =

14   {

15     val id = nextStageId.getAndIncrement()

16     val stage =

17       new Stage(id, rdd, numTasks, shuffleDep, getParentStages(rdd, jobId), jobId, callSite)

18     stageIdToStage(id) = stage

19     updateJobIdStageIdMaps(jobId, stage)

20     stageToInfos(stage) = StageInfo.fromStage(stage)

21     stage

22   }
 1   /**

 2    * Run a job on an RDD locally, assuming it has only a single partition and no dependencies.

 3    * We run the operation in a separate thread just in case it takes a bunch of time, so that we

 4    * don't block the DAGScheduler event loop or other concurrent jobs.

 5    */

 6   protected def runLocally(job: ActiveJob) {

 7     logInfo("Computing the requested partition locally")

 8     new Thread("Local computation of job " + job.jobId) {

 9       override def run() {

10         runLocallyWithinThread(job)

11       }

12     }.start()

13   }

5. submitStage: 如果parent stage有缺失,

 1   /** Submits stage, but first recursively submits any missing parents. */

 2   private def submitStage(stage: Stage) {

 3     val jobId = activeJobForStage(stage)

 4     if (jobId.isDefined) {

 5       logDebug("submitStage(" + stage + ")")

 6       if (!waitingStages(stage) && !runningStages(stage) && !failedStages(stage)) {

 7         val missing = getMissingParentStages(stage).sortBy(_.id) 

 8         logDebug("missing: " + missing)

 9         if (missing == Nil) {

10           logInfo("Submitting " + stage + " (" + stage.rdd + "), which has no missing parents")

11  submitMissingTasks(stage, jobId.get) 

12           runningStages += stage

13         } else {

14           for (parent <- missing) {

15             submitStage(parent)

16           }

17           waitingStages += stage

18         }

19       }

20     } else {

21       abortStage(stage, "No active job for stage " + stage.id)

22     }

23   }
 1   private def getMissingParentStages(stage: Stage): List[Stage] = {

 2     val missing = new HashSet[Stage]

 3     val visited = new HashSet[RDD[_]]

 4     def visit(rdd: RDD[_]) {

 5       if (!visited(rdd)) {

 6         visited += rdd

 7         if (getCacheLocs(rdd).contains(Nil)) {//如果cacheLocs包含Nil,则认为此rdd miss了  8           for (dep <- rdd.dependencies) {

 9             dep match { //分两种情况:shufDep和narrowDep,前者会生成shuffleMapStage,后者会递归访问

10               case shufDep: ShuffleDependency[_,_] =>

11                 val mapStage = getShuffleMapStage(shufDep, stage.jobId)

12                 if (!mapStage.isAvailable) {

13                   missing += mapStage

14                 }

15               case narrowDep: NarrowDependency[_] =>

16                 visit(narrowDep.rdd)

17             }

18           }

19         }

20       }

21     }

22     visit(stage.rdd)

23     missing.toList

24   }

6. submitMissTasks

 1   /** Called when stage's parents are available and we can now do its task. */

 2   private def submitMissingTasks(stage: Stage, jobId: Int) {

 3     logDebug("submitMissingTasks(" + stage + ")")

 4     // Get our pending tasks and remember them in our pendingTasks entry

 5     val myPending = pendingTasks.getOrElseUpdate(stage, new HashSet)

 6     myPending.clear()

 7     var tasks = ArrayBuffer[Task[_]]()

 8     if (stage.isShuffleMap) {

 9       for (p <- 0 until stage.numPartitions if stage.outputLocs(p) == Nil) { //将stage中存储空间outputLocas为Nil的patition生成一个shuffleMapTask

10         val locs = getPreferredLocs(stage.rdd, p)

11         tasks += new ShuffleMapTask(stage.id, stage.rdd, stage.shuffleDep.get, p, locs)

12       }

13     } else {

14       // This is a final stage; figure out its job's missing partitions

15       val job = resultStageToJob(stage)

16       for (id <- 0 until job.numPartitions if !job.finished(id)) {

17         val partition = job.partitions(id)

18         val locs = getPreferredLocs(stage.rdd, partition)

19         tasks += new ResultTask(stage.id, stage.rdd, job.func, partition, locs, id) //生成resultTask

20       }

21     }

22 

23     val properties = if (jobIdToActiveJob.contains(jobId)) {

24       jobIdToActiveJob(stage.jobId).properties

25     } else {

26       // this stage will be assigned to "default" pool

27       null

28     }

29 

30     // must be run listener before possible NotSerializableException

31     // should be "StageSubmitted" first and then "JobEnded"

32     listenerBus.post(SparkListenerStageSubmitted(stageToInfos(stage), properties))

33 

34     if (tasks.size > 0) {

35       // Preemptively serialize a task to make sure it can be serialized. We are catching this

36       // exception here because it would be fairly hard to catch the non-serializable exception

37       // down the road, where we have several different implementations for local scheduler and

38       // cluster schedulers.

39       try {

40         SparkEnv.get.closureSerializer.newInstance().serialize(tasks.head)

41       } catch {

42         case e: NotSerializableException =>

43           abortStage(stage, "Task not serializable: " + e.toString)

44           runningStages -= stage

45           return

46       }

47 

48       logInfo("Submitting " + tasks.size + " missing tasks from " + stage + " (" + stage.rdd + ")")

49       myPending ++= tasks

50       logDebug("New pending tasks: " + myPending)

51       taskScheduler.submitTasks(

52         new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.jobId, properties)) //将这些task生成一个taskSet,并调用taskScheduler.submitTasks

53       stageToInfos(stage).submissionTime = Some(System.currentTimeMillis())

54     } else {

55       logDebug("Stage " + stage + " is actually done; %b %d %d".format(

56         stage.isAvailable, stage.numAvailableOutputs, stage.numPartitions))

57       runningStages -= stage

58     }

59   }

7. taskSet: 某个rdd的一部分parition missing了,会通过上面的步骤找到,并将这些partition生成对应的tasks,通过taskSet来一起调度。

 1 /**

 2  * A set of tasks submitted together to the low-level TaskScheduler, usually representing

 3  * missing partitions of a particular stage.

 4  */

 5 private[spark] class TaskSet(

 6     val tasks: Array[Task[_]],

 7     val stageId: Int,

 8     val attempt: Int,

 9     val priority: Int,

10     val properties: Properties) {

11     val id: String = stageId + "." + attempt

12 

13   def kill(interruptThread: Boolean) {

14     tasks.foreach(_.kill(interruptThread))

15   }

16 

17   override def toString: String = "TaskSet " + id

18 }

8. taskScheduler.submitTasks

 1   override def submitTasks(taskSet: TaskSet) {

 2     val tasks = taskSet.tasks

 3     logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")

 4     this.synchronized {

 5       val manager = new TaskSetManager(this, taskSet, maxTaskFailures)

 6       activeTaskSets(taskSet.id) = manager

 7       schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)

 8 

 9       if (!isLocal && !hasReceivedTask) {

10         starvationTimer.scheduleAtFixedRate(new TimerTask() {

11           override def run() {

12             if (!hasLaunchedTask) {

13               logWarning("Initial job has not accepted any resources; " +

14                 "check your cluster UI to ensure that workers are registered " +

15                 "and have sufficient memory")

16             } else {

17               this.cancel()

18             }

19           }

20         }, STARVATION_TIMEOUT, STARVATION_TIMEOUT)

21       }

22       hasReceivedTask = true

23     }

24     backend.reviveOffers()

25   }

 

你可能感兴趣的:(scheduler)