spark 内核源码剖析七:Work工作原理

image.png

Worker.scala

case LaunchDriver(driverId, driverDesc) => {
      logInfo(s"Asked to launch driver $driverId")
      val driver = new DriverRunner(
        conf,
        driverId,
        workDir,
        sparkHome,
        driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)),
        self,
        akkaUrl)
      drivers(driverId) = driver
//启动driverRunner
      driver.start()

      coresUsed += driverDesc.cores
      memoryUsed += driverDesc.mem
    }

-----------------------------------------------------------------------------------
case DriverStateChanged(driverId, state, exception) => {
      state match {
        case DriverState.ERROR =>
          logWarning(s"Driver $driverId failed with unrecoverable exception: ${exception.get}")
        case DriverState.FAILED =>
          logWarning(s"Driver $driverId exited with failure")
        case DriverState.FINISHED =>
          logInfo(s"Driver $driverId exited successfully")
        case DriverState.KILLED =>
          logInfo(s"Driver $driverId was killed by user")
        case _ =>
          logDebug(s"Driver $driverId changed state to $state")
      }
//driver执行完以后,DriverRunner线程会发送一个状态给worker
//然后worker实际上会将DriverStateChanged消息发送给master,master会进行状态改变处理
      master ! DriverStateChanged(driverId, state, exception)
//将driver从本地缓存中移除
      val driver = drivers.remove(driverId).get
//将driver加入完成的driver队列
      finishedDrivers(driverId) = driver
//将dirver的内存和cpu释放处理
      memoryUsed -= driver.driverDesc.mem
      coresUsed -= driver.driverDesc.cores
    }

----------------------------------------------------------------------------------------
case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>
      if (masterUrl != activeMasterUrl) {
        logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
      } else {
        try {
          logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))

          // Create the executor's working directory
          val executorDir = new File(workDir, appId + "/" + execId)
          if (!executorDir.mkdirs()) {
            throw new IOException("Failed to create directory " + executorDir)
          }

          // Create local dirs for the executor. These are passed to the executor via the
          // SPARK_LOCAL_DIRS environment variable, and deleted by the Worker when the
          // application finishes.
          val appLocalDirs = appDirectories.get(appId).getOrElse {
            Utils.getOrCreateLocalRootDirs(conf).map { dir =>
              Utils.createDirectory(dir).getAbsolutePath()
            }.toSeq
          }
          appDirectories(appId) = appLocalDirs
          val manager = new ExecutorRunner(
            appId,
            execId,
            appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),
            cores_,
            memory_,
            self,
            workerId,
            host,
            webUi.boundPort,
            publicAddress,
            sparkHome,
            executorDir,
            akkaUrl,
            conf,
            appLocalDirs, ExecutorState.LOADING)
//把executorRunner加入本地缓存
          executors(appId + "/" + execId) = manager
//启动executorRunner
          manager.start()
//加上executor要使用的资源
          coresUsed += cores_
          memoryUsed += memory_
//向master返回一个ExecutorStateChanged消息
          master ! ExecutorStateChanged(appId, execId, manager.state, None, None)
        } catch {
          case e: Exception => {
            logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
            if (executors.contains(appId + "/" + execId)) {
              executors(appId + "/" + execId).kill()
              executors -= appId + "/" + execId
            }
            master ! ExecutorStateChanged(appId, execId, ExecutorState.FAILED,
              Some(e.toString), None)
          }
        }
      }


DriverRunner:管理一个driver的执行,包括driver失败时自动重启driver。目前,这种方式仅仅适用于standalone集群部署模式

DriverRunner.scala

 /** Starts a thread to run and manage the driver. */
  def start() = {
    new Thread("DriverRunner for " + driverId) {
      override def run() {
        try {
//第一步,创建driver的工作目录
          val driverDir = createWorkingDirectory()
//第二步,下载用户上传的jar(Spark应用程序)
          val localJarFilename = downloadUserJar(driverDir)

          def substituteVariables(argument: String): String = argument match {
            case "{{WORKER_URL}}" => workerUrl
            case "{{USER_JAR}}" => localJarFilename
            case other => other
          }

          // TODO: If we add ability to submit multiple jars they should also be added here
//构建ProcessBuilder,传入driver的启动命令,需哟啊的内存大小等信息
          val builder = CommandUtils.buildProcessBuilder(driverDesc.command, driverDesc.mem,
            sparkHome.getAbsolutePath, substituteVariables)
          launchDriver(builder, driverDir, driverDesc.supervise)
        }
        catch {
          case e: Exception => finalException = Some(e)
        }
 //对driver的退出状态做一些处理
        val state =
          if (killed) {
            DriverState.KILLED
          } else if (finalException.isDefined) {
            DriverState.ERROR
          } else {
            finalExitCode match {
              case Some(0) => DriverState.FINISHED
              case _ => DriverState.FAILED
            }
          }

        finalState = Some(state)
//DriverRunner这个线程,向它所属的worker的actor,发送一个DriverStateChanged的事件
//(见上面worker.scala DriverStateChanged)
        worker ! DriverStateChanged(driverId, state, finalException)
      }
    }.start()
  }

------------------------------------------------------------------------------------
//将用户jar包下载到提供的目录中(之前创建的driver工作目录),并返回它在worker本地的路径

private def downloadUserJar(driverDir: File): String = {
    //用Hadoop jar 里的Path
    val jarPath = new Path(driverDesc.jarUrl)
//拿到Hadoop配置
    val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
//获取hdfs的FileSystem
    val jarFileSystem = jarPath.getFileSystem(hadoopConf)
//创建本地目录
    val destPath = new File(driverDir.getAbsolutePath, jarPath.getName)
    val jarFileName = jarPath.getName
    val localJarFile = new File(driverDir, jarFileName)
    val localJarFilename = localJarFile.getAbsolutePath
//如果jar在本地不存在
    if (!localJarFile.exists()) { // May already exist if running multiple workers on one node
      logInfo(s"Copying user jar $jarPath to $destPath")
//用FileUtil将jar拷贝到本地
      FileUtil.copy(jarFileSystem, jarPath, destPath, false, hadoopConf)
    }

    if (!localJarFile.exists()) { // Verify copy succeeded
      throw new Exception(s"Did not see expected jar $jarFileName in $driverDir")
    }

    localJarFilename
  }

你可能感兴趣的:(spark 内核源码剖析七:Work工作原理)