Worker.scala
case LaunchDriver(driverId, driverDesc) => {
logInfo(s"Asked to launch driver $driverId")
val driver = new DriverRunner(
conf,
driverId,
workDir,
sparkHome,
driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)),
self,
akkaUrl)
drivers(driverId) = driver
//启动driverRunner
driver.start()
coresUsed += driverDesc.cores
memoryUsed += driverDesc.mem
}
-----------------------------------------------------------------------------------
case DriverStateChanged(driverId, state, exception) => {
state match {
case DriverState.ERROR =>
logWarning(s"Driver $driverId failed with unrecoverable exception: ${exception.get}")
case DriverState.FAILED =>
logWarning(s"Driver $driverId exited with failure")
case DriverState.FINISHED =>
logInfo(s"Driver $driverId exited successfully")
case DriverState.KILLED =>
logInfo(s"Driver $driverId was killed by user")
case _ =>
logDebug(s"Driver $driverId changed state to $state")
}
//driver执行完以后,DriverRunner线程会发送一个状态给worker
//然后worker实际上会将DriverStateChanged消息发送给master,master会进行状态改变处理
master ! DriverStateChanged(driverId, state, exception)
//将driver从本地缓存中移除
val driver = drivers.remove(driverId).get
//将driver加入完成的driver队列
finishedDrivers(driverId) = driver
//将dirver的内存和cpu释放处理
memoryUsed -= driver.driverDesc.mem
coresUsed -= driver.driverDesc.cores
}
----------------------------------------------------------------------------------------
case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>
if (masterUrl != activeMasterUrl) {
logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
} else {
try {
logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
// Create the executor's working directory
val executorDir = new File(workDir, appId + "/" + execId)
if (!executorDir.mkdirs()) {
throw new IOException("Failed to create directory " + executorDir)
}
// Create local dirs for the executor. These are passed to the executor via the
// SPARK_LOCAL_DIRS environment variable, and deleted by the Worker when the
// application finishes.
val appLocalDirs = appDirectories.get(appId).getOrElse {
Utils.getOrCreateLocalRootDirs(conf).map { dir =>
Utils.createDirectory(dir).getAbsolutePath()
}.toSeq
}
appDirectories(appId) = appLocalDirs
val manager = new ExecutorRunner(
appId,
execId,
appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),
cores_,
memory_,
self,
workerId,
host,
webUi.boundPort,
publicAddress,
sparkHome,
executorDir,
akkaUrl,
conf,
appLocalDirs, ExecutorState.LOADING)
//把executorRunner加入本地缓存
executors(appId + "/" + execId) = manager
//启动executorRunner
manager.start()
//加上executor要使用的资源
coresUsed += cores_
memoryUsed += memory_
//向master返回一个ExecutorStateChanged消息
master ! ExecutorStateChanged(appId, execId, manager.state, None, None)
} catch {
case e: Exception => {
logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
if (executors.contains(appId + "/" + execId)) {
executors(appId + "/" + execId).kill()
executors -= appId + "/" + execId
}
master ! ExecutorStateChanged(appId, execId, ExecutorState.FAILED,
Some(e.toString), None)
}
}
}
DriverRunner:管理一个driver的执行,包括driver失败时自动重启driver。目前,这种方式仅仅适用于standalone集群部署模式
DriverRunner.scala
/** Starts a thread to run and manage the driver. */
def start() = {
new Thread("DriverRunner for " + driverId) {
override def run() {
try {
//第一步,创建driver的工作目录
val driverDir = createWorkingDirectory()
//第二步,下载用户上传的jar(Spark应用程序)
val localJarFilename = downloadUserJar(driverDir)
def substituteVariables(argument: String): String = argument match {
case "{{WORKER_URL}}" => workerUrl
case "{{USER_JAR}}" => localJarFilename
case other => other
}
// TODO: If we add ability to submit multiple jars they should also be added here
//构建ProcessBuilder,传入driver的启动命令,需哟啊的内存大小等信息
val builder = CommandUtils.buildProcessBuilder(driverDesc.command, driverDesc.mem,
sparkHome.getAbsolutePath, substituteVariables)
launchDriver(builder, driverDir, driverDesc.supervise)
}
catch {
case e: Exception => finalException = Some(e)
}
//对driver的退出状态做一些处理
val state =
if (killed) {
DriverState.KILLED
} else if (finalException.isDefined) {
DriverState.ERROR
} else {
finalExitCode match {
case Some(0) => DriverState.FINISHED
case _ => DriverState.FAILED
}
}
finalState = Some(state)
//DriverRunner这个线程,向它所属的worker的actor,发送一个DriverStateChanged的事件
//(见上面worker.scala DriverStateChanged)
worker ! DriverStateChanged(driverId, state, finalException)
}
}.start()
}
------------------------------------------------------------------------------------
//将用户jar包下载到提供的目录中(之前创建的driver工作目录),并返回它在worker本地的路径
private def downloadUserJar(driverDir: File): String = {
//用Hadoop jar 里的Path
val jarPath = new Path(driverDesc.jarUrl)
//拿到Hadoop配置
val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
//获取hdfs的FileSystem
val jarFileSystem = jarPath.getFileSystem(hadoopConf)
//创建本地目录
val destPath = new File(driverDir.getAbsolutePath, jarPath.getName)
val jarFileName = jarPath.getName
val localJarFile = new File(driverDir, jarFileName)
val localJarFilename = localJarFile.getAbsolutePath
//如果jar在本地不存在
if (!localJarFile.exists()) { // May already exist if running multiple workers on one node
logInfo(s"Copying user jar $jarPath to $destPath")
//用FileUtil将jar拷贝到本地
FileUtil.copy(jarFileSystem, jarPath, destPath, false, hadoopConf)
}
if (!localJarFile.exists()) { // Verify copy succeeded
throw new Exception(s"Did not see expected jar $jarFileName in $driverDir")
}
localJarFilename
}