源码目录
1 spark-submit.sh
#!/usr/bin/env bash
if [ -z "${SPARK_HOME}" ]; then
source "$(dirname "$0")"/find-spark-home
fi
# disable randomized hash for string in Python 3.3+
export PYTHONHASHSEED=0
exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"
2 调用主函数
- 进入
org.apache.spark.deploy.SparkSubmit.scala
override def main(args: Array[String]): Unit = {
val submit = new SparkSubmit() {
self =>
override protected def parseArguments(args: Array[String]): SparkSubmitArguments = {
new SparkSubmitArguments(args) {
override protected def logInfo(msg: => String): Unit = self.logInfo(msg)
override protected def logWarning(msg: => String): Unit = self.logWarning(msg)
}
}
override protected def logInfo(msg: => String): Unit = printMessage(msg)
override protected def logWarning(msg: => String): Unit = printMessage(s"Warning: $msg")
override def doSubmit(args: Array[String]): Unit = {
try {
super.doSubmit(args)
} catch {
case e: SparkUserAppException =>
exitFn(e.exitCode)
}
}
}
submit.doSubmit(args)
}
- 创建 SparkSubmit;
- 调用 SparkSubmit.doSubmit() 提交任务。
3 提交Application的过程
- 进入
org.apache.spark.deploy.SparkSubmit.scala
def doSubmit(args: Array[String]): Unit = {
// Initialize logging if it hasn't been done yet. Keep track of whether logging needs to
// be reset before the application starts.
val uninitLog = initializeLogIfNecessary(true, silent = true)
val appArgs = parseArguments(args)
if (appArgs.verbose) {
logInfo(appArgs.toString)
}
appArgs.action match {
case SparkSubmitAction.SUBMIT => submit(appArgs, uninitLog)
case SparkSubmitAction.KILL => kill(appArgs)
case SparkSubmitAction.REQUEST_STATUS => requestStatus(appArgs)
case SparkSubmitAction.PRINT_VERSION => printVersion()
}
}
- 解析参数;
- 匹配 SparkSubmitAction.SUBMIT,走对应分支提交任务。
- 进入
org.apache.spark.deploy.SparkSubmit.scala
/**
* Submit the application using the provided parameters, ensuring to first wrap
* in a doAs when --proxy-user is specified.
*/
@tailrec
private def submit(args: SparkSubmitArguments, uninitLog: Boolean): Unit = {
def doRunMain(): Unit = {
if (args.proxyUser != null) {
val proxyUser = UserGroupInformation.createProxyUser(args.proxyUser,
UserGroupInformation.getCurrentUser())
try {
proxyUser.doAs(new PrivilegedExceptionAction[Unit]() {
override def run(): Unit = {
runMain(args, uninitLog)
}
})
} catch {
case e: Exception =>
// Hadoop's AuthorizationException suppresses the exception's stack trace, which
// makes the message printed to the output by the JVM not very helpful. Instead,
// detect exceptions with empty stack traces here, and treat them differently.
if (e.getStackTrace().length == 0) {
error(s"ERROR: ${e.getClass().getName()}: ${e.getMessage()}")
} else {
throw e
}
}
} else {
runMain(args, uninitLog)
}
}
// In standalone cluster mode, there are two submission gateways:
// (1) The traditional RPC gateway using o.a.s.deploy.Client as a wrapper
// (2) The new REST-based gateway introduced in Spark 1.3
// The latter is the default behavior as of Spark 1.3, but Spark submit will fail over
// to use the legacy gateway if the master endpoint turns out to be not a REST server.
if (args.isStandaloneCluster && args.useRest) {
try {
logInfo("Running Spark using the REST application submission protocol.")
doRunMain()
} catch {
// Fail over to use the legacy submission gateway
case e: SubmitRestConnectionException =>
logWarning(s"Master endpoint ${args.master} was not a REST server. " +
"Falling back to legacy submission gateway instead.")
args.useRest = false
submit(args, false)
}
// In all other modes, just run the main class as prepared
} else {
doRunMain()
}
}
- 进入
org.apache.spark.deploy.SparkSubmit.scala
/**
* Run the main method of the child class using the submit arguments.
*
* This runs in two steps. First, we prepare the launch environment by setting up
* the appropriate classpath, system properties, and application arguments for
* running the child main class based on the cluster manager and the deploy mode.
* Second, we use this launch environment to invoke the main method of the child
* main class.
*
* Note that this main class will not be the one provided by the user if we're
* running cluster deploy mode or python applications.
*/
private def runMain(args: SparkSubmitArguments, uninitLog: Boolean): Unit = {
val (childArgs, childClasspath, sparkConf, childMainClass) = prepareSubmitEnvironment(args)
// Let the main class re-initialize the logging system once it starts.
if (uninitLog) {
Logging.uninitialize()
}
if (args.verbose) {
logInfo(s"Main class:\n$childMainClass")
logInfo(s"Arguments:\n${childArgs.mkString("\n")}")
// sysProps may contain sensitive information, so redact before printing
logInfo(s"Spark config:\n${Utils.redact(sparkConf.getAll.toMap).mkString("\n")}")
logInfo(s"Classpath elements:\n${childClasspath.mkString("\n")}")
logInfo("\n")
}
val loader =
if (sparkConf.get(DRIVER_USER_CLASS_PATH_FIRST)) {
new ChildFirstURLClassLoader(new Array[URL](0),
Thread.currentThread.getContextClassLoader)
} else {
new MutableURLClassLoader(new Array[URL](0),
Thread.currentThread.getContextClassLoader)
}
Thread.currentThread.setContextClassLoader(loader)
for (jar <- childClasspath) {
addJarToClasspath(jar, loader)
}
var mainClass: Class[_] = null
try {
mainClass = Utils.classForName(childMainClass)
} catch {
case e: ClassNotFoundException =>
logWarning(s"Failed to load $childMainClass.", e)
if (childMainClass.contains("thriftserver")) {
logInfo(s"Failed to load main class $childMainClass.")
logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
}
throw new SparkUserAppException(CLASS_NOT_FOUND_EXIT_STATUS)
case e: NoClassDefFoundError =>
logWarning(s"Failed to load $childMainClass: ${e.getMessage()}")
if (e.getMessage.contains("org/apache/hadoop/hive")) {
logInfo(s"Failed to load hive class.")
logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
}
throw new SparkUserAppException(CLASS_NOT_FOUND_EXIT_STATUS)
}
val app: SparkApplication = if (classOf[SparkApplication].isAssignableFrom(mainClass)) {
mainClass.newInstance().asInstanceOf[SparkApplication]
} else {
// SPARK-4170
if (classOf[scala.App].isAssignableFrom(mainClass)) {
logWarning("Subclasses of scala.App may not work correctly. Use a main() method instead.")
}
new JavaMainApplication(mainClass)
}
@tailrec
def findCause(t: Throwable): Throwable = t match {
case e: UndeclaredThrowableException =>
if (e.getCause() != null) findCause(e.getCause()) else e
case e: InvocationTargetException =>
if (e.getCause() != null) findCause(e.getCause()) else e
case e: Throwable =>
e
}
try {
app.start(childArgs.toArray, sparkConf)
} catch {
case t: Throwable =>
throw findCause(t)
}
}
- 调用 prepareSubmitEnvironment 准备提交 app 的环境,根据传入参数,基于 clusterManager 和 deployMode 不同,得到不同的(childArgs, childClasspath, sparkConf, childMainClass);
- 利用反射加载 childMainClass 得到实例 SparkApplication;
- 利用 app.start 方法启动 SparkApplication。
在调用 prepareSubmitEnvironment 方法时:
- 如果 deployMode 为 Client,则 childMainClass 为
spark-submit --class mainClass
中的 mainClass - 如果 deployMode 为 Cluster,则 childMainClass 为 RestSubmissionClientApp(useRest) 或 ClientApp(not useRest)。
以 (Standalone,Cluster) 模式为例,假设不使用REST方式:
childMainClass:org.apache.spark.deploy.ClientApp
childArgs:提交任务时的那些参数及一些其他参数(memory、cores、mainClass等)
- 进入
org.apache.spark.deploy.ClientApp.scala
private[spark] class ClientApp extends SparkApplication {
override def start(args: Array[String], conf: SparkConf): Unit = {
val driverArgs = new ClientArguments(args)
if (!conf.contains("spark.rpc.askTimeout")) {
conf.set("spark.rpc.askTimeout", "10s")
}
Logger.getRootLogger.setLevel(driverArgs.logLevel)
val rpcEnv =
RpcEnv.create("driverClient", Utils.localHostName(), 0, conf, new SecurityManager(conf))
val masterEndpoints = driverArgs.masters.map(RpcAddress.fromSparkURL).
map(rpcEnv.setupEndpointRef(_, Master.ENDPOINT_NAME))
rpcEnv.setupEndpoint("client", new ClientEndpoint(rpcEnv, driverArgs, masterEndpoints, conf))
rpcEnv.awaitTermination()
}
}
- 在当前节点上创建 Driver 端的 RpcEnv;
- 创建 ClientEndpoint(RpcEndpoint),并注册到 driverRpcEnv 上。
注册 RpcEndpoint 到 RpcEnv 上时,会发送Onstart
消息,进而调用 RpcEndpoint.onStart() 方法。
- 进入
org.apache.spark.deploy.ClientEndpoint.scala
override def onStart(): Unit = {
driverArgs.cmd match {
case "launch" =>
// TODO: We could add an env variable here and intercept it in `sc.addJar` that would
// truncate filesystem paths similar to what YARN does. For now, we just require
// people call `addJar` assuming the jar is in the same directory.
val mainClass = "org.apache.spark.deploy.worker.DriverWrapper"
val classPathConf = "spark.driver.extraClassPath"
val classPathEntries = getProperty(classPathConf, conf).toSeq.flatMap { cp =>
cp.split(java.io.File.pathSeparator)
}
val libraryPathConf = "spark.driver.extraLibraryPath"
val libraryPathEntries = getProperty(libraryPathConf, conf).toSeq.flatMap { cp =>
cp.split(java.io.File.pathSeparator)
}
val extraJavaOptsConf = "spark.driver.extraJavaOptions"
val extraJavaOpts = getProperty(extraJavaOptsConf, conf)
.map(Utils.splitCommandString).getOrElse(Seq.empty)
val sparkJavaOpts = Utils.sparkJavaOpts(conf)
val javaOpts = sparkJavaOpts ++ extraJavaOpts
val command = new Command(mainClass,
Seq("{{WORKER_URL}}", "{{USER_JAR}}", driverArgs.mainClass) ++ driverArgs.driverOptions,
sys.env, classPathEntries, libraryPathEntries, javaOpts)
val driverDescription = new DriverDescription(
driverArgs.jarUrl,
driverArgs.memory,
driverArgs.cores,
driverArgs.supervise,
command)
asyncSendToMasterAndForwardReply[SubmitDriverResponse](
RequestSubmitDriver(driverDescription))
case "kill" =>
val driverId = driverArgs.driverId
asyncSendToMasterAndForwardReply[KillDriverResponse](RequestKillDriver(driverId))
}
}
匹配 launch,进入对应分支。
- 配置各种参数构建 Command,这里的 mainClass 是
org.apache.spark.deploy.worker.DriverWrapper
; - 构建DriverDescription;
- 调用 asyncSendToMasterAndForwardReply 方法发送消息RequestSubmitDriver 给 Master 并等待响应。
RPC消息发送接收流程类似,不再赘述。
4 注册Driver
- 进入
org.apache.spark.deploy.master.Master.scala
override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
case RequestSubmitDriver(description) =>
if (state != RecoveryState.ALIVE) {
val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +
"Can only accept driver submissions in ALIVE state."
context.reply(SubmitDriverResponse(self, false, None, msg))
} else {
logInfo("Driver submitted " + description.command.mainClass)
val driver = createDriver(description)
persistenceEngine.addDriver(driver)
waitingDrivers += driver
drivers.add(driver)
schedule()
// TODO: It might be good to instead have the submission client poll the master to determine
// the current status of the driver. For now it's simply "fire and forget".
context.reply(SubmitDriverResponse(self, true, Some(driver.id),
s"Driver successfully submitted as ${driver.id}"))
}
}
- 匹配 RequestSubmitDriver 进入对应分支;
- 调用 createDriver(description) 构建 DriverInfo;
- 将创建的 DriverInfo 加入到 Master 的 waitingDrivers 中保存起来;
- 调用 schedule 方法为 Master.waitingDrivers 中正在等待的 driver 分配资源,如果某个正在等待的 driver 分配到了资源,将启动该 driver;
- schedule 方法里还调用了 startExecutorsOnWorkers 方法,该方法会遍历 Master.waitingApps 为正在等待的 app 启动 Executors,运行到这里,才刚刚注册Driver,Master.waitingApps 中并没有正在等待的 app,因此还不会启动 Executors,这里先略过,后面 Spark源码:启动TaskScheduler 里注册完 application 后启动 application 时再具体分析;
- 返回结果。
5 启动Driver
- 进入
org.apache.spark.deploy.master.Master.scala
/**
* Schedule the currently available resources among waiting apps. This method will be called
* every time a new app joins or resource availability changes.
*/
private def schedule(): Unit = {
if (state != RecoveryState.ALIVE) {
return
}
// Drivers take strict precedence over executors
val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
val numWorkersAlive = shuffledAliveWorkers.size
var curPos = 0
for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers
// We assign workers to each waiting driver in a round-robin fashion. For each driver, we
// start from the last worker that was assigned a driver, and continue onwards until we have
// explored all alive workers.
var launched = false
var numWorkersVisited = 0
while (numWorkersVisited < numWorkersAlive && !launched) {
val worker = shuffledAliveWorkers(curPos)
numWorkersVisited += 1
if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
launchDriver(worker, driver)
waitingDrivers -= driver
launched = true
}
curPos = (curPos + 1) % numWorkersAlive
}
}
startExecutorsOnWorkers()
}
- 从 workers 中选出所有 ALIVE 的 worker,进行随机打散;
- 遍历 waitingDrivers(即所有正在等待分配资源的driver),依次判断那些随机打散的 workers 中是否有可用核数、可用内存大小满足 driver 运行要求的 worker,调用 launchDriver(worker, driver) 在满足要求的 worker 上启动 driver;
- 调用 startExecutorsOnWorkers() 在 Workers 上启动 Executors。
- 进入
org.apache.spark.deploy.master.Master.scala
private def launchDriver(worker: WorkerInfo, driver: DriverInfo) {
logInfo("Launching driver " + driver.id + " on worker " + worker.id)
worker.addDriver(driver)
driver.worker = Some(worker)
worker.endpoint.send(LaunchDriver(driver.id, driver.desc))
driver.state = DriverState.RUNNING
}
- 在 WorkerInfo 中加入 DriverInfo;
- 在 DriverInfo 中加入 WorkerInfo;
- 调用 workerEndpointRef 异步发送 LaunchDriver 消息,用于启动Driver。
RPC消息发送接收流程类似,不再赘述。
- 进入
org.apache.spark.deploy.worker.Worker.scala
override def receive: PartialFunction[Any, Unit] = synchronized {
case LaunchDriver(driverId, driverDesc) =>
logInfo(s"Asked to launch driver $driverId")
val driver = new DriverRunner(
conf,
driverId,
workDir,
sparkHome,
driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)),
self,
workerUri,
securityMgr)
drivers(driverId) = driver
driver.start()
coresUsed += driverDesc.cores
memoryUsed += driverDesc.mem
// 省略部分内容
}
- 创建 DriverRunner;
- 调用 driver.start() 启动 Driver;
- 将 Driver 所占用的核数、内存数加到 Worker 的已用核数、已用内存数上。
- 进入
org.apache.spark.deploy.worker.DriverRunner.scala
/** Starts a thread to run and manage the driver. */
private[worker] def start() = {
new Thread("DriverRunner for " + driverId) {
override def run() {
var shutdownHook: AnyRef = null
try {
shutdownHook = ShutdownHookManager.addShutdownHook { () =>
logInfo(s"Worker shutting down, killing driver $driverId")
kill()
}
// prepare driver jars and run driver
val exitCode = prepareAndRunDriver()
// set final state depending on if forcibly killed and process exit code
finalState = if (exitCode == 0) {
Some(DriverState.FINISHED)
} else if (killed) {
Some(DriverState.KILLED)
} else {
Some(DriverState.FAILED)
}
} catch {
case e: Exception =>
kill()
finalState = Some(DriverState.ERROR)
finalException = Some(e)
} finally {
if (shutdownHook != null) {
ShutdownHookManager.removeShutdownHook(shutdownHook)
}
}
// notify worker of final driver state, possible exception
worker.send(DriverStateChanged(driverId, finalState.get, finalException))
}
}.start()
}
- 启一个新线程;
- 在新线程中添加ShutdownHook,准备 driver jars,启动 driver;
- 更新 Driver 状态。
- 进入
org.apache.spark.deploy.worker.DriverRunner.scala
private[worker] def prepareAndRunDriver(): Int = {
val driverDir = createWorkingDirectory()
val localJarFilename = downloadUserJar(driverDir)
def substituteVariables(argument: String): String = argument match {
case "{{WORKER_URL}}" => workerUrl
case "{{USER_JAR}}" => localJarFilename
case other => other
}
// TODO: If we add ability to submit multiple jars they should also be added here
val builder = CommandUtils.buildProcessBuilder(driverDesc.command, securityManager,
driverDesc.mem, sparkHome.getAbsolutePath, substituteVariables)
runDriver(builder, driverDir, driverDesc.supervise)
}
- 创建 Driver 工作目录;
- 下载用户 jar 包到工作目录;
- 基于给定的参数创建 ProcessBuilder,构建命令行;
- runDriver 方法启动 Driver。
- 进入
org.apache.spark.deploy.worker.DriverRunner.scala
private def runDriver(builder: ProcessBuilder, baseDir: File, supervise: Boolean): Int = {
builder.directory(baseDir)
def initialize(process: Process): Unit = {
// Redirect stdout and stderr to files
val stdout = new File(baseDir, "stdout")
CommandUtils.redirectStream(process.getInputStream, stdout)
val stderr = new File(baseDir, "stderr")
val formattedCommand = builder.command.asScala.mkString("\"", "\" \"", "\"")
val header = "Launch Command: %s\n%s\n\n".format(formattedCommand, "=" * 40)
Files.append(header, stderr, StandardCharsets.UTF_8)
CommandUtils.redirectStream(process.getErrorStream, stderr)
}
runCommandWithRetry(ProcessBuilderLike(builder), initialize, supervise)
}
private[worker] def runCommandWithRetry(
command: ProcessBuilderLike, initialize: Process => Unit, supervise: Boolean): Int = {
var exitCode = -1
// Time to wait between submission retries.
var waitSeconds = 1
// A run of this many seconds resets the exponential back-off.
val successfulRunDuration = 5
var keepTrying = !killed
while (keepTrying) {
logInfo("Launch Command: " + command.command.mkString("\"", "\" \"", "\""))
synchronized {
if (killed) { return exitCode }
process = Some(command.start())
initialize(process.get)
}
val processStart = clock.getTimeMillis()
exitCode = process.get.waitFor()
// check if attempting another run
keepTrying = supervise && exitCode != 0 && !killed
if (keepTrying) {
if (clock.getTimeMillis() - processStart > successfulRunDuration * 1000L) {
waitSeconds = 1
}
logInfo(s"Command exited with status $exitCode, re-launching after $waitSeconds s.")
sleeper.sleep(waitSeconds)
waitSeconds = waitSeconds * 2 // exponential back-off
}
}
exitCode
}
- 调用
process = Some(command.start())
,即调用 ProcessBuilder.start() 方法,执行Command; - 调用initialize方法重定向 stdout、stderr 到 stdout、stderr 文件。
- 进入
org.apache.spark.deploy.worker.DriverWrapper
(Command中定义的mainClass)
/**
* Utility object for launching driver programs such that they share fate with the Worker process.
* This is used in standalone cluster mode only.
*/
object DriverWrapper extends Logging {
def main(args: Array[String]) {
args.toList match {
/*
* IMPORTANT: Spark 1.3 provides a stable application submission gateway that is both
* backward and forward compatible across future Spark versions. Because this gateway
* uses this class to launch the driver, the ordering and semantics of the arguments
* here must also remain consistent across versions.
*/
case workerUrl :: userJar :: mainClass :: extraArgs =>
val conf = new SparkConf()
val host: String = Utils.localHostName()
val port: Int = sys.props.getOrElse("spark.driver.port", "0").toInt
val rpcEnv = RpcEnv.create("Driver", host, port, conf, new SecurityManager(conf))
logInfo(s"Driver address: ${rpcEnv.address}")
rpcEnv.setupEndpoint("workerWatcher", new WorkerWatcher(rpcEnv, workerUrl))
val currentLoader = Thread.currentThread.getContextClassLoader
val userJarUrl = new File(userJar).toURI().toURL()
val loader =
if (sys.props.getOrElse("spark.driver.userClassPathFirst", "false").toBoolean) {
new ChildFirstURLClassLoader(Array(userJarUrl), currentLoader)
} else {
new MutableURLClassLoader(Array(userJarUrl), currentLoader)
}
Thread.currentThread.setContextClassLoader(loader)
setupDependencies(loader, userJar)
// Delegate to supplied main class
val clazz = Utils.classForName(mainClass)
val mainMethod = clazz.getMethod("main", classOf[Array[String]])
mainMethod.invoke(null, extraArgs.toArray[String])
rpcEnv.shutdown()
case _ =>
// scalastyle:off println
System.err.println("Usage: DriverWrapper [options]")
// scalastyle:on println
System.exit(-1)
}
}
}
- 在当前Worker上创建DriverRpcEnv;
- 创建WorkerWatcher并注册到DriverRpcEnv上;
- 反射 args.mainClass 获取app中包含main函数的mainClass,即
spark-submit --class mainClass
中的 mainClass; - 调用 mainClass.main() 方法,正式开始运行我们的Spark应用。
6 总结
在(Standalone,Cluster) 模式下,假设不使用 Rest 方式。
- spark-submit 命令行提交application;
- 进入SparkSubmit,调用 doSubmit() 方法;
- 调用 prepareSubmitEnvironment 方法备提交app的环境,基于 clusterManager 和 deployMode 不同,得到不同的(childArgs, childClasspath, sparkConf, childMainClass);
- 在(Standalone,Cluster) 模式下,假设不使用 Rest 方式,返回的 childMainClass 为
org.apache.spark.deploy.ClientApp
,childArgs 为提交任务时的那些参数(包括memory、cores、mainClass等); - 反射 childMainClass 进入到 ClientApp 中开始执行;
- 在 ClientApp 中构建 Command,Command 中也有一个 mainClass 参数,为
org.apache.spark.deploy.worker.DriverWrapper
; - 在 ClientApp 中创建 Driver 端的 RpcEnv,并创建一个RpcEndpoint(ClientEndpoint) 注册到该 RpcEnv 上;
- ClientEndpoint 的 onStart 方法中发送消息 RequestSubmitDriver 给 Master,用于注册 Driver;
- 进入 Master,Master 上注册 Driver 完成后,会调用 schedule 方法;
- 寻找资源条件满足要求的 Worker,向该 Worker 上异步发送消息 LaunchDriver,用于启动 Driver;
- 进入 Worker,启一个新线程执行之前创建的 Command,即执行其 mainClass 声明的
DriverWrapper.main()
方法,即启动 Driver; - DriverWrapper 中开始解析 spark-submit 命令行中的各参数(args),反射 args.mainClass 运行 main() 方法,自己的 application 开始运行。