CommandUtils是Spark中最常用的工具类之一,其作用是为了构建进程。如果不太关心其实现也不影响对Spark源码的阅读和原理的学习。我们要介绍的方法如下:
def buildProcessBuilder(
command: Command,
securityMgr: SecurityManager,
memory: Int,
sparkHome: String,
substituteArguments: String => String,
classPaths: Seq[String] = Seq[String](),
env: Map[String, String] = sys.env): ProcessBuilder = {
val localCommand = buildLocalCommand(
command, securityMgr, substituteArguments, classPaths, env)
val commandSeq = buildCommandSeq(localCommand, memory, sparkHome)
val builder = new ProcessBuilder(commandSeq: _*)
val environment = builder.environment()
for ((key, value) <- localCommand.environment) {
environment.put(key, value)
}
builder
}
private def buildLocalCommand(
command: Command,
securityMgr: SecurityManager,
substituteArguments: String => String,
classPath: Seq[String] = Seq[String](),
env: Map[String, String]): Command = {
val libraryPathName = Utils.libraryPathEnvName
val libraryPathEntries = command.libraryPathEntries
val cmdLibraryPath = command.environment.get(libraryPathName)
var newEnvironment = if (libraryPathEntries.nonEmpty && libraryPathName.nonEmpty) {
val libraryPaths = libraryPathEntries ++ cmdLibraryPath ++ env.get(libraryPathName)
command.environment + ((libraryPathName, libraryPaths.mkString(File.pathSeparator)))
} else {
command.environment
}
if (securityMgr.isAuthenticationEnabled) {
newEnvironment += (SecurityManager.ENV_AUTH_SECRET -> securityMgr.getSecretKey)
}
Command(
command.mainClass,
command.arguments.map(substituteArguments),
newEnvironment,
command.classPathEntries ++ classPath,
Seq[String](), // library path already captured in environment variable
command.javaOpts.filterNot(_.startsWith("-D" + SecurityManager.SPARK_AUTH_SECRET_CONF)))
}
private def buildCommandSeq(command: Command, memory: Int, sparkHome: String): Seq[String] = {
val cmd = new WorkerCommandBuilder(sparkHome, memory, command).buildCommand()
cmd.asScala ++ Seq(command.mainClass) ++ command.arguments
}
介绍完CommandUtils命令工具类之后正式进入主题,其实Executor进程的启动是通过调用CommandUtils.buildProcessBuilder方法生成ProcessBuilder,然后执行其start方法启动ProcessBuilder,生成进程。这里着重分析cmd如何构建。在buildProcessBuilder中调用了buildCommandSep方法,此方法的最后一行已经很明显可以看到命令(CommandSeq)的结构([javaOpt + classPath] + mainclass + args),接下来分析WorkerCommandBuilder的buildCommand方法。
/**
* This class is used by CommandUtils. It uses some package-private APIs in SparkLauncher, and since
* Java doesn't have a feature similar to `private[spark]`, and we don't want that class to be
* public, needs to live in the same package as the rest of the library.
*/
private[spark] class WorkerCommandBuilder(sparkHome: String, memoryMb: Int, command: Command)
extends AbstractCommandBuilder {
childEnv.putAll(command.environment.asJava)
childEnv.put(CommandBuilderUtils.ENV_SPARK_HOME, sparkHome)
override def buildCommand(env: JMap[String, String]): JList[String] = {
val cmd = buildJavaCommand(command.classPathEntries.mkString(File.pathSeparator))
cmd.add(s"-Xmx${memoryMb}M")
command.javaOpts.foreach(cmd.add)
cmd
}
def buildCommand(): JList[String] = buildCommand(new JHashMap[String, String]())
}
可以看到在构建cmd命令时拼接了 -Xmx${memoryMb}M ,而这里的memoryMb就是SparkConf中的executor-memory值,分析到这里就真相大白了,我们设置的executor-memory最终用在了启动Executor进程指定最大堆。