Spark启动Executor进程时堆内存的指定

CommandUtils是Spark中最常用的工具类之一,其作用是为了构建进程。如果不太关心其实现也不影响对Spark源码的阅读和原理的学习。我们要介绍的方法如下:

buildProcessBuilder

  def buildProcessBuilder(
      command: Command,
      securityMgr: SecurityManager,
      memory: Int,
      sparkHome: String,
      substituteArguments: String => String,
      classPaths: Seq[String] = Seq[String](),
      env: Map[String, String] = sys.env): ProcessBuilder = {
    val localCommand = buildLocalCommand(
      command, securityMgr, substituteArguments, classPaths, env)
    val commandSeq = buildCommandSeq(localCommand, memory, sparkHome)
    val builder = new ProcessBuilder(commandSeq: _*)
    val environment = builder.environment()
    for ((key, value) <- localCommand.environment) {
      environment.put(key, value)
    }
    builder
  }

buildLocalCommand

  private def buildLocalCommand(
      command: Command,
      securityMgr: SecurityManager,
      substituteArguments: String => String,
      classPath: Seq[String] = Seq[String](),
      env: Map[String, String]): Command = {
    val libraryPathName = Utils.libraryPathEnvName
    val libraryPathEntries = command.libraryPathEntries
    val cmdLibraryPath = command.environment.get(libraryPathName)
 
    var newEnvironment = if (libraryPathEntries.nonEmpty && libraryPathName.nonEmpty) {
      val libraryPaths = libraryPathEntries ++ cmdLibraryPath ++ env.get(libraryPathName)
      command.environment + ((libraryPathName, libraryPaths.mkString(File.pathSeparator)))
    } else {
      command.environment
    }
 
    if (securityMgr.isAuthenticationEnabled) {
      newEnvironment += (SecurityManager.ENV_AUTH_SECRET -> securityMgr.getSecretKey)
    }
 
    Command(
      command.mainClass,
      command.arguments.map(substituteArguments),
      newEnvironment,
      command.classPathEntries ++ classPath,
      Seq[String](), // library path already captured in environment variable
      command.javaOpts.filterNot(_.startsWith("-D" + SecurityManager.SPARK_AUTH_SECRET_CONF)))
  }

buildCommandSeq

  private def buildCommandSeq(command: Command, memory: Int, sparkHome: String): Seq[String] = {
    val cmd = new WorkerCommandBuilder(sparkHome, memory, command).buildCommand()
    cmd.asScala ++ Seq(command.mainClass) ++ command.arguments
  }

介绍完CommandUtils命令工具类之后正式进入主题,其实Executor进程的启动是通过调用CommandUtils.buildProcessBuilder方法生成ProcessBuilder,然后执行其start方法启动ProcessBuilder,生成进程。这里着重分析cmd如何构建。在buildProcessBuilder中调用了buildCommandSep方法,此方法的最后一行已经很明显可以看到命令(CommandSeq)的结构([javaOpt + classPath] + mainclass  + args),接下来分析WorkerCommandBuilder的buildCommand方法。

/**
 * This class is used by CommandUtils. It uses some package-private APIs in SparkLauncher, and since
 * Java doesn't have a feature similar to `private[spark]`, and we don't want that class to be
 * public, needs to live in the same package as the rest of the library.
 */
private[spark] class WorkerCommandBuilder(sparkHome: String, memoryMb: Int, command: Command)
    extends AbstractCommandBuilder {

  childEnv.putAll(command.environment.asJava)
  childEnv.put(CommandBuilderUtils.ENV_SPARK_HOME, sparkHome)

  override def buildCommand(env: JMap[String, String]): JList[String] = {
    val cmd = buildJavaCommand(command.classPathEntries.mkString(File.pathSeparator))
    cmd.add(s"-Xmx${memoryMb}M")
    command.javaOpts.foreach(cmd.add)
    cmd
  }

  def buildCommand(): JList[String] = buildCommand(new JHashMap[String, String]())

}

可以看到在构建cmd命令时拼接了 -Xmx${memoryMb}M ,而这里的memoryMb就是SparkConf中的executor-memory值,分析到这里就真相大白了,我们设置的executor-memory最终用在了启动Executor进程指定最大堆。

你可能感兴趣的:(大数据)