基于Spark2.4.4版本作业通过submit操作提交作业
./bin/spark-submit \
--class <main-class> \
--master <master-url> \
--deploy-mode <deploy-mode> \
--conf <key>=<value> \
... # other options
<application-jar> \
[application-arguments]
通过以上脚本将我们的应用程序提交到相对应的介质中运行,spark内部是如何提交运行的呢?
//最后一行 很明显这个类中绝对有Main方法, 脚本中 @$ 意思是启动时传递的一堆参数
org.apache.spark.deploy.SparkSubmit 类来启动程序
1.1 来到spark源码中
// 很明显 main方法中就调用了一个doSubmit方法而已,并将参数传进去
def main(args: Array[String]): Unit = {
val submit = new SparkSubmit()
submit.doSubmit(args)
}
def doSubmit(args: Array[String]): Unit = {
// 解析参数的方法
val appArgs = parseArguments(args)
// 如果启动配置了 -verbos = true 就会输出日志信息
if (appArgs.verbose) {
logInfo(appArgs.toString)
}
//根据参数类型就行匹配相对应方法
appArgs.action match {
case SparkSubmitAction.SUBMIT => submit(appArgs, uninitLog)
case SparkSubmitAction.KILL => kill(appArgs)
case SparkSubmitAction.REQUEST_STATUS => requestStatus(appArgs)
case SparkSubmitAction.PRINT_VERSION => printVersion()
}
}
2.1 对上面的解析参数的方法进行粗略的跟一下源码:
protected def parseArguments(args: Array[String]): SparkSubmitArguments = {
new SparkSubmitArguments(args)
}
/**
* Parses and encapsulates arguments from the spark-submit script.
* The env argument is used for testing.
*/
private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, String] = sys.env)
extends SparkSubmitArgumentsParser with Logging {
// 看到下面一坨参数,是不是亲切又熟悉了?
var master: String = null
var deployMode: String = null
var executorMemory: String = null
var executorCores: String = null
var totalExecutorCores: String = null
var propertiesFile: String = null
var driverMemory: String = null
var driverExtraClassPath: String = null
var driverExtraLibraryPath: String = null
var driverExtraJavaOptions: String = null
var queue: String = null
var numExecutors: String = null
var files: String = null
var mainClass: String = null
var name: String = null
.......
// 验证参数(验证提交参数,验证kill参数 等) 这些点进去看 都很简单,清晰明了
validateArguments()
...
}
2.2 继续回到doSubmit()方法中
//既然是提交自然走的是submit()方法中
case SparkSubmitAction.SUBMIT => submit(appArgs, uninitLog)
// 使用提供的参数提交程序,当指定了代理用户时,确保首先在doAS中包装处理
private def submit(args: SparkSubmitArguments, uninitLog: Boolean): Unit = {
def doRunMain(): Unit = {
// 一般代理用户都是为空
if (args.proxyUser != null) {
val proxyUser = UserGroupInformation.createProxyUser(args.proxyUser,
UserGroupInformation.getCurrentUser())
try {
proxyUser.doAs(new PrivilegedExceptionAction[Unit]() {
override def run(): Unit = {
runMain(args, uninitLog)
}
})
} catch {
// .....
}
} else {
// 最后调用的方法是runMain方法
runMain(args, uninitLog)
}
}
// ...
}
2.2.1 runMain 方法,关键方法
private def runMain(args: SparkSubmitArguments, uninitLog: Boolean): Unit = {
// 准备提交的环境
val (childArgs, childClasspath, sparkConf, childMainClass) = prepareSubmitEnvironment(args)
val loader =
if (sparkConf.get(DRIVER_USER_CLASS_PATH_FIRST)) {
new ChildFirstURLClassLoader(new Array[URL](0),
Thread.currentThread.getContextClassLoader)
} else {
new MutableURLClassLoader(new Array[URL](0),
Thread.currentThread.getContextClassLoader)
}
Thread.currentThread.setContextClassLoader(loader)
// 加载用户提交的jar,file 到classPath
for (jar <- childClasspath) {
addJarToClasspath(jar, loader)
}
var mainClass: Class[_] = null
try {
// 核心方法 点进去一看
mainClass = Utils.classForName(childMainClass)
/** 看到这个方法惊不惊喜, 反射中非常熟悉的方法
def classForName(className: String): Class[_] = {
Class.forName(className, true, getContextOrSparkClassLoader)
}
**/
} catch {
// ... 异常处理
}
// 通过newInstance 实例化构造方法
val app: SparkApplication = if (classOf[SparkApplication].isAssignableFrom(mainClass)) {
mainClass.newInstance().asInstanceOf[SparkApplication]
} else {
// SPARK-4170
if (classOf[scala.App].isAssignableFrom(mainClass)) {
logWarning("Subclasses of scala.App may not work correctly. Use a main() method instead.")
}
new JavaMainApplication(mainClass)
}
// 获取main 方法,并执行
app.start(childArgs.toArray, sparkConf)
上文中的app.start()方法
override def start(args: Array[String], conf: SparkConf): Unit = {
// 实例化 main 方法
val mainMethod = klass.getMethod("main", new Array[String](0).getClass)
if (!Modifier.isStatic(mainMethod.getModifiers)) {
throw new IllegalStateException("The main method in the given main class must be static")
}
val sysProps = conf.getAll.toMap
sysProps.foreach { case (k, v) =>
sys.props(k) = v
}
// 执行main方法
mainMethod.invoke(null, args)
}
// 一层一层 点进去剖析,其实很简单
SparkSubmit.main{
submit.doSubmit(args) { // args:启动传的那么一坨参数
super.doSubmit(args) {
val appArgs = parseArguments(args)
submit(appArgs, uninitLog) {
doRunMain() {
runMain(args, uninitLog) {
val app = mainClass.newInstance()
app.start(childArgs.toArray, sparkConf) {
val mainMethod = klass.getMethod("main"...)
mainMethod.invoke(null, args)
}
}
}
}
}
}
}
有兴趣根据以上思路,可以瞅瞅Flink 启动是否类似呢? 答案是肯定的!