说明:java -cp 和 -classpath 一样,是指定类运行所依赖其他类的路径。
java -cp =》开启 JVM 虚拟机 =》开启 Process(CliFrontend)=》程序入口 CliFrontend.main
在 IDEA 中全局查找(ctrl + R):org.apache.flink.client.cli.CliFrontend,找到 CliFrontend类,并找到 main 方法
/**
* Submits the job based on the arguments.(根据参数提交作业)
*/
public static void main(final String[] args) {
EnvironmentInformation.logEnvironmentInfo(LOG, "Command Line Client", args);
// 1. find the configuration directory
/*TODO 获取flink的conf目录的路径*/
final String configurationDirectory = getConfigurationDirectoryFromEnv();
// 2. load the global configuration
/*TODO 根据conf路径,加载配置*/
final Configuration configuration = GlobalConfiguration.loadConfiguration(configurationDirectory);
// 3. load the custom command lines
/*TODO 封装命令行接口:按顺序Generic、Yarn、Default*/
final List customCommandLines = loadCustomCommandLines(
configuration,
configurationDirectory);
try {
final CliFrontend cli = new CliFrontend(
configuration,
customCommandLines);
SecurityUtils.install(new SecurityConfiguration(cli.configuration));
int retCode = SecurityUtils.getInstalledContext()
.runSecured(() -> cli.parseAndRun(args));
System.exit(retCode);
}
catch (Throwable t) {
final Throwable strippedThrowable = ExceptionUtils.stripException(t, UndeclaredThrowableException.class);
LOG.error("Fatal error while running command line interface.", strippedThrowable);
strippedThrowable.printStackTrace();
System.exit(31);
}
}
CliFrontend.java
/**
* Submits the job based on the arguments.(根据参数提交作业)
*/
public static void main(final String[] args) {
......(代表代码省略)
final CliFrontend cli = new CliFrontend(
configuration,
customCommandLines);
SecurityUtils.install(new SecurityConfiguration(cli.configuration));
int retCode = SecurityUtils.getInstalledContext()
.runSecured(() -> cli.parseAndRun(args));
System.exit(retCode);
......(代表代码省略)
}
/**
* Parses the command line arguments and starts the requested action.(解析命令行参数并启动请求的操作)
*
* @param args command line arguments of the client.
* @return The return code of the program
*/
public int parseAndRun(String[] args) {
// check for action
if (args.length < 1) {
CliFrontendParser.printHelp(customCommandLines);
System.out.println("Please specify an action.");
return 1;
}
// get action
String action = args[0];
// remove action from parameters
final String[] params = Arrays.copyOfRange(args, 1, args.length);
try {
// do action
switch (action) {
case ACTION_RUN:
run(params);
return 0;
case ACTION_RUN_APPLICATION:
runApplication(params);
return 0;
case ACTION_LIST:
list(params);
return 0;
case ACTION_INFO:
info(params);
return 0;
case ACTION_CANCEL:
cancel(params);
return 0;
case ACTION_STOP:
stop(params);
return 0;
case ACTION_SAVEPOINT:
savepoint(params);
return 0;
case "-h":
case "--help":
CliFrontendParser.printHelp(customCommandLines);
return 0;
case "-v":
case "--version":
String version = EnvironmentInformation.getVersion();
String commitID = EnvironmentInformation.getRevisionInformation().commitId;
System.out.print("Version: " + version);
System.out.println(commitID.equals(EnvironmentInformation.UNKNOWN) ? "" : ", Commit ID: " + commitID);
return 0;
default:
System.out.printf("\"%s\" is not a valid action.\n", action);
System.out.println();
System.out.println("Valid actions are \"run\", \"list\", \"info\", \"savepoint\", \"stop\", or \"cancel\".");
System.out.println();
System.out.println("Specify the version option (-v or --version) to print Flink version.");
System.out.println();
System.out.println("Specify the help option (-h or --help) to get help on the command.");
return 1;
}
} catch (CliArgsException ce) {
return handleArgException(ce);
} catch (ProgramParametrizationException ppe) {
return handleParametrizationException(ppe);
} catch (ProgramMissingJobException pmje) {
return handleMissingJobException();
} catch (Exception e) {
return handleError(e);
}
}
CliFrontend.java
/**
* Executions the run action.
*
* @param args Command line arguments for the run action.
*/
protected void run(String[] args) throws Exception {
LOG.info("Running 'run' command.");
/*TODO 获取run动作,默认的配置项*/
final Options commandOptions = CliFrontendParser.getRunCommandOptions();
/*TODO 根据用户指定的配置项,进行解析*/
final CommandLine commandLine = getCommandLine(commandOptions, args, true);
// evaluate help flag
if (commandLine.hasOption(HELP_OPTION.getOpt())) {
CliFrontendParser.printHelpForRun(customCommandLines);
return;
}
/*TODO 根据之前添加的顺序,挨个判断是否active:Generic、Yarn、Default*/
final CustomCommandLine activeCommandLine =
validateAndGetActiveCommandLine(checkNotNull(commandLine));
final ProgramOptions programOptions = ProgramOptions.create(commandLine);
/*TODO 获取 用户的jar包和其他依赖*/
final List jobJars = getJobJarAndDependencies(programOptions);
/*TODO 获取有效配置:HA的id、Target(session、per-job)、JobManager内存、TaskManager内存、每个TM的slot数...*/
final Configuration effectiveConfiguration = getEffectiveConfiguration(
activeCommandLine, commandLine, programOptions, jobJars);
LOG.debug("Effective executor configuration: {}", effectiveConfiguration);
final PackagedProgram program = getPackagedProgram(programOptions, effectiveConfiguration);
try {
/*TODO 执行程序*/
executeProgram(effectiveConfiguration, program);
} finally {
program.deleteExtractedLibraries();
}
}
DefaultParser.java
/*TODO 获取run动作,默认的配置项*/
final Options commandOptions = CliFrontendParser.getRunCommandOptions();
/**
* A simple command line parser (based on Apache Commons CLI) that extracts command(一个简单的命令行解析器(基于Apache Commons CLI),用于提取命令
* line options.行选项)
*
*/
public class CliFrontendParser {
static final Option HELP_OPTION = new Option("h", "help", false,
"Show the help message for the CLI Frontend or the action.");
static final Option JAR_OPTION = new Option("j", "jarfile", true, "Flink program JAR file.");
static final Option CLASS_OPTION = new Option("c", "class", true,
"Class with the program entry point (\"main()\" method). Only needed if the " +
"JAR file does not specify the class in its manifest.");
static final Option CLASSPATH_OPTION = new Option("C", "classpath", true, "Adds a URL to each user code " +
"classloader on all nodes in the cluster. The paths must specify a protocol (e.g. file://) and be " +
"accessible on all nodes (e.g. by means of a NFS share). You can use this option multiple " +
"times for specifying more than one URL. The protocol must be supported by the " +
"{@link java.net.URLClassLoader}.");
......
/*TODO 根据用户指定的配置项,进行解析*/
final CommandLine commandLine = getCommandLine(commandOptions, args, true);
public CommandLine getCommandLine(final Options commandOptions, final String[] args, final boolean stopAtNonOptions) throws CliArgsException {
final Options commandLineOptions = CliFrontendParser.mergeOptions(commandOptions, customCommandLineOptions);
return CliFrontendParser.parse(commandLineOptions, args, stopAtNonOptions);
}
public CommandLine parse(Options options, String[] arguments, Properties properties, boolean stopAtNonOption) throws ParseException {
this.options = options;
this.stopAtNonOption = stopAtNonOption;
this.skipParsing = false;
this.currentOption = null;
this.expectedOpts = new ArrayList(options.getRequiredOptions());
Iterator var5 = options.getOptionGroups().iterator();
while(var5.hasNext()) {
OptionGroup group = (OptionGroup)var5.next();
group.setSelected((Option)null);
}
this.cmd = new CommandLine();
if (arguments != null) {
String[] var9 = arguments;
int var10 = arguments.length;
for(int var7 = 0; var7 < var10; ++var7) {
String argument = var9[var7];
this.handleToken(argument);
}
}
this.checkRequiredArgs();
this.handleProperties(properties);
this.checkRequiredOptions();
return this.cmd;
}
private void handleToken(String token) throws ParseException {
this.currentToken = token;
if (this.skipParsing) {
this.cmd.addArg(token);
} else if ("--".equals(token)) {
this.skipParsing = true;
} else if (this.currentOption != null && this.currentOption.acceptsArg() && this.isArgument(token)) {
// 添加参数值
this.currentOption.addValueForProcessing(Util.stripLeadingAndTrailingQuotes(token));
} else if (token.startsWith("--")) {
// 解析 --形式的参数名
this.handleLongOption(token);
} else if (token.startsWith("-") && !"-".equals(token)) {
// 解析 -形式的参数名
this.handleShortAndLongOption(token);
} else {
this.handleUnknownToken(token);
}
if (this.currentOption != null && !this.currentOption.acceptsArg()) {
this.currentOption = null;
}
}
private void handleLongOption(String token) throws ParseException
{
if (token.indexOf('=') == -1)
{
//解析 –L、-L、--l、-l 形式的参数(不包含=)
handleLongOptionWithoutEqual(token);
}
else
{
// 解析 --L=V、-L=V、--l=V、-l=V 形式的参数(包含=)
handleLongOptionWithEqual(token);
}
}
各种情况的解析,逻辑大体相同:去除-或--前缀,校验参数,以其中一个为例
/**
* Handles the following tokens:
*
* --L
* -L
* --l
* -l
*
* @param token the command line token to handle要处理的命令行标记
*/
private void handleLongOptionWithoutEqual(String token) throws ParseException
{
// 校验参数是否合法
List matchingOpts = options.getMatchingOptions(token);
if (matchingOpts.isEmpty())
{
handleUnknownToken(currentToken);
}
else if (matchingOpts.size() > 1)
{
throw new AmbiguousOptionException(token, matchingOpts);
}
else
{
// 参数添加到执行命令
handleOption(options.getOption(matchingOpts.get(0)));
}
}
Options.java:
// 校验参数是否合法
List
/**
* Returns the options with a long name starting with the name specified.返回以指定名称开头的长名称的选项。
*
* @param opt the partial name of the option
* @return the options matching the partial name specified, or an empty list if none matches
* @since 1.3
*/
public List getMatchingOptions(String opt)
{
// 去除 - 或 -- 前缀
opt = Util.stripLeadingHyphens(opt);
List matchingOpts = new ArrayList();
// for a perfect match return the single option only
if (longOpts.keySet().contains(opt))
{
return Collections.singletonList(opt);
}
for (String longOpt : longOpts.keySet())
{
if (longOpt.startsWith(opt))
{
matchingOpts.add(longOpt);
}
}
return matchingOpts;
}
// 参数添加到执行命令
handleOption(options.getOption(matchingOpts.get(0)));
private void handleOption(Option option) throws ParseException
{
// check the previous option before handling the next one
checkRequiredArgs();
option = (Option) option.clone();
updateRequiredOptions(option);
cmd.addOption(option);
if (option.hasArg())
{
currentOption = option;
}
else
{
currentOption = null;
}
}
这里依次添加了 Generic、Yarn 和 Default 三种命令行客户端(后面根据 isActive()按顺序选择):
// 3. load the custom command lines
/*TODO 封装命令行接口:按顺序Generic、Yarn、Default*/
final List customCommandLines = loadCustomCommandLines(
configuration,
configurationDirectory);
public static List loadCustomCommandLines(Configuration configuration, String configurationDirectory) {
List customCommandLines = new ArrayList<>();
//1.Generic
customCommandLines.add(new GenericCLI(configuration, configurationDirectory));
// Command line interface of the YARN session, with a special initialization here
// to prefix all options with y/yarn.
final String flinkYarnSessionCLI = "org.apache.flink.yarn.cli.FlinkYarnSessionCli";
try {
//Yarn
customCommandLines.add(
loadCustomCommandLine(flinkYarnSessionCLI,
configuration,
configurationDirectory,
"y",
"yarn"));
} catch (NoClassDefFoundError | Exception e) {
final String errorYarnSessionCLI = "org.apache.flink.yarn.cli.FallbackYarnSessionCli";
try {
LOG.info("Loading FallbackYarnSessionCli");
customCommandLines.add(
loadCustomCommandLine(errorYarnSessionCLI, configuration));
} catch (Exception exception) {
LOG.warn("Could not load CLI class {}.", flinkYarnSessionCLI, e);
}
}
// Tips: DefaultCLI must be added at last, because getActiveCustomCommandLine(..) will get the
// active CustomCommandLine in order and DefaultCLI isActive always return true.
//Default
customCommandLines.add(new DefaultCLI());
return customCommandLines;
}
在 run()里面,进行客户端的选择:
/*TODO 根据之前添加的顺序,挨个判断是否active:Generic、Yarn、Default*/
final CustomCommandLine activeCommandLine =
validateAndGetActiveCommandLine(checkNotNull(commandLine));
/**
* Gets the custom command-line for the arguments.
* @param commandLine The input to the command-line.
* @return custom command-line which is active (may only be one at a time)
*/
public CustomCommandLine validateAndGetActiveCommandLine(CommandLine commandLine) {
LOG.debug("Custom commandlines: {}", customCommandLines);
for (CustomCommandLine cli : customCommandLines) {
LOG.debug("Checking custom commandline {}, isActive: {}", cli, cli.isActive(commandLine));
//在 FlinkYarnSessionCli 为 active 时优先返回 FlinkYarnSessionCli。
//对于 DefaultCli,它的 isActive 方法总是返回 true。
if (cli.isActive(commandLine)) {
return cli;
}
}
throw new IllegalStateException("No valid command-line found.");
}
FlinkYarnSessionCli.java => Yarn 客户端 isActive 的判断逻辑:
@Override
public boolean isActive(CommandLine commandLine) {
final String jobManagerOption = commandLine.getOptionValue(addressOption.getOpt(), null);
/*TODO ID是固定的字符串 "yarn-cluster"*/
final boolean yarnJobManager = ID.equals(jobManagerOption);
/*TODO 判断是否存在 Yarn Session对应的 AppID*/
final boolean hasYarnAppId = commandLine.hasOption(applicationId.getOpt())
|| configuration.getOptional(YarnConfigOptions.APPLICATION_ID).isPresent();
final boolean hasYarnExecutor = YarnSessionClusterExecutor.NAME.equalsIgnoreCase(configuration.get(DeploymentOptions.TARGET))
|| YarnJobClusterExecutor.NAME.equalsIgnoreCase(configuration.get(DeploymentOptions.TARGET));
/*TODO -m yarn-cluster || yarn有appID,或者命令行指定了 || 执行器是yarn的*/
return hasYarnExecutor || yarnJobManager || hasYarnAppId;
}
在 run()里面,获取有效配置:
/*TODO 获取有效配置:HA的id、Target(session、per-job)、JobManager内存、TaskManager内存、每个TM的slot数...*/
final Configuration effectiveConfiguration = getEffectiveConfiguration(
activeCommandLine, commandLine, programOptions, jobJars);
FlinkYarnSessionCli.java
@Override
public Configuration toConfiguration(CommandLine commandLine) throws FlinkException {
// we ignore the addressOption because it can only contain "yarn-cluster"
final Configuration effectiveConfiguration = new Configuration();
applyDescriptorOptionToConfig(commandLine, effectiveConfiguration);
final ApplicationId applicationId = getApplicationId(commandLine);
if (applicationId != null) {
final String zooKeeperNamespace;
if (commandLine.hasOption(zookeeperNamespace.getOpt())){
zooKeeperNamespace = commandLine.getOptionValue(zookeeperNamespace.getOpt());
} else {
zooKeeperNamespace = effectiveConfiguration.getString(HA_CLUSTER_ID, applicationId.toString());
}
effectiveConfiguration.setString(HA_CLUSTER_ID, zooKeeperNamespace);
effectiveConfiguration.setString(YarnConfigOptions.APPLICATION_ID, ConverterUtils.toString(applicationId));
effectiveConfiguration.setString(DeploymentOptions.TARGET, YarnSessionClusterExecutor.NAME);
} else {
// TARGET 就是 execution.target,目标执行器
//决定后面什么类型的执行器提交任务:yarn-session、yarn-per-job
effectiveConfiguration.setString(DeploymentOptions.TARGET, YarnJobClusterExecutor.NAME);
}
if (commandLine.hasOption(jmMemory.getOpt())) {
String jmMemoryVal = commandLine.getOptionValue(jmMemory.getOpt());
if (!MemorySize.MemoryUnit.hasUnit(jmMemoryVal)) {
jmMemoryVal += "m";
}
effectiveConfiguration.set(JobManagerOptions.TOTAL_PROCESS_MEMORY, MemorySize.parse(jmMemoryVal));
}
if (commandLine.hasOption(tmMemory.getOpt())) {
String tmMemoryVal = commandLine.getOptionValue(tmMemory.getOpt());
if (!MemorySize.MemoryUnit.hasUnit(tmMemoryVal)) {
tmMemoryVal += "m";
}
effectiveConfiguration.set(TaskManagerOptions.TOTAL_PROCESS_MEMORY, MemorySize.parse(tmMemoryVal));
}
if (commandLine.hasOption(slots.getOpt())) {
effectiveConfiguration.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, Integer.parseInt(commandLine.getOptionValue(slots.getOpt())));
}
dynamicPropertiesEncoded = encodeDynamicProperties(commandLine);
if (!dynamicPropertiesEncoded.isEmpty()) {
Map dynProperties = getDynamicProperties(dynamicPropertiesEncoded);
for (Map.Entry dynProperty : dynProperties.entrySet()) {
effectiveConfiguration.setString(dynProperty.getKey(), dynProperty.getValue());
}
}
if (isYarnPropertiesFileMode(commandLine)) {
return applyYarnProperties(effectiveConfiguration);
} else {
return effectiveConfiguration;
}
}
在 run()里面,执行程序:
/*TODO 执行程序*/
executeProgram(effectiveConfiguration, program);
public static void executeProgram(
PipelineExecutorServiceLoader executorServiceLoader,
Configuration configuration,
PackagedProgram program,
boolean enforceSingleJobExecution,
boolean suppressSysout) throws ProgramInvocationException {
checkNotNull(executorServiceLoader);
final ClassLoader userCodeClassLoader = program.getUserCodeClassLoader();
final ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
设置当前的 classloader 为用户代码的 classloader
Thread.currentThread().setContextClassLoader(userCodeClassLoader);
LOG.info("Starting program (detached: {})", !configuration.getBoolean(DeploymentOptions.ATTACHED));
/*TODO 配置环境的上下文,用户代码里的 getExecutionEnvironment就会拿到这些环境信息*/
ContextEnvironment.setAsContext(
executorServiceLoader,
configuration,
userCodeClassLoader,
enforceSingleJobExecution,
suppressSysout);
StreamContextEnvironment.setAsContext(
executorServiceLoader,
configuration,
userCodeClassLoader,
enforceSingleJobExecution,
suppressSysout);
try {
调用用户代码的 main 方法
program.invokeInteractiveModeForExecution();
} finally {
ContextEnvironment.unsetAsContext();
StreamContextEnvironment.unsetAsContext();
}
} finally {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
/**
* This method assumes that the context environment is prepared, or the execution
* will be a local execution by default.(此方法假定上下文环境已准备好,或已执行
* 默认为本地执行。)
*/
public void invokeInteractiveModeForExecution() throws ProgramInvocationException {
callMainMethod(mainClass, args);
}
private static void callMainMethod(Class> entryClass, String[] args) throws ProgramInvocationException
{
... ...
mainMethod = entryClass.getMethod("main", String[].class);
... ...
// 反射调用 main 函数
mainMethod.invoke(null, (Object) args);
... ...
}
StreamExecutionEnvironment.java
/**
* Triggers the program execution. The environment will execute all parts of
* the program that have resulted in a "sink" operation. Sink operations are
* for example printing results or forwarding them to a message queue.
*
* The program execution will be logged and displayed with a generated
* default name.
*
* @return The result of the job execution, containing elapsed time and accumulators.作业执行的结果,包含运行时间和累加器。
* @throws Exception which occurs during job execution.作业执行期间发生的异常。
*/
public JobExecutionResult execute() throws Exception {
return execute(getJobName());
}
/**
* Triggers the program execution. The environment will execute all parts of
* the program that have resulted in a "sink" operation. Sink operations are
* for example printing results or forwarding them to a message queue.
*
* The program execution will be logged and displayed with the provided name
*
* @param jobName
* Desired name of the job
* @return The result of the job execution, containing elapsed time and accumulators.
* @throws Exception which occurs during job execution.
*/
public JobExecutionResult execute(String jobName) throws Exception {
Preconditions.checkNotNull(jobName, "Streaming Job name should not be null.");
/*TODO 获取StreamGraph,并接着执行*/
return execute(getStreamGraph(jobName));
}
public JobExecutionResult execute(StreamGraph streamGraph) throws Exception { final JobClient jobClient = executeAsync(streamGraph);
... ...
}
public JobClient executeAsync(StreamGraph streamGraph) throws Exception {
... ...
//根据提交模式选择匹配的 factory
final PipelineExecutorFactory executorFactory = executorServiceLoader.getExecutorFactory(configuration);
... ...
//选择合适的 executor 提交任务
CompletableFuture jobClientFuture = executorFactory
.getExecutor(configuration)
.execute(streamGraph, configuration);
... ...
}
PipelineExecutor -> YarnJobClusterExecutor -> AbstractJobClusterExecutor
@Override
public CompletableFuture execute(@Nonnull final Pipeline pipeline, @Nonnull final Configuration configuration, @Nonnull final ClassLoader userCodeClassloader) throws Exception {
/*TODO 将 流图(StreamGraph) 转换成 作业图(JobGraph)*/
final JobGraph jobGraph = PipelineExecutorUtils.getJobGraph(pipeline, configuration);
/*TODO 集群描述器:创建、启动了 YarnClient, 包含了一些yarn、flink的配置和环境信息*/
try (final ClusterDescriptor clusterDescriptor = clusterClientFactory.createClusterDescriptor(configuration)) {
final ExecutionConfigAccessor configAccessor = ExecutionConfigAccessor.fromConfiguration(configuration);
/*TODO 集群特有资源配置:JobManager内存、TaskManager内存、每个Tm的slot数*/
final ClusterSpecification clusterSpecification = clusterClientFactory.getClusterSpecification(configuration);
final ClusterClientProvider clusterClientProvider = clusterDescriptor
.deployJobCluster(clusterSpecification, jobGraph, configAccessor.getDetachedMode());
LOG.info("Job has been submitted with JobID " + jobGraph.getJobID());
return CompletableFuture.completedFuture(
new ClusterClientJobClientAdapter<>(clusterClientProvider, jobGraph.getJobID(), userCodeClassloader));
}
}
YarnClusterClientFactory.java
@Override
public YarnClusterDescriptor createClusterDescriptor(Configuration configuration) {
checkNotNull(configuration);
final String configurationDirectory =
configuration.get(DeploymentOptionsInternal.CONF_DIR);
YarnLogConfigUtil.setLogConfigFileInConfig(configuration, configurationDirectory);
return getClusterDescriptor(configuration);
}
private YarnClusterDescriptor getClusterDescriptor(Configuration configuration) {
/*TODO 创建了YarnClient*/
final YarnClient yarnClient = YarnClient.createYarnClient();
final YarnConfiguration yarnConfiguration = new YarnConfiguration();
/*TODO 初始化、启动 YarnClient*/
yarnClient.init(yarnConfiguration);
yarnClient.start();
return new YarnClusterDescriptor(
configuration,
yarnConfiguration,
yarnClient,
YarnClientYarnClusterInformationRetriever.create(yarnClient),
false);
}
/*TODO 集群特有资源配置:JobManager内存、TaskManager内存、每个Tm的slot数*/
final ClusterSpecification clusterSpecification = clusterClientFactory.getClusterSpecification(configuration);
@Override
public ClusterSpecification getClusterSpecification(Configuration configuration) {
checkNotNull(configuration);
final int jobManagerMemoryMB = JobManagerProcessUtils.processSpecFromConfigWithNewOptionToInterpretLegacyHeap(
configuration,
JobManagerOptions.TOTAL_PROCESS_MEMORY)
.getTotalProcessMemorySize()
.getMebiBytes();
final int taskManagerMemoryMB = TaskExecutorProcessUtils
.processSpecFromConfig(TaskExecutorProcessUtils.getConfigurationMapLegacyTaskManagerHeapSizeToConfigOption(
configuration, TaskManagerOptions.TOTAL_PROCESS_MEMORY))
.getTotalProcessMemorySize()
.getMebiBytes();
int slotsPerTaskManager = configuration.getInteger(TaskManagerOptions.NUM_TASK_SLOTS);
return new ClusterSpecification.ClusterSpecificationBuilder()
.setMasterMemoryMB(jobManagerMemoryMB)
.setTaskManagerMemoryMB(taskManagerMemoryMB)
.setSlotsPerTaskManager(slotsPerTaskManager)
.createClusterSpecification();
}
final ClusterClientProvider clusterClientProvider = clusterDescriptor
.deployJobCluster(clusterSpecification, jobGraph, configAccessor.getDetachedMode());
AbstractJobClusterExecutor -> ClusterDescriptor -> YarnClusterDescriptor
@Override
public ClusterClientProvider deployJobCluster(
ClusterSpecification clusterSpecification,
JobGraph jobGraph,
boolean detached) throws ClusterDeploymentException {
try {
return deployInternal(
clusterSpecification,
"Flink per-job cluster",
getYarnJobClusterEntrypoint(),// 获取 YarnJobClusterEntrypoint,启动 AM 的入口
jobGraph,
detached);
} catch (Exception e) {
throw new ClusterDeploymentException("Could not deploy Yarn job cluster.", e);
}
}
1.3.3.1上传 jar 包和配置文件到 HDFS
YarnClusterDescriptor.java
return deployInternal(
clusterSpecification,
"Flink per-job cluster",
getYarnJobClusterEntrypoint(),// 获取 YarnJobClusterEntrypoint,启动 AM 的入口
jobGraph,
detached);
/**
* This method will block until the ApplicationMaster/JobManager have been deployed on YARN.
*
* @param clusterSpecification Initial cluster specification for the Flink cluster to be deployed
* @param applicationName name of the Yarn application to start
* @param yarnClusterEntrypoint Class name of the Yarn cluster entry point.
* @param jobGraph A job graph which is deployed with the Flink cluster, {@code null} if none
* @param detached True if the cluster should be started in detached mode
*/
private ClusterClientProvider deployInternal(
ClusterSpecification clusterSpecification,
String applicationName,
String yarnClusterEntrypoint,
@Nullable JobGraph jobGraph,
boolean detached) throws Exception {
final UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
if (HadoopUtils.isKerberosSecurityEnabled(currentUser)) {
boolean useTicketCache = flinkConfiguration.getBoolean(SecurityOptions.KERBEROS_LOGIN_USETICKETCACHE);
if (!HadoopUtils.areKerberosCredentialsValid(currentUser, useTicketCache)) {
throw new RuntimeException("Hadoop security with Kerberos is enabled but the login user " +
"does not have Kerberos credentials or delegation tokens!");
}
}
/*TODO 部署前检查:jar包路径、conf路径、yarn最大核数....*/
isReadyForDeployment(clusterSpecification);
// ------------------ Check if the specified queue exists --------------------
/*TODO 检查指定的yarn队列是否存在*/
checkYarnQueues(yarnClient);
// ------------------ Check if the YARN ClusterClient has the requested resources --------------
/*TODO 检查yarn是否有足够的资源*/
// Create application via yarnClient
// 创建应用
final YarnClientApplication yarnApplication = yarnClient.createApplication();
final GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();
Resource maxRes = appResponse.getMaximumResourceCapability();
final ClusterResourceDescription freeClusterMem;
try {
freeClusterMem = getCurrentFreeClusterResources(yarnClient);
} catch (YarnException | IOException e) {
failSessionDuringDeployment(yarnClient, yarnApplication);
throw new YarnDeploymentException("Could not retrieve information about free cluster resources.", e);
}
final int yarnMinAllocationMB = yarnConfiguration.getInt(
YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
if (yarnMinAllocationMB <= 0) {
throw new YarnDeploymentException("The minimum allocation memory "
+ "(" + yarnMinAllocationMB + " MB) configured via '" + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB
+ "' should be greater than 0.");
}
final ClusterSpecification validClusterSpecification;
try {
validClusterSpecification = validateClusterResources(
clusterSpecification,
yarnMinAllocationMB,
maxRes,
freeClusterMem);
} catch (YarnDeploymentException yde) {
failSessionDuringDeployment(yarnClient, yarnApplication);
throw yde;
}
LOG.info("Cluster specification: {}", validClusterSpecification);
final ClusterEntrypoint.ExecutionMode executionMode = detached ?
ClusterEntrypoint.ExecutionMode.DETACHED
: ClusterEntrypoint.ExecutionMode.NORMAL;
flinkConfiguration.setString(ClusterEntrypoint.EXECUTION_MODE, executionMode.toString());
/*TODO 开始启动AM*/
ApplicationReport report = startAppMaster(
flinkConfiguration,
applicationName,
yarnClusterEntrypoint,
jobGraph,
yarnClient,
yarnApplication,
validClusterSpecification);
// print the application id for user to cancel themselves.
if (detached) {
final ApplicationId yarnApplicationId = report.getApplicationId();
logDetachedClusterInformation(yarnApplicationId, LOG);
}
setClusterEntrypointInfoToConfig(report);
return () -> {
try {
return new RestClusterClient<>(flinkConfiguration, report.getApplicationId());
} catch (Exception e) {
throw new RuntimeException("Error while creating RestClusterClient.", e);
}
};
}
private ApplicationReport startAppMaster(
Configuration configuration,
String applicationName,
String yarnClusterEntrypoint,
JobGraph jobGraph,
YarnClient yarnClient,
YarnClientApplication yarnApplication,
ClusterSpecification clusterSpecification) throws Exception {
// ------------------ Initialize the file systems -------------------------
/*TODO 初始化、创建 Hadoop的 FileSystem*/
org.apache.flink.core.fs.FileSystem.initialize(
configuration,
PluginUtils.createPluginManagerFromRootFolder(configuration));
final FileSystem fs = FileSystem.get(yarnConfiguration);
// hard coded check for the GoogleHDFS client because its not overriding the getScheme() method.
if (!fs.getClass().getSimpleName().equals("GoogleHadoopFileSystem") &&
fs.getScheme().startsWith("file")) {
LOG.warn("The file system scheme is '" + fs.getScheme() + "'. This indicates that the "
+ "specified Hadoop configuration path is wrong and the system is using the default Hadoop configuration values."
+ "The Flink YARN client needs to store its files in a distributed file system");
}
ApplicationSubmissionContext appContext = yarnApplication.getApplicationSubmissionContext();
final List providedLibDirs = Utils.getQualifiedRemoteSharedPaths(configuration, yarnConfiguration);
/*TODO Yarn应用的文件上传器:FS、对应的HDFS路径
* 用来上传:用户jar包、flink的依赖、flink的配置文件(接下来接近300行,不用看)
* 直接跳到 fileUploader.close()
* */
final YarnApplicationFileUploader fileUploader = YarnApplicationFileUploader.from(
fs,
getStagingDir(fs),
providedLibDirs,
appContext.getApplicationId(),
getFileReplication());
// The files need to be shipped and added to classpath.
Set systemShipFiles = new HashSet<>(shipFiles.size());
for (File file : shipFiles) {
systemShipFiles.add(file.getAbsoluteFile());
}
final String logConfigFilePath = configuration.getString(YarnConfigOptionsInternal.APPLICATION_LOG_CONFIG_FILE);
if (logConfigFilePath != null) {
systemShipFiles.add(new File(logConfigFilePath));
}
// Set-up ApplicationSubmissionContext for the application
final ApplicationId appId = appContext.getApplicationId();
// ------------------ Add Zookeeper namespace to local flinkConfiguraton ------
String zkNamespace = getZookeeperNamespace();
// no user specified cli argument for namespace?
if (zkNamespace == null || zkNamespace.isEmpty()) {
// namespace defined in config? else use applicationId as default.
zkNamespace = configuration.getString(HighAvailabilityOptions.HA_CLUSTER_ID, String.valueOf(appId));
setZookeeperNamespace(zkNamespace);
}
configuration.setString(HighAvailabilityOptions.HA_CLUSTER_ID, zkNamespace);
/*TODO 高可用配置:重试次数,默认2次*/
if (HighAvailabilityMode.isHighAvailabilityModeActivated(configuration)) {
// activate re-execution of failed applications
appContext.setMaxAppAttempts(
configuration.getInteger(
YarnConfigOptions.APPLICATION_ATTEMPTS.key(),
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));
activateHighAvailabilitySupport(appContext);
} else {
//不是高可用重试次数为 1
// set number of application retries to 1 in the default case
appContext.setMaxAppAttempts(
configuration.getInteger(
YarnConfigOptions.APPLICATION_ATTEMPTS.key(),
1));
}
// 多次调用上传 HDFS 的方法,分别是:
// => systemShipFiles:日志的配置文件、lib/目录下除了 dist 的 jar 包
// => shipOnlyFiles:plugins/目录下的文件
// => userJarFiles:用户代码的 jar 包
fileUploader.registerMultipleLocalResources (... ...);
// 上传和配置 ApplicationMaster 的 jar 包:flink-dist*.jar
final YarnLocalResourceDescriptor localResourceDescFlinkJar = fileUploader.uploadFlinkDist(flinkJarPath);
... ...
// fileUploader.registerSingleLocalResource(
jobGraphFilename,
new Path(tmpJobGraphFile.toURI()), "",
true, false);
... ...
// 上传 flink 配置文件
String flinkConfigKey = "flink-conf.yaml";
Path remotePathConf = setupSingleLocalResource( flinkConfigKey,
fs, appId,
new Path(tmpConfigurationFile.getAbsolutePath()), localResources,
homeDir, "");
... ...
// 将 JobGraph 写入 tmp 文件并添加到本地资源,并上传到 HDFS fileUploader.registerSingleLocalResource(
jobGraphFilename,
new Path(tmpJobGraphFile.toURI()), "",
true, false);
... ...
// 上传 flink 配置文件
String flinkConfigKey = "flink-conf.yaml"; fileUploader.registerSingleLocalResource(
flinkConfigKey,
new Path(tmpConfigurationFile.getAbsolutePath()), "",
true, true);
... ...
final JobManagerProcessSpec processSpec = JobManagerProcessUtils.processSpecFromConfigWithNewOptionToInterpretLegacyHeap(
flinkConfiguration, JobManagerOptions.TOTAL_PROCESS_MEMORY);
//封装启动 AM container 的 Java 命令
final ContainerLaunchContext amContainer = setupApplicationMasterContainer( yarnClusterEntrypoint,
hasKrb5, processSpec);
... ...
ContainerLaunchContext setupApplicationMasterContainer( String yarnClusterEntrypoint,
boolean hasKrb5, JobManagerProcessSpec processSpec) {
// respect custom JVM options in the YAML file
String javaOpts = flinkConfiguration.getString(CoreOptions.FLINK_JVM_OPTIONS);
if (flinkConfiguration.getString(CoreOptions.FLINK_JM_JVM_OPTIONS).length() > 0) { javaOpts += " " + flinkConfiguration.getString(CoreOptions.FLINK_JM_JVM_OPTIONS);
}
//applicable only for YarnMiniCluster secure test run
//krb5.conf file will be available as local resource in JM/TM container if (hasKrb5) {
javaOpts += " -Djava.security.krb5.conf=krb5.conf";
}
// 创建 AM 的容器启动上下文
ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
final Map startCommandValues = new HashMap<>(); startCommandValues.put("java", "$JAVA_HOME/bin/java");
String jvmHeapMem = JobManagerProcessUtils.generateJvmParametersStr(processSpec, flinkConfiguration);
startCommandValues.put("jvmmem", jvmHeapMem);
startCommandValues.put("jvmopts", javaOpts); startCommandValues.put("logging",
YarnLogConfigUtil.getLoggingYarnCommand(flinkConfiguration));
startCommandValues.put("class", yarnClusterEntrypoint); startCommandValues.put("redirects",
"1> " + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.out " + "2> " + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/jobmanager.err");
startCommandValues.put("args", "");
final String commandTemplate = flinkConfiguration
.getString(ConfigConstants.YARN_CONTAINER_START_COMMAND_TEMPLATE,
ConfigConstants.DEFAULT_YARN_CONTAINER_START_COMMAND_TEMPLATE);
final String amCommand =
BootstrapTools.getStartCommand(commandTemplate, startCommandValues); amContainer.setCommands(Collections.singletonList(amCommand)); LOG.debug("Application Master start command: " + amCommand);
return amContainer;
}
封装 AM 参数,提交应用:
private ApplicationReport startAppMaster( Configuration configuration, String applicationName,
String yarnClusterEntrypoint, JobGraph jobGraph, YarnClient yarnClient,
YarnClientApplication yarnApplication,
ClusterSpecification clusterSpecification) throws Exception {
... ...
final ContainerLaunchContext amContainer = setupApplicationMasterContainer( yarnClusterEntrypoint,
hasKrb5, processSpec);
... ...
// 封装 AM 的 classpath 和环境参数
final Map appMasterEnv = new HashMap<>();
// set user specified app master environment variables appMasterEnv.putAll(
ConfigurationUtils.getPrefixedKeyValuePairs(ResourceManagerOptions.CONTAINERIZED_MASTE R_ENV_PREFIX, configuration));
// set Flink app class path
appMasterEnv.put(YarnConfigKeys.ENV_FLINK_CLASSPATH, classPathBuilder.toString());
// set Flink on YARN internal configuration values appMasterEnv.put(YarnConfigKeys.FLINK_DIST_JAR, localResourceDescFlinkJar.toString()); appMasterEnv.put(YarnConfigKeys.ENV_APP_ID, appId.toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_HOME_DIR,
fileUploader.getHomeDir().toString()); appMasterEnv.put(YarnConfigKeys.ENV_CLIENT_SHIP_FILES,
encodeYarnLocalResourceDescriptorListToString(fileUploader.getEnvShipResourceList())); appMasterEnv.put(YarnConfigKeys.ENV_ZOOKEEPER_NAMESPACE,
getZookeeperNamespace());
appMasterEnv.put(YarnConfigKeys.FLINK_YARN_FILES, fileUploader.getApplicationDir().toUri().toString());
// https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn- site/src/site/markdown/YarnApplicationSecurity.md#identity-on-an-insecure-cluster-hadoop_user_name
appMasterEnv.put(YarnConfigKeys.ENV_HADOOP_USER_NAME, UserGroupInformation.getCurrentUser().getUserName());
if (localizedKeytabPath != null) { appMasterEnv.put(YarnConfigKeys.LOCAL_KEYTAB_PATH, localizedKeytabPath); String principal =
configuration.getString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL); appMasterEnv.put(YarnConfigKeys.KEYTAB_PRINCIPAL, principal); if (remotePathKeytab != null) {
appMasterEnv.put(YarnConfigKeys.REMOTE_KEYTAB_PATH, remotePathKeytab.toString());
}
}
//To support Yarn Secure Integration Test Scenario if (remoteYarnSiteXmlPath != null) {
appMasterEnv.put(YarnConfigKeys.ENV_YARN_SITE_XML_PATH, remoteYarnSiteXmlPath.toString());
}
if (remoteKrb5Path != null) {
appMasterEnv.put(YarnConfigKeys.ENV_KRB5_PATH, remoteKrb5Path.toString());
}
// set classpath from YARN configuration Utils.setupYarnClassPath(yarnConfiguration, appMasterEnv);
// 设置 AM 参数
amContainer.setEnvironment(appMasterEnv);
... ...
yarnClient.submitApplication(appContext);
... ...
}
/*TODO 前面做了很多上传、环境配置,终于可以提交应用了*/
yarnClient.submitApplication(appContext);
YarnClientImpl.java
public ApplicationId submitApplication(ApplicationSubmissionContext appContext) throws YarnException,
IOException {
ApplicationId applicationId = appContext.getApplicationId();
... ...
SubmitApplicationRequest request = Records.newRecord(SubmitApplicationRequest.class);
request.setApplicationSubmissionContext(appContext);
//TODO: YARN-1763:Handle RM failovers during the submitApplication call. rmClient.submitApplication(request);
... ...
}
ApplicationClientProtocolPBClientImpl.java
public SubmitApplicationResponse submitApplication( SubmitApplicationRequest request) throws YarnException, IOException {
//取出报文
SubmitApplicationRequestProto requestProto = ((SubmitApplicationRequestPBImpl) request).getProto();
//将报文发送发送到服务端,并将返回结果构成 response
try {
return new SubmitApplicationResponsePBImpl(proxy.submitApplication(null, requestProto));
} catch (ServiceException e) { RPCUtil.unwrapAndThrowException(e); return null;
}
}
ApplicationClientProtocolPBServiceImpl.java
public SubmitApplicationResponseProto submitApplication(RpcController arg0, SubmitApplicationRequestProto proto) throws ServiceException {
//服务端重新构建报文
SubmitApplicationRequestPBImpl request = new SubmitApplicationRequestPBImpl(proto);
... ...
SubmitApplicationResponse response = real.submitApplication(request); return ((SubmitApplicationResponsePBImpl)response).getProto();
... ...
}
ClientRMService.java
public SubmitApplicationResponse submitApplication(SubmitApplicationRequest request) throws
YarnException {
... ...
//将应用请求提交到 Yarn 上的 RMAppManager 去提交任务
this.rmAppManager.submitApplication(submissionContext, System.currentTimeMillis(), user);
... ...
}
Per-job 模式的 AM container 加载运行入口是 YarnJobClusterEntryPoint 中的 main()方法:
YarnJobClusterEntrypoint.java