Flink源码-2-Flink提交任务分析

代码示例

public class WorldCount {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple2<String, Integer>> dataStream = env
                .socketTextStream("localhost", Integer.parseInt(args[0]))
                .flatMap(new Splitter())
                .keyBy(0)
                .timeWindow(Time.seconds(5))
                .sum(1);

        dataStream.print();

        env.execute("Window WordCount");
    }

    public static class Splitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
        @Override
        public void flatMap(String sentence, Collector<Tuple2<String, Integer>> out) throws Exception {
            for (String word : sentence.split(" ")) {
                out.collect(new Tuple2<String, Integer>(word, 1));
            }
        }
    }
}

启动

bin/flink run -p 2 /home/hadoop/testjar/flink-test-1.9-1.0-SNAPSHOT.jar 9010

入口

org.apache.flink.client.cli.CliFrontend

public static void main(final String[] args) {
		EnvironmentInformation.logEnvironmentInfo(LOG, "Command Line Client", args);

		//这是部分主要是解析命令

		try {
			final CliFrontend cli = new CliFrontend(
				configuration,
				customCommandLines);

			SecurityUtils.install(new SecurityConfiguration(cli.configuration));
			int retCode = SecurityUtils.getInstalledContext()
					.runSecured(() -> cli.parseParameters(args));
			System.exit(retCode);
		}
		catch (Throwable t) {
			final Throwable strippedThrowable = ExceptionUtils.stripException(t, UndeclaredThrowableException.class);
			LOG.error("Fatal error while running command line interface.", strippedThrowable);
			strippedThrowable.printStackTrace();
			System.exit(31);
		}
	}

根据参数走到run这一步

 */
	protected void run(String[] args) throws Exception {
		LOG.info("Running 'run' command.");

		final Options commandOptions = CliFrontendParser.getRunCommandOptions();
		final CommandLine commandLine = getCommandLine(commandOptions, args, true);

		final ProgramOptions programOptions = new ProgramOptions(commandLine);

		// evaluate help flag
		if (commandLine.hasOption(HELP_OPTION.getOpt())) {
			CliFrontendParser.printHelpForRun(customCommandLines);
			return;
		}

		if (!programOptions.isPython()) {
			// Java program should be specified a JAR file
			if (programOptions.getJarFilePath() == null) {
				throw new CliArgsException("Java program should be specified a JAR file.");
			}
		}

		final PackagedProgram program;
		try {
			LOG.info("Building program from JAR file");
			program = buildProgram(programOptions);
		}
		catch (FileNotFoundException e) {
			throw new CliArgsException("Could not build the program from JAR file.", e);
		}

		final List<URL> jobJars = program.getJobJarAndDependencies();
		final Configuration effectiveConfiguration =
				getEffectiveConfiguration(commandLine, programOptions, jobJars);

		LOG.debug("Effective executor configuration: {}", effectiveConfiguration);

		try {
			executeProgram(effectiveConfiguration, program);
		} finally {
			program.deleteExtractedLibraries();
		}
	}

executeProgram方法

得到程序jar包,找到主类,反射调用方法main执行


public static void executeProgram(
			PipelineExecutorServiceLoader executorServiceLoader,
			Configuration configuration,
			PackagedProgram program) throws ProgramInvocationException {
		checkNotNull(executorServiceLoader);
		final ClassLoader userCodeClassLoader = program.getUserCodeClassLoader();
		final ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
		try {
			Thread.currentThread().setContextClassLoader(userCodeClassLoader);

			LOG.info("Starting program (detached: {})", !configuration.getBoolean(DeploymentOptions.ATTACHED));

			ContextEnvironmentFactory factory = new ContextEnvironmentFactory(
					executorServiceLoader,
					configuration,
					userCodeClassLoader);
			ContextEnvironment.setAsContext(factory);

			try {
				program.invokeInteractiveModeForExecution();
			} finally {
				ContextEnvironment.unsetContext();
			}
		} finally {
			Thread.currentThread().setContextClassLoader(contextClassLoader);
		}
	}

开始执行自己的代码

 env.execute("Window WordCount");

点进来之后
StreamContextEnvironment 这个类

@Override
	public JobExecutionResult execute(StreamGraph streamGraph) throws Exception {
		transformations.clear();

		// execute the programs
		if (ctx instanceof DetachedEnvironment) {
			LOG.warn("Job was executed in detached mode, the results will be available on completion.");
			((DetachedEnvironment) ctx).setDetachedPlan(streamGraph);
			return DetachedEnvironment.DetachedJobExecutionResult.INSTANCE;
		} else {
			return ctx
				.getClient()
				.run(streamGraph, ctx.getJars(), ctx.getClasspaths(), ctx.getUserCodeClassLoader(), ctx.getSavepointRestoreSettings())
				.getJobExecutionResult();
		}
	}

然后绕到
AbstractSessionClusterExecutor 这个类
提交的数据结构是这个JobGraph

public CompletableFuture<JobClient> execute(@Nonnull final Pipeline pipeline, @Nonnull final Configuration configuration) throws Exception {
		final JobGraph jobGraph = ExecutorUtils.getJobGraph(pipeline, configuration);

		try (final ClusterDescriptor<ClusterID> clusterDescriptor = clusterClientFactory.createClusterDescriptor(configuration)) {
			final ClusterID clusterID = clusterClientFactory.getClusterId(configuration);
			checkState(clusterID != null);

			final ClusterClientProvider<ClusterID> clusterClientProvider = clusterDescriptor.retrieve(clusterID);
			ClusterClient<ClusterID> clusterClient = clusterClientProvider.getClusterClient();
			return clusterClient
			        //开始提交任务
					.submitJob(jobGraph)
					.thenApplyAsync(jobID -> (JobClient) new ClusterClientJobClientAdapter<>(
							clusterClientProvider,
							jobID))
					.whenComplete((ignored1, ignored2) -> clusterClient.close());
		}
	}

最后绕到这个
RestClusterClient类,提交

@Override
	public CompletableFuture<JobID> submitJob(@Nonnull JobGraph jobGraph) {
	//请求出去
	    final CompletableFuture<JobSubmitResponseBody> submissionFuture = requestFuture.thenCompose(
			requestAndFileUploads -> sendRetriableRequest(
				JobSubmitHeaders.getInstance(),
				EmptyMessageParameters.getInstance(),
				requestAndFileUploads.f0,
				requestAndFileUploads.f1,
				isConnectionProblemOrServiceUnavailable())
		);
	}

总结

根据一系统的命令解析,最后反射调用自己的代码,任务信息通过JobGraph这个数据结构send出去

你可能感兴趣的:(Flink,入门到实践,java,flink,stream)