FLINK源码阅读-FLINK LOCAL 模式启动过程

StreamExecutionEnvironment跟进去可以看到,实现类里面有个LocalStreamEnvironment,这个就是local模式启动的入口从 StreamExecutionEnvironment.execute() 进去

public JobExecutionResult execute() throws Exception {
		return execute(DEFAULT_JOB_NAME);
	}

public JobExecutionResult execute(String jobName) throws Exception {
		Preconditions.checkNotNull(jobName, "Streaming Job name should not be null.");

		return execute(getStreamGraph(jobName));
	}

看到getStreamGraph(jobName) 先构建StreamGraph(有关Graph有关的构建之后说明)。
接着可以跟到execture(getStreamGraph(jobName))进入localStreamEnvironment 如下方法

@Override
public JobExecutionResult execute(StreamGraph streamGraph) throws Exception {
	JobGraph jobGraph = streamGraph.getJobGraph();
	jobGraph.setAllowQueuedScheduling(true);

	Configuration configuration = new Configuration();
	configuration.addAll(jobGraph.getJobConfiguration());
	configuration.setString(TaskManagerOptions.MANAGED_MEMORY_SIZE, "0");

	// add (and override) the settings with what the user defined
	configuration.addAll(this.configuration);

	if (!configuration.contains(RestOptions.BIND_PORT)) {
		configuration.setString(RestOptions.BIND_PORT, "0");
	}
    // 设置slot数量根据最大并行度(也就是编码的时候设置的并行度)
	int numSlotsPerTaskManager = configuration.getInteger(TaskManagerOptions.NUM_TASK_SLOTS, jobGraph.getMaximumParallelism());

	MiniClusterConfiguration cfg = new MiniClusterConfiguration.Builder()
		.setConfiguration(configuration)
		.setNumSlotsPerTaskManager(numSlotsPerTaskManager)
		.build();

	if (LOG.isInfoEnabled()) {
		LOG.info("Running job on local embedded Flink mini cluster");
	}
    //创建本地的迷你cluster
	MiniCluster miniCluster = new MiniCluster(cfg);

	try {
		miniCluster.start();
		configuration.setInteger(RestOptions.PORT, miniCluster.getRestAddress().get().getPort());

		return miniCluster.executeJobBlocking(jobGraph);
	}
	finally {
		transformations.clear();
		miniCluster.close();
	}
}

具体从miniCluster.start()启动cluster开始,整个核心启动流程就在这块。

public void start() throws Exception {
	synchronized (lock) {
		checkState(!running, "MiniCluster is already running");
		LOG.info("Starting Flink Mini Cluster");
		LOG.debug("Using configuration {}", miniClusterConfiguration);

		final Configuration configuration = miniClusterConfiguration.getConfiguration();
		final boolean useSingleRpcService = miniClusterConfiguration.getRpcServiceSharing() == RpcServiceSharing.SHARED;

		try {
			initializeIOFormatClasses(configuration);
			LOG.info("Starting Metrics Registry");
			metricRegistry = createMetricRegistry(configuration);
				// bring up all the RPC services
			LOG.info("Starting RPC Service(s)");

			AkkaRpcServiceConfiguration akkaRpcServiceConfig = AkkaRpcServiceConfiguration.fromConfiguration(configuration);

			final RpcServiceFactory dispatcherResourceManagreComponentRpcServiceFactory;

			if (useSingleRpcService) {
				// we always need the 'commonRpcService' for auxiliary calls
				commonRpcService = createRpcService(akkaRpcServiceConfig, false, null);
				final CommonRpcServiceFactory commonRpcServiceFactory = new CommonRpcServiceFactory(commonRpcService);
				taskManagerRpcServiceFactory = commonRpcServiceFactory;
				dispatcherResourceManagreComponentRpcServiceFactory = commonRpcServiceFactory;
			} else {
				// we always need the 'commonRpcService' for auxiliary calls
				commonRpcService = createRpcService(akkaRpcServiceConfig, true, null);

				// start a new service per component, possibly with custom bind addresses
				final String jobManagerBindAddress = miniClusterConfiguration.getJobManagerBindAddress();
				final String taskManagerBindAddress = miniClusterConfiguration.getTaskManagerBindAddress();

				dispatcherResourceManagreComponentRpcServiceFactory = new DedicatedRpcServiceFactory(akkaRpcServiceConfig, jobManagerBindAddress);
				taskManagerRpcServiceFactory = new DedicatedRpcServiceFactory(akkaRpcServiceConfig, taskManagerBindAddress);
			}

			RpcService metricQueryServiceRpcService = MetricUtils.startMetricsRpcService(
				configuration,
				commonRpcService.getAddress());
			metricRegistry.startQueryService(metricQueryServiceRpcService, null);

			ioExecutor = Executors.newFixedThreadPool(
				Hardware.getNumberCPUCores(),
				new ExecutorThreadFactory("mini-cluster-io"));
			haServices = createHighAvailabilityServices(configuration, ioExecutor);

			blobServer = new BlobServer(configuration, haServices.createBlobStore());
			blobServer.start();

			heartbeatServices = HeartbeatServices.fromConfiguration(configuration);

			blobCacheService = new BlobCacheService(
				configuration, haServices.createBlobStore(), new InetSocketAddress(InetAddress.getLocalHost(), blobServer.getPort())
			);

			startTaskManagers();

			MetricQueryServiceRetriever metricQueryServiceRetriever = new RpcMetricQueryServiceRetriever(metricRegistry.getMetricQueryServiceRpcService());

			dispatcherResourceManagerComponents.addAll(createDispatcherResourceManagerComponents(
				configuration,
				dispatcherResourceManagreComponentRpcServiceFactory,
				haServices,
				blobServer,
				heartbeatServices,
				metricRegistry,
				metricQueryServiceRetriever,
				new ShutDownFatalErrorHandler()
			));

			resourceManagerLeaderRetriever = haServices.getResourceManagerLeaderRetriever();
			dispatcherLeaderRetriever = haServices.getDispatcherLeaderRetriever();
			webMonitorLeaderRetrievalService = haServices.getWebMonitorLeaderRetriever();

			dispatcherGatewayRetriever = new RpcGatewayRetriever<>(
				commonRpcService,
				DispatcherGateway.class,
				DispatcherId::fromUuid,
				20,
				Time.milliseconds(20L));
			resourceManagerGatewayRetriever = new RpcGatewayRetriever<>(
				commonRpcService,
				ResourceManagerGateway.class,
				ResourceManagerId::fromUuid,
				20,
				Time.milliseconds(20L));
			webMonitorLeaderRetriever = new LeaderRetriever();

			resourceManagerLeaderRetriever.start(resourceManagerGatewayRetriever);
			dispatcherLeaderRetriever.start(dispatcherGatewayRetriever);
			webMonitorLeaderRetrievalService.start(webMonitorLeaderRetriever);
		}
		catch (Exception e) {
			// cleanup everything
			try {
				close();
			} catch (Exception ee) {
				e.addSuppressed(ee);
			}
			throw e;
		}

		// create a new termination future
		terminationFuture = new CompletableFuture<>();

		// now officially mark this as running
		running = true;

		LOG.info("Flink Mini Cluster started successfully");
	}
}

前几行代码在配置和启动一些rpc服务,如metric、ha。startTaskManagers()启动startTaskmanage,(此处只是启动了taskManager的RPC服务)

@GuardedBy("lock")
private void startTaskManagers() throws Exception {
	final int numTaskManagers = miniClusterConfiguration.getNumTaskManagers();

	LOG.info("Starting {} TaskManger(s)", numTaskManagers);
		for (int i = 0; i < numTaskManagers; i++) {
		startTaskExecutor();
	}
}

然后是 dispatch、resourceManager 等服务启动。回到LocalStreamEnvironment.execute() 完成miniCluster 启动之后,将jobGraph提交给cluster 运行miniCluster.executeJobBlocking(jobGraph)。发现

final CompletableFuture<Acknowledge> acknowledgeCompletableFuture = jarUploadFuture
			.thenCombine(
				dispatcherGatewayFuture,
				(Void ack, DispatcherGateway dispatcherGateway) -> dispatcherGateway.submitJob(jobGraph, rpcTimeout))
			.thenCompose(Function.identity());

提交jobGraph。
看到这里发现一个问题,TaskManager、Dispatcher、resourceManager 都启动了,那jobManager呢?
继续往下跟代码发现

private CompletableFuture<Acknowledge> internalSubmitJob(JobGraph jobGraph) {
		log.info("Submitting job {} ({}).", jobGraph.getJobID(), jobGraph.getName());
		//提交 jobGraph 运行 jobMaster
		final CompletableFuture<Acknowledge> persistAndRunFuture = waitForTerminatingJobManager(jobGraph.getJobID(), jobGraph, this::persistAndRunJob)
			.thenApply(ignored -> Acknowledge.get());

		return persistAndRunFuture.handleAsync((acknowledge, throwable) -> {
			if (throwable != null) {
				cleanUpJobData(jobGraph.getJobID(), true);

				final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(throwable);
				log.error("Failed to submit job {}.", jobGraph.getJobID(), strippedThrowable);
				throw new CompletionException(
					new JobSubmissionException(jobGraph.getJobID(), "Failed to submit job.", strippedThrowable));
			} else {
				return acknowledge;
			}
		}, getRpcService().getExecutor());
	}

重点在这行final CompletableFuture persistAndRunFuture = waitForTerminatingJobManager(jobGraph.getJobID(), jobGraph, this::persistAndRunJob),看看this::persistAndRunJob方法做了什么操作

private CompletableFuture<Void> persistAndRunJob(JobGraph jobGraph) throws Exception {
		jobGraphStore.putJobGraph(jobGraph);

		final CompletableFuture<Void> runJobFuture = runJob(jobGraph);

		return runJobFuture.whenComplete(BiConsumerWithException.unchecked((Object ignored, Throwable throwable) -> {
			if (throwable != null) {
				jobGraphStore.removeJobGraph(jobGraph.getJobID());
			}
		}));
	}

private CompletableFuture<Void> runJob(JobGraph jobGraph) {
		Preconditions.checkState(!jobManagerRunnerFutures.containsKey(jobGraph.getJobID()));

		final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = createJobManagerRunner(jobGraph);

		jobManagerRunnerFutures.put(jobGraph.getJobID(), jobManagerRunnerFuture);

		return jobManagerRunnerFuture
			.thenApply(FunctionUtils.nullFn())
			.whenCompleteAsync(
				(ignored, throwable) -> {
					if (throwable != null) {
						jobManagerRunnerFutures.remove(jobGraph.getJobID());
					}
				},
				getMainThreadExecutor());
	}

看到这里也差不多了,启动JobManager运行jobGraph。

个人博客

你可能感兴趣的:(大数据,Flink,Flink,源码)