Flink 源码解析 Flink-Connector (Kafka)

 
  org.apache.flink
  flink-connector-kafka_2.11 
  1.11.1 

FlinkKafkaConsumerBase:
所有
Flink Kafka Consumer数据源的基类。这个类实现了所有Kafka版本的公共行为
Flink 源码解析 Flink-Connector (Kafka)_第1张图片

open方法

	@Override
	public void open(Configuration configuration) throws Exception {
		// determine the offset commit mode
		//获取到offset的提交方式
		this.offsetCommitMode = OffsetCommitModes.fromConfiguration(
				getIsAutoCommitEnabled(),
				enableCommitOnCheckpoints,
				((StreamingRuntimeContext) getRuntimeContext()).isCheckpointingEnabled());

		// create the partition discoverer
		//创建分区发现器
		this.partitionDiscoverer = createPartitionDiscoverer(
				topicsDescriptor,
				getRuntimeContext().getIndexOfThisSubtask(),
				getRuntimeContext().getNumberOfParallelSubtasks());
		//初始化kafkaConsumer对象
		this.partitionDiscoverer.open();

		subscribedPartitionsToStartOffsets = new HashMap<>();
		//为每个SubTask分配消费的topic以及对应分区
		final List allPartitions = partitionDiscoverer.discoverPartitions();
		//判断是否从checkPoint中启动
		if (restoredState != null) {
			for (KafkaTopicPartition partition : allPartitions) {
				//如果checkPoint中没有找到对应的这个分区 则默认从EARLIEST_OFFSET开始读取
				if (!restoredState.containsKey(partition)) {
					restoredState.put(partition, KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET);
				}
			}

			for (Map.Entry restoredStateEntry : restoredState.entrySet()) {
				// seed the partition discoverer with the union state while filtering out
				// restored partitions that should not be subscribed by this subtask
				//返回应该分配给此SubTask特定Kafka分区的索引
				if (KafkaTopicPartitionAssigner.assign(
					restoredStateEntry.getKey(), getRuntimeContext().getNumberOfParallelSubtasks())
						== getRuntimeContext().getIndexOfThisSubtask()){
          // 将restoredState中保存的一组topic的partition和要开始读取的起始偏移量保存到subscribedPartitionsToStartOffsets
          // 其中restoredStateEntry.getKey为某个Topic的摸个partition,restoredStateEntry.getValue为该partition的要开始读取的起始偏移量
          subscribedPartitionsToStartOffsets.put(
              restoredStateEntry.getKey(), restoredStateEntry.getValue());
				}
			}
			//过滤掉topic名称不符合topicsDescriptor的topicPattern的分区
			if (filterRestoredPartitionsWithCurrentTopicsDescriptor) {
				subscribedPartitionsToStartOffsets.entrySet().removeIf(entry -> {
					if (!topicsDescriptor.isMatchingTopic(entry.getKey().getTopic())) {
						LOG.warn(
							"{} is removed from subscribed partitions since it is no longer associated with topics descriptor of current execution.",
							entry.getKey());
						return true;
					}
					return false;
				});
			}

			LOG.info("Consumer subtask {} will start reading {} partitions with offsets in restored state: {}",
				getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets);
		} else {
			// use the partition discoverer to fetch the initial seed partitions,
			// and set their initial offsets depending on the startup mode.
			// for SPECIFIC_OFFSETS and TIMESTAMP modes, we set the specific offsets now;
			// for other modes (EARLIEST, LATEST, and GROUP_OFFSETS), the offset is lazily determined
			// when the partition is actually read.
			//通过startupMode来获取topic对应分区的偏移量 默认是GROUP_OFFSETS
			//GROUP_OFFSETS  Start from committed offsets in ZK / Kafka brokers of a specific consumer group (default)
			switch (startupMode) {
				//初始化给定topic 分区对应的offset
				case SPECIFIC_OFFSETS:
					if (specificStartupOffsets == null) {
						throw new IllegalStateException(
							"Startup mode for the consumer set to " + StartupMode.SPECIFIC_OFFSETS +
								", but no specific offsets were specified.");
					}

					for (KafkaTopicPartition seedPartition : allPartitions) {
						Long specificOffset = specificStartupOffsets.get(seedPartition);
						if (specificOffset != null) {
							// since the specified offsets represent the next record to read, we subtract
							// it by one so that the initial state of the consumer will be correct
							subscribedPartitionsToStartOffsets.put(seedPartition, specificOffset - 1);
						} else {
							// default to group offset behaviour if the user-provided specific offsets
							// do not contain a value for this partition
							subscribedPartitionsToStartOffsets.put(seedPartition, KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
						}
					}

					break;
				//通过时间确定topic 分区对应的offset
				case TIMESTAMP:
					if (startupOffsetsTimestamp == null) {
						throw new IllegalStateException(
							"Startup mode for the consumer set to " + StartupMode.TIMESTAMP +
								", but no startup timestamp was specified.");
					}

					for (Map.Entry partitionToOffset
							: fetchOffsetsWithTimestamp(allPartitions, startupOffsetsTimestamp).entrySet()) {
						subscribedPartitionsToStartOffsets.put(
							partitionToOffset.getKey(),
							(partitionToOffset.getValue() == null)
									// if an offset cannot be retrieved for a partition with the given timestamp,
									// we default to using the latest offset for the partition
									? KafkaTopicPartitionStateSentinel.LATEST_OFFSET
									// since the specified offsets represent the next record to read, we subtract
									// it by one so that the initial state of the consumer will be correct
									: partitionToOffset.getValue() - 1);
					}

					break;
				default:
					for (KafkaTopicPartition seedPartition : allPartitions) {
						//SPECIFIC_OFFSETS
						subscribedPartitionsToStartOffsets.put(seedPartition, startupMode.getStateSentinel());
					}
			}
			//SubTask订阅的topic 分区对应offset指针是否全未找到
			if (!subscribedPartitionsToStartOffsets.isEmpty()) {
				switch (startupMode) {
					//LOG打印出SubTask订阅了几个分区 每个topic以及对应的分区编号
					case EARLIEST:
						LOG.info("Consumer subtask {} will start reading the following {} partitions from the earliest offsets: {}",
							getRuntimeContext().getIndexOfThisSubtask(),
							subscribedPartitionsToStartOffsets.size(),
							subscribedPartitionsToStartOffsets.keySet());
						break;
					case LATEST:
						LOG.info("Consumer subtask {} will start reading the following {} partitions from the latest offsets: {}",
							getRuntimeContext().getIndexOfThisSubtask(),
							subscribedPartitionsToStartOffsets.size(),
							subscribedPartitionsToStartOffsets.keySet());
						break;
					case TIMESTAMP:
						LOG.info("Consumer subtask {} will start reading the following {} partitions from timestamp {}: {}",
							getRuntimeContext().getIndexOfThisSubtask(),
							subscribedPartitionsToStartOffsets.size(),
							startupOffsetsTimestamp,
							subscribedPartitionsToStartOffsets.keySet());
						break;
					case SPECIFIC_OFFSETS:
						LOG.info("Consumer subtask {} will start reading the following {} partitions from the specified startup offsets {}: {}",
							getRuntimeContext().getIndexOfThisSubtask(),
							subscribedPartitionsToStartOffsets.size(),
							specificStartupOffsets,
							subscribedPartitionsToStartOffsets.keySet());

						List partitionsDefaultedToGroupOffsets = new ArrayList<>(subscribedPartitionsToStartOffsets.size());
						for (Map.Entry subscribedPartition : subscribedPartitionsToStartOffsets.entrySet()) {
							//在指定偏移量时 有些分区的偏移量未指定或者指定失败则放入partitionsDefaultedToGroupOffsets中
							if (subscribedPartition.getValue() == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) {
								partitionsDefaultedToGroupOffsets.add(subscribedPartition.getKey());
							}
						}

						if (partitionsDefaultedToGroupOffsets.size() > 0) {
							LOG.warn("Consumer subtask {} cannot find offsets for the following {} partitions in the specified startup offsets: {}" +
									"; their startup offsets will be defaulted to their committed group offsets in Kafka.",
								getRuntimeContext().getIndexOfThisSubtask(),
								partitionsDefaultedToGroupOffsets.size(),
								partitionsDefaultedToGroupOffsets);
						}
						break;
					case GROUP_OFFSETS:
						LOG.info("Consumer subtask {} will start reading the following {} partitions from the committed group offsets in Kafka: {}",
							getRuntimeContext().getIndexOfThisSubtask(),
							subscribedPartitionsToStartOffsets.size(),
							subscribedPartitionsToStartOffsets.keySet());
				}
			} else {
				//SubTask订阅topic 分区offset未找到
				LOG.info("Consumer subtask {} initially has no partitions to read from.",
					getRuntimeContext().getIndexOfThisSubtask());
			}
		}

		this.deserializer.open(
				RuntimeContextInitializationContextAdapters.deserializationAdapter(
						getRuntimeContext(),
						metricGroup -> metricGroup.addGroup("user")
				)
		);
	}

(1)指定offer的提交模式

OffffsetCommitMode:表示偏移量如何从外部提交回Kafka brokers/ Zookeeper的行为它的确切值是在运行时在使用者子任务中确定的。
在使用kafka时一般都是默认启动了checkPoint&&在checkPoint时提交offerset

	public static OffsetCommitMode fromConfiguration(
			boolean enableAutoCommit,
			boolean enableCommitOnCheckpoint,
			boolean enableCheckpointing) {
		//是否开启了checkPoint
		if (enableCheckpointing) {
			// if checkpointing is enabled, the mode depends only on whether committing on checkpoints is enabled
			// enableCommitOnCheckpoint 是否启用了在检查点上的提交Offset  
			return (enableCommitOnCheckpoint) ? OffsetCommitMode.ON_CHECKPOINTS : OffsetCommitMode.DISABLED;
		} else {
			// else, the mode depends only on whether auto committing is enabled in the provided Kafka properties
			//enableAutoCommit 是否启用了自动提交Offset
			return (enableAutoCommit) ? OffsetCommitMode.KAFKA_PERIODIC : OffsetCommitMode.DISABLED;
		}
	}

在这里解释OffffsetCommitMode

public enum OffsetCommitMode {

	/** Completely disable offset committing. */
	//禁用offset的提交
	DISABLED,

	/** Commit offsets back to Kafka only when checkpoints are completed. */
	//在checkPoint完成时提交offset
	ON_CHECKPOINTS,

	/** Commit offsets periodically back to Kafka, using the auto commit functionality of internal Kafka clients. */
	//使用内部Kafka客户机的自动提交功能,定期将偏移量提交回Kafka。
	KAFKA_PERIODIC;
}

(2)接下来创建和启动分区发现工具

	/**
	 * Creates the partition discoverer that is used to find new partitions for this subtask.
	 * 创建用于为此子任务查找新分区的分区发现器。
	 * @param topicsDescriptor Descriptor that describes whether we are discovering partitions for fixed topics or a topic pattern.
	 *                         传入的topic是固定的topic还是正则表达式的topic
	 * @param indexOfThisSubtask The index of this consumer subtask.
	 *                             子任务的索引
	 * @param numParallelSubtasks The total number of parallel consumer subtasks.
	 *							 子任务的总数(并行度)
	 * @return The instantiated partition discoverer
	 */
	protected abstract AbstractPartitionDiscoverer createPartitionDiscoverer(
			KafkaTopicsDescriptor topicsDescriptor,
			int indexOfThisSubtask,
			int numParallelSubtasks);

(3) 打开分区发现程序,初始化所有需要的Kafka连接。创建出KafkaConsumer对象。

	/**
	 * Opens the partition discoverer, initializing all required Kafka connections.
	 *
	 * 

NOTE: thread-safety is not guaranteed. 这个是线程不安全的 */ public void open() throws Exception { closed = false; initializeConnections(); } /** Establish the required connections in order to fetch topics and partitions metadata. * 建立连接以获取主题和分区元数据 * */ protected abstract void initializeConnections() throws Exception; @Override protected void initializeConnections() { //创建kafkaConsumer对象 this.kafkaConsumer = new KafkaConsumer<>(kafkaProperties); }

(4)已订阅的分区列表,这里将它初始化

private Map subscribedPartitionsToStartOffsets;
//已订阅的分区列表,这里将它初始化
subscribedPartitionsToStartOffsets = new HashMap<>();

(5)获取每个SubTask分配消费的topic以及对应分区

final List allPartitions = partitionDiscoverer.discoverPartitions();


	//发现分区的执行过程
	public List discoverPartitions() throws WakeupException, ClosedException {
		//判断SubTask是否关闭或者未被唤醒
		if (!closed && !wakeup) {
			try {
				List newDiscoveredPartitions;

				// (1) get all possible partitions, based on whether we are subscribed to fixed topics or a topic pattern
				//判断topic是否是固定的topic 固定topic和正则表达式topic都是一样的 目的是获取topic已经对应分区的元数据信息
				if (topicsDescriptor.isFixedTopics()) {
					//获取topic对应的分区元数据信息
					newDiscoveredPartitions = getAllPartitionsForTopics(topicsDescriptor.getFixedTopics());
				} else {
					List matchedTopics = getAllTopics();

					// retain topics that match the pattern
					Iterator iter = matchedTopics.iterator();
					while (iter.hasNext()) {
						if (!topicsDescriptor.isMatchingTopic(iter.next())) {
							iter.remove();
						}
					}
					//如果有匹配的topic 则获取对应的分区
					if (matchedTopics.size() != 0) {
						// get partitions only for matched topics
						newDiscoveredPartitions = getAllPartitionsForTopics(matchedTopics);
					} else {
						//否则将newDiscoveredPartitions设置为null
						newDiscoveredPartitions = null;
					}
				}

				// (2) eliminate partition that are old partitions or should not be subscribed by this subtask
				//删除旧分区或不应由此子任务订阅的分区
				if (newDiscoveredPartitions == null || newDiscoveredPartitions.isEmpty()) {
					throw new RuntimeException("Unable to retrieve any partitions with KafkaTopicsDescriptor: " + topicsDescriptor);
				} else {
					Iterator iter = newDiscoveredPartitions.iterator();
					KafkaTopicPartition nextPartition;
					while (iter.hasNext()) {
						nextPartition = iter.next();
						//校验此SubTask是否应该订阅此topic对应的此分区
						if (!setAndCheckDiscoveredPartition(nextPartition)) {
							iter.remove();
						}
					}
				}
				//返回SubTask应该订阅的topic以及对应分区
				return newDiscoveredPartitions;
			} catch (WakeupException e) {
				// the actual topic / partition metadata fetching methods
				// may be woken up midway; reset the wakeup flag and rethrow
				wakeup = false;
				throw e;
			}
		} else if (!closed && wakeup) {
			// may have been woken up before the method call
			wakeup = false;
			throw new WakeupException();
		} else {
			throw new ClosedException();
		}
	}

	@Override
	protected List getAllPartitionsForTopics(List topics) throws WakeupException, RuntimeException {
		final List partitions = new LinkedList<>();

		try {
			for (String topic : topics) {
				//获取topic对应的元数据信息
				final List kafkaPartitions = kafkaConsumer.partitionsFor(topic);

				if (kafkaPartitions == null) {
					throw new RuntimeException(String.format("Could not fetch partitions for %s. Make sure that the topic exists.", topic));
				}

				for (PartitionInfo partitionInfo : kafkaPartitions) {
					partitions.add(new KafkaTopicPartition(partitionInfo.topic(), partitionInfo.partition()));
				}
			}
		} catch (org.apache.kafka.common.errors.WakeupException e) {
			// rethrow our own wakeup exception
			throw new WakeupException();
		}

		return partitions;
	}


    public boolean setAndCheckDiscoveredPartition(KafkaTopicPartition partition) {
		if (isUndiscoveredPartition(partition)) {
			discoveredPartitions.add(partition);

			return KafkaTopicPartitionAssigner.assign(partition, numParallelSubtasks) == indexOfThisSubtask;
		}

		return false;
	}

	public static int assign(KafkaTopicPartition partition, int numParallelSubtasks) {
		int startIndex = ((partition.getTopic().hashCode() * 31) & 0x7FFFFFFF) % numParallelSubtasks;

		// here, the assumption is that the id of Kafka partitions are always ascending
		// starting from 0, and therefore can be used directly as the offset clockwise from the start index
		return (startIndex + partition.getPartition()) % numParallelSubtasks;
	}



    //返回指定Kafka分区应该分配给的目标子任务的索引。
    //跨子任务均匀分布  分区是循环分布的
	public static int assign(KafkaTopicPartition partition, int numParallelSubtasks) {
		int startIndex = ((partition.getTopic().hashCode() * 31) & 0x7FFFFFFF) % numParallelSubtasks;

		// here, the assumption is that the id of Kafka partitions are always ascending
		// starting from 0, and therefore can be used directly as the offset clockwise from the start index
		return (startIndex + partition.getPartition()) % numParallelSubtasks;
	}

(6)restoredState的赋值  FlinkKafkaConsumerBase实现了CheckpointedFunction所以肯定会有snapshotState()和initializeState()方法 此时直接看initializeState()

public abstract class FlinkKafkaConsumerBase extends RichParallelSourceFunction implements
		CheckpointListener,
		ResultTypeQueryable,
		CheckpointedFunction




	@Override
	public final void initializeState(FunctionInitializationContext context) throws Exception {

		OperatorStateStore stateStore = context.getOperatorStateStore();

		this.unionOffsetStates = stateStore.getUnionListState(new ListStateDescriptor<>(OFFSETS_STATE_NAME,
			createStateSerializer(getRuntimeContext().getExecutionConfig())));
        //当程序发生故障的时候值为true
		if (context.isRestored()) {
			restoredState = new TreeMap<>(new KafkaTopicPartition.Comparator());

			// populate actual holder for restored state
			for (Tuple2 kafkaOffset : unionOffsetStates.get()) {
				restoredState.put(kafkaOffset.f0, kafkaOffset.f1);
			}

			LOG.info("Consumer subtask {} restored state: {}.", getRuntimeContext().getIndexOfThisSubtask(), restoredState);
		} else {
			LOG.info("Consumer subtask {} has no restore state.", getRuntimeContext().getIndexOfThisSubtask());
		}
	}

Run方法

	@Override
	public void run(SourceContext sourceContext) throws Exception {
		//判断保存分区和读取起始偏移量的集合是否为空
		if (subscribedPartitionsToStartOffsets == null) {
			throw new Exception("The partitions were not set for the consumer");
		}

		// initialize commit metrics and default offset callback method
		//初始化提交指标和默认偏移量回调方法
		//记录Kafka offset成功提交和失败提交的数量
		//private transient Counter successfulCommits;  Counter for successful Kafka offset commits.
		this.successfulCommits = this.getRuntimeContext().getMetricGroup().counter(COMMITS_SUCCEEDED_METRICS_COUNTER);
		//private transient Counter failedCommits;  Counter for failed Kafka offset commits
		this.failedCommits =  this.getRuntimeContext().getMetricGroup().counter(COMMITS_FAILED_METRICS_COUNTER);
		//获取当前SubTask的索引
		final int subtaskIndex = this.getRuntimeContext().getIndexOfThisSubtask();
		//注册一个提交时的回调函数,提交成功时,提交成功计数器加一;提交失败时,提交失败计数器加一
		this.offsetCommitCallback = new KafkaCommitCallback() {
			@Override
			public void onSuccess() {
				successfulCommits.inc();
			}

			@Override
			public void onException(Throwable cause) {
				LOG.warn(String.format("Consumer subtask %d failed async Kafka commit.", subtaskIndex), cause);
				failedCommits.inc();
			}
		};

		// mark the subtask as temporarily idle if there are no initial seed partitions;
		// once this subtask discovers some partitions and starts collecting records, the subtask's
		// status will automatically be triggered back to be active.
		//判断subscribedPartitionsToStartOffsets集合是否为空。如果为空,标记数据源的状态为暂时空闲。
		// 多分区会有watermark的问题 watermark的值是分区中最小的watermark 标记空闲后忽略此分区
		if (subscribedPartitionsToStartOffsets.isEmpty()) {
			sourceContext.markAsTemporarilyIdle();
		}

		LOG.info("Consumer subtask {} creating fetcher with offsets {}.",
			getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets);
		// from this point forward:
		//   - 'snapshotState' will draw offsets from the fetcher,
		//     instead of being built from `subscribedPartitionsToStartOffsets`
		//   - 'notifyCheckpointComplete' will start to do work (i.e. commit offsets to
		//     Kafka through the fetcher, if configured to do so)
		//如果是快照状态则分区offsets直接从fetcher中获取
		//如果是通知检查点完成状态则通过fetcher提交offsets
		//创建一个KafkaFetcher,借助KafkaConsumer API从Kafka的broker拉取数据
		this.kafkaFetcher = createFetcher(
				sourceContext,
				subscribedPartitionsToStartOffsets,
				watermarkStrategy,
				(StreamingRuntimeContext) getRuntimeContext(),
				offsetCommitMode,
				getRuntimeContext().getMetricGroup().addGroup(KAFKA_CONSUMER_METRICS_GROUP),
				useMetrics);
		//如果是非运行 则直接返回
		if (!running) {
			return;
		}

		// depending on whether we were restored with the current state version (1.3),
		// remaining logic branches off into 2 paths:
		//  1) New state - partition discovery loop executed as separate thread, with this
		//                 thread running the main fetcher loop
		//  2) Old state - partition discovery is disabled and only the main fetcher loop is executed
		//根据分区发现间隔时间,来确定是否启动分区定时发现任务
		//如果没有配置分区定时发现时间间隔,则直接启动获取数据任务;否则,启动定期分区发现任务和数据获取任务
		if (discoveryIntervalMillis == PARTITION_DISCOVERY_DISABLED) {
			//开启循环拉取数据
			kafkaFetcher.runFetchLoop();
		} else {
			runWithPartitionDiscovery();
		}
	}

(1)createFetcher()在创建Fetcher对象时会通过offsetCommitMode的模式判断是否关闭offset的自动提交。

	@Override
	protected AbstractFetcher createFetcher(
		SourceContext sourceContext,
		Map assignedPartitionsWithInitialOffsets,
		SerializedValue> watermarkStrategy,
		StreamingRuntimeContext runtimeContext,
		OffsetCommitMode offsetCommitMode,
		MetricGroup consumerMetricGroup,
		boolean useMetrics) throws Exception {

		// make sure that auto commit is disabled when our offset commit mode is ON_CHECKPOINTS;
		// this overwrites whatever setting the user configured in the properties
		adjustAutoCommitConfig(properties, offsetCommitMode);

}
        //父类方法中
        //初始化反序列化器
		this.deserializer = deserializer;
        //消费者线程和任务线程之间的数据和异常的切换
		this.handover = new Handover();
        //运行KafkaConsumer并将记录批次传递给fetcher的线程
		this.consumerThread = new KafkaConsumerThread(
			LOG,
			handover,
			kafkaProperties,
			unassignedPartitionsQueue,
			getFetcherName() + " for " + taskNameWithSubtasks,
			pollTimeout,
			useMetrics,
			consumerMetricGroup,
			subtaskMetricGroup);
        //以批处理方式发出记录的收集器(bundle)
		this.kafkaCollector = new KafkaCollector();

(2)如果配置了分区发现器(默认是开启的)则会启动分区发现器线程

	private void runWithPartitionDiscovery() throws Exception {
		final AtomicReference discoveryLoopErrorRef = new AtomicReference<>();
		//创建分区发现的定时任务
		createAndStartDiscoveryLoop(discoveryLoopErrorRef);
		//开启循环拉取数据
		kafkaFetcher.runFetchLoop();

		// make sure that the partition discoverer is waked up so that
		// the discoveryLoopThread exits
		//确保分区发现器在分区发现循环线程启动期间一直处于唤醒状态
		partitionDiscoverer.wakeup();
		//等待发现分区线程执行完成
		joinDiscoveryLoopThread();

		// rethrow any fetcher errors
		final Exception discoveryLoopError = discoveryLoopErrorRef.get();
		if (discoveryLoopError != null) {
			throw new RuntimeException(discoveryLoopError);
		}
	}

	private void createAndStartDiscoveryLoop(AtomicReference discoveryLoopErrorRef) {
		discoveryLoopThread = new Thread(() -> {
			try {
				// --------------------- partition discovery loop ---------------------

				// throughout the loop, we always eagerly check if we are still running before
				// performing the next operation, so that we can escape the loop as soon as possible

				while (running) {
					if (LOG.isDebugEnabled()) {
						LOG.debug("Consumer subtask {} is trying to discover new partitions ...", getRuntimeContext().getIndexOfThisSubtask());
					}

					final List discoveredPartitions;
					try {
						//发现分区的执行过程
						//此子任务应订阅的已发现的新分区列表。  返回SubTask应该订阅的topic以及对应分区
						discoveredPartitions = partitionDiscoverer.discoverPartitions();
					} catch (AbstractPartitionDiscoverer.WakeupException | AbstractPartitionDiscoverer.ClosedException e) {
						// the partition discoverer may have been closed or woken up before or during the discovery;
						// this would only happen if the consumer was canceled; simply escape the loop
						break;
					}

					// no need to add the discovered partitions if we were closed during the meantime
					//如果在此期间关闭分区发现则不需要添加发现的分区
					if (running && !discoveredPartitions.isEmpty()) {
						//将发现的新分区添加到kafkaFetcher中
						kafkaFetcher.addDiscoveredPartitions(discoveredPartitions);
					}

					// do not waste any time sleeping if we're not running anymore
					if (running && discoveryIntervalMillis != 0) {
						try {
							Thread.sleep(discoveryIntervalMillis);
						} catch (InterruptedException iex) {
							// may be interrupted if the consumer was canceled midway; simply escape the loop
							break;
						}
					}
				}
			} catch (Exception e) {
				discoveryLoopErrorRef.set(e);
			} finally {
				// calling cancel will also let the fetcher loop escape
				// (if not running, cancel() was already called)
				if (running) {
					cancel();
				}
			}
		}, "Kafka Partition Discovery for " + getRuntimeContext().getTaskNameWithSubtasks());
		//启动分区发现定时任务
		discoveryLoopThread.start();
	}

(3)runFetchLoop()循环拉取数据

	@Override
	public void runFetchLoop() throws Exception {
		try {
			// kick off the actual Kafka consumer
			//开始kafka 实际的消费端 定期将消费到的数据转交给handover handover对象在createFetcher初始化的
			consumerThread.start();

			while (running) {
				// this blocks until we get the next records
				// it automatically re-throws exceptions encountered in the consumer thread
				//获取handover中的数据 在consumerThread线程没有将数据发送给handover时此方法会堵塞
				final ConsumerRecords records = handover.pollNext();

				// get the records for each topic partition
				//获取所有的分区记录
				for (KafkaTopicPartitionState partition : subscribedPartitionStates()) {
					//获取此分区的Records
					List> partitionRecords =
						records.records(partition.getKafkaPartitionHandle());

					partitionConsumerRecordsHandler(partitionRecords, partition);
				}
			}
		}
		finally {
			// this signals the consumer thread that no more work is to be done
			consumerThread.shutdown();
		}

		// on a clean exit, wait for the runner thread
		try {
			consumerThread.join();
		}
		catch (InterruptedException e) {
			// may be the result of a wake-up interruption after an exception.
			// we ignore this here and only restore the interruption state
			Thread.currentThread().interrupt();
		}
	}

(4)consumerThread.start() consumerThread线程的启动

	@Override
	public void run() {
		// early exit check
		if (!running) {
			return;
		}

		// this is the means to talk to FlinkKafkaConsumer's main thread
		//handover 这是与FlinkKafkaConsumer的main保持会话
		final Handover handover = this.handover;

		// This method initializes the KafkaConsumer and guarantees it is torn down properly.
		// This is important, because the consumer has multi-threading issues,
		// including concurrent 'close()' calls.
		try {
			this.consumer = getConsumer(kafkaProperties);
		}
		catch (Throwable t) {
			handover.reportError(t);
			return;
		}

		// from here on, the consumer is guaranteed to be closed properly
		try {
			// register Kafka's very own metrics in Flink's metric reporters
			if (useMetrics) {
				// register Kafka metrics to Flink
				Map metrics = consumer.metrics();
				if (metrics == null) {
					// MapR's Kafka implementation returns null here.
					log.info("Consumer implementation does not support metrics");
				} else {
					// we have Kafka metrics, register them
					for (Map.Entry metric: metrics.entrySet()) {
						consumerMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));

						// TODO this metric is kept for compatibility purposes; should remove in the future
						subtaskMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
					}
				}
			}

			// early exit check
			if (!running) {
				return;
			}

			// the latest bulk of records. May carry across the loop if the thread is woken up
			// from blocking on the handover
			ConsumerRecords records = null;

			// reused variable to hold found unassigned new partitions.
			// found partitions are not carried across loops using this variable;
			// they are carried across via re-adding them to the unassigned partitions queue
			List> newPartitions;

			// main fetch loop
			while (running) {

				// check if there is something to commit
				if (!commitInProgress) {
					// get and reset the work-to-be committed, so we don't repeatedly commit the same
					final Tuple2, KafkaCommitCallback> commitOffsetsAndCallback =
							nextOffsetsToCommit.getAndSet(null);

					if (commitOffsetsAndCallback != null) {
						log.debug("Sending async offset commit request to Kafka broker");

						// also record that a commit is already in progress
						// the order here matters! first set the flag, then send the commit command.
						commitInProgress = true;
						consumer.commitAsync(commitOffsetsAndCallback.f0, new CommitCallback(commitOffsetsAndCallback.f1));
					}
				}

				try {
					if (hasAssignedPartitions) {
						newPartitions = unassignedPartitionsQueue.pollBatch();
					}
					else {
						// if no assigned partitions block until we get at least one
						// instead of hot spinning this loop. We rely on a fact that
						// unassignedPartitionsQueue will be closed on a shutdown, so
						// we don't block indefinitely
						newPartitions = unassignedPartitionsQueue.getBatchBlocking();
					}
					if (newPartitions != null) {
						reassignPartitions(newPartitions);
					}
				} catch (AbortedReassignmentException e) {
					continue;
				}

				if (!hasAssignedPartitions) {
					// Without assigned partitions KafkaConsumer.poll will throw an exception
					continue;
				}

				// get the next batch of records, unless we did not manage to hand the old batch over
				if (records == null) {
					try {
						records = consumer.poll(pollTimeout);
					}
					catch (WakeupException we) {
						continue;
					}
				}

				try {
					//handover线程获取到了consumer消费的数据然后将数据发送
					handover.produce(records);
					records = null;
				}
				catch (Handover.WakeupException e) {
					// fall through the loop
				}
			}
			// end main fetch loop
		}
		catch (Throwable t) {
			// let the main thread know and exit
			// it may be that this exception comes because the main thread closed the handover, in
			// which case the below reporting is irrelevant, but does not hurt either
			handover.reportError(t);
		}
		finally {
			// make sure the handover is closed if it is not already closed or has an error
			handover.close();

			// make sure the KafkaConsumer is closed
			try {
				consumer.close();
			}
			catch (Throwable t) {
				log.warn("Error while closing Kafka consumer", t);
			}
		}
	}

(5)partitionConsumerRecordsHandler()数据发送和收尾

	protected void partitionConsumerRecordsHandler(
			List> partitionRecords,
			KafkaTopicPartitionState partition) throws Exception {

		for (ConsumerRecord record : partitionRecords) {
			//反序列化record 将数据交给kafkaCollector 已备数据往下发送
			deserializer.deserialize(record, kafkaCollector);

			// emit the actual records. this also updates offset state atomically and emits
			// watermarks
			//发送数据 更新offset 生产timestamp和watermarks
			emitRecordsWithTimestamps(
				kafkaCollector.getRecords(),
				partition,
				record.offset(),
				record.timestamp());
			//如果数据源已经到末尾了(收到了流结束信号),停止fetcher循环
			if (kafkaCollector.isEndOfStreamSignalled()) {
				// end of stream signaled
				running = false;
				break;
			}
		}
	}

(6)emitRecordsWithTimestamps()发出一个附加时间戳的记录。

	protected void emitRecordsWithTimestamps(
			Queue records,
			KafkaTopicPartitionState partitionState,
			long offset,
			long kafkaEventTimestamp) {
		// emit the records, using the checkpoint lock to guarantee
		// atomicity of record emission and offset state update
		synchronized (checkpointLock) {
			T record;
			while ((record = records.poll()) != null) {
				long timestamp = partitionState.extractTimestamp(record, kafkaEventTimestamp);
				//发送数据
				sourceContext.collectWithTimestamp(record, timestamp);

				// this might emit a watermark, so do it after emitting the record
				//发送watermark
				partitionState.onEvent(record, timestamp);
			}
			//更新offset
			partitionState.setOffset(offset);
		}
	}

initializeState()方法

//访问操作符状态后端中的状态
private transient ListState> unionOffsetStates;
//如果消费者从检查点恢复状态,则恢复到的偏移量。
private transient volatile TreeMap restoredState;
	@Override
	public final void initializeState(FunctionInitializationContext context) throws Exception {

		OperatorStateStore stateStore = context.getOperatorStateStore();

		this.unionOffsetStates = stateStore.getUnionListState(new ListStateDescriptor<>(OFFSETS_STATE_NAME,
			createStateSerializer(getRuntimeContext().getExecutionConfig())));
		//判断是否从上一次快照送恢复的
		if (context.isRestored()) {
			//如果是从checkPoint恢复的则初始化restoredState 然后和open对应
			restoredState = new TreeMap<>(new KafkaTopicPartition.Comparator());

			// populate actual holder for restored state
			//将快照中的数据填充到restoredState中 和open对应
			for (Tuple2 kafkaOffset : unionOffsetStates.get()) {
				restoredState.put(kafkaOffset.f0, kafkaOffset.f1);
			}

			LOG.info("Consumer subtask {} restored state: {}.", getRuntimeContext().getIndexOfThisSubtask(), restoredState);
		} else {
			LOG.info("Consumer subtask {} has no restore state.", getRuntimeContext().getIndexOfThisSubtask());
		}
	}

snapshotState()

	/** Data for pending but uncommitted offsets. */
   //未完成checkPoint时未提交的偏移量的数据
	private final LinkedMap pendingOffsetsToCommit = new LinkedMap();

	@Override
	public final void snapshotState(FunctionSnapshotContext context) throws Exception {
		if (!running) {
			LOG.debug("snapshotState() called on closed source");
		} else {
			//清除状态中的数据 方便重新填充
			unionOffsetStates.clear();

			final AbstractFetcher fetcher = this.kafkaFetcher;
			if (fetcher == null) {
				// the fetcher has not yet been initialized, which means we need to return the
				// originally restored offsets or the assigned partitions
				//fetcher还没有初始化,这意味着我们需要返回最初恢复的偏移量或分配的分区  在run()方法中初始化的
				for (Map.Entry subscribedPartition : subscribedPartitionsToStartOffsets.entrySet()) {
					//将open方法中获取的数据放入
					unionOffsetStates.add(Tuple2.of(subscribedPartition.getKey(), subscribedPartition.getValue()));
				}

				if (offsetCommitMode == OffsetCommitMode.ON_CHECKPOINTS) {
					// the map cannot be asynchronously updated, because only one checkpoint call can happen
					// on this function at a time: either snapshotState() or notifyCheckpointComplete()
					//保存checkPoint进行时的offset数据 restoredState不是状态对象是使用对象
					pendingOffsetsToCommit.put(context.getCheckpointId(), restoredState);
				}
			} else {
				//获取fetcher对象中的当前topic 分区对应的offset
				HashMap currentOffsets = fetcher.snapshotCurrentState();

				if (offsetCommitMode == OffsetCommitMode.ON_CHECKPOINTS) {
					// the map cannot be asynchronously updated, because only one checkpoint call can happen
					// on this function at a time: either snapshotState() or notifyCheckpointComplete()
					//保存checkPoint进行时的offset数据
					pendingOffsetsToCommit.put(context.getCheckpointId(), currentOffsets);
				}

				for (Map.Entry kafkaTopicPartitionLongEntry : currentOffsets.entrySet()) {
					//将最新的topic 分区对应的offset放入状态中
					unionOffsetStates.add(
							Tuple2.of(kafkaTopicPartitionLongEntry.getKey(), kafkaTopicPartitionLongEntry.getValue()));
				}
			}

			if (offsetCommitMode == OffsetCommitMode.ON_CHECKPOINTS) {
				// truncate the map of pending offsets to commit, to prevent infinite growth
				//保证因为checkPoint导致的topic 分区对应的offset数据不会过多
				while (pendingOffsetsToCommit.size() > MAX_NUM_PENDING_CHECKPOINTS) {
					pendingOffsetsToCommit.remove(0);
				}
			}
		}
	}

到此重要的一些方法都已经分析了。

你可能感兴趣的:(Flink)