org.apache.flink
flink-connector-kafka_2.11
1.11.1
FlinkKafkaConsumerBase:
所有Flink Kafka Consumer数据源的基类。这个类实现了所有Kafka版本的公共行为
@Override
public void open(Configuration configuration) throws Exception {
// determine the offset commit mode
//获取到offset的提交方式
this.offsetCommitMode = OffsetCommitModes.fromConfiguration(
getIsAutoCommitEnabled(),
enableCommitOnCheckpoints,
((StreamingRuntimeContext) getRuntimeContext()).isCheckpointingEnabled());
// create the partition discoverer
//创建分区发现器
this.partitionDiscoverer = createPartitionDiscoverer(
topicsDescriptor,
getRuntimeContext().getIndexOfThisSubtask(),
getRuntimeContext().getNumberOfParallelSubtasks());
//初始化kafkaConsumer对象
this.partitionDiscoverer.open();
subscribedPartitionsToStartOffsets = new HashMap<>();
//为每个SubTask分配消费的topic以及对应分区
final List allPartitions = partitionDiscoverer.discoverPartitions();
//判断是否从checkPoint中启动
if (restoredState != null) {
for (KafkaTopicPartition partition : allPartitions) {
//如果checkPoint中没有找到对应的这个分区 则默认从EARLIEST_OFFSET开始读取
if (!restoredState.containsKey(partition)) {
restoredState.put(partition, KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET);
}
}
for (Map.Entry restoredStateEntry : restoredState.entrySet()) {
// seed the partition discoverer with the union state while filtering out
// restored partitions that should not be subscribed by this subtask
//返回应该分配给此SubTask特定Kafka分区的索引
if (KafkaTopicPartitionAssigner.assign(
restoredStateEntry.getKey(), getRuntimeContext().getNumberOfParallelSubtasks())
== getRuntimeContext().getIndexOfThisSubtask()){
// 将restoredState中保存的一组topic的partition和要开始读取的起始偏移量保存到subscribedPartitionsToStartOffsets
// 其中restoredStateEntry.getKey为某个Topic的摸个partition,restoredStateEntry.getValue为该partition的要开始读取的起始偏移量
subscribedPartitionsToStartOffsets.put(
restoredStateEntry.getKey(), restoredStateEntry.getValue());
}
}
//过滤掉topic名称不符合topicsDescriptor的topicPattern的分区
if (filterRestoredPartitionsWithCurrentTopicsDescriptor) {
subscribedPartitionsToStartOffsets.entrySet().removeIf(entry -> {
if (!topicsDescriptor.isMatchingTopic(entry.getKey().getTopic())) {
LOG.warn(
"{} is removed from subscribed partitions since it is no longer associated with topics descriptor of current execution.",
entry.getKey());
return true;
}
return false;
});
}
LOG.info("Consumer subtask {} will start reading {} partitions with offsets in restored state: {}",
getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets.size(), subscribedPartitionsToStartOffsets);
} else {
// use the partition discoverer to fetch the initial seed partitions,
// and set their initial offsets depending on the startup mode.
// for SPECIFIC_OFFSETS and TIMESTAMP modes, we set the specific offsets now;
// for other modes (EARLIEST, LATEST, and GROUP_OFFSETS), the offset is lazily determined
// when the partition is actually read.
//通过startupMode来获取topic对应分区的偏移量 默认是GROUP_OFFSETS
//GROUP_OFFSETS Start from committed offsets in ZK / Kafka brokers of a specific consumer group (default)
switch (startupMode) {
//初始化给定topic 分区对应的offset
case SPECIFIC_OFFSETS:
if (specificStartupOffsets == null) {
throw new IllegalStateException(
"Startup mode for the consumer set to " + StartupMode.SPECIFIC_OFFSETS +
", but no specific offsets were specified.");
}
for (KafkaTopicPartition seedPartition : allPartitions) {
Long specificOffset = specificStartupOffsets.get(seedPartition);
if (specificOffset != null) {
// since the specified offsets represent the next record to read, we subtract
// it by one so that the initial state of the consumer will be correct
subscribedPartitionsToStartOffsets.put(seedPartition, specificOffset - 1);
} else {
// default to group offset behaviour if the user-provided specific offsets
// do not contain a value for this partition
subscribedPartitionsToStartOffsets.put(seedPartition, KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
}
}
break;
//通过时间确定topic 分区对应的offset
case TIMESTAMP:
if (startupOffsetsTimestamp == null) {
throw new IllegalStateException(
"Startup mode for the consumer set to " + StartupMode.TIMESTAMP +
", but no startup timestamp was specified.");
}
for (Map.Entry partitionToOffset
: fetchOffsetsWithTimestamp(allPartitions, startupOffsetsTimestamp).entrySet()) {
subscribedPartitionsToStartOffsets.put(
partitionToOffset.getKey(),
(partitionToOffset.getValue() == null)
// if an offset cannot be retrieved for a partition with the given timestamp,
// we default to using the latest offset for the partition
? KafkaTopicPartitionStateSentinel.LATEST_OFFSET
// since the specified offsets represent the next record to read, we subtract
// it by one so that the initial state of the consumer will be correct
: partitionToOffset.getValue() - 1);
}
break;
default:
for (KafkaTopicPartition seedPartition : allPartitions) {
//SPECIFIC_OFFSETS
subscribedPartitionsToStartOffsets.put(seedPartition, startupMode.getStateSentinel());
}
}
//SubTask订阅的topic 分区对应offset指针是否全未找到
if (!subscribedPartitionsToStartOffsets.isEmpty()) {
switch (startupMode) {
//LOG打印出SubTask订阅了几个分区 每个topic以及对应的分区编号
case EARLIEST:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the earliest offsets: {}",
getRuntimeContext().getIndexOfThisSubtask(),
subscribedPartitionsToStartOffsets.size(),
subscribedPartitionsToStartOffsets.keySet());
break;
case LATEST:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the latest offsets: {}",
getRuntimeContext().getIndexOfThisSubtask(),
subscribedPartitionsToStartOffsets.size(),
subscribedPartitionsToStartOffsets.keySet());
break;
case TIMESTAMP:
LOG.info("Consumer subtask {} will start reading the following {} partitions from timestamp {}: {}",
getRuntimeContext().getIndexOfThisSubtask(),
subscribedPartitionsToStartOffsets.size(),
startupOffsetsTimestamp,
subscribedPartitionsToStartOffsets.keySet());
break;
case SPECIFIC_OFFSETS:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the specified startup offsets {}: {}",
getRuntimeContext().getIndexOfThisSubtask(),
subscribedPartitionsToStartOffsets.size(),
specificStartupOffsets,
subscribedPartitionsToStartOffsets.keySet());
List partitionsDefaultedToGroupOffsets = new ArrayList<>(subscribedPartitionsToStartOffsets.size());
for (Map.Entry subscribedPartition : subscribedPartitionsToStartOffsets.entrySet()) {
//在指定偏移量时 有些分区的偏移量未指定或者指定失败则放入partitionsDefaultedToGroupOffsets中
if (subscribedPartition.getValue() == KafkaTopicPartitionStateSentinel.GROUP_OFFSET) {
partitionsDefaultedToGroupOffsets.add(subscribedPartition.getKey());
}
}
if (partitionsDefaultedToGroupOffsets.size() > 0) {
LOG.warn("Consumer subtask {} cannot find offsets for the following {} partitions in the specified startup offsets: {}" +
"; their startup offsets will be defaulted to their committed group offsets in Kafka.",
getRuntimeContext().getIndexOfThisSubtask(),
partitionsDefaultedToGroupOffsets.size(),
partitionsDefaultedToGroupOffsets);
}
break;
case GROUP_OFFSETS:
LOG.info("Consumer subtask {} will start reading the following {} partitions from the committed group offsets in Kafka: {}",
getRuntimeContext().getIndexOfThisSubtask(),
subscribedPartitionsToStartOffsets.size(),
subscribedPartitionsToStartOffsets.keySet());
}
} else {
//SubTask订阅topic 分区offset未找到
LOG.info("Consumer subtask {} initially has no partitions to read from.",
getRuntimeContext().getIndexOfThisSubtask());
}
}
this.deserializer.open(
RuntimeContextInitializationContextAdapters.deserializationAdapter(
getRuntimeContext(),
metricGroup -> metricGroup.addGroup("user")
)
);
}
OffffsetCommitMode:表示偏移量如何从外部提交回Kafka brokers/ Zookeeper的行为它的确切值是在运行时在使用者子任务中确定的。
在使用kafka时一般都是默认启动了checkPoint&&在checkPoint时提交offerset
public static OffsetCommitMode fromConfiguration(
boolean enableAutoCommit,
boolean enableCommitOnCheckpoint,
boolean enableCheckpointing) {
//是否开启了checkPoint
if (enableCheckpointing) {
// if checkpointing is enabled, the mode depends only on whether committing on checkpoints is enabled
// enableCommitOnCheckpoint 是否启用了在检查点上的提交Offset
return (enableCommitOnCheckpoint) ? OffsetCommitMode.ON_CHECKPOINTS : OffsetCommitMode.DISABLED;
} else {
// else, the mode depends only on whether auto committing is enabled in the provided Kafka properties
//enableAutoCommit 是否启用了自动提交Offset
return (enableAutoCommit) ? OffsetCommitMode.KAFKA_PERIODIC : OffsetCommitMode.DISABLED;
}
}
public enum OffsetCommitMode {
/** Completely disable offset committing. */
//禁用offset的提交
DISABLED,
/** Commit offsets back to Kafka only when checkpoints are completed. */
//在checkPoint完成时提交offset
ON_CHECKPOINTS,
/** Commit offsets periodically back to Kafka, using the auto commit functionality of internal Kafka clients. */
//使用内部Kafka客户机的自动提交功能,定期将偏移量提交回Kafka。
KAFKA_PERIODIC;
}
/**
* Creates the partition discoverer that is used to find new partitions for this subtask.
* 创建用于为此子任务查找新分区的分区发现器。
* @param topicsDescriptor Descriptor that describes whether we are discovering partitions for fixed topics or a topic pattern.
* 传入的topic是固定的topic还是正则表达式的topic
* @param indexOfThisSubtask The index of this consumer subtask.
* 子任务的索引
* @param numParallelSubtasks The total number of parallel consumer subtasks.
* 子任务的总数(并行度)
* @return The instantiated partition discoverer
*/
protected abstract AbstractPartitionDiscoverer createPartitionDiscoverer(
KafkaTopicsDescriptor topicsDescriptor,
int indexOfThisSubtask,
int numParallelSubtasks);
/**
* Opens the partition discoverer, initializing all required Kafka connections.
*
* NOTE: thread-safety is not guaranteed. 这个是线程不安全的
*/
public void open() throws Exception {
closed = false;
initializeConnections();
}
/** Establish the required connections in order to fetch topics and partitions metadata.
* 建立连接以获取主题和分区元数据
* */
protected abstract void initializeConnections() throws Exception;
@Override
protected void initializeConnections() {
//创建kafkaConsumer对象
this.kafkaConsumer = new KafkaConsumer<>(kafkaProperties);
}
private Map subscribedPartitionsToStartOffsets;
//已订阅的分区列表,这里将它初始化
subscribedPartitionsToStartOffsets = new HashMap<>();
final List allPartitions = partitionDiscoverer.discoverPartitions();
//发现分区的执行过程
public List discoverPartitions() throws WakeupException, ClosedException {
//判断SubTask是否关闭或者未被唤醒
if (!closed && !wakeup) {
try {
List newDiscoveredPartitions;
// (1) get all possible partitions, based on whether we are subscribed to fixed topics or a topic pattern
//判断topic是否是固定的topic 固定topic和正则表达式topic都是一样的 目的是获取topic已经对应分区的元数据信息
if (topicsDescriptor.isFixedTopics()) {
//获取topic对应的分区元数据信息
newDiscoveredPartitions = getAllPartitionsForTopics(topicsDescriptor.getFixedTopics());
} else {
List matchedTopics = getAllTopics();
// retain topics that match the pattern
Iterator iter = matchedTopics.iterator();
while (iter.hasNext()) {
if (!topicsDescriptor.isMatchingTopic(iter.next())) {
iter.remove();
}
}
//如果有匹配的topic 则获取对应的分区
if (matchedTopics.size() != 0) {
// get partitions only for matched topics
newDiscoveredPartitions = getAllPartitionsForTopics(matchedTopics);
} else {
//否则将newDiscoveredPartitions设置为null
newDiscoveredPartitions = null;
}
}
// (2) eliminate partition that are old partitions or should not be subscribed by this subtask
//删除旧分区或不应由此子任务订阅的分区
if (newDiscoveredPartitions == null || newDiscoveredPartitions.isEmpty()) {
throw new RuntimeException("Unable to retrieve any partitions with KafkaTopicsDescriptor: " + topicsDescriptor);
} else {
Iterator iter = newDiscoveredPartitions.iterator();
KafkaTopicPartition nextPartition;
while (iter.hasNext()) {
nextPartition = iter.next();
//校验此SubTask是否应该订阅此topic对应的此分区
if (!setAndCheckDiscoveredPartition(nextPartition)) {
iter.remove();
}
}
}
//返回SubTask应该订阅的topic以及对应分区
return newDiscoveredPartitions;
} catch (WakeupException e) {
// the actual topic / partition metadata fetching methods
// may be woken up midway; reset the wakeup flag and rethrow
wakeup = false;
throw e;
}
} else if (!closed && wakeup) {
// may have been woken up before the method call
wakeup = false;
throw new WakeupException();
} else {
throw new ClosedException();
}
}
@Override
protected List getAllPartitionsForTopics(List topics) throws WakeupException, RuntimeException {
final List partitions = new LinkedList<>();
try {
for (String topic : topics) {
//获取topic对应的元数据信息
final List kafkaPartitions = kafkaConsumer.partitionsFor(topic);
if (kafkaPartitions == null) {
throw new RuntimeException(String.format("Could not fetch partitions for %s. Make sure that the topic exists.", topic));
}
for (PartitionInfo partitionInfo : kafkaPartitions) {
partitions.add(new KafkaTopicPartition(partitionInfo.topic(), partitionInfo.partition()));
}
}
} catch (org.apache.kafka.common.errors.WakeupException e) {
// rethrow our own wakeup exception
throw new WakeupException();
}
return partitions;
}
public boolean setAndCheckDiscoveredPartition(KafkaTopicPartition partition) {
if (isUndiscoveredPartition(partition)) {
discoveredPartitions.add(partition);
return KafkaTopicPartitionAssigner.assign(partition, numParallelSubtasks) == indexOfThisSubtask;
}
return false;
}
public static int assign(KafkaTopicPartition partition, int numParallelSubtasks) {
int startIndex = ((partition.getTopic().hashCode() * 31) & 0x7FFFFFFF) % numParallelSubtasks;
// here, the assumption is that the id of Kafka partitions are always ascending
// starting from 0, and therefore can be used directly as the offset clockwise from the start index
return (startIndex + partition.getPartition()) % numParallelSubtasks;
}
//返回指定Kafka分区应该分配给的目标子任务的索引。
//跨子任务均匀分布 分区是循环分布的
public static int assign(KafkaTopicPartition partition, int numParallelSubtasks) {
int startIndex = ((partition.getTopic().hashCode() * 31) & 0x7FFFFFFF) % numParallelSubtasks;
// here, the assumption is that the id of Kafka partitions are always ascending
// starting from 0, and therefore can be used directly as the offset clockwise from the start index
return (startIndex + partition.getPartition()) % numParallelSubtasks;
}
public abstract class FlinkKafkaConsumerBase extends RichParallelSourceFunction implements
CheckpointListener,
ResultTypeQueryable,
CheckpointedFunction
@Override
public final void initializeState(FunctionInitializationContext context) throws Exception {
OperatorStateStore stateStore = context.getOperatorStateStore();
this.unionOffsetStates = stateStore.getUnionListState(new ListStateDescriptor<>(OFFSETS_STATE_NAME,
createStateSerializer(getRuntimeContext().getExecutionConfig())));
//当程序发生故障的时候值为true
if (context.isRestored()) {
restoredState = new TreeMap<>(new KafkaTopicPartition.Comparator());
// populate actual holder for restored state
for (Tuple2 kafkaOffset : unionOffsetStates.get()) {
restoredState.put(kafkaOffset.f0, kafkaOffset.f1);
}
LOG.info("Consumer subtask {} restored state: {}.", getRuntimeContext().getIndexOfThisSubtask(), restoredState);
} else {
LOG.info("Consumer subtask {} has no restore state.", getRuntimeContext().getIndexOfThisSubtask());
}
}
@Override
public void run(SourceContext sourceContext) throws Exception {
//判断保存分区和读取起始偏移量的集合是否为空
if (subscribedPartitionsToStartOffsets == null) {
throw new Exception("The partitions were not set for the consumer");
}
// initialize commit metrics and default offset callback method
//初始化提交指标和默认偏移量回调方法
//记录Kafka offset成功提交和失败提交的数量
//private transient Counter successfulCommits; Counter for successful Kafka offset commits.
this.successfulCommits = this.getRuntimeContext().getMetricGroup().counter(COMMITS_SUCCEEDED_METRICS_COUNTER);
//private transient Counter failedCommits; Counter for failed Kafka offset commits
this.failedCommits = this.getRuntimeContext().getMetricGroup().counter(COMMITS_FAILED_METRICS_COUNTER);
//获取当前SubTask的索引
final int subtaskIndex = this.getRuntimeContext().getIndexOfThisSubtask();
//注册一个提交时的回调函数,提交成功时,提交成功计数器加一;提交失败时,提交失败计数器加一
this.offsetCommitCallback = new KafkaCommitCallback() {
@Override
public void onSuccess() {
successfulCommits.inc();
}
@Override
public void onException(Throwable cause) {
LOG.warn(String.format("Consumer subtask %d failed async Kafka commit.", subtaskIndex), cause);
failedCommits.inc();
}
};
// mark the subtask as temporarily idle if there are no initial seed partitions;
// once this subtask discovers some partitions and starts collecting records, the subtask's
// status will automatically be triggered back to be active.
//判断subscribedPartitionsToStartOffsets集合是否为空。如果为空,标记数据源的状态为暂时空闲。
// 多分区会有watermark的问题 watermark的值是分区中最小的watermark 标记空闲后忽略此分区
if (subscribedPartitionsToStartOffsets.isEmpty()) {
sourceContext.markAsTemporarilyIdle();
}
LOG.info("Consumer subtask {} creating fetcher with offsets {}.",
getRuntimeContext().getIndexOfThisSubtask(), subscribedPartitionsToStartOffsets);
// from this point forward:
// - 'snapshotState' will draw offsets from the fetcher,
// instead of being built from `subscribedPartitionsToStartOffsets`
// - 'notifyCheckpointComplete' will start to do work (i.e. commit offsets to
// Kafka through the fetcher, if configured to do so)
//如果是快照状态则分区offsets直接从fetcher中获取
//如果是通知检查点完成状态则通过fetcher提交offsets
//创建一个KafkaFetcher,借助KafkaConsumer API从Kafka的broker拉取数据
this.kafkaFetcher = createFetcher(
sourceContext,
subscribedPartitionsToStartOffsets,
watermarkStrategy,
(StreamingRuntimeContext) getRuntimeContext(),
offsetCommitMode,
getRuntimeContext().getMetricGroup().addGroup(KAFKA_CONSUMER_METRICS_GROUP),
useMetrics);
//如果是非运行 则直接返回
if (!running) {
return;
}
// depending on whether we were restored with the current state version (1.3),
// remaining logic branches off into 2 paths:
// 1) New state - partition discovery loop executed as separate thread, with this
// thread running the main fetcher loop
// 2) Old state - partition discovery is disabled and only the main fetcher loop is executed
//根据分区发现间隔时间,来确定是否启动分区定时发现任务
//如果没有配置分区定时发现时间间隔,则直接启动获取数据任务;否则,启动定期分区发现任务和数据获取任务
if (discoveryIntervalMillis == PARTITION_DISCOVERY_DISABLED) {
//开启循环拉取数据
kafkaFetcher.runFetchLoop();
} else {
runWithPartitionDiscovery();
}
}
@Override
protected AbstractFetcher createFetcher(
SourceContext sourceContext,
Map assignedPartitionsWithInitialOffsets,
SerializedValue> watermarkStrategy,
StreamingRuntimeContext runtimeContext,
OffsetCommitMode offsetCommitMode,
MetricGroup consumerMetricGroup,
boolean useMetrics) throws Exception {
// make sure that auto commit is disabled when our offset commit mode is ON_CHECKPOINTS;
// this overwrites whatever setting the user configured in the properties
adjustAutoCommitConfig(properties, offsetCommitMode);
}
//父类方法中
//初始化反序列化器
this.deserializer = deserializer;
//消费者线程和任务线程之间的数据和异常的切换
this.handover = new Handover();
//运行KafkaConsumer并将记录批次传递给fetcher的线程
this.consumerThread = new KafkaConsumerThread(
LOG,
handover,
kafkaProperties,
unassignedPartitionsQueue,
getFetcherName() + " for " + taskNameWithSubtasks,
pollTimeout,
useMetrics,
consumerMetricGroup,
subtaskMetricGroup);
//以批处理方式发出记录的收集器(bundle)
this.kafkaCollector = new KafkaCollector();
private void runWithPartitionDiscovery() throws Exception {
final AtomicReference discoveryLoopErrorRef = new AtomicReference<>();
//创建分区发现的定时任务
createAndStartDiscoveryLoop(discoveryLoopErrorRef);
//开启循环拉取数据
kafkaFetcher.runFetchLoop();
// make sure that the partition discoverer is waked up so that
// the discoveryLoopThread exits
//确保分区发现器在分区发现循环线程启动期间一直处于唤醒状态
partitionDiscoverer.wakeup();
//等待发现分区线程执行完成
joinDiscoveryLoopThread();
// rethrow any fetcher errors
final Exception discoveryLoopError = discoveryLoopErrorRef.get();
if (discoveryLoopError != null) {
throw new RuntimeException(discoveryLoopError);
}
}
private void createAndStartDiscoveryLoop(AtomicReference discoveryLoopErrorRef) {
discoveryLoopThread = new Thread(() -> {
try {
// --------------------- partition discovery loop ---------------------
// throughout the loop, we always eagerly check if we are still running before
// performing the next operation, so that we can escape the loop as soon as possible
while (running) {
if (LOG.isDebugEnabled()) {
LOG.debug("Consumer subtask {} is trying to discover new partitions ...", getRuntimeContext().getIndexOfThisSubtask());
}
final List discoveredPartitions;
try {
//发现分区的执行过程
//此子任务应订阅的已发现的新分区列表。 返回SubTask应该订阅的topic以及对应分区
discoveredPartitions = partitionDiscoverer.discoverPartitions();
} catch (AbstractPartitionDiscoverer.WakeupException | AbstractPartitionDiscoverer.ClosedException e) {
// the partition discoverer may have been closed or woken up before or during the discovery;
// this would only happen if the consumer was canceled; simply escape the loop
break;
}
// no need to add the discovered partitions if we were closed during the meantime
//如果在此期间关闭分区发现则不需要添加发现的分区
if (running && !discoveredPartitions.isEmpty()) {
//将发现的新分区添加到kafkaFetcher中
kafkaFetcher.addDiscoveredPartitions(discoveredPartitions);
}
// do not waste any time sleeping if we're not running anymore
if (running && discoveryIntervalMillis != 0) {
try {
Thread.sleep(discoveryIntervalMillis);
} catch (InterruptedException iex) {
// may be interrupted if the consumer was canceled midway; simply escape the loop
break;
}
}
}
} catch (Exception e) {
discoveryLoopErrorRef.set(e);
} finally {
// calling cancel will also let the fetcher loop escape
// (if not running, cancel() was already called)
if (running) {
cancel();
}
}
}, "Kafka Partition Discovery for " + getRuntimeContext().getTaskNameWithSubtasks());
//启动分区发现定时任务
discoveryLoopThread.start();
}
@Override
public void runFetchLoop() throws Exception {
try {
// kick off the actual Kafka consumer
//开始kafka 实际的消费端 定期将消费到的数据转交给handover handover对象在createFetcher初始化的
consumerThread.start();
while (running) {
// this blocks until we get the next records
// it automatically re-throws exceptions encountered in the consumer thread
//获取handover中的数据 在consumerThread线程没有将数据发送给handover时此方法会堵塞
final ConsumerRecords records = handover.pollNext();
// get the records for each topic partition
//获取所有的分区记录
for (KafkaTopicPartitionState partition : subscribedPartitionStates()) {
//获取此分区的Records
List> partitionRecords =
records.records(partition.getKafkaPartitionHandle());
partitionConsumerRecordsHandler(partitionRecords, partition);
}
}
}
finally {
// this signals the consumer thread that no more work is to be done
consumerThread.shutdown();
}
// on a clean exit, wait for the runner thread
try {
consumerThread.join();
}
catch (InterruptedException e) {
// may be the result of a wake-up interruption after an exception.
// we ignore this here and only restore the interruption state
Thread.currentThread().interrupt();
}
}
@Override
public void run() {
// early exit check
if (!running) {
return;
}
// this is the means to talk to FlinkKafkaConsumer's main thread
//handover 这是与FlinkKafkaConsumer的main保持会话
final Handover handover = this.handover;
// This method initializes the KafkaConsumer and guarantees it is torn down properly.
// This is important, because the consumer has multi-threading issues,
// including concurrent 'close()' calls.
try {
this.consumer = getConsumer(kafkaProperties);
}
catch (Throwable t) {
handover.reportError(t);
return;
}
// from here on, the consumer is guaranteed to be closed properly
try {
// register Kafka's very own metrics in Flink's metric reporters
if (useMetrics) {
// register Kafka metrics to Flink
Map metrics = consumer.metrics();
if (metrics == null) {
// MapR's Kafka implementation returns null here.
log.info("Consumer implementation does not support metrics");
} else {
// we have Kafka metrics, register them
for (Map.Entry metric: metrics.entrySet()) {
consumerMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
// TODO this metric is kept for compatibility purposes; should remove in the future
subtaskMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
}
}
}
// early exit check
if (!running) {
return;
}
// the latest bulk of records. May carry across the loop if the thread is woken up
// from blocking on the handover
ConsumerRecords records = null;
// reused variable to hold found unassigned new partitions.
// found partitions are not carried across loops using this variable;
// they are carried across via re-adding them to the unassigned partitions queue
List> newPartitions;
// main fetch loop
while (running) {
// check if there is something to commit
if (!commitInProgress) {
// get and reset the work-to-be committed, so we don't repeatedly commit the same
final Tuple2
protected void partitionConsumerRecordsHandler(
List> partitionRecords,
KafkaTopicPartitionState partition) throws Exception {
for (ConsumerRecord record : partitionRecords) {
//反序列化record 将数据交给kafkaCollector 已备数据往下发送
deserializer.deserialize(record, kafkaCollector);
// emit the actual records. this also updates offset state atomically and emits
// watermarks
//发送数据 更新offset 生产timestamp和watermarks
emitRecordsWithTimestamps(
kafkaCollector.getRecords(),
partition,
record.offset(),
record.timestamp());
//如果数据源已经到末尾了(收到了流结束信号),停止fetcher循环
if (kafkaCollector.isEndOfStreamSignalled()) {
// end of stream signaled
running = false;
break;
}
}
}
protected void emitRecordsWithTimestamps(
Queue records,
KafkaTopicPartitionState partitionState,
long offset,
long kafkaEventTimestamp) {
// emit the records, using the checkpoint lock to guarantee
// atomicity of record emission and offset state update
synchronized (checkpointLock) {
T record;
while ((record = records.poll()) != null) {
long timestamp = partitionState.extractTimestamp(record, kafkaEventTimestamp);
//发送数据
sourceContext.collectWithTimestamp(record, timestamp);
// this might emit a watermark, so do it after emitting the record
//发送watermark
partitionState.onEvent(record, timestamp);
}
//更新offset
partitionState.setOffset(offset);
}
}
//访问操作符状态后端中的状态
private transient ListState> unionOffsetStates;
//如果消费者从检查点恢复状态,则恢复到的偏移量。
private transient volatile TreeMap restoredState;
@Override
public final void initializeState(FunctionInitializationContext context) throws Exception {
OperatorStateStore stateStore = context.getOperatorStateStore();
this.unionOffsetStates = stateStore.getUnionListState(new ListStateDescriptor<>(OFFSETS_STATE_NAME,
createStateSerializer(getRuntimeContext().getExecutionConfig())));
//判断是否从上一次快照送恢复的
if (context.isRestored()) {
//如果是从checkPoint恢复的则初始化restoredState 然后和open对应
restoredState = new TreeMap<>(new KafkaTopicPartition.Comparator());
// populate actual holder for restored state
//将快照中的数据填充到restoredState中 和open对应
for (Tuple2 kafkaOffset : unionOffsetStates.get()) {
restoredState.put(kafkaOffset.f0, kafkaOffset.f1);
}
LOG.info("Consumer subtask {} restored state: {}.", getRuntimeContext().getIndexOfThisSubtask(), restoredState);
} else {
LOG.info("Consumer subtask {} has no restore state.", getRuntimeContext().getIndexOfThisSubtask());
}
}
/** Data for pending but uncommitted offsets. */
//未完成checkPoint时未提交的偏移量的数据
private final LinkedMap pendingOffsetsToCommit = new LinkedMap();
@Override
public final void snapshotState(FunctionSnapshotContext context) throws Exception {
if (!running) {
LOG.debug("snapshotState() called on closed source");
} else {
//清除状态中的数据 方便重新填充
unionOffsetStates.clear();
final AbstractFetcher, ?> fetcher = this.kafkaFetcher;
if (fetcher == null) {
// the fetcher has not yet been initialized, which means we need to return the
// originally restored offsets or the assigned partitions
//fetcher还没有初始化,这意味着我们需要返回最初恢复的偏移量或分配的分区 在run()方法中初始化的
for (Map.Entry subscribedPartition : subscribedPartitionsToStartOffsets.entrySet()) {
//将open方法中获取的数据放入
unionOffsetStates.add(Tuple2.of(subscribedPartition.getKey(), subscribedPartition.getValue()));
}
if (offsetCommitMode == OffsetCommitMode.ON_CHECKPOINTS) {
// the map cannot be asynchronously updated, because only one checkpoint call can happen
// on this function at a time: either snapshotState() or notifyCheckpointComplete()
//保存checkPoint进行时的offset数据 restoredState不是状态对象是使用对象
pendingOffsetsToCommit.put(context.getCheckpointId(), restoredState);
}
} else {
//获取fetcher对象中的当前topic 分区对应的offset
HashMap currentOffsets = fetcher.snapshotCurrentState();
if (offsetCommitMode == OffsetCommitMode.ON_CHECKPOINTS) {
// the map cannot be asynchronously updated, because only one checkpoint call can happen
// on this function at a time: either snapshotState() or notifyCheckpointComplete()
//保存checkPoint进行时的offset数据
pendingOffsetsToCommit.put(context.getCheckpointId(), currentOffsets);
}
for (Map.Entry kafkaTopicPartitionLongEntry : currentOffsets.entrySet()) {
//将最新的topic 分区对应的offset放入状态中
unionOffsetStates.add(
Tuple2.of(kafkaTopicPartitionLongEntry.getKey(), kafkaTopicPartitionLongEntry.getValue()));
}
}
if (offsetCommitMode == OffsetCommitMode.ON_CHECKPOINTS) {
// truncate the map of pending offsets to commit, to prevent infinite growth
//保证因为checkPoint导致的topic 分区对应的offset数据不会过多
while (pendingOffsetsToCommit.size() > MAX_NUM_PENDING_CHECKPOINTS) {
pendingOffsetsToCommit.remove(0);
}
}
}
}
到此重要的一些方法都已经分析了。