Flink源码解析 之 FlinkKafkaProducer011

1.构造方法

1.1 代码

public FlinkKafkaProducer011(
            String brokerList, 
            String topicId, 
            SerializationSchema serializationSchema);

public FlinkKafkaProducer011(
            String topicId, 
            SerializationSchema serializationSchema, 
            Properties producerConfig);

public FlinkKafkaProducer011(
            String topicId, 
            SerializationSchema serializationSchema, 
            Properties producerConfig, 
            Optional> customPartitioner);

public FlinkKafkaProducer011(
            String brokerList, 
            String topicId, 
            KeyedSerializationSchema serializationSchema);

public FlinkKafkaProducer011(
            String topicId, 
            KeyedSerializationSchema serializationSchema, 
            Properties producerConfig);

public FlinkKafkaProducer011(
            String topicId, 
            KeyedSerializationSchema serializationSchema, 
            Properties producerConfig, 
            Semantic semantic);

public FlinkKafkaProducer011(
            String defaultTopicId, 
            KeyedSerializationSchema serializationSchema, 
            Properties producerConfig, 
            Optional> customPartitioner);

public FlinkKafkaProducer011(
			String defaultTopicId,
			KeyedSerializationSchema serializationSchema,
			Properties producerConfig,
			Optional> customPartitioner,
			Semantic semantic,
			int kafkaProducersPoolSize);

1.2 构造方法入参

topicId:push的目标kafka topic

brokerList:kafka broker列表

SerializationSchema类:序列化方式

Properties类:可以包含kafka和zookeeper地址 以及消费组id

1.3 FlinkKafkaProducer011属性

FlinkKafkaPartitioner:分区初始化父类,其中的Open()方法会在每个kafka producer并行接收器上调用一次,其中parallelInstanceId表示并行接收器id,parallelInstances表示并行度(即线程数量)。此并行度和kafka broker的partitions相对应,最好配置是这里的parallelInstances等于kafka服务器的patition数量。
@PublicEvolving
public abstract class FlinkKafkaPartitioner implements Serializable {

	private static final long serialVersionUID = -9086719227828020494L;

	/**
	 * 在每个kafka producer并行接收器上调用一次                    
	 *
	 * @param parallelInstanceId flink kafka producer并行接收器id
	 * @param parallelInstances 并行度
	 */
	public void open(int parallelInstanceId, int parallelInstances) {
		// overwrite this method if needed.
	}

	/**
	 * Determine the id of the partition that the record should be written to.
	 *
	 * @param record the record value
	 * @param key serialized key of the record
	 * @param value serialized value of the record
	 * @param targetTopic target topic for the record
	 * @param partitions found partitions for the target topic
	 *
	 * @return the id of the target partition
	 */
	public abstract int partition(T record, byte[] key, byte[] value, String targetTopic, int[] partitions);
}

 

如果用户没有定义自己的patition规则,则会按照默认的方式:
@PublicEvolving
public class FlinkFixedPartitioner extends FlinkKafkaPartitioner {

	private static final long serialVersionUID = -3785320239953858777L;

	private int parallelInstanceId;

	@Override
	public void open(int parallelInstanceId, int parallelInstances) {
		Preconditions.checkArgument(parallelInstanceId >= 0, "Id of this subtask cannot be negative.");
		Preconditions.checkArgument(parallelInstances > 0, "Number of subtasks must be larger than 0.");

		this.parallelInstanceId = parallelInstanceId;
	}

	@Override
	public int partition(T record, byte[] key, byte[] value, String targetTopic, int[] partitions) {
		Preconditions.checkArgument(
			partitions != null && partitions.length > 0,
			"Partitions of the target topic is empty.");

		return partitions[parallelInstanceId % partitions.length];
	}

	@Override
	public boolean equals(Object o) {
		return this == o || o instanceof FlinkFixedPartitioner;
	}

	@Override
	public int hashCode() {
		return FlinkFixedPartitioner.class.hashCode();
	}
}

执行push操作:

FlinkKafkaProducerBase.java
	/**
	 * Initializes the connection to Kafka.
	 */
	@Override
	public void open(Configuration configuration) {
		producer = getKafkaProducer(this.producerConfig);

		RuntimeContext ctx = getRuntimeContext();

		if (null != flinkKafkaPartitioner) {
			if (flinkKafkaPartitioner instanceof FlinkKafkaDelegatePartitioner) {
				((FlinkKafkaDelegatePartitioner) flinkKafkaPartitioner).setPartitions(
						getPartitionsByTopic(this.defaultTopicId, this.producer));
			}
			flinkKafkaPartitioner.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks());
		}

		LOG.info("Starting FlinkKafkaProducer ({}/{}) to produce into default topic {}",
				ctx.getIndexOfThisSubtask() + 1, ctx.getNumberOfParallelSubtasks(), defaultTopicId);

		// register Kafka metrics to Flink accumulators
		if (!Boolean.parseBoolean(producerConfig.getProperty(KEY_DISABLE_METRICS, "false"))) {
			Map metrics = this.producer.metrics();

			if (metrics == null) {
				// MapR's Kafka implementation returns null here.
				LOG.info("Producer implementation does not support metrics");
			} else {
				final MetricGroup kafkaMetricGroup = getRuntimeContext().getMetricGroup().addGroup("KafkaProducer");
				for (Map.Entry metric: metrics.entrySet()) {
					kafkaMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
				}
			}
		}

		if (flushOnCheckpoint && !((StreamingRuntimeContext) this.getRuntimeContext()).isCheckpointingEnabled()) {
			LOG.warn("Flushing on checkpoint is enabled, but checkpointing is not enabled. Disabling flushing.");
			flushOnCheckpoint = false;
		}

		if (logFailuresOnly) {
			callback = new Callback() {
				@Override
				public void onCompletion(RecordMetadata metadata, Exception e) {
					if (e != null) {
						LOG.error("Error while sending record to Kafka: " + e.getMessage(), e);
					}
					acknowledgeMessage();
				}
			};
		}
		else {
			callback = new Callback() {
				@Override
				public void onCompletion(RecordMetadata metadata, Exception exception) {
					if (exception != null && asyncException == null) {
						asyncException = exception;
					}
					acknowledgeMessage();
				}
			};
		}
	}

	/**
	 * Called when new data arrives to the sink, and forwards it to Kafka.
	 *
	 * @param next
	 * 		The incoming data
	 */
	@Override
	public void invoke(IN next, Context context) throws Exception {
		// propagate asynchronous errors
		checkErroneous();

		byte[] serializedKey = schema.serializeKey(next);
		byte[] serializedValue = schema.serializeValue(next);
		String targetTopic = schema.getTargetTopic(next);
		if (targetTopic == null) {
			targetTopic = defaultTopicId;
		}

		int[] partitions = this.topicPartitionsMap.get(targetTopic);
		if (null == partitions) {
			partitions = getPartitionsByTopic(targetTopic, producer);
			this.topicPartitionsMap.put(targetTopic, partitions);
		}

		ProducerRecord record;
		if (flinkKafkaPartitioner == null) {
			record = new ProducerRecord<>(targetTopic, serializedKey, serializedValue);
		} else {
			record = new ProducerRecord<>(
					targetTopic,
					flinkKafkaPartitioner.partition(next, serializedKey, serializedValue, targetTopic, partitions),
					serializedKey,
					serializedValue);
		}
		if (flushOnCheckpoint) {
			synchronized (pendingRecordsLock) {
				pendingRecords++;
			}
		}
		producer.send(record, callback);
	}

	@Override
	public void close() throws Exception {
		if (producer != null) {
			producer.close();
		}

		// make sure we propagate pending errors
		checkErroneous();
	}

 

你可能感兴趣的:(Flink源码解析 之 FlinkKafkaProducer011)