public FlinkKafkaProducer011(
String brokerList,
String topicId,
SerializationSchema serializationSchema);
public FlinkKafkaProducer011(
String topicId,
SerializationSchema serializationSchema,
Properties producerConfig);
public FlinkKafkaProducer011(
String topicId,
SerializationSchema serializationSchema,
Properties producerConfig,
Optional> customPartitioner);
public FlinkKafkaProducer011(
String brokerList,
String topicId,
KeyedSerializationSchema serializationSchema);
public FlinkKafkaProducer011(
String topicId,
KeyedSerializationSchema serializationSchema,
Properties producerConfig);
public FlinkKafkaProducer011(
String topicId,
KeyedSerializationSchema serializationSchema,
Properties producerConfig,
Semantic semantic);
public FlinkKafkaProducer011(
String defaultTopicId,
KeyedSerializationSchema serializationSchema,
Properties producerConfig,
Optional> customPartitioner);
public FlinkKafkaProducer011(
String defaultTopicId,
KeyedSerializationSchema serializationSchema,
Properties producerConfig,
Optional> customPartitioner,
Semantic semantic,
int kafkaProducersPoolSize);
topicId:push的目标kafka topic
brokerList:kafka broker列表
SerializationSchema类:序列化方式
Properties类:可以包含kafka和zookeeper地址 以及消费组id
FlinkKafkaPartitioner:分区初始化父类,其中的Open()方法会在每个kafka producer并行接收器上调用一次,其中parallelInstanceId表示并行接收器id,parallelInstances表示并行度(即线程数量)。此并行度和kafka broker的partitions相对应,最好配置是这里的parallelInstances等于kafka服务器的patition数量。
@PublicEvolving
public abstract class FlinkKafkaPartitioner implements Serializable {
private static final long serialVersionUID = -9086719227828020494L;
/**
* 在每个kafka producer并行接收器上调用一次
*
* @param parallelInstanceId flink kafka producer并行接收器id
* @param parallelInstances 并行度
*/
public void open(int parallelInstanceId, int parallelInstances) {
// overwrite this method if needed.
}
/**
* Determine the id of the partition that the record should be written to.
*
* @param record the record value
* @param key serialized key of the record
* @param value serialized value of the record
* @param targetTopic target topic for the record
* @param partitions found partitions for the target topic
*
* @return the id of the target partition
*/
public abstract int partition(T record, byte[] key, byte[] value, String targetTopic, int[] partitions);
}
如果用户没有定义自己的patition规则,则会按照默认的方式:
@PublicEvolving
public class FlinkFixedPartitioner extends FlinkKafkaPartitioner {
private static final long serialVersionUID = -3785320239953858777L;
private int parallelInstanceId;
@Override
public void open(int parallelInstanceId, int parallelInstances) {
Preconditions.checkArgument(parallelInstanceId >= 0, "Id of this subtask cannot be negative.");
Preconditions.checkArgument(parallelInstances > 0, "Number of subtasks must be larger than 0.");
this.parallelInstanceId = parallelInstanceId;
}
@Override
public int partition(T record, byte[] key, byte[] value, String targetTopic, int[] partitions) {
Preconditions.checkArgument(
partitions != null && partitions.length > 0,
"Partitions of the target topic is empty.");
return partitions[parallelInstanceId % partitions.length];
}
@Override
public boolean equals(Object o) {
return this == o || o instanceof FlinkFixedPartitioner;
}
@Override
public int hashCode() {
return FlinkFixedPartitioner.class.hashCode();
}
}
执行push操作:
FlinkKafkaProducerBase.java
/**
* Initializes the connection to Kafka.
*/
@Override
public void open(Configuration configuration) {
producer = getKafkaProducer(this.producerConfig);
RuntimeContext ctx = getRuntimeContext();
if (null != flinkKafkaPartitioner) {
if (flinkKafkaPartitioner instanceof FlinkKafkaDelegatePartitioner) {
((FlinkKafkaDelegatePartitioner) flinkKafkaPartitioner).setPartitions(
getPartitionsByTopic(this.defaultTopicId, this.producer));
}
flinkKafkaPartitioner.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks());
}
LOG.info("Starting FlinkKafkaProducer ({}/{}) to produce into default topic {}",
ctx.getIndexOfThisSubtask() + 1, ctx.getNumberOfParallelSubtasks(), defaultTopicId);
// register Kafka metrics to Flink accumulators
if (!Boolean.parseBoolean(producerConfig.getProperty(KEY_DISABLE_METRICS, "false"))) {
Map metrics = this.producer.metrics();
if (metrics == null) {
// MapR's Kafka implementation returns null here.
LOG.info("Producer implementation does not support metrics");
} else {
final MetricGroup kafkaMetricGroup = getRuntimeContext().getMetricGroup().addGroup("KafkaProducer");
for (Map.Entry metric: metrics.entrySet()) {
kafkaMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
}
}
}
if (flushOnCheckpoint && !((StreamingRuntimeContext) this.getRuntimeContext()).isCheckpointingEnabled()) {
LOG.warn("Flushing on checkpoint is enabled, but checkpointing is not enabled. Disabling flushing.");
flushOnCheckpoint = false;
}
if (logFailuresOnly) {
callback = new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception e) {
if (e != null) {
LOG.error("Error while sending record to Kafka: " + e.getMessage(), e);
}
acknowledgeMessage();
}
};
}
else {
callback = new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
if (exception != null && asyncException == null) {
asyncException = exception;
}
acknowledgeMessage();
}
};
}
}
/**
* Called when new data arrives to the sink, and forwards it to Kafka.
*
* @param next
* The incoming data
*/
@Override
public void invoke(IN next, Context context) throws Exception {
// propagate asynchronous errors
checkErroneous();
byte[] serializedKey = schema.serializeKey(next);
byte[] serializedValue = schema.serializeValue(next);
String targetTopic = schema.getTargetTopic(next);
if (targetTopic == null) {
targetTopic = defaultTopicId;
}
int[] partitions = this.topicPartitionsMap.get(targetTopic);
if (null == partitions) {
partitions = getPartitionsByTopic(targetTopic, producer);
this.topicPartitionsMap.put(targetTopic, partitions);
}
ProducerRecord record;
if (flinkKafkaPartitioner == null) {
record = new ProducerRecord<>(targetTopic, serializedKey, serializedValue);
} else {
record = new ProducerRecord<>(
targetTopic,
flinkKafkaPartitioner.partition(next, serializedKey, serializedValue, targetTopic, partitions),
serializedKey,
serializedValue);
}
if (flushOnCheckpoint) {
synchronized (pendingRecordsLock) {
pendingRecords++;
}
}
producer.send(record, callback);
}
@Override
public void close() throws Exception {
if (producer != null) {
producer.close();
}
// make sure we propagate pending errors
checkErroneous();
}