文章目录
- 一、目标
- 二、依赖
- 三、消费者
- 3.1 初始化Consumer
- 3.2 订阅主题(也订阅多个,可正则)
- 3.3 轮询
- 3.4 完整Demo
- 3.5 提交偏移量 offset
- 3.5.1 自动提交 offset
- 3.5.2 手动提交 offset (三选一)
- 3.5.3 手动提交 offset (同步异步一起上)
- 3.5.4 提交特定偏移量 offset
- 四、生产者 Producer
- 4.1 简单默认发送
- 4.2 指定分区 Partition 发送
- 4.3 指定 key 发送
- 4.4 同时指定 Partition 与 Key
- 五、 生产者发送代码剖析
- 5.1 初始化 KafkaProducer
- 5.2 send 消息格式
- 5.3 send方法剖析
- 六、生产者Producer返回值处理
- 6.1 发送并获取返回结果
- 6.2 发送并获取返回结果(异步)
- == 番外篇 ==
org.apache.kafka
kafka_2.12
2.3.1
org.apache.kafka
kafka-clients
2.3.1
public class KafkaConsumer implements Consumer {
public KafkaConsumer(Properties properties) {
this(properties, null);
}
}
public static KafkaConsumer getInstance(){
Properties props = new Properties();
props.put("bootstrap.servers","localhost:9092,localhost:9093,localhost:9094");
props.put("group.id","group01");
props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
KafkaConsumer consumer = new KafkaConsumer(props);
return consumer;
}
consumer.subscribe(Collections.singletonList("topic01");
正则:
consumer.subscribe("test.*");
消息轮询是消费者API的核心,通过一个简单的轮询像服务器请求数据。一旦Consumer订阅了Topic,轮询就会处理所有的细节,包括Group协调、分区再均衡 Partition Rebalance、发送心跳 heartbeat 和获取数据,开发者只需要一组简单的 API 来处理从Partition 返回的数据。
没数据的话最多等待100ms,然后不断轮询。
try{
while(true){
// 间隔100毫秒拉取一次。
ConsumerRecords records = consumer.poll(Duration.ofMillis(100));
for(ConsumerRecord record :records){
System.out.println(record);
System.out.println("msgContent :" + new String(record.value()));
}
}
} finally {
consumer.close();
}
package indi.sword.kafka;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.time.Duration;
import java.util.Collections;
import java.util.Properties;
/**
* @author jeb_lin
* 3:57 PM 2020/7/31
*/
public class MQConsumer {
private static final String TOPIC = "testCluster2";
/**
* 设置基本信息,获取消费者
*/
public static KafkaConsumer getInstance(){
Properties props = new Properties();
props.put("bootstrap.servers","localhost:9092,localhost:9093,localhost:9094");
props.put("group.id","group01");
props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
KafkaConsumer consumer = new KafkaConsumer(props);
return consumer;
}
@Test
public void testConsume(){
KafkaConsumer consumer = MQConsumer.getInstance();
// 订阅指定的主题 Topic
consumer.subscribe(Collections.singletonList(TOPIC));
try{
while(true){
// 间隔100毫秒拉取一次。
ConsumerRecords records = consumer.poll(Duration.ofMillis(10));
for(ConsumerRecord record :records){
System.out.println(record);
System.out.println("msgContent :" + new String(record.value()));
}
}
} finally {
consumer.close();
}
}
}
ConsumerRecord(topic = testCluster2, partition = 1, leaderEpoch = 9, offset = 42, CreateTime = 1596187207508, serialized key size = 1, serialized value size = 5, headers = RecordHeaders(headers = [], isReadOnly = false), key = [B@3e2e18f2, value = [B@470f1802)
msgContent :msg-9
可以看到消费者拉取的是哪个Partition 与内容。
假如提交间隔是5s,那么轮询拉回来的消息处理也就是for的时候,会去检查是否5s了,5s的话,那么会将上次poll时候的最大 offset提交上去,也就是for 到的offset,比如总共拉回来了10个消息,目前for处理了5个了,现在已经5秒了,那么提交offset就是5.
/**
* 测试提交三种 Offset 提交方式
*/
@Test
public void testCommitOffset() {
KafkaConsumer consumer = MQConsumer.getInstance();
// 订阅指定的主题 Topic
consumer.subscribe(Collections.singletonList(TOPIC));
try {
while (true) {
// 间隔100毫秒拉取一次。
ConsumerRecords records = consumer.poll(Duration.ofMillis(10));
for (ConsumerRecord record : records) {
System.out.println(record);
System.out.println("msgContent :" + new String(record.value()));
try {
consumer.commitSync(); // 手动同步提交,3选1
consumer.commitAsync(); // 手动异步提交
// 手动异步+回调处理
consumer.commitAsync(new OffsetCommitCallback() {
@Override
public void onComplete(Map offsets, Exception e) {
if (e != null) {
System.out.println("commit failed for offsets -> {} " + offsets + ",e -> {} " + e);
}
}
});
} catch (CommitFailedException e) {
System.out.println("commit failed" + e);
}
}
}
} finally {
consumer.close();
}
}
/**
* 同步异步一起上
*/
@Test
public void testCommitOffsetBoth() {
KafkaConsumer consumer = MQConsumer.getInstance();
// 订阅指定的主题 Topic
consumer.subscribe(Collections.singletonList(TOPIC));
try {
while (true) {
// 间隔100毫秒拉取一次。
ConsumerRecords records = consumer.poll(Duration.ofMillis(10));
for (ConsumerRecord record : records) {
System.out.println(record);
System.out.println("msgContent :" + new String(record.value()));
try {
consumer.commitAsync(); // 手动异步提交
} catch (CommitFailedException e) {
System.out.println("commit failed" + e);
} finally {
consumer.commitSync();
}
}
}
} finally {
consumer.close();
}
}
public final class TopicPartition {
private int hash = 0;
private final int partition;
private final String topic;
}
public class OffsetAndMetadata implements Product, Serializable {
private final long offset;
private final String metadata;
private long timestamp;
提交特定的 offset
/**
* 提交特定的 offset
*/
@Test
public void testCommitSpecificOffset() {
KafkaConsumer consumer = MQConsumer.getInstance();
// 订阅指定的主题 Topic
consumer.subscribe(Collections.singletonList(TOPIC));
Map currentOffsets = new HashMap<>();
int count = 0;
try {
while (true) {
// 间隔100毫秒拉取一次。
ConsumerRecords records = consumer.poll(Duration.ofMillis(10));
for(ConsumerRecord record:records){
System.out.println(record);
System.out.println("msgContent :" + new String(record.value()));
currentOffsets.put(new TopicPartition(record.topic(),record.partition()),
new OffsetAndMetadata(record.offset() + 1,"no metadata"));
if(count % 8 == 0){
consumer.commitAsync(currentOffsets,null);
}
count++;
}
}
} finally {
consumer.close();
}
}
package indi.sword.kafka;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.utils.Utils;
import org.junit.Test;
import java.util.Properties;
/**
* @author jeb_lin
* 3:50 PM 2020/7/31
*/
public class MQProducer {
private static final String TOPIC = "testCluster2";
private static KafkaProducer kafkaProducer = null;
/**
* 获取生产者 Producer
*/
public static KafkaProducer getInstance() {
if (kafkaProducer == null) {
kafkaProducer = new KafkaProducer(getDefaultProductProperties());
System.out.println("kafka message producer start .....");
}
return kafkaProducer;
}
/**
* 获取默认的生产者 Producer 的配置
*/
private static Properties getDefaultProductProperties() {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092,localhost:9093,localhost:9094");
props.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
return props;
}
}
/**
* 简单发送,指定 Topic 与 发送内容 value
* @throws Exception
*/
@Test
public void testSendSimple() throws Exception {
KafkaProducer producer = MQProducer.getInstance();
for (int i = 0; i < 10; i++) {
String msg = "msg-" + i;
System.out.println("send msg : " + msg);
producer.send(new ProducerRecord(TOPIC, msg.getBytes()));
Thread.sleep(1000);
}
}
kafka message producer start .....
send msg : msg-0
...
send msg : msg-9
ConsumerRecord(topic = testCluster2, partition = 1, leaderEpoch = 15, offset = 104, CreateTime = 1596260977187, serialized key size = -1, serialized value size = 5, headers = RecordHeaders(headers = [], isReadOnly = false), key = null, value = [B@6a01e23)
msgContent :msg-0
......
ConsumerRecord(topic = testCluster2, partition = 2, leaderEpoch = 19, offset = 67, CreateTime = 1596260986210, serialized key size = -1, serialized value size = 5, headers = RecordHeaders(headers = [], isReadOnly = false), key = null, value = [B@23986957)
msgContent :msg-9
/**
* 指定分区 Partition 发送
* @throws Exception
*/
@Test
public void testSendSpecificPartition() throws Exception {
KafkaProducer producer = MQProducer.getInstance();
for (int i = 0; i < 10; i++) {
String msg = "msg-" + i;
System.out.println("send msg : " + msg);
producer.send(new ProducerRecord(TOPIC, 1, null, msg.getBytes()));
Thread.sleep(1000);
}
}
ConsumerRecord(topic = testCluster2, partition = 1, leaderEpoch = 15, offset = 107, CreateTime = 1596261140966, serialized key size = -1, serialized value size = 5, headers = RecordHeaders(headers = [], isReadOnly = false), key = null, value = [B@2a32de6c)
msgContent :msg-0
...
ConsumerRecord(topic = testCluster2, partition = 1, leaderEpoch = 15, offset = 116, CreateTime = 1596261150001, serialized key size = -1, serialized value size = 5, headers = RecordHeaders(headers = [], isReadOnly = false), key = null, value = [B@2fd6b6c7)
msgContent :msg-9
/**
* 指定 Key 发送
* @throws Exception
*/
@Test
public void testSendSpecificKey() throws Exception {
KafkaProducer producer = MQProducer.getInstance();
// 根据key去进行算法路由,这是底层的算法公式
System.out.println(Utils.toPositive(Utils.murmur2("1".getBytes())) % 4);
for (int i = 0; i < 10; i++) {
String msg = "msg-" + i;
System.out.println("send msg : " + msg);
producer.send(new ProducerRecord(TOPIC, null, "1".getBytes(), msg.getBytes()));
Thread.sleep(1000);
}
}
kafka message producer start .....
specificKey :3
send msg : msg-0
send msg : msg-1
。。。
send msg : msg-9
ConsumerRecord(topic = testCluster2, partition = 3, leaderEpoch = 16, offset = 135, CreateTime = 1596261903368, serialized key size = 1, serialized value size = 5, headers = RecordHeaders(headers = [], isReadOnly = false), key = [B@358ee631, value = [B@ec756bd)
msgContent :msg-0
...
ConsumerRecord(topic = testCluster2, partition = 3, leaderEpoch = 16, offset = 144, CreateTime = 1596261912403, serialized key size = 1, serialized value size = 5, headers = RecordHeaders(headers = [], isReadOnly = false), key = [B@30b7c004, value = [B@79efed2d)
msgContent :msg-9
/**
* 同时指定 Partition 与 Key ,最终只有 Partition 参数生效
* @throws Exception
*/
@Test
public void testSendSpecificPartitionAndKey() throws Exception {
KafkaProducer producer = MQProducer.getInstance();
System.out.println("specificKey :" + Utils.toPositive(Utils.murmur2("1".getBytes())) % 4);
for (int i = 0; i < 10; i++) {
String msg = "msg-" + i;
System.out.println("send msg : " + msg);
producer.send(new ProducerRecord(TOPIC, 1, "1".getBytes(), msg.getBytes()));
Thread.sleep(1000);
}
}
ConsumerRecord(topic = testCluster2, partition = 1, leaderEpoch = 15, offset = 117, CreateTime = 1596262013527, serialized key size = 1, serialized value size = 5, headers = RecordHeaders(headers = [], isReadOnly = false), key = [B@2928854b, value = [B@27ae2fd0)
msgContent :msg-0
...
ConsumerRecord(topic = testCluster2, partition = 1, leaderEpoch = 15, offset = 126, CreateTime = 1596262022569, serialized key size = 1, serialized value size = 5, headers = RecordHeaders(headers = [], isReadOnly = false), key = [B@309e345f, value = [B@56a6d5a6)
msgContent :msg-9
接口: org.apache.kafka.clients.producer.Producer
具体实现:org.apache.kafka.clients.producer.KafkaProducer
public class KafkaProducer implements Producer {
public KafkaProducer(Properties properties) {
this(new ProducerConfig(properties), null, null);
}
}
private static KafkaProducer kafkaProducer = null;
public static synchronized Producer getProducer() {
if (kafkaProducer == null) {
kafkaProducer = new KafkaProducer<>(getDefaultProductProperties());
LOG.info("kafka message producer start .....");
}
return kafkaProducer;
}
/**
* 获取默认的生产者的配置
*/
private static Properties getDefaultProductProperties() {
Properties props = new Properties();
props.put("bootstrap.servers", "host01:port,host02:port,host03:port");
props.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
return props;
}
- bootstrap.servers: 指定 broker 的地址清单,地址格式为 host:port 。尽量不少于1个
- key.serializer :broker 希望收到的消息都是字节数组。Kafka客户端默认提供了,ByteArraySerializer、StringSerializer和 IntegerSerializer
- value.serializer : 同上
public class KafkaProducer implements Producer {
@Override
public Future send(ProducerRecord record) {
return send(record, null);
}
}
public final class ProducerRecord {
private final String topic;
private final Integer partition;
private final K key;
private final V value;
}
发送到哪个 partition 由下面规则决定:
1. 只指定了 partition 。
2. 只指定了 key ,那么将会 hash 的方式发送到指定 partition 。
3. 通知指定 partition 和 key,那么key不起作用,只看partition 。
4. 如果没有指定 partition 和 key ,那么随机发 。
返回值: RecordMetadata:
返回值: org.apache.kafka.clients.producer.RecordMetadata
public final class RecordMetadata {
private final long offset;
private final TopicPartition topicPartition;
...
}
public Future send(ProducerRecord record) {
return this.send(record, (Callback)null);
}
public Future send(ProducerRecord record, Callback callback) {
ProducerRecord interceptedRecord = this.interceptors.onSend(record);
return this.doSend(interceptedRecord, callback);
}
private Future doSend(ProducerRecord record, Callback callback) {
...
int partition = this.partition(record, serializedKey, serializedValue, cluster);
tp = new TopicPartition(record.topic(), partition);
...
private int partition(ProducerRecord record, byte[] serializedKey, byte[] serializedValue, Cluster cluster) {
Integer partition = record.partition();
return partition != null ? partition : this.partitioner.partition(record.topic(), record.key(), serializedKey, record.value(), serializedValue, cluster);
}
可以看到,先判断是否有指定 Partition,如果没有,再进入下一步 partition 方法
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
List partitions = cluster.partitionsForTopic(topic);
int numPartitions = partitions.size();
if (keyBytes == null) {
int nextValue = this.nextValue(topic);
List availablePartitions = cluster.availablePartitionsForTopic(topic);
if (availablePartitions.size() > 0) {
int part = Utils.toPositive(nextValue) % availablePartitions.size();
return ((PartitionInfo)availablePartitions.get(part)).partition();
} else {
return Utils.toPositive(nextValue) % numPartitions;
}
} else {
return Utils.toPositive(Utils.murmur2(keyBytes)) % numPartitions;
}
}
- 首先,如果有指定key,那么直接按照hash算法算出分区,Utils.toPositive(Utils.murmur2(keyBytes)) % numPartitions;
- 其次,如果没有指定key,那么先取一个nextValue,然后再根据hash算法算出一个分区,这个地方,看了nextValue方法,就能明白是随机的了。
private int nextValue(String topic) {
AtomicInteger counter = (AtomicInteger)this.topicCounterMap.get(topic);
if (null == counter) {
counter = new AtomicInteger(ThreadLocalRandom.current().nextInt());
AtomicInteger currentCounter = (AtomicInteger)this.topicCounterMap.putIfAbsent(topic, counter);
if (currentCounter != null) {
counter = currentCounter;
}
}
return counter.getAndIncrement();
}
由 ThreadLocalRandom.current().nextInt()) 可以瞧见是采用随机的算法。
1. 发送并忘记(fire-and-forget) :发完就不管了 。
producer.send(record);
2. 同步发送 :调用 send() 发送消息,返回一个 Future 对象,调用 get() 进行等待获取发送结果。
producer.send(record).get()
3. 异步发送 :调用 send() ,并指定一个回调函数 org.apache.kafka.clients.producer.Callback ,服务器在返回响应时会调用该函数。
producer.send(record,new DemoProducerCallBack())
producer 可以使用多线程来发送消息,可以在生产者数量不变的情况下增加线程数量,如果还不够,可以增加生产者数量。
上面几种列举的几个Demo都是 【发送并忘记(fire-and-forget) :发完就不管了 。】
返回值: RecordMetadata:
public final class RecordMetadata {
public static final int UNKNOWN_PARTITION = -1;
private final long offset; // 分区的位置偏移量
private final long timestamp; // 时间戳
private final int serializedKeySize;
private final int serializedValueSize;
private final TopicPartition topicPartition; //分区
private volatile Long checksum; // 签名,用于验证的
@Test
public void testSendAndGet() throws Exception {
KafkaProducer producer = MQProducer.getInstance();
for (int i = 0; i < 10; i++) {
String msg = "msg-" + i;
System.out.println("send msg : " + msg);
RecordMetadata recordMetadata = (RecordMetadata) producer.send(new ProducerRecord(TOPIC, msg.getBytes())).get();
System.out.println("partition : " + recordMetadata.partition() + " ,offset :" + recordMetadata.offset() + " ,serializedKeySize: " +
recordMetadata.serializedKeySize() + " ,serializedValueSize: " + recordMetadata.serializedValueSize() + " ,timestamp: " + recordMetadata.timestamp() + " ,checksum: " + recordMetadata.checksum());
Thread.sleep(1000);
}
}
kafka message producer start .....
send msg : msg-0
partition : 1 ,offset :205 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596281246172 ,checksum: 3225500964
...
send msg : msg-9
partition : 2 ,offset :80 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596281255240 ,checksum: 4027041389
@Test
public void testSendAndAsyncGet() throws Exception {
KafkaProducer producer = MQProducer.getInstance();
List> futureList = new ArrayList<>();
for (int i = 0; i < 10; i++) {
String msg = "msg-" + i;
System.out.println("send msg : " + msg);
Future future = producer.send(new ProducerRecord(TOPIC, msg.getBytes()), new Callback() {
@Override
public void onCompletion(RecordMetadata recordMetadata, Exception e) {
try {
Thread.sleep(1000);
} catch (InterruptedException interruptedException) {
interruptedException.printStackTrace();
}
System.out.println("partition : " + recordMetadata.partition() + " ,offset :" + recordMetadata.offset() + " ,serializedKeySize: " +
recordMetadata.serializedKeySize() + " ,serializedValueSize: " + recordMetadata.serializedValueSize() + " ,timestamp: " + recordMetadata.timestamp() + " ,checksum: " + recordMetadata.checksum());
}
});
futureList.add(future);
System.out.println("send Msg finish ");
}
for (Future future : futureList) {
future.get();
}
}
kafka message producer start .....
send msg : msg-0
send Msg finish
send msg : msg-1
send Msg finish
send msg : msg-2
send Msg finish
send msg : msg-3
send Msg finish
send msg : msg-4
send Msg finish
send msg : msg-5
send Msg finish
send msg : msg-6
send Msg finish
send msg : msg-7
send Msg finish
send msg : msg-8
send Msg finish
send msg : msg-9
send Msg finish
partition : 2 ,offset :93 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596282329819 ,checksum: 2836644770
partition : 2 ,offset :94 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596282329820 ,checksum: 1853033912
partition : 2 ,offset :95 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596282329820 ,checksum: 1853033912
partition : 0 ,offset :89 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596282329820 ,checksum: 1853033912
partition : 0 ,offset :90 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596282329820 ,checksum: 1853033912
partition : 1 ,offset :219 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596282329813 ,checksum: 574499175
partition : 1 ,offset :220 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596282329820 ,checksum: 1853033912
partition : 1 ,offset :221 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596282329820 ,checksum: 1853033912
partition : 3 ,offset :173 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596282329820 ,checksum: 1853033912
partition : 3 ,offset :174 ,serializedKeySize: -1 ,serializedValueSize: 5 ,timestamp: 1596282329820 ,checksum: 1853033912
上一章节:【Kafka】消费者组 Consumer Group(三)