在Kafka的消费理念中还有一层消费组(Consumer Group)的概念,每个消费者都有一个对应的消费组。当消息发布到主题后,只会被投递给订阅它的每个消费组中的一个消费者。
每个消费者只能消费所分配到的分区中的消息。换言之,每一个分区只能被一个消费组中的一个消费者所消费。
发布订阅模式定义了如何向一个内容节点发布和订阅消息,这个内容节点称为主题(Topic),主题可以认为是消息传递的中介,消息发布者将消息发布到某个主题,而消息订阅者从主题中订阅消息。
主题使得消息的订阅者和发布者互相保持独立,不需要进行接触即可保证消息的传递,发布/订阅模式在消息的一对多广播时采用。
如果所有的消费者都隶属于同一个消费组,那么所有的消息都会被均衡地投递给每一个消费者,即每条消息只会被一个消费者处理,这就相当于点对点模式的应用。
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.time.Duration;
import java.util.Arrays;
import java.util.Collections;
import java.util.Properties;
/**
* @Author shu
* @Date: 2021/10/25/ 15:09
* @Description 消费者
**/
public class MySimpleConsumer {
//主题名
private final static String TOPIC_NAME = "my-replicated-topic";
//分组
private final static String CONSUMER_GROUP_NAME = "testGroup";
public static void main(String[] args) {
Properties props =new Properties();
//消息地址
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "ip:9093");
//分组
props.put(ConsumerConfig.GROUP_ID_CONFIG, CONSUMER_GROUP_NAME);
//序列化
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
//自动提交,拉取到信息之后,立马提交偏移量给consumer_offset,保证顺序消费,但是会造成消息丢失问题
// // 是否⾃动提交offset,默认就是true
// props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
// // ⾃动提交offset的间隔时间
// props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
//手动提交,当消费者消费消息完毕之后,返回偏移量
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
//⼀次poll最⼤拉取消息的条数,可以根据消费速度的快慢来设置
// props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 500);
//props.put(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, 30 * 1000);
//1.创建⼀个消费者的客户端
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(props);
//2. 消费者订阅主题列表
// consumer.assign();
//consumer.subscribe(Arrays.asList(TOPIC_NAME));
TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, 0);
consumer.assign(Arrays.asList(topicPartition));
while (true) {
/*
* 3.poll() API 是拉取消息的⻓轮询
*/
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(1000));
for (ConsumerRecord<String, String> record : records) {
//4.打印消息ConsumerCoordinator
System.out.printf("收到消息:partition = %d,offset = %d, key = %s, value = %s%n", record.partition(),
record.offset(), record.key(), record.value());
}
//所有的消息已消费完
if (records.count() > 0) {//有消息
// ⼿动同步提交offset,当前线程会阻塞直到offset提交成功
// ⼀般使⽤同步提交,因为提交之后⼀般也没有什么逻辑代码了
consumer.commitSync();//=======阻塞=== 提交成功
}
long position = consumer.position(topicPartition);
System.out.println("下一个消费的位置"+position);
}
}
}
bootstrap.servers
:该参数用来指定生产者客户端连接Kafka集群所需的broker地址清单,具体的内容格式为host1:port1,host2:port2
,可以设置一个或多个地址,中间以逗号隔开。key.deserializer 和 value.deserializer
:broker 端接收的消息必须以字节数组(byte[]
)的形式存在,key.deserializer
和value.deserializer
这两个参数分别用来指定key和value序列化操作的序列化器,这两个参数无默认值。注意这里必须填写序列化器的全限定名。enable-auto-commit
:是否自动提交。group.id
:消费者隶属的消费组的名称,默认值为’’。auto-offset-reset
:
ack-mode
:动调Acknowledgment.acknowledge()
后即提交#########consumer############# 关闭自动提交
spring.kafka.consumer.enable-auto-commit=false
# 消费组
spring.kafka.consumer.group-id=MyGroup1
#第一次重头消费,后面依次消费
spring.kafka.consumer.auto-offset-reset=earliest
# 反序列化s
pring.kafka.consumer.key-deserializer= org.apache.kafka.common.serialization.StringDeserializerspring.kafka.consumer.value-deserializer= org.apache.kafka.common.serialization.StringDeserializer
# 最大消息
spring.kafka.consumer.max-poll-records=500
# 动调Acknowledgment.acknowledge()后即提交
spring.kafka.listener.ack-mode=manual_immediate
#kafka的监听接口监听的主题不存在时是否报错#
spring.kafka.listener.missing-topics-fatal=false
//1.创建⼀个消费者的客户端
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(props);
//2. 消费者订阅主题列表
consumer.subscribe(Arrays.asList(TOPIC_NAME));
public void subscribe(Collection<String> topics, ConsumerRebalanceListener listener) {} public void subscribe(Collection<String> topics) {}
public void subscribe(Pattern pattern, ConsumerRebalanceListener listener) {}
public void subscribe(Pattern pattern) { }
public void subscribe(Collection<String> topics) {}
if (this.subscriptions.subscribe(new HashSet<>(topics), listener)) metadata.requestUpdateForNewTopics();
public synchronized boolean subscribe(Set<String> topics, ConsumerRebalanceListener listener) { registerRebalanceListener(listener); setSubscriptionType(SubscriptionType.AUTO_TOPICS); return changeSubscription(topics); }
/* 用户请求的主题列表 */
private Set<String> subscription;
private boolean changeSubscription(Set<String> topicsToSubscribe) {
if (subscription.equals(topicsToSubscribe))
return false;
subscription = topicsToSubscribe;
return true;
}
KafkaConsumer
中的partitionsFor()
方法可以用来查询指定主题的元数据信息,找到主题的分区信息public class PartitionInfo {
private final String topic;//主题
private final int partition;//分区
private final Node leader;//领导节点
private final Node[] replicas;//AR集合
private final Node[] inSyncReplicas;//ISR集合
private final Node[] offlineReplicas;//OSR集合 }
//获取有关给定主题的分区的元数据
public List<PartitionInfo> partitionsFor(String topic, Duration timeout) { acquireAndEnsureOpen();
try {
Cluster cluster = this.metadata.fetch();
List<PartitionInfo> parts = cluster.partitionsForTopic(topic);
if (!parts.isEmpty())
return parts;
Timer timer = time.timer(timeout);
Map<String, List<PartitionInfo>> topicMetadata = fetcher.getTopicMetadata( new MetadataRequest.Builder(Collections.singletonList(topic), metadata.allowAutoTopicCreation()), timer);
return topicMetadata.get(topic); }
finally { release(); } }
assign
:根据分区信息拉取内容consumer.assign();
public void assign(Collection<TopicPartition> partitions) {
if (this.subscriptions.assignFromUser(new HashSet<>(partitions))) metadata.requestUpdateForNewTopics(); }//将分配更改为用户提供的指定分区,注意这与assignFromSubscribed(Collection)不同,其输入分区由订阅的主题提供。
public synchronized boolean assignFromUser(Set<TopicPartition> partitions) { setSubscriptionType(SubscriptionType.USER_ASSIGNED);
if (this.assignment.partitionSet().equals(partitions))
return false;
assignmentId++;
Set<String> manualSubscribedTopics = new HashSet<>();
Map<TopicPartition, TopicPartitionState> partitionToState = new HashMap<>();
for (TopicPartition partition : partitions) {
TopicPartitionState state = assignment.stateValue(partition);
if (state == null)
state = new TopicPartitionState();
partitionToState.put(partition, state); manualSubscribedTopics.add(partition.topic()); } this.assignment.set(partitionToState);
return changeSubscription(manualSubscribedTopics); }
private boolean changeSubscription(Set<String> topicsToSubscribe) {
if (subscription.equals(topicsToSubscribe))
return false;
subscription = topicsToSubscribe;
return true; }
private enum SubscriptionType { NONE, AUTO_TOPICS, AUTO_PATTERN, USER_ASSIGNED }
ConsumerRebalanceListener
类型参数的方法,而assign()方法却没有。 while (true) { /* * 3.poll() API 是拉取消息的⻓轮询 */ ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(1000)); for (ConsumerRecord<String, String> record : records) {
//4.打印消息
System.out.printf("收到消息:partition = %d,offset = %d, key = %s, value = %s%n", record.partition(),record.offset(), record.key(), record.value()); }
ofMillis()、ofSeconds()、ofMinutes()、ofHours()
等多种不同的方法指定不同的时间单位,灵活性更强。//拉取的消息
public class ConsumerRecord<K, V> {
public static final long NO_TIMESTAMP = RecordBatch.NO_TIMESTAMP;
public static final int NULL_SIZE = -1;
public static final int NULL_CHECKSUM = -1;
private final String topic; //主题名
private final int partition; //分区
private final long offset; //偏移量
private final long timestamp; //时间戳
private final TimestampType timestampType;
private final int serializedKeySize; //序列化
private final int serializedValueSize;
private final Headers headers; //头部信息
private final K key; //key
private final V value; //值
private final Optional<Integer> leaderEpoch;
private volatile Long checksum;}
ByteBufferDeserializer、ByteArrayDeserializer、BytesDeserializer、DoubleDeserializer、FloatDeserializer、IntegerDeserializer、LongDeserializer、ShortDeserializer、StringDeserializer
。ByteBuffer、ByteArray、Bytes、Double、Float、Integer、Long、Short
及String类型的反序列化,这些序列化器也都实现了 Deserializer 接口public interface Deserializer<T> extends Closeable {
//初始化配置
default void configure(Map<String, ?> configs, boolean isKey) {
// intentionally left blank
}
T deserialize(String topic, byte[] data);
//用来执行反序列化
default T deserialize(String topic, Headers headers, byte[] data) {
return deserialize(topic, data);
@Override
default void close() {
// intentionally left blank
}
}
public class PersonJsonDeserializer implements Deserializer<Person> {
@Override
public void configure(Map<String, ?> configs, boolean isKey) {
}
@Override
public Person deserialize(String topic, byte[] data) {
return JSON.parseObject(data, Person.class);
}
@Override
public void close() {
}
}
消费者消费到此分区消息的最大偏移量为268,对应的消费位移lastConsumedOffset
也就是268。在消费完之后就执行同步提交,但是最终结果显示所提交的位移committed offset为 269。
对于位移提交的具体时机的把握也很有讲究,有可能会造成重复消费和消息丢失的现象。
enable.auto.commit
配置,默认值为 true。当然这个默认的自动提交不是每消费一条消息就提交一次,而是定期提交,这个定期的周期时间由客户端参数auto.commit.interval.ms
配置,默认值为5秒。commitSync()
和commitAsync()
两种类型的方法。同步提交
//所有的消息已消费完
if (records.count() > 0) {//有消息
// ⼿动同步提交offset,当前线程会阻塞直到offset提交成功
// ⼀般使⽤同步提交,因为提交之后⼀般也没有什么逻辑代码了
consumer.commitSync();//=======阻塞=== 提交成功
}
long position = consumer.position(topicPartition);
System.out.println("下一个消费的位置"+position);
有重复消费的问题,如果在业务逻辑处理完之后,并且在同步位移提交前,程序出现了崩溃,那么待恢复之后又只能从上一次位移提交的地方拉取消息,由此在两次位移提交的窗口中出现了重复消费的现象。
commitSync()方法会根据poll()方法拉取的最新位移来进行提交(注意提交的值对应于图3-6中position的位置),只要没有发生不可恢复的错误(Unrecoverable Error),它就会阻塞消费者线程直至位移提交完成
异步提交
//所有的消息已消费完
if (records.count() > 0) {//有消息
consumer.commitAsync(new OffsetCommitCallback() {
@Override
public void onComplete(Map<TopicPartition, OffsetAndMetadata> offsets, Exception exception) {
// 失败判断redis最后一次消费的偏移量,如果大于或等于就正常消费
}
});
long position = consumer.position(topicPartition);
System.out.println("下一个消费的位置"+position);
与commitSync()方法相反,异步提交的方式(commitAsync()在执行的时候消费者线程不会被阻塞,可能在提交消费位移的结果还未返回之前就开始了新一次的拉取操作。
设置一个递增的序号来维护异步提交的顺序,每次位移提交之后就增加序号相对应的值。在遇到位移提交失败需要重试的时候,可以检查所提交的位移和序号的值的大小,如果前者小于后者,则说明有更大的位移已经提交了,不需要再进行本次重试;如果两者相同,则说明可以进行重试提交。
在有些应用场景下我们可能需要暂停某些分区的消费而先消费其他分区,当达到一定条件时再恢复这些分区的消费。
KafkaConsumer中使用pause()和resume()方法来分别实现暂停某些分区在拉取操作时返回数据给客户端和恢复某些分区向客户端返回数据的操作。
consumer.pause();
consumer.resume();
//consumer.subscribe(Arrays.asList(TOPIC_NAME));
TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, 0);
consumer.assign(Arrays.asList(topicPartition));
while (true) {
/*
* 3.poll() API 是拉取消息的⻓轮询
*/
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(1000));
//指定位移消费
for (ConsumerRecord<String, String> record : records) {
consumer.seek(topicPartition,10);
//4.打印消息ConsumerCoordinator
System.out.printf("收到消息:partition = %d,offset = %d, key = %s, value = %s%n", record.partition(),
record.offset(), record.key(), record.value());
}
//所有的消息已消费完
if (records.count() > 0) {//有消息
// ⼿动同步提交offset,当前线程会阻塞直到offset提交成功
// ⼀般使⽤同步提交,因为提交之后⼀般也没有什么逻辑代码了
consumer.commitSync();//=======阻塞=== 提交成功
}
long position = consumer.position(topicPartition);
System.out.println("下一个消费的位置"+position);
}
public void subscribe(Pattern pattern, ConsumerRebalanceListener listener) {
}
// 在均衡监视器
public interface ConsumerRebalanceListener {
//这个方法会在再均衡开始之前和消费者停止读取消息之后被调用。可以通过这个回调方法来处理消费位移的提交,以此来避免一些不必要的重复消费现象的发生。
void onPartitionsRevoked(Collection<TopicPartition> partitions);
//这个方法会在重新分配分区之后和消费者开始读取消费之前被调用。参数partitions表示再均衡后所分配到的分区。
void onPartitionsAssigned(Collection<TopicPartition> partitions);
default void onPartitionsLost(Collection<TopicPartition> partitions) {
onPartitionsRevoked(partitions);
}
}
public class HandlerRebalance implements ConsumerRebalanceListener {
private final Map<TopicPartition, OffsetAndMetadata> currOffsets;
private final KafkaConsumer<String,String> consumer;
//private final Transaction tr事务类的实例
public HandlerRebalance(Map<TopicPartition, OffsetAndMetadata> currOffsets,
KafkaConsumer<String, String> consumer) {
this.currOffsets = currOffsets;
this.consumer = consumer;
}
/*模拟一个保存分区偏移量的数据库表*/
public final static ConcurrentHashMap<TopicPartition,Long>
partitionOffsetMap = new ConcurrentHashMap();
//分区再均衡之前
public void onPartitionsRevoked(
Collection<TopicPartition> partitions) {
final String id = Thread.currentThread().getId()+"";
System.out.println(id+"-onPartitionsRevoked参数值为:"+partitions);
System.out.println(id+"-服务器准备分区再均衡,提交偏移量。当前偏移量为:"
+currOffsets);
//开始事务
//偏移量写入数据库
System.out.println("分区偏移量表中:"+partitionOffsetMap);
for(TopicPartition topicPartition:partitions){
partitionOffsetMap.put(topicPartition,
currOffsets.get(topicPartition).offset());
}
consumer.commitSync(currOffsets);
//提交业务数和偏移量入库 tr.commit
}
//分区再均衡完成以后
public void onPartitionsAssigned(
Collection<TopicPartition> partitions) {
final String id = "" + Thread.currentThread().getId();
System.out.println(id+"-再均衡完成,onPartitionsAssigned参数值为:"+partitions);
System.out.println("分区偏移量表中:"+partitionOffsetMap);
for(TopicPartition topicPartition:partitions){
System.out.println(id+"-topicPartition"+topicPartition);
//模拟从数据库中取得上次的偏移量
Long offset = partitionOffsetMap.get(topicPartition);
if(offset==null) continue;
//从特定偏移量处开始记录 (从指定分区中的指定偏移量开始消费)
//这样就可以确保分区再均衡中的数据不错乱
consumer.seek(topicPartition,partitionOffsetMap.get(topicPartition));
}
}
}
private final ConsumerInterceptors<K, V> interceptors;
//拉取方法
private ConsumerRecords<K, V> poll(final Timer timer, final boolean includeMetadataInTimeout) {
final Map<TopicPartition, List<ConsumerRecord<K, V>>> records = pollForFetches(timer);
if (!records.isEmpty()) {
// before returning the fetched records, we can send off the next round of fetches
// and avoid block waiting for their responses to enable pipelining while the user
// is handling the fetched records.
//
// NOTE: since the consumed position has already been updated, we must not allow
// wakeups or any other errors to be triggered prior to returning the fetched records.
if (fetcher.sendFetches() > 0 || client.hasPendingRequests()) {
client.transmitSends();
}
//进入消费者拦截器
return this.interceptors.onConsume(new ConsumerRecords<>(records));
}
}
public class ConsumerInterceptors<K, V> implements Closeable {
private static final Logger log = LoggerFactory.getLogger(ConsumerInterceptors.class);
private final List<ConsumerInterceptor<K, V>> interceptors;
public ConsumerInterceptors(List<ConsumerInterceptor<K, V>> interceptors) {
this.interceptors = interceptors;
}
public ConsumerRecords<K, V> onConsume(ConsumerRecords<K, V> records) {
ConsumerRecords<K, V> interceptRecords = records;
for (ConsumerInterceptor<K, V> interceptor : this.interceptors) {
try {
interceptRecords = interceptor.onConsume(interceptRecords);
} catch (Exception e) {
// do not propagate interceptor exception, log and continue calling other interceptors
log.warn("Error executing interceptor onConsume callback", e);
}
}
return interceptRecords;
}
/**
KafkaConsumer会在提交完消费位移之后调用拦截器的onCommit()方法,可以使用这个方法来记录跟踪所提交的位移信息,比如当消费者使用commitSync的无参方法时,我们不知道提交的消费位移的具体细节,而使用拦截器的onCommit()方法却可以做到这一点。
**/
public void onCommit(Map<TopicPartition, OffsetAndMetadata> offsets) {
for (ConsumerInterceptor<K, V> interceptor : this.interceptors) {
try {
interceptor.onCommit(offsets);
} catch (Exception e) {
// do not propagate interceptor exception, just log
log.warn("Error executing interceptor onCommit callback", e);
}
}
}
@Override
public void close() {
for (ConsumerInterceptor<K, V> interceptor : this.interceptors) {
try {
interceptor.close();
} catch (Exception e) {
log.error("Failed to close consumer interceptor ", e);
}
}
}
}
@Component
public class MyConsumerInterceptor implements ConsumerInterceptor<String,String> {
@Override
public ConsumerRecords<String, String> onConsume(ConsumerRecords<String, String> records) {
Map<TopicPartition, List<ConsumerRecord<String, String>>> newRecords = new HashMap<>();
for(TopicPartition partition:records.partitions()){
List<ConsumerRecord<String, String>> recs = records.records(partition);
List<ConsumerRecord<String, String>> newRecs = new ArrayList<>();
for(ConsumerRecord<String,String> rec:recs){
String newValue = "interceptor-"+rec.value();
ConsumerRecord<String,String> newRec = new ConsumerRecord<>(rec.topic(),
rec.partition(),rec.offset(),rec.key(),newValue);
newRecs.add(newRec);
}
newRecords.put(partition,newRecs);
}
return new ConsumerRecords<>(newRecords);
}
@Override
public void onCommit(Map<TopicPartition, OffsetAndMetadata> offsets) {
offsets.forEach((tp,offsetAndMetadata) -> {
System.out.println(tp+" : "+offsetAndMetadata.offset());
});
}
@Override
public void close() {
}
@Override
public void configure(Map<String, ?> configs) {
}
private void acquire() {
long threadId = Thread.currentThread().getId();
if (threadId != currentThread.get() && !currentThread.compareAndSet(NO_CURRENT_THREAD, threadId))
throw new ConcurrentModificationException("KafkaConsumer is not safe for multi-threaded access");
refcount.incrementAndGet();
}