目录
目标
KAFKA官方API
实战
生产者发送消息
消费者消费消息
把消费者组对应的主题内未消费完的数据导入到文件中
生产者核心参数
acks
retries&&retry.backoff.ms
buffer.memory&&batch.size&&linger.ms
消费者核心参数
enable.auto.commit&&auto.commit.interval.ms
max.poll.records
max.poll.interval.ms
heartbeat.interval.ms
session.timeout.ms
auto.offset.reset
消费者核心方法
assign
subscribe
kafka生产者文档https://kafka.apache.org/31/javadoc/org/apache/kafka/clients/producer/KafkaProducer.html
kafka消费者文档https://kafka.apache.org/31/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html
相关依赖
org.springframework.kafka
spring-kafka
生产者发送消息代码
package com.kafka;
import org.apache.kafka.clients.producer.*;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
/**
* KAFKA生产者官方文档:
* https://kafka.apache.org/31/javadoc/org/apache/kafka/clients/producer/KafkaProducer.html
*/
public class MyProducer {
//如果是集群,则用逗号分隔。
public static final String KAFKA_BROKER_LIST="kafka服务地址:端口";
//主题
public static final String TOPIC_NAME = "liNingShoesTopic";
public static void main(String[] args) throws Exception{
new MyProducer().synSend();
}
/**
* 同步发送消息:发送成功以后才发下一条消息。
*/
public void synSend() throws ExecutionException, InterruptedException {
Properties props = new Properties();
//向kafka服务器发消息,多个服务器用逗号隔开。
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//网络传输,所以要把发送的数据的、key转成字节数组的格式
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
//网络传输,所以要把发送的数据的value转成字节数组的格式
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
Producer producer = new KafkaProducer<>(props);
for (int i = 0; i < 3; i++){
Future send = producer.send(new ProducerRecord(TOPIC_NAME,"这里是我发送的key。", "这里是我发送的内容。"));
RecordMetadata recordMetadata = send.get();
System.out.println("主题="+recordMetadata.topic()+";偏移量="+recordMetadata.offset()+";分区="+recordMetadata.partition());
}
producer.close();
}
/**
* 异步发送消息&&回调
* 注意:异步发送消息要注意发送消息的过程中,producer还没有关闭。
*/
public void asySend() throws ExecutionException, InterruptedException {
Properties props = new Properties();
//向kafka服务器发消息,多个服务器用逗号隔开。
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//网络传输,所以要把发送的数据的、key转成字节数组的格式
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
//网络传输,所以要把发送的数据的value转成字节数组的格式
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
Producer producer = new KafkaProducer<>(props);
int msgNum = 3;
//因为是异步发送,为了确保消息还没发送完producer就被关闭,所以用了CountDownLatch计数。
CountDownLatch countDownLatch = new CountDownLatch(msgNum);
for (int i = 0; i < msgNum; i++){
producer.send(new ProducerRecord(TOPIC_NAME, "这里是我发送的key。", "这里是我发送的内容。"), new Callback() {
@Override
public void onCompletion(RecordMetadata recordMetadata, Exception exception) {
if (exception != null) {
System.err.println("发送消息失败:" + exception.getStackTrace());
}
if (recordMetadata != null) {
System.out.println("主题="+recordMetadata.topic()+";偏移量="+recordMetadata.offset()+";分区="+recordMetadata.partition());
}
countDownLatch.countDown();
}
});
}
countDownLatch.await(5, TimeUnit.SECONDS);
producer.close();
}
/**
* 指定分区发送消息。
* 如果不指定分区,则默认通过hash运算和对分区总数取模来确定分区。
* org.apache.kafka.clients.producer.internals.DefaultPartitioner.class
* public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster, int numPartitions) {
* return keyBytes == null ? this.stickyPartitionCache.partition(topic, cluster) : Utils.toPositive(Utils.murmur2(keyBytes)) % numPartitions;
* }
*/
public void choosePartitionSend() throws ExecutionException, InterruptedException {
Properties props = new Properties();
//向kafka服务器发消息,多个服务器用逗号隔开。
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//网络传输,所以要把发送的数据的、key转成字节数组的格式
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
//网络传输,所以要把发送的数据的value转成字节数组的格式
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
Producer producer = new KafkaProducer<>(props);
for (int i = 0; i < 3; i++){
//我这里指定分区1,把消息发送到分区1。
Future send = producer.send(new ProducerRecord(TOPIC_NAME,1,"这里是我发送的key。", "这里是我发送的内容。"));
RecordMetadata recordMetadata = send.get();
System.out.println("主题="+recordMetadata.topic()+";偏移量="+recordMetadata.offset()+";分区="+recordMetadata.partition());
}
producer.close();
}
}
相关依赖
org.apache.kafka
kafka-clients
3.1.1-SNAPSHOT
消费者消费消息代码
package com.ctx.com.simple;
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import java.time.Duration;
import java.util.*;
/**
* KAFKA消费者官方文档:
* https://kafka.apache.org/31/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html
*/
public class MyConsumer {
//主题
public static final String TOPIC_NAME = "liNingShoesTopic";
//消费者组
public static final String GROUP_ID_CONFIG = "liNingGroup";
//如果是集群,则用逗号分隔。
public static final String KAFKA_BROKER_LIST = "kafka服务地址:端口";
public static void main(String[] args) {
//消费这个时间段内0、1、2分区内的所有消息,并生成文件。
new Thread(()->{
new CreateKafkaFile().assignByTime2("2023-03-02 18:00:00", "2023-03-02 23:00:00",0);
}).start();
new Thread(()->{
new CreateKafkaFile().assignByTime2("2023-03-02 18:00:00", "2023-03-02 23:00:00",1);
}).start();
new Thread(()->{
new CreateKafkaFile().assignByTime2("2023-03-02 18:00:00", "2023-03-02 23:00:00",2);
}).start();
}
/**
* 消费消息后自动偏移提交
*/
public void autoCommitOffset() {
Properties props = new Properties();
//kafka服务器地址和端口
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//设置消费者组
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG);
//如果为true,则将在后台定期提交使用者的偏移量。
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
//如果开启了自动提交偏移量功能,则每隔1秒提交一次偏移量。
props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
//将key和value反序列化
props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
//订阅给定的主题列表以获取动态分配的分区。
consumer.subscribe(Arrays.asList(TOPIC_NAME));
while (true) {
/*
* poll()是拉取消息的长轮询,我这里设置0.1秒循环一次。主要作用:
* 1.判断消费者是否还活着;
* 2.消费者会存活在自己所在的消费者组中,并且持续的消费指定的分区的消息。
*/
ConsumerRecords records = consumer.poll(Duration.ofMillis(100));
if (records != null && records.count() > 0) {
for (ConsumerRecord record : records) {
System.out.println("偏移量=" + record.offset() + ";" + "key=" + record.key() + ";" + "value=" + record.value());
}
}
}
}
/**
* 手动同步提交偏移量
*/
public void manualCommitSync() {
Properties props = new Properties();
//kafka服务器地址和端口
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//设置消费者组
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG);
//如果为true,则将在后台定期提交使用者的偏移量。
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
//如果开启了自动提交偏移量功能,则每隔1秒提交一次偏移量。
props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
//将key和value反序列化
props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
//订阅给定的主题列表以获取动态分配的分区。
consumer.subscribe(Arrays.asList(TOPIC_NAME));
List> buffer = new ArrayList<>();
while (true) {
//100毫秒循环一次。
ConsumerRecords records = consumer.poll(Duration.ofMillis(100));
if (records != null && records.count() > 0) {
for (ConsumerRecord record : records) {
System.out.println("偏移量=" + record.offset() + ";" + "key=" + record.key() + ";" + "value=" + record.value());
buffer.add(record);
}
}
//每当消费消息>=5就提交一次偏移量。
if (buffer.size() >= 5) {
System.out.println("提交偏移量。");
//一般可以在这里加try,在catch里面重复提交。
consumer.commitSync();
buffer.clear();
}
}
}
/**
* 手动异步提交偏移量
*/
public void manualCommitAsy() {
Properties props = new Properties();
//kafka服务器地址和端口
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//设置消费者组
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG);
//如果为true,则将在后台定期提交使用者的偏移量。
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
//如果开启了自动提交偏移量功能,则每隔1秒提交一次偏移量。
props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
//将key和value反序列化
props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
//订阅给定的主题列表以获取动态分配的分区。
consumer.subscribe(Arrays.asList(TOPIC_NAME));
List> buffer = new ArrayList<>();
while (true) {
//100毫秒循环一次。
ConsumerRecords records = consumer.poll(Duration.ofMillis(100));
if (records != null && records.count() > 0) {
for (ConsumerRecord record : records) {
System.out.println("偏移量=" + record.offset() + ";" + "key=" + record.key() + ";" + "value=" + record.value());
buffer.add(record);
}
}
//每当消费消息>=5就提交一次偏移量。
if (buffer.size() >= 5) {
System.out.println("提交偏移量。");
consumer.commitAsync(new OffsetCommitCallback() {
@Override
public void onComplete(Map map, Exception e) {
//e!=null表示异步提交失败,此时可以再次提交。
//生产上一般用同步提交。
if (e != null) {
System.out.println("异步提交消息失败。");
}
}
});
buffer.clear();
}
}
}
/**
* 对订阅的主题轮询按照分区的粒度来消费,手动设置并提交偏移量。
*/
public void manualCommitOffsetByPartition() {
Properties props = new Properties();
//kafka服务器地址和端口
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//设置消费者组
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG);
//如果为true,则将在后台定期提交使用者的偏移量。
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
//如果开启了自动提交偏移量功能,则每隔1秒提交一次偏移量。
props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
//将key和value反序列化
props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
//订阅给定的主题列表以获取动态分配的分区。
consumer.subscribe(Arrays.asList(TOPIC_NAME));
List> buffer = new ArrayList<>();
try {
while (true) {
ConsumerRecords records = consumer.poll(Duration.ofMillis(Long.MAX_VALUE));
if (records != null && records.count() > 0) {
//拉取消息以后根据消息获取所有分区。
for (TopicPartition partition : records.partitions()) {
//获取给定分区的记录
List> partitionRecords = records.records(partition);
//循环输出每个分区的记录
for (ConsumerRecord record : partitionRecords) {
System.out.println(record.offset() + ": " + record.value());
}
//最终消费到的偏移量
long lastOffset = partitionRecords.get(partitionRecords.size() - 1).offset();
//提交指定主题和分区列表的指定偏移量。
consumer.commitSync(Collections.singletonMap(partition, new OffsetAndMetadata(lastOffset + 1)));
}
}
}
} finally {
consumer.close();
}
}
/**
* 指定分区来消费消息
* 指定分区有哪些有点?
* 官网给出的评价:消费者失败不会导致分配的分区重新平衡。每个消费者独立行动,即使它与另一个消费者共享一个 groupId。
*/
public void assign() {
Properties props = new Properties();
//kafka服务器地址和端口
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//设置消费者组
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG);
//如果为true,则将在后台定期提交使用者的偏移量。
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
//如果开启了自动提交偏移量功能,则每隔1秒提交一次偏移量。
props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
//将key和value反序列化
props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
//手动将分区列表分配给此使用者。
consumer.assign(Arrays.asList(new TopicPartition(TOPIC_NAME, 0)));
while (true) {
ConsumerRecords records = consumer.poll(Duration.ofMillis(100));
if (records != null && records.count() > 0) {
for (ConsumerRecord record : records) {
System.out.println("偏移量=" + record.offset() + ";" + "key=" + record.key() + ";" + "value=" + record.value());
}
}
}
}
/**
* 消费限定时间内,指定的分区消息。这里我设定从一个小时内开始消费消息。
*/
public void assignByTime() {
Properties props = new Properties();
//kafka服务器地址和端口
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//设置消费者组
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG);
//如果为true,则将在后台定期提交使用者的偏移量。
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
//如果开启了自动提交偏移量功能,则每隔1秒提交一次偏移量。
props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
//将key和value反序列化
props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
List topicPartitions = consumer.partitionsFor(TOPIC_NAME);
//从1小时前开始消费
long fetchDataTime = new Date().getTime() - 1000 * 60 * 60;
Map map = new HashMap<>();
for (PartitionInfo par : topicPartitions) {
map.put(new TopicPartition(TOPIC_NAME, par.partition()), fetchDataTime);
}
Map parMap = consumer.offsetsForTimes(map);
for (Map.Entry entry : parMap.entrySet()) {
TopicPartition key = entry.getKey();
OffsetAndTimestamp value = entry.getValue();
if (key == null || value == null) {
continue;
}
Long offset = value.offset();
System.out.println("指定时间内的分区=" + key.partition() + ";偏移量=" + offset);
//根据消费里的timestamp确定offset
if (value != null) {
consumer.assign(Arrays.asList(key));
consumer.seek(key, offset);
}
}
while (true) {
ConsumerRecords records = consumer.poll(Duration.ofMillis(1000));
if (records.count() > 0) {
for (ConsumerRecord record : records) {
System.out.printf("收到消息:分区 = %d,偏移量 = %d, key = %s, 数据 = %s,时间戳=%d%n", record.partition(),
record.offset(), record.key(), record.value(), record.timestamp());
}
}
}
}
/**
* 消费限定时间内,指定的分区消息。
*
* @param time 如:"2022-08-08 12:00:00"表示从(按照消息的创建时间开始计算)"2022-08-08 12:00:00"消费消息,一直消费到现在。
*/
public void assignByTime(String time) {
Properties props = new Properties();
//kafka服务器地址和端口
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//设置消费者组
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG);
//如果为true,则将在后台定期提交使用者的偏移量。
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
//如果开启了自动提交偏移量功能,则每隔1秒提交一次偏移量。
props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
//将key和value反序列化
props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
List topicPartitions = consumer.partitionsFor(TOPIC_NAME);
long fetchDataTime = DateUtils.strToDate(time).getTime();
Map map = new HashMap<>();
for (PartitionInfo par : topicPartitions) {
map.put(new TopicPartition(TOPIC_NAME, par.partition()), fetchDataTime);
}
Map parMap = consumer.offsetsForTimes(map);
for (Map.Entry entry : parMap.entrySet()) {
TopicPartition key = entry.getKey();
OffsetAndTimestamp value = entry.getValue();
if (key == null || value == null) {
continue;
}
Long offset = value.offset();
System.out.println("指定时间内的分区=" + key.partition() + ";偏移量=" + offset);
//根据消费里的timestamp确定offset
if (value != null) {
consumer.assign(Arrays.asList(key));
consumer.seek(key, offset);
}
}
while (true) {
ConsumerRecords records = consumer.poll(Duration.ofMillis(1000));
if (records.count() > 0) {
for (ConsumerRecord record : records) {
System.out.printf("收到消息:分区 = %d,偏移量 = %d, key = %s, 数据 = %s,时间戳=%d%n", record.partition(),
record.offset(), record.key(), record.value(), record.timestamp());
}
}
}
}
/**
* 消费限定时间内,指定的分区消息。
*注意:只能消费一个分区,startMap添加多个分区无效。
* @param startTime 如:"2022-08-08 12:00:00"
* @param endTime 如:"2022-08-08 22:00:00"
*/
public void assignByTime2(String startTime, String endTime,int partition ) {
Properties props = new Properties();
//kafka服务器地址和端口
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//设置消费者组
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG);
//如果为true,则将在后台定期提交使用者的偏移量。
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
//如果开启了自动提交偏移量功能,则每隔1秒提交一次偏移量。
props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
//将key和value反序列化
props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
//开始时间时间戳
long startTimeStamp = DateUtils.strToDate(startTime).getTime();
Map startMap = new HashMap<>();
startMap.put(new TopicPartition(TOPIC_NAME, partition), startTimeStamp);
Map parStartMap = consumer.offsetsForTimes(startMap);
for (Map.Entry startEntry : parStartMap.entrySet()) {
TopicPartition key = startEntry.getKey();
OffsetAndTimestamp value = startEntry.getValue();
if (key == null || value == null) {
continue;
}
Long offset = value.offset();
System.out.println("指定时间内的分区=" + key.partition() + ";偏移量=" + offset);
//根据消费里的timestamp确定offset
if (value != null) {
consumer.assign(Arrays.asList(key));
consumer.seek(key, offset);
}
}
//开始时间时间戳(加999毫秒是为了应对相同年月日时分秒,但毫秒值大于限定值的情况。)
long endTimeStamp = DateUtils.strToDate(endTime).getTime()+999;
System.out.println("============"+endTimeStamp);
String pathName = "C:\\Users\\20203\\Desktop\\kafka数据\\" + TOPIC_NAME + "_" + DateUtils.dateToStr(new Date(),"yyyy-MM-dd-HH-mm-ss") + ".txt";
while (true) {
ConsumerRecords records = consumer.poll(Duration.ofMillis(1000));
if (records.count() > 0) {
for (ConsumerRecord record : records) {
System.out.println(+record.timestamp());
//小于等于设定的结束时间
if(record.timestamp()<=endTimeStamp ){
createTmpFileFromCharset("分区="+record.partition()+";"+record.value(), pathName);
System.out.printf("收到消息:分区 = %d,偏移量 = %d, key = %s, 数据 = %s,时间戳=%d%n", record.partition(),
record.offset(), record.key(), record.value(), record.timestamp());
}else{
Thread.currentThread().stop();
}
}
}
}
}
/**
* 从最早的未提交的偏移量开始消费
*/
public void consumeFromBeginning() {
Properties props = new Properties();
//设置从最早的未提交的偏移量开始消费
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
//kafka服务器地址和端口
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//设置消费者组
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG);
//如果为true,则将在后台定期提交使用者的偏移量。
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
//如果开启了自动提交偏移量功能,则每隔1秒提交一次偏移量。
props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
//将key和value反序列化
props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
//订阅给定的主题列表以获取动态分配的分区。
consumer.subscribe(Arrays.asList(TOPIC_NAME));
while (true) {
/*
* poll()是拉取消息的长轮询,我这里设置0.1秒循环一次。主要作用:
* 1.判断消费者是否还活着;
* 2.消费者会存活在自己所在的消费者组中,并且持续的消费指定的分区的消息。
*/
ConsumerRecords records = consumer.poll(Duration.ofMillis(100));
if (records != null && records.count() > 0) {
for (ConsumerRecord record : records) {
System.out.println("偏移量=" + record.offset() + ";" + "key=" + record.key() + ";" + "value=" + record.value());
}
}
}
}
/**
* 指定分区,指定偏移量消费。
*/
public void consumeFromOffset() {
Properties props = new Properties();
//kafka服务器地址和端口
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//设置消费者组
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG);
//将key和value反序列化
props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
List topicPartitions = consumer.partitionsFor(TOPIC_NAME);
//从指定时间点开始消费
Map map = new HashMap<>();
//手动将分区列表分配给此使用者。
consumer.assign(Arrays.asList(new TopicPartition(TOPIC_NAME, 0)));
//指定0分区从第20个偏移量开始消费,即跳过0至19的偏移量开始消费。
consumer.seek(new TopicPartition(TOPIC_NAME, 0), 20);
while (true) {
ConsumerRecords records = consumer.poll(Duration.ofMillis(100));
if (records != null && records.count() > 0) {
for (ConsumerRecord record : records) {
System.out.println("偏移量=" + record.offset() + ";" + "key=" + record.key() + ";" + "value=" + record.value());
}
}
}
}
}
package com.ctx.com.simple;
import org.apache.kafka.clients.consumer.*;
import java.io.*;
import java.time.Duration;
import java.util.*;
public class CreateKafkaFile {
private static volatile FileWriter fileWriter = null;
//主题
public static final String TOPIC_NAME = "liNingShoesTopic";
//消费者组
public static final String GROUP_ID_CONFIG = "liNingGroup";
//如果是集群,则用逗号分隔。
public static final String KAFKA_BROKER_LIST = "127.0.0.1:6667";
public static void main(String[] args) {
new CreateKafkaFile().consumeFromBeginning();
}
/**
* 从最早的未提交的偏移量开始消费
*/
public void consumeFromBeginning() {
Properties props = new Properties();
//设置从最早的未提交的偏移量开始消费
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
//kafka服务器地址和端口
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER_LIST);
//设置消费者组
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, GROUP_ID_CONFIG);
//如果为true,则将在后台定期提交使用者的偏移量。
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
//如果开启了自动提交偏移量功能,则每隔1秒提交一次偏移量。
props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
//将key和value反序列化
props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
//订阅给定的主题列表以获取动态分配的分区。
consumer.subscribe(Arrays.asList(TOPIC_NAME));
//kafka数据保存的文件的路径
String pathName = "C:\\Users\\20203\\Desktop\\kafka数据\\" + TOPIC_NAME + "_" + DateUtils.dateToStr(new Date(),"yyyy-MM-dd-HH-mm-ss") + ".txt";
while (true) {
ConsumerRecords records = consumer.poll(Duration.ofMillis(100));
if (records != null && records.count() > 0) {
for (ConsumerRecord record : records) {
if (record.value() != null ) {
createTmpFileFromCharset(record.value(), pathName);
System.out.println("偏移量=" + record.offset() + ";" + "key=" + record.key() + ";" + "value=" + record.value());
}
}
}
}
}
public static synchronized FileWriter getFileWriter(String pathname, boolean boo) {
if (fileWriter == null) {
synchronized (FileWriter.class) {
if (fileWriter == null) {
//默认false表示不追加,true追加
try {
fileWriter = new FileWriter(pathname, boo);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
}
return fileWriter;
}
/**
* 指定数据和文件的绝对路径,生成并写入数据。
*
* @param data
* @param pathname
* @return
*/
public void createTmpFileFromCharset(String data, String pathname) {
FileWriter fileWriter = null;
try {
//创建字符输入流
fileWriter = getFileWriter(pathname, true);
char[] chars = data.toCharArray();
fileWriter.write(chars);
fileWriter.write("\r\n");
fileWriter.flush();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
/*try {
if (fileWriter != null) {
fileWriter.close();
}
} catch (IOException e) {
e.printStackTrace();
}*/
}
}
}
官网文档
acks
The number of acknowledgments the producer requires the leader to have received before considering a request complete. This controls the durability of records that are sent. The following settings are allowed:
acks=0
If set to zero then the producer will not wait for any acknowledgment from the server at all. The record will be immediately added to the socket buffer and considered sent. No guarantee can be made that the server has received the record in this case, and theretries
configuration will not take effect (as the client won't generally know of any failures). The offset given back for each record will always be set to-1
.acks=1
This will mean the leader will write the record to its local log but will respond without awaiting full acknowledgement from all followers. In this case should the leader fail immediately after acknowledging the record but before the followers have replicated it then the record will be lost.acks=all
This means the leader will wait for the full set of in-sync replicas to acknowledge the record. This guarantees that the record will not be lost as long as at least one in-sync replica remains alive. This is the strongest available guarantee. This is equivalent to the acks=-1 setting.Note that enabling idempotence requires this config value to be 'all'. If conflicting configurations are set and idempotence is not explicitly enabled, idempotence is disabled.
Type: string Default: all Valid Values: [all, -1, 0, 1] Importance: low
min.insync.replicas
When a producer sets acks to "all" (or "-1"), min.insync.replicas specifies the minimum number of replicas that must acknowledge a write for the write to be considered successful. If this minimum cannot be met, then the producer will raise an exception (either NotEnoughReplicas or NotEnoughReplicasAfterAppend).
When used together, min.insync.replicas and acks allow you to enforce greater durability guarantees. A typical scenario would be to create a topic with a replication factor of 3, set min.insync.replicas to 2, and produce with acks of "all". This will ensure that the producer raises an exception if a majority of replicas do not receive a write.
Type: int Default: 1 Valid Values: [1,...] Importance: high Update Mode: cluster-wide
官网文档释义
acks=all或者acks=-1
此时要看min.insync.replicas(最小同步副本数量,默认等于1)参数设定的值。例如:min.insync.replicas=2,表示需要有2个kafka节点写入数据才会返回客户端提示成功。推荐大于等于2。
acks=0
生产者不需要等待kafka节点回复确认收到消息,就可以继续发送下一条消息。
acks=1
消息写入Leader(主节点),但是不需要等待其他Follower(从节点)同步消息,就可以继续发送下一条消息。如果Follower没有同步到消息,而Leader宕机,则消息丢失。这种模式等同于acks=all或者acks=-1,且min.insync.replicas=1。
综上所述
实现
第一步:设置kafka服务器配置文件中同步Broker的数量。
min.insync.replicas=2
第二步:设置acks。
props.put(ProducerConfig.ACKS_CONFIG,"all");
注意:如果acks=all或者acks=-1,则需要在kafka的server.properties配置文件中设置min.insync.replicas的值。
官网文档
retries
Setting a value greater than zero will cause the client to resend any record whose send fails with a potentially transient error. Note that this retry is no different than if the client resent the record upon receiving the error. Produce requests will be failed before the number of retries has been exhausted if the timeout configured by
delivery.timeout.ms
expires first before successful acknowledgement. Users should generally prefer to leave this config unset and instead usedelivery.timeout.ms
to control retry behavior.Enabling idempotence requires this config value to be greater than 0. If conflicting configurations are set and idempotence is not explicitly enabled, idempotence is disabled.
Allowing retries while setting
enable.idempotence
tofalse
andmax.in.flight.requests.per.connection
to 1 will potentially change the ordering of records because if two batches are sent to a single partition, and the first fails and is retried but the second succeeds, then the records in the second batch may appear first.
Type: int Default: 2147483647 Valid Values: [0,...,2147483647] Importance: high
retry.backoff.ms
The amount of time to wait before attempting to retry a failed request to a given topic partition. This avoids repeatedly sending requests in a tight loop under some failure scenarios.
Type: long Default: 100 Valid Values: [0,...] Importance: low
enable.idempotence
When set to 'true', the producer will ensure that exactly one copy of each message is written in the stream. If 'false', producer retries due to broker failures, etc., may write duplicates of the retried message in the stream. Note that enabling idempotence requires
max.in.flight.requests.per.connection
to be less than or equal to 5 (with message ordering preserved for any allowable value),retries
to be greater than 0, andacks
must be 'all'.Idempotence is enabled by default if no conflicting configurations are set. If conflicting configurations are set and idempotence is not explicitly enabled, idempotence is disabled. If idempotence is explicitly enabled and conflicting configurations are set, a
ConfigException
is thrown.
Type: boolean Default: true Valid Values: Importance: low
官网文档释义
retries
生产者向消费者发送消息失败后允许重试的次数,重试次数范围:[0,2147483647]; 如果要开启重试,需要将enable.idempotence设置为false或1。
retry.backoff.ms
重试间隔次数。
实现
/**
* 重试3次,注意:可能会因为网络延迟问题导致发送消息成功但回复客户端不及时的问题,所以会导致消息重复发送,
* 因此需要在消费者端控制重复消费的问题。
*/
props.put(ProducerConfig.RETRIES_CONFIG,3);
//重试间隔:500毫秒
props.put(ProducerConfig.RETRY_BACKOFF_MS_CONFIG,500);
注意:需要在kafka的server.properties配置文件中设置enable.idempotence为false或1,且acks必须为all或者-1。
官网文档
buffer.memory
The total bytes of memory the producer can use to buffer records waiting to be sent to the server. If records are sent faster than they can be delivered to the server the producer will block for
max.block.ms
after which it will throw an exception.This setting should correspond roughly to the total memory the producer will use, but is not a hard bound since not all memory the producer uses is used for buffering. Some additional memory will be used for compression (if compression is enabled) as well as for maintaining in-flight requests.
Type: long Default: 33554432 Valid Values: [0,...] Importance: high
batch.size
The producer will attempt to batch records together into fewer requests whenever multiple records are being sent to the same partition. This helps performance on both the client and the server. This configuration controls the default batch size in bytes.
No attempt will be made to batch records larger than this size.
Requests sent to brokers will contain multiple batches, one for each partition with data available to be sent.
A small batch size will make batching less common and may reduce throughput (a batch size of zero will disable batching entirely). A very large batch size may use memory a bit more wastefully as we will always allocate a buffer of the specified batch size in anticipation of additional records.
Note: This setting gives the upper bound of the batch size to be sent. If we have fewer than this many bytes accumulated for this partition, we will 'linger' for the
linger.ms
time waiting for more records to show up. Thislinger.ms
setting defaults to 0, which means we'll immediately send out a record even the accumulated batch size is under thisbatch.size
setting.
Type: int Default: 16384 Valid Values: [0,...] Importance: medium
linger.ms
The producer groups together any records that arrive in between request transmissions into a single batched request. Normally this occurs only under load when records arrive faster than they can be sent out. However in some circumstances the client may want to reduce the number of requests even under moderate load. This setting accomplishes this by adding a small amount of artificial delay—that is, rather than immediately sending out a record, the producer will wait for up to the given delay to allow other records to be sent so that the sends can be batched together. This can be thought of as analogous to Nagle's algorithm in TCP. This setting gives the upper bound on the delay for batching: once we get
batch.size
worth of records for a partition it will be sent immediately regardless of this setting, however if we have fewer than this many bytes accumulated for this partition we will 'linger' for the specified time waiting for more records to show up. This setting defaults to 0 (i.e. no delay). Settinglinger.ms=5
, for example, would have the effect of reducing the number of requests sent but would add up to 5ms of latency to records sent in the absence of load.
Type: long Default: 0 Valid Values: [0,...] Importance: medium
官网文档释义
buffer.memory
消息会先发送到本地缓冲区,可以提高消息发送性能,默认值是33554432,即32MB
batch.size
kafka本地线程会从缓冲区取数据批量发送,默认值是16384,即16kb/次。
linger.ms
默认值是0,即消息必须立即被发送,但这样会影响性能一般设置100毫秒左右,如果100毫秒内,如果100毫秒内,batch没满,也必须把消息发送出去。
实现
//消息会先发送到本地缓冲区,可以提高消息发送性能,默认值是33554432,即32MB
props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, 33554432);
//kafka本地线程会从缓冲区取数据批量发送,默认值是16384,即16kb/次。
props.put(ProducerConfig.BATCH_SIZE_CONFIG, 16384);
//默认值是0,即消息必须立即被发送,但这样会影响性能一般设置100毫秒左右,如果100毫秒内,如果100毫秒内,batch没满,也必须把消息发送出去。
props.put(ProducerConfig.LINGER_MS_CONFIG, 100);
官网文档
enable.auto.commit
If true the consumer's offset will be periodically committed in the background.
Type: boolean Default: true Valid Values: Importance: medium
auto.commit.interval.ms
The frequency in milliseconds that the consumer offsets are auto-committed to Kafka if
enable.auto.commit
is set totrue
.
Type: int Default: 5000 (5 seconds) Valid Values: [0,...] Importance: low
官网文档释义
enable.auto.commit
如果设置为true,则消费者的偏移量自动提交会以一定的频率自动提交。默认值为true。
auto.commit.interval.ms
如果enable.auto.commit=true,则该配置生效,其值表示消费者自动提交的频率,单位毫秒。默认事件是5秒。
实现
//如果为true,则将在后台定期提交使用者的偏移量。
props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
//如果开启了自动提交偏移量功能,则每隔1秒提交一次偏移量。
props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
消费者丢失消息的情况:假如设置自定提交,时间间隔是2秒。如果消费者拿到消息后继续处理业务一共用了2秒钟还没有完成,此时处理业务的服务器宕机。此时消费者偏移量已经自动提交了,而消息却没有消费完。下次消费时就消费不到该消息了。
消费者重复消费的情况:假如设置自定提交,时间间隔是2秒。如果消费者1秒钟已经处理完了业务,此时消费者并没有来得及提交偏移量服务器就宕机了。下次消费时仍然会消费这条已经消费过的消息。
综上所属:
官网文档
max.poll.records
The maximum number of records returned in a single call to poll(). Note, that
max.poll.records
does not impact the underlying fetching behavior. The consumer will cache the records from each fetch request and returns them incrementally from each poll.
Type: int Default: 500 Valid Values: [1,...] Importance: medium
官网文档释义
消费者拉取消息时,最多一次批量拉取的数量,默认500条。
实现
//每次最多拉取200条消息
props.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG,"200");
官网文档
max.poll.interval.ms
The maximum delay between invocations of poll() when using consumer group management. This places an upper bound on the amount of time that the consumer can be idle before fetching more records. If poll() is not called before expiration of this timeout, then the consumer is considered failed and the group will rebalance in order to reassign the partitions to another member. For consumers using a non-null
group.instance.id
which reach this timeout, partitions will not be immediately reassigned. Instead, the consumer will stop sending heartbeats and partitions will be reassigned after expiration ofsession.timeout.ms
. This mirrors the behavior of a static consumer which has shutdown.
Type: int Default: 300000 (5 minutes) Valid Values: [1,...] Importance: medium
官网文档释义
消费者处理消息超时时间。即从消费者poll到消息后开始计时,如果因为处理业务耗时过长而没有再次poll,则消费者组将重新平衡,把分区分配给其他消费者。默认值是5分钟。
实现
//消费者处理消息最大限定为120秒,超时后消费者组重新平衡,该消费者将被剔除出消费者组,分区重新分配给其他消费者。
props.setProperty(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG,"120000");
注意
如果消费者拉取到消息后处理业务消耗时间较长,为了避免消费者被剔除出消费者组,可以用两个方案解决:
官网文档
The expected time between heartbeats to the group coordinator when using Kafka's group management facilities. Heartbeats are used to ensure that the worker's session stays active and to facilitate rebalancing when new members join or leave the group. The value must be set lower than
session.timeout.ms
, but typically should be set no higher than 1/3 of that value. It can be adjusted even lower to control the expected time for normal rebalances.
Type: int Default: 3000 (3 seconds) Valid Values: Importance: high
官网文档释义
消费者从Broker拉取消息,他们之间通过心跳机制来建立长连接,如果超过了设定值,则消费者组重新平衡,该消费者会被剔除出消费者组,分区将重新分配。这个值必须小于session.timeout.ms值,通常该值小于等于session.timeout.ms值的三分之一,默认值为3秒。
实现
//心跳时间为5秒
props.setProperty(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG,"5000");
官网文档
session.timeout.ms
The timeout used to detect client failures when using Kafka's group management facility. The client sends periodic heartbeats to indicate its liveness to the broker. If no heartbeats are received by the broker before the expiration of this session timeout, then the broker will remove this client from the group and initiate a rebalance. Note that the value must be in the allowable range as configured in the broker configuration by
group.min.session.timeout.ms
andgroup.max.session.timeout.ms
.
Type: int Default: 45000 (45 seconds) Valid Values: Importance: high
官网文档释义
Broker在设定时间内感知不到该消费者,则消费者组重新平衡,消费者被消费者组剔除,分区重新分配给其他消费者。默认值为45秒,这个值的设定范围在group.min.session.timeout.ms和group.max.session.timeout.ms之间,即消费者的最大最小会话时间,这两个默认值分别是6秒和30分钟。
实现
//Broker20秒感知不到该消费者,则消费者组重新平衡,消费者被消费者组剔除,分区重新分配给其他消费者。
props.setProperty(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG,"20000");
官网文档
auto.offset.reset
What to do when there is no initial offset in Kafka or if the current offset does not exist any more on the server (e.g. because that data has been deleted):
- earliest: automatically reset the offset to the earliest offset
- latest: automatically reset the offset to the latest offset
- none: throw exception to the consumer if no previous offset is found for the consumer's group
- anything else: throw exception to the consumer.
Type: string Default: latest Valid Values: [latest, earliest, none] Importance: medium
官网文档释义
如果kafka中没有最初的偏移量,则:
例如:生产者生产了100条消息,然后创建新的消费者组,此时如果设定值为earliest,则组内的消费者从头开始消费这100条消息;
如果设定值为latest,则组内的消费者消费不到这100条消息,而是只消费后续产生的消息。
实现
props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
官网文档
assign
public void assign(Collection
Manually assign a list of partitions to this consumer. This interface does not allow for incremental assignment and will replace the previous assignment (if there is one).partitions) If the given list of topic partitions is empty, it is treated the same as unsubscribe().
Manual topic assignment through this method does not use the consumer's group management functionality. As such, there will be no rebalance operation triggered when group membership or cluster and topic metadata change. Note that it is not possible to use both manual partition assignment with assign(Collection) and group assignment with subscribe(Collection, ConsumerRebalanceListener).
If auto-commit is enabled, an async commit (based on the old assignment) will be triggered before the new assignment replaces the old one.
Specified by:
assign
in interfaceConsumer
Parameters:
partitions
- The list of partitions to assign this consumerThrows:
IllegalArgumentException
- If partitions is null or contains null or empty topics
IllegalStateException
- Ifsubscribe()
is called previously with topics or pattern (without a subsequent call to unsubscribe())See Also:
- assign(Collection)
官网文档释义
官网文档
subscribe
public void subscribe(Collection
Subscribe to the given list of topics to get dynamically assigned partitions. Topic subscriptions are not incremental. This list will replace the current assignment (if there is one). It is not possible to combine topic subscription with group management with manual partition assignment through assign(Collection). If the given list of topics is empty, it is treated the same as unsubscribe().topics) This is a short-hand for subscribe(Collection, ConsumerRebalanceListener), which uses a no-op listener. If you need the ability to seek to particular offsets, you should prefer subscribe(Collection, ConsumerRebalanceListener), since group rebalances will cause partition offsets to be reset. You should also provide your own listener if you are doing your own offset management since the listener gives you an opportunity to commit offsets before a rebalance finishes.
Specified by:
subscribe
in interfaceConsumer
Parameters:
topics
- The list of topics to subscribe toThrows:
IllegalArgumentException
- If topics is null or contains null or empty elements
IllegalStateException
- Ifsubscribe()
is called previously with pattern, or assign is called previously (without a subsequent call to unsubscribe()), or if not configured at-least one partition assignment strategySee Also:
- subscribe(Collection)
官网文档释义