kafka API https://kafka.apache.org/0100/javadoc/index.html?org/apache/kafka/clients/consumer/KafkaConsumer.html
知乎关于多线程的回答https://www.zhihu.com/question/57483708/answer/153185829
1、高级新api消费者,一个线程一个消费者。
import com.atguigu.datacosumer.util.PropertyUtil;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.util.Arrays;
import java.util.Properties;
/**
* @author wade
* @create 2019-03-20 12:26
*/
public class MyTestThreads {
public static void main(String[] args) {
/*
a进行了消费ddd分区是0偏移量是15
b进行了消费eee分区是2偏移量是17
a进行了消费wade分区是1偏移量是17
a进行了消费pual分区是0偏移量是16
*/
//因为KafkaConsumer不是线程安全的,使用一个对象会报异常
//ConcurrentModificationException: KafkaConsumer is not safe for multi-threaded access
new Thread(new MyConsumerThread(),"a").start();
new Thread(new MyConsumerThread(),"b").start();
}
}
class MyConsumerThread implements Runnable {
KafkaConsumer consumer;
public MyConsumerThread(){
Properties properties = PropertyUtil.properties;
consumer = new KafkaConsumer<>(properties);
}
@Override
public void run() {
consumer.subscribe(Arrays.asList("dai"));
while(true){
ConsumerRecords records = consumer.poll(1000);
for (ConsumerRecord record : records) {
System.out.println(Thread.currentThread().getName()+"进行了消费"+record.value()+"分区是"+record.partition()+"偏移量是"+record.offset());
}
}
}
//./kafka-consumer-groups.sh --bootstrap-server hadoop103:9092 --new-consumer --group g1 --describe
/*
TOPIC PARTITION CURRENT-OFFSET LOG-END-OFFSET LAG CONSUMER-ID HOST CLIENT-ID
dai 0 19 19 0 consumer-1-de94a73e-78ab-4097-9a4b-05a44b0efdfa /192.168.11.1 consumer-1
dai 1 20 20 0 consumer-1-de94a73e-78ab-4097-9a4b-05a44b0efdfa /192.168.11.1 consumer-1
dai 2 20 20 0 consumer-2-da47cf63-fcc8-44b1-ae9e-760dba4df284 /192.168.11.1
*/
}
2、高级旧api消费者,一个消费者3个线程
import com.atguigu.datacosumer.util.PropertyUtil;
import kafka.consumer.*;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;
import org.apache.commons.lang.ObjectUtils;
import scala.Int;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
/**
* @author wade
* @create 2019-03-20 15:50
*/
public class MyTestThreads2 {
public static void main(String[] args) {
//这种是老版本的高级api 还要连 zk
Properties properties = PropertyUtil.properties;
ConsumerConfig config = new ConsumerConfig(properties);
ConsumerConnector connector = Consumer.createJavaConsumerConnector(config);
HashMap map = new HashMap<>();
map.put("dai",3);
Map>> messageStreams = connector.createMessageStreams(map);
List> kafkaStreams = messageStreams.get("dai");
for (KafkaStream stream : kafkaStreams) {
new Thread(new MyThreads(stream)).start();
}
}
}
class MyThreads implements Runnable {
KafkaStream stream = null ;
public MyThreads (KafkaStream stream){
this.stream = stream;
}
@Override
public void run() {
ConsumerIterator iterator = stream.iterator();
while (iterator.hasNext()){
MessageAndMetadata metadata = iterator.next();
System.out.println(
Thread.currentThread().getName() + "消费了 =>>" + new String(metadata.message())+
"主题=>"+metadata.topic()+
"分区=>" +metadata.partition() +
"偏移量=>" + metadata.offset()
);
}
}
}
// ./kafka-consumer-groups.sh --zookeeper hadoop103:2181 --group g1 --describe
/*
TOPIC PARTITION CURRENT-OFFSET LOG-END-OFFSET LAG CONSUMER-ID
dai 0 19 20 1 g1_DESKTOP-HGSVH26-1553070141911-a078e09d
dai 1 20 22 2 g1_DESKTOP-HGSVH26-1553070141911-a078e09d
dai 2 20 21 1 g1_DESKTOP-HGSVH26-1553070141911-a078e09d
*/
3、高级新api实现消费和处理解耦
import com.atguigu.datacosumer.util.PropertyUtil;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import sun.applet.Main;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.Properties;
import java.util.concurrent.LinkedBlockingQueue;
/**
* @author wade
* @create 2019-03-20 18:04
*/
public class MyThreadsTest4 {
public static void main(String[] args) throws InterruptedException {
KafkaConsumer consumer;
Properties properties = PropertyUtil.properties;
consumer = new KafkaConsumer<>(properties);
consumer.subscribe(Arrays.asList("dai"));
ConsumerRecords records ;
/*
线程安全,获取如果没有 阻塞,添加如果多了 阻塞 ,多线程数据共享,类似消息队列
*/
LinkedBlockingQueue> list = new LinkedBlockingQueue();
new Thread(new MyThread4(list),"bb").start();
new Thread(new MyThread4(list),"aa").start();
while (true){
records = consumer.poll(1000);
list.put(records);
//建议打开来主动提交
// 默认的自动提交会造成offset的提交不及时,关闭再启动的时候会重复消费
//避免不了数据丢失
// consumer.commitAsync();
}
}
}
/**
* 消费和处理解耦
* 一个或多个消费者线程来做所有的数据消费,把ConsumerRecords实例存到一个被多个处理线程或线程池
* 消费的阻塞队列
* 好处:不限制消费和处理的线程,让 一个消费者来满足多个处理线程,避免了线程数被分区数所限制
* 理解 :(因为 不解耦的情况下,消费和处理在一起,offset提交的原因,消费线程被分区数限制,多的线程都是空转。
* 而解耦了,处理线程完全不受限制,消费线程仍然限制
* )
* 坏处 : 顺序是一个问题, 多个处理线程顺序无法保证,先从阻塞队列获得的数据 可能比后面获得的数据处理时间晚
* 坏处 : 手动提交offset变得很难,可能数据丢失和重复消费
*
* 2. Decouple Consumption and Processing
* Another alternative is to have one or more consumer threads that do all data consumption and hands off ConsumerRecords instances to a blocking queue consumed by a pool of processor threads that actually handle the record processing.
* This option likewise has pros and cons:
* PRO: This option allows independently scaling the number of consumers and processors.
* This makes it possible to have a single consumer that feeds many processor threads, avoiding any limitation on partitions.
* CON: Guaranteeing order across the processors requires particular care as the threads will execute independently an earlier chunk of data may actually be processed after a later chunk of data just due to the luck of thread execution timing.
* For processing that has no ordering requirements this is not a problem.
* CON: Manually committing the position becomes harder as it requires that all threads co-ordinate to ensure that processing is complete for that partition.
* There are many possible variations on this approach. For example each processor thread can have its own queue,
* and the consumer threads can hash into these queues using the TopicPartition to ensure in-order consumption and simplify commit.
*/
class MyThread4 implements Runnable {
LinkedBlockingQueue> list ;
public MyThread4 (LinkedBlockingQueue> list){
this.list = list;
}
@Override
public void run() {
while (true) {
ConsumerRecords consumerRecords;
try {
consumerRecords = list.take();
for (ConsumerRecord consumerRecord : consumerRecords) {
System.out.println(Thread.currentThread().getName()
+"消费了:" + consumerRecord.value()
+" 分区:"+consumerRecord.partition()
+"偏移量是:" + consumerRecord.offset()
);
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}