kafka官网: http://kafka.apache.org/quickstart
目录
kafka简单介绍:
实现方式
1:kafka分区
2: 实现结果
3:kafka的consumer代码
4:kafka生产者
必要条件:
kafka配置:
> bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 5 --topic TEST
topic存在多个分区(--partitions 5), 才会实现多个consumer消费一个topic, 注意:consumer的数量应小于partitions数量, 要不然会浪费。
误区: 多线程多个消费者, 在kafka多个线程消费者不安全
KafkaConsumer是非线程安全的,那么怎么样实现多线程消费?
每个线程一个消费者
Kafka中是怎么体现消息顺序性的?
kafka每个partition中的消息在写入时都是有序的,消费时,每个partition只能被每一个group中的一个消费者消费,保证了消费时也是有序的。
整个topic不保证有序
Kafka中的分区器如何处理数据?
分区器:根据键值确定消息应该处于哪个分区中,默认情况下使用轮询分区,可以自行实现分区器接口自定义分区逻辑
实现方式:
方法一: 开启多个进程消费者, 在每个进程里使用线程池异步做业务处理。
方法二: 多个Consumer且每一个Consumer有自己的线程,
这里主要讲的方法一, 方法二(优秀人的博客): http://www.cnblogs.com/qizhelongdeyang/p/7355309.html
bin
/kafka-topics
.sh --describe --zookeeper localhost:2181
开启5个进程如下:
开启一个进程:
Kafka_Consumer.java
import java.util.Arrays;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerRecord;
public final class Kafka_Consumer {
/**
* kafka消费者不是线程安全的
*/
private final KafkaConsumer consumer;
private ExecutorService executorService;
public Kafka_Consumer() {
Properties props = new Properties();
props.put("bootstrap.servers",
"180.108.64.146:9099"); //180.108.64.146:9099 kafka的服务器和端口号
props.put("group.id", "12334");
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "100");
props.put("session.timeout.ms", "30000");
props.put("auto.offset.reset","latest");
props.put("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
consumer = new KafkaConsumer(props);
consumer.subscribe(Arrays.asList("TEST"));
}
public void execute() {
executorService = Executors.newFixedThreadPool(6); //线程池做异步清洗数据
while (true) {
ConsumerRecords records = consumer.poll(100);
if (null != records) {
executorService.submit(new ConsumerThread(records));
}
}
}
public void shutdown() {
try {
if (consumer != null) {
consumer.close();
}
if (executorService != null) {
executorService.shutdown();
}
if (!executorService.awaitTermination(10, TimeUnit.SECONDS)) {
System.out.println("Timeout");
}
} catch (InterruptedException ignored) {
Thread.currentThread().interrupt();
}
}
}
/**
* 线程池做业务处理, 将kakfa接收消息和业务分离开来
*/
class ConsumerThread implements Runnable {
private ConsumerRecords records;
public ConsumerThread(ConsumerRecords records) {
this.records = records;
}
@Override
public void run() {
for (ConsumerRecord record : records) {
System.out.println("当前线程:" + Thread.currentThread() + ","
+ "偏移量:" + record.offset() + "," + "主题:"
+ record.topic() + "," + "分区:" + record.partition()
+ "," + "获取的消息:" + record.value());
}
}
}
ConsumerMain.java
public class ConsumerMain {
public static void main(String[] args) {
Kafka_Consumer kafka_Consumer = new Kafka_Consumer();
try {
kafka_Consumer.execute();
Thread.sleep(20000);
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
kafka_Consumer.shutdown();
}
}
}
pom.xml
4.0.0
data
analyticCore
0.0.1-SNAPSHOT
jar
analyticCore
http://maven.apache.org
5.3.1.Final
UTF-8
junit
junit
3.8.1
test
org.eclipse.paho
org.eclipse.paho.client.mqttv3
1.0.2
org.drools
drools-core
${drools.version}
org.drools
drools-compiler
${drools.version}
org.drools
drools-spring
${drools.version}
com.thoughtworks.xstream
xstream
1.2.2
com.google.code.gson
gson
2.2.4
org.apache.kafka
kafka_2.12
1.0.1
com.alibaba
fastjson
1.2.47
com.alibaba
druid
1.1.9
mysql
mysql-connector-java
5.1.35
org.slf4j
slf4j-api
1.7.25
org.slf4j
slf4j-log4j12
1.7.25
test
log4j
log4j
1.2.17
org.json
json
20180130
redis.clients
jedis
2.3.0
org.apache.httpcomponents
httpcore
4.4.5
org.apache.httpcomponents
httpclient
4.3.5
org.apache.httpcomponents
httpcore-nio
4.4.5
org.apache.httpcomponents
httpasyncclient
4.1.1
org.apache.maven.plugins
maven-resources-plugin
3.1.0
utf-8
maven-compiler-plugin
3.8.0
1.8
utf-8
true
true
true
true
128m
512m
org.codehaus.mojo
exec-maven-plugin
1.2.1
data.analyticCore.consumerMain
src/main/rules
src/main/resources
python代码:
from kafka import KafkaProducer
import json
import time
import random
import threading
producer = KafkaProducer(
value_serializer=lambda v: json.dumps(v).encode('utf-8'),
bootstrap_servers=['180.108.64.146:9099']
)
sj = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
def send_msg(Num):
for i in range(Num):
time.sleep(1)
data = {
"name": "李四",
"age": 23,
"gender": "男",
"id": i
}
producer.send('TEST', data)
print("============%s" % i)
end = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
print("===开始时间:%s" % sj)
print("=====截止时间: %s" % end)
producer.close()
def thread_start():
num = 1
Num = 2000
Threads = []
for i in range(num):
Threads.append(threading.Thread(target=send_msg, args=(Num,)))
for t in Threads:
# t.setDaemon(True)
t.start()
if __name__ == "__main__":
send_msg(100000)