http://www.biglittleant.cn/2016/12/28/kafka-python/
kafka-python-client-example
安装kafka-python
pip安装
1
|
pip install kafka-python
|
源码安装
1
2
3
4
5
6
7
8
9
10
|
### pip
git clone https://github.com/dpkp/kafka-python
pip install ./kafka-python
### Setuptools
git clone https://github.com/dpkp/kafka-python
easy_install ./kafka-python
### setup
git clone https://github.com/dpkp/kafka-python
cd kafka-python
python setup.py install
|
如果想启用压缩功能需要额外安装以下两个模块
pip install lz4tools
pip install xxhash
|
使用方法
kafka生产端
第一步:连接到服务器端
1
2
3
4
5
|
from kafka
import KafkaProducer
from kafka.errors
import KafkaError
## 连接到服务器端
producer = KafkaProducer(bootstrap_servers=[
'192.168.56.12:9092'])
|
第二步:发送一个简单的消息
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
## 默认发送普通的消息
datenow = datetime.datetime.now().strftime(
'%Y-%m-%d:%H-%M-%s')
my_bytes = bytes(source=datenow,encoding=
'utf-8')
future = producer.send(
'topic1', my_bytes)
##消息必须是二进制格式
### OR 延时发送,并获取相关参数
try:
record_metadata = future.get(timeout=
10)
except KafkaError:
# Decide what to do if produce request failed...
#log.exception()
pass
# Successful result returns assigned partition and offset
print (record_metadata.topic)
##打印写到那个topic上了。
print (record_metadata.partition)
## 打印消息所在的分区。
print (record_metadata.offset)
## 打印消息的位置
|
第三步:发送json格式的数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
# produce keyed messages to enable hashed partitioning
producer.send(
'my-topic', key=
b'foo', value=
b'bar')
# encode objects via msgpack
producer = KafkaProducer(value_serializer=msgpack.dumps)
##msgpack为自定义json格式。
producer.send(
'msgpack-topic', {
'key':
'value'})
# produce json messages
producer = KafkaProducer(value_serializer=
lambda m: json.dumps(m).encode(
'utf-8'),
bootstrap_servers=[
'192.168.56.12:9092'])
producer.send(
'json-topic1', {
'key':
'value'})
# produce asynchronously
for _
in range(
100):
producer.send(
'my-topic',
b'msg')
# block until all async messages are sent
producer.flush()
##锁住进程,直到所有消息发送完毕,在执行下一步。
# configure multiple retries
producer = KafkaProducer(retries=
5)
|
kafka消费端
kafka 实时消费程序
只消费新写入的消息,不消费旧消息。
1
2
3
4
5
6
7
8
9
10
11
12
|
from kafka
import KafkaConsumer
# To consume latest messages and auto-commit offsets
consumer = KafkaConsumer(
'my-topic',
group_id=
'my-group',
## 定义一个组,group中记录office_set的位置。
bootstrap_servers=[
'localhost:9092'])
for message
in consumer:
# message value and key are raw bytes -- decode if necessary!
# e.g., for unicode: `message.value.decode('utf-8')`
print (
"%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
message.offset, message.key,
message.value))
|
kafka消息早期的数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
consumer = KafkaConsumer(
'topic1',
auto_offset_reset=
'earliest',
enable_auto_commit=
False,
bootstrap_servers=[
'192.168.56.12:9092'])
for message
in consumer:
# message value and key are raw bytes -- decode if necessary!
# e.g., for unicode: `message.value.decode('utf-8')`
print (
"%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
message.offset, message.key,
message.value))
### 结果
topic1:
0:
0: key=
None value=
b'11-16-19:11-2016-00'
topic1:
0:
1: key=
None value=
b'11-16-19:11-2016-02'
topic1:
0:
2: key=
None value=
b'11-16-19:11-2016-03'
topic1:
0:
3: key=
None value=
b'11-16-19:11-2016-03'
topic1:
0:
4: key=
None value=
b'2016-11-19:11-05-1479524731'
|
自定义分析结果
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
consumer = KafkaConsumer(
'json-topic1',
value_deserializer=
lambda m: json.loads(m.decode(
'utf-8')),
auto_offset_reset=
'earliest',
## or latest。
enable_auto_commit=
False,
## 如果设置为False,不会自动提交office_set的位置。
bootstrap_servers=[
'192.168.56.12:9092'])
for message
in consumer:
# message value and key are raw bytes -- decode if necessary!
# e.g., for unicode: `message.value.decode('utf-8')`
print (
"%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
message.offset, message.key,
message.value))
### 结果
json-topic1:
0:
0: key=
None value={
'key':
'value'}
|
其他参数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
# 如果1s没有数据,就退出。
KafkaConsumer(consumer_timeout_ms=
1000)
# 使用正则去匹配topic。
consumer = KafkaConsumer()
consumer.subscribe(pattern=
'^awesome.*')
# 开启多个客户端去消费消息。
# Use multiple consumers in parallel w/ 0.9 kafka brokers
# typically you would run each on a different server / process / CPU
consumer1 = KafkaConsumer(
'my-topic',
group_id=
'my-group',
bootstrap_servers=
'my.server.com')
consumer2 = KafkaConsumer(
'my-topic',
group_id=
'my-group',
bootstrap_servers=
'my.server.com')
|
Example
- 将文件a.txt的内容写入到kafka中。
- 消费者定义个my-group的组去消费kafka中的数据。
第一步编写一个生产者,生产消息。
1
2
3
4
5
6
7
|
from kafka
import KafkaProducer
producer = KafkaProducer(bootstrap_servers=[
'192.168.56.12:9092'])
with open(
'a.txt',
'rb')
as file:
for n
in file:
future = producer.send(
'topic1', n)
producer.flush()
|
第一步编写一个消费者,消费消息
1
2
3
4
5
6
7
8
9
10
11
12
|
from kafka
import KafkaConsumer
consumer = KafkaConsumer(
'topic1',
group_id=
'my-group',
bootstrap_servers=[
'192.168.56.12:9092'])
for message
in consumer:
# message value and key are raw bytes -- decode if necessary!
# e.g., for unicode: `message.value.decode('utf-8')`
print (
"%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
message.offset, message.key,
message.value))
|
帮助文档
kafka-python官方参考