原版文章来自
https://cwiki.apache.org/confluence/display/KAFKA/0.8.0+Producer+Example
https://cwiki.apache.org/confluence/display/KAFKA/0.8.0+SimpleConsumer+Example
Producer类用来创建消息发送到特定Topic和可选的Partition
需要引入的类的
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
首先我们需要定义properties告诉Producer如何找到cluster、序列化消息、如果需要指定信息到特定的Partiton。
Properties props = new Properties();
props.put("metadata.broker.list", "broker1:9092,broker2:9092");
props.put("serializer.class", "kafka.serializer.StringEncoder");
props.put("partitioner.class", "example.producer.SimplePartitioner");
props.put("request.required.acks", "1");
ProducerConfig config = new ProducerConfig(props);
/**
* 我们在定义Producer时,需要告诉它两个参数类型。第一个是Partition Key的类型;第二个是Message的类型。在这个例子里,两个都是String
*/
Producer producer = new Producer(config);
KeyedMessage<String, String> data = new KeyedMessage<String, String>("myTopic", "partition_key", message);
import java.util.*;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
public class TestProducer {
public static void main(String[] args) {
long events = Long.parseLong(args[0]);
Random rnd = new Random();
Properties props = new Properties();
props.put("metadata.broker.list", "broker1:9092,broker2:9092 ");
props.put("serializer.class", "kafka.serializer.StringEncoder");
props.put("partitioner.class", "example.producer.SimplePartitioner");
props.put("request.required.acks", "1");
ProducerConfig config = new ProducerConfig(props);
Producer<String, String> producer = new Producer<String, String>(config);
for (long nEvents = 0; nEvents < events; nEvents++) {
long runtime = new Date().getTime();
String ip = “192.168.2.” + rnd.nextInt(255);
String msg = runtime + “,www.example.com,” + ip;
KeyedMessage<String, String> data = new KeyedMessage<String, String>("page_visits", ip, msg);
producer.send(data);
}
producer.close();
}
}
Partitioning Code:
import kafka.producer.Partitioner;
import kafka.utils.VerifiableProperties;
public class SimplePartitioner implements Partitioner {
public SimplePartitioner (VerifiableProperties props) {}
//根据IP最后一位求余分派partion
public int partition(Object key, int a_numPartitions) {
int partition = 0;
String stringKey = (String) key;
int offset = stringKey.lastIndexOf('.');
if (offset > 0) {
partition = Integer.parseInt( stringKey.substring(offset+1)) % a_numPartitions;
}
return partition;
}
}
创建Topic:
bin/kafka-create-topic.sh –topic page_visits –replica 1 –zookeeper localhost:2181 –partition 2
运行我们写的JAVA程序
java TestProducer 100
查看我们写的内容
bin/kafka-console-consumer.sh –zookeeper localhost:2181 –topic page_visits –from-beginning
这里要特别说明一点,如果你使用kafka默认配置文件,并且运行Producer与kafka不在同一机器,这时需要修改config/server.properties方件的host.name参数。因为默认只接收本地请求。比如改成host.name=192.168.8.240本机ip
生产者说完了,我们来看消费者。上面的例子我们使用了kafka自带的消费都来观看发送的消息,现在我们自己编写来实现特定的业务
要知道除了SimpleConsumer,Consumer这个类也可以用作消费者。我们之所以使用SimpleConsumer,是因为可以更好的在partion上控制consumption。
SimpleConsumer需要大量的工作,不需要在Consumer Groups中。
package com.test.simple;
import kafka.api.FetchRequest;
import kafka.api.FetchRequestBuilder;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.common.ErrorMapping;
import kafka.common.TopicAndPartition;
import kafka.javaapi.*;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.message.MessageAndOffset;
import java.nio.ByteBuffer;
import java.util.*;
public class SimpleConsumerClientDemo {
public static void main(String args[]) {
SimpleConsumerClientDemo example = new SimpleConsumerClientDemo();
//最大测试读取信息数
long maxReads = 1000000;
//消费主题
String topic = "test_topic";
//partition
int[] partitions = {0, 1};
//broker list
List seeds = new ArrayList();
seeds.add("192.168.8.240");
//本地端口
int port = 9092;
try {
example.run(maxReads, topic, partitions, seeds, port);
} catch (Exception e) {
System.out.println("Oops:" + e);
e.printStackTrace();
}
}
private List m_replicaBrokers = new ArrayList();
public SimpleConsumerClientDemo() {
m_replicaBrokers = new ArrayList();
}
public void run(long a_maxReads, String topic, int[] partitions, List a_seedBrokers, int a_port) throws Exception {
/**
* 找到leader broker
*/
PartitionMetadata metadata = findLeader(a_seedBrokers, a_port, topic, partitions[0]);
if (metadata == null) {
System.out.println("Can't find metadata for Topic and Partition. Exiting");
return;
}
if (metadata.leader() == null) {
System.out.println("Can't find Leader for Topic and Partition. Exiting");
return;
}
String leadBrokerHost = metadata.leader().host();
int leadBrokerPort = metadata.leader().port();
String clientName = "client-"+topic;
System.out.println("leader broker host: " + leadBrokerHost +", port: " + leadBrokerPort);
/**
* 创建SimpleConsumer
*/
SimpleConsumer consumer = new SimpleConsumer(leadBrokerHost, leadBrokerPort, 100000, 64 * 1024, clientName);
/**
* 读取不同partition的开始offsets,这里设置从头开始。
*/
long readOffsets[] = new long[partitions.length];
for(int i=0; iint numErrors = 0;
while (a_maxReads > 0) {
if (consumer == null) {
consumer = new SimpleConsumer(leadBrokerHost, leadBrokerPort, 100000, 64 * 1024, clientName);
}
/**
* 从不同partition循环读取信息
*/
long numRead = 0;
for(int i=0; inew FetchRequestBuilder()
.clientId(clientName)
.addFetch(topic, partitions[i], readOffsets[i], 100000) // Note: this fetchSize of 100000 might need to be increased if large batches are written to Kafka
.build();
FetchResponse fetchResponse = consumer.fetch(req);
if (fetchResponse.hasError()) {
numErrors++;
// Something went wrong!
short code = fetchResponse.errorCode(topic, partitions[i]);
System.out.println("Error fetching data from the Broker:" + leadBrokerHost + " Reason: " + code);
if (numErrors > 5) break;
if (code == ErrorMapping.OffsetOutOfRangeCode()) {
// We asked for an invalid offset. For simple case ask for the last element to reset
readOffsets[i] = getLastOffset(consumer, topic, partitions[i], kafka.api.OffsetRequest.LatestTime(), clientName);
continue;
}
consumer.close();
consumer = null;
leadBrokerHost = findNewLeader(leadBrokerHost, topic, partitions[i], leadBrokerPort);
continue;
}
numErrors = 0;
for (MessageAndOffset messageAndOffset : fetchResponse.messageSet(topic, partitions[i])) {
long currentOffset = messageAndOffset.offset();
if (currentOffset < readOffsets[i]) {
System.out.println("Found an old offset: " + currentOffset + " Expecting: " + readOffsets[i]);
continue;
}
readOffsets[i] = messageAndOffset.nextOffset();
ByteBuffer payload = messageAndOffset.message().payload();
byte[] bytes = new byte[payload.limit()];
payload.get(bytes);
System.out.println(partitions[i]+":"+String.valueOf(messageAndOffset.offset()) + ": " + new String(bytes, "UTF-8"));
numRead++;
a_maxReads--;
}
}
if (numRead == 0) {
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
}
}
}
if (consumer != null) consumer.close();
}
public static long getLastOffset(SimpleConsumer consumer, String topic, int partition,
long whichTime, String clientName) {
TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partition);
Map requestInfo = new HashMap();
requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(whichTime, 2));
kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest(
requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientName);
OffsetResponse response = consumer.getOffsetsBefore(request);
if (response.hasError()) {
System.out.println("Error fetching data Offset Data the Broker. Reason: " + response.errorCode(topic, partition));
return 0;
}
long[] offsets = response.offsets(topic, partition);
return offsets[0];
}
private String findNewLeader(String a_oldLeader, String a_topic, int a_partition, int a_port) throws Exception {
for (int i = 0; i < 3; i++) {
boolean goToSleep = false;
PartitionMetadata metadata = findLeader(m_replicaBrokers, a_port, a_topic, a_partition);
if (metadata == null) {
goToSleep = true;
} else if (metadata.leader() == null) {
goToSleep = true;
} else if (a_oldLeader.equalsIgnoreCase(metadata.leader().host()) && i == 0) {
// first time through if the leader hasn't changed give ZooKeeper a second to recover
// second time, assume the broker did recover before failover, or it was a non-Broker issue
//
goToSleep = true;
} else {
return metadata.leader().host();
}
if (goToSleep) {
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
}
}
}
System.out.println("Unable to find new leader after Broker failure. Exiting");
throw new Exception("Unable to find new leader after Broker failure. Exiting");
}
/**
* 有个疑问,不同partition会在不同的leader broker上么?
*/
private PartitionMetadata findLeader(List a_seedBrokers, int a_port, String a_topic, int a_partition) {
PartitionMetadata returnMetaData = null;
loop:
for (String seed : a_seedBrokers) {
SimpleConsumer consumer = null;
try {
consumer = new SimpleConsumer(seed, a_port, 100000, 64 * 1024, "leaderLookup");
List topics = Collections.singletonList(a_topic);
TopicMetadataRequest req = new TopicMetadataRequest(topics);
kafka.javaapi.TopicMetadataResponse resp = consumer.send(req);
List metaData = resp.topicsMetadata();
for (TopicMetadata item : metaData) {
System.out.println(item);
for (PartitionMetadata part : item.partitionsMetadata()) {
if (part.partitionId() == a_partition) {
returnMetaData = part;
break loop;
}
}
}
} catch (Exception e) {
System.out.println("Error communicating with Broker [" + seed + "] to find Leader for [" + a_topic
+ ", " + a_partition + "] Reason: " + e);
} finally {
if (consumer != null) consumer.close();
}
}
if (returnMetaData != null) {
m_replicaBrokers.clear();
for (kafka.cluster.Broker replica : returnMetaData.replicas()) {
m_replicaBrokers.add(replica.host());
}
}
return returnMetaData;
}
}
kafka的consumer接口,提供了两种版本,
一种high-level版本,比较简单不用关心offset, 会自动的读zookeeper中该Consumer group的last offset
1. 如果consumer比partition多,是浪费,因为kafka的设计是在一个partition上是不允许并发的,所以consumer数不要大于partition数
2. 如果consumer比partition少,一个consumer会对应于多个partitions,这里主要合理分配consumer数和partition数,否则会导致partition里面的数据被取的不均匀
最好partiton数目是consumer数目的整数倍,所以partition数目很重要,比如取24,就很容易设定consumer数目
3. 如果consumer从多个partition读到数据,不保证数据间的顺序性,kafka只保证在一个partition上数据是有序的,但多个partition,根据你读的顺序会有不同
4. 增减consumer,broker,partition会导致rebalance,所以rebalance后consumer对应的partition会发生变化
5. High-level接口中获取不到数据的时候是会block的
另一种就是SimpleConsumer了