public static Properties getProducerProperties() {
// create instance for properties to access producer configs
Properties props = new Properties();
//Assign localhost id
// props.put("bootstrap.servers", "172.16.1.248:9092,172.16.1.248:9093");
/**
*1.这里指定server的所有节点
*2. product客户端支持动态broke节点扩展,metadata.max.age.ms是在一段时间后更新metadata。
*
*/
// props.put("bootstrap.servers", "172.16.30.13:9093");
//开发测试环境
props.put("bootstrap.servers", "172.16.30.13:9095,172.16.30.13:9096");
// props.put("bootstrap.servers", "dev.kafka1.cnhz.shishike.com:9092");
/**
* Set acknowledgements for producer requests.
* acks=0:意思server不会返回任何确认信息,不保证server是否收到,因为没有返回retires重试机制不会起效。
* acks=1:意思是partition leader已确认写record到日志中,但是不保证record是否被正确复制(建议设置1)。
* acks=all:意思是leader将等待所有同步复制broker的ack信息后返回。
*/
props.put("acks", "1");
/**
* 1.If the request fails, the producer can automatically retry,
* 2.请设置大于0,这个重试机制与我们手动发起resend没有什么不同。
*/
props.put("retries", 3);
//
/**
* 1.Specify buffer size in config
* 2. 10.0后product完全支持批量发送给broker,不乱你指定不同parititon,product都是批量自动发送指定parition上。
* 3. 当batch.size达到最大值就会触发dosend机制。
*/
props.put("batch.size", 16384);
/**
* Reduce the no of requests less than 0;意思在指定batch.size数量没有达到情况下,在5s内也回推送数据
*/
props.put("linger.ms", 60000);
/**
* 1. The buffer.memory controls the total amount of memory available to the producer for buffering.
* 2. 生产者总内存被应用缓存,压缩,及其它运算。
*
*/
props.put("buffer.memory", 33554432);
/**
* 可以采用的压缩方式:gzip,snappy
*/
// props.put("compression.type", gzip);
/**
* 1.请保持producer,consumer 序列化方式一样,如果序列化不一样,将报错。
*/
props.put("key.serializer",
"org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer",
"org.apache.kafka.common.serialization.StringSerializer");
return props;
}
@Test
public void check_CallBack() throws Exception {
try {
CountDownLatch latch = new CountDownLatch(1);
//Assign topicName to string variable
String topicName = "page_visits8";
Producer producer = ProductUtils.getProducer();
Future result = producer.send(new ProducerRecord(topicName,
"1", "ddddddddd洪10002" + 5), new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
if (exception != null) {
exception.printStackTrace();
logger.error("find send exception:", exception);
}
logger.info("callback completion:" + metadata);
latch.countDown();
}
});
logger.info("have send info");
Thread.sleep(10000);
logger.info("wait 10s");
producer.flush();
logger.info(" flush");
latch.await();
logger.info(" callback");
// RecordMetadata data=result.get();
//data.
System.out.println("Message sent successfully");
producer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* The default partitioning strategy:
*
* - If a partition is specified in the record, use it
*
- If no partition is specified but a key is present choose a partition based on a hash of the key
*
- If no partition or key is present choose a partition in a round-robin fashion
*/
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
List partitions = cluster.partitionsForTopic(topic);
int numPartitions = partitions.size();
if (keyBytes == null) {
int nextValue = counter.getAndIncrement();
List availablePartitions = cluster.availablePartitionsForTopic(topic);
if (availablePartitions.size() > 0) {
int part = DefaultPartitioner.toPositive(nextValue) % availablePartitions.size();
return availablePartitions.get(part).partition();
} else {
// no partitions are available, give a non-available partition
return DefaultPartitioner.toPositive(nextValue) % numPartitions;
}
} else {
// hash the keyBytes to choose a partition
return DefaultPartitioner.toPositive(Utils.murmur2(keyBytes)) % numPartitions;
}
}
我通过如下测试实验,topic page_visits5有两个partition。producer通过key的hash指定partition。构建两个consumer指定读取不同的partition。通过实验可以知道offset是对某一个partition成自增长。
testCustomerByPartitionOne的日志
ConsumerRecord(topic = page_visits5, partition = 0, offset = 605, CreateTime = 1499752027859, checksum = 2595474072, serialized key size = 1, serialized value size = 19, key = 0, value = ddddddddd洪1000245)
ConsumerRecord(topic = page_visits5, partition = 0, offset = 606, CreateTime = 1499752027859, checksum = 62561058, serialized key size = 1, serialized value size = 19, key = 0, value = ddddddddd洪1000246)
ConsumerRecord(topic = page_visits5, partition = 0, offset = 607, CreateTime = 1499752027859, checksum = 1958587316, serialized key size = 1, serialized value size = 19, key = 0, value = ddddddddd洪1000247)
ConsumerRecord(topic = page_visits5, partition = 0, offset = 608, CreateTime = 1499752027859, checksum = 3825382949, serialized key size = 1, serialized value size = 19, key = 0, value = ddddddddd洪1000248)
ConsumerRecord(topic = page_visits5, partition = 0, offset = 609, CreateTime = 1499752027860, checksum = 1633914638, serialized key size = 1, serialized value size = 19, key = 0, value = ddddddddd洪1000249)
ConsumerRecord(topic = page_visits5, partition = 1, offset = 604, CreateTime = 1499752027859, checksum = 1821482793, serialized key size = 1, serialized value size = 19, key = 1, value = ddddddddd洪1000244)
ConsumerRecord(topic = page_visits5, partition = 1, offset = 605, CreateTime = 1499752027859, checksum = 462860223, serialized key size = 1, serialized value size = 19, key = 1, value = ddddddddd洪1000245)
ConsumerRecord(topic = page_visits5, partition = 1, offset = 606, CreateTime = 1499752027859, checksum = 2191523333, serialized key size = 1, serialized value size = 19, key = 1, value = ddddddddd洪1000246)
ConsumerRecord(topic = page_visits5, partition = 1, offset = 607, CreateTime = 1499752027859, checksum = 4120432275, serialized key size = 1, serialized value size = 19, key = 1, value = ddddddddd洪1000247)
ConsumerRecord(topic = page_visits5, partition = 1, offset = 608, CreateTime = 1499752027860, checksum = 2537675455, serialized key size = 1, serialized value size = 19, key = 1, value = ddddddddd洪1000248)
ConsumerRecord(topic = page_visits5, partition = 1, offset = 609, CreateTime = 1499752027860, checksum = 3762743849, serialized key size = 1, serialized value size = 19, key = 1, value = ddddddddd洪1000249)
@Test
public void assignPartitionByKey() throws Exception {
try {
//Assign topicName to string variable
String topicName = "page_visits5";
Producer producer = getProducer();
for (int i = 0; i < 50; i++) {
for(int j=0;j<2;j++) {
producer.send(new ProducerRecord(topicName,
Integer.toString(j), "ddddddddd洪10002" + i));
System.out.println("Message sent successfully");
}
}
producer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
@Test
public void testCustomerByPartitionOne() throws Exception {
//Kafka consumer configuration settings
String topicName = "page_visits5";
Properties props = new Properties();
KafkaConsumer<String, String> consumer = getKafkaConsumer(props);
//Kafka Consumer subscribes list of topics here.
//这里支持配置多个topic
TopicPartition partition0 = new TopicPartition(topicName, 0);
consumer.assign(Arrays.asList(partition0));
//print the topic name
System.out.println("Subscribed to topic " + topicName);
int i = 0;
while (true) {
ConsumerRecords<String, String> records = consumer.poll(100);
if (!records.isEmpty()) {
System.out.println("======one===================");
}
for (ConsumerRecord<String, String> record : records) {
System.out.println(record);
}
// readPartition(consumer, records);
}
}
@Test
public void testCustomerByPartitionTwo() throws Exception {
//Kafka consumer configuration settings
// String topicName = "page_visits4";
Properties props = new Properties();
KafkaConsumer<String, String> consumer = getKafkaConsumer(props);
//Kafka Consumer subscribes list of topics here.
//这里支持配置多个topic
String topic = "page_visits5";
TopicPartition partition1 = new TopicPartition(topic, 1);
consumer.assign(Arrays.asList(partition1));
//print the topic name
System.out.println("Subscribed to topic " + topic);
int i = 0;
while (true) {
ConsumerRecords<String, String> records = consumer.poll(100);
if (!records.isEmpty()) {
System.out.println("======two===================");
}
for (ConsumerRecord<String, String> record : records) {
System.out.println(record);
}
// readPartition(consumer, records);
}
}
如果某consumer group中consumer数量少于partition数量,则至少有一个consumer会消费多个partition的数据,如果consumer 的数量与partition数量相同,则正好一个consumer消费一个partition的数据,而如果consumer的数量多于 partition的数量时,会有部分consumer无法消费该topic下任何一条消息。 partition.assignment.strategy = [org.apache.kafka.clients.consumer.RangeAssignor]
参考文档:http://www.cnblogs.com/coprince/p/5893066.html ,
/**
* http://www.tutorialspoint.com/apache_kafka/apache_kafka_simple_producer_example.htm
*
* @throws Exception
*/
// @Test
public void testCustomer() throws Exception {
//Kafka consumer configuration settings
String topicName = "page_visits5";
Properties props = new Properties();
KafkaConsumer consumer = getKafkaConsumer(props);
//Kafka Consumer subscribes list of topics here.
//这里支持配置多个topic
consumer.subscribe(Arrays.asList(topicName, "page_visits5"));
//print the topic name
System.out.println("Subscribed to topic " + topicName);
int i = 0;
while (true) {
ConsumerRecords records = consumer.poll(100);
if (!records.isEmpty()) {
System.out.println("=========================");
}
for (ConsumerRecord record : records) {
System.out.println(record);
}
// readPartition(consumer, records);
}
}
private KafkaConsumer getKafkaConsumer(Properties props) {
props.put("bootstrap.servers", "172.16.1.248:9092,172.16.1.248:9093");
// props.put("bootstrap.servers", "172.16.30.13:9095,172.16.30.13:9096");
props.put("group.id", "group-2");
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
//每次poll方法调用都是client与server的一次心跳
props.put("session.timeout.ms", "30000");
//so it's natural to want to set a limit on the number of records handled at once. This setting provides that
// . By default, there is essentially no limit.
// props.put("max.poll.records", "2");
props.put("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
return new KafkaConsumer(props);
}
@Test
public void testCustomerByOneByOne() throws Exception {
//Kafka consumer configuration settings
// String topicName = "page_visits4";
Properties props = new Properties();
KafkaConsumer<String, String> consumer = getKafkaConsumer(props, false);
//Kafka Consumer subscribes list of topics here.
//这里支持配置多个topic
String topic = "page_visits5";
// TopicPartition partition1 = new TopicPartition(topic, 1);
// consumer.assign(Arrays.asList(partition1));
consumer.subscribe(Arrays.asList(topic));
//print the topic name
System.out.println("Subscribed to topic " + topic);
int i = 0;
try {
while (true) {
ConsumerRecords<String, String> records = consumer.poll(1000);
for (TopicPartition partition : records.partitions()) {
List<ConsumerRecord<String, String>> partitionRecords = records.records(partition);
for (ConsumerRecord<String, String> record : partitionRecords) {
// System.out.println(record.offset() + ": " + record.value());
System.out.println(record);
// one by one 提交
long lastOffset=record.offset();
consumer.commitSync(Collections.singletonMap(partition, new OffsetAndMetadata(lastOffset + 1)));
}
// 批量
// long lastOffset = partitionRecords.get(partitionRecords.size() - 1).offset();/
// // 这个提交数量 与 max.poll.records取的数量有关,
// consumer.commitSync(Collections.singletonMap(partition, new OffsetAndMetadata(lastOffset + 1)));
}
}
}catch(Exception e){
e.printStackTrace();
} finally {
consumer.close();
}
}
[work@iZbp14iiauukqckkhyphv9Z kafka_2.10-0.10.0.1]$ ./bin/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker --zookeeper 172.16.30.13:2181 --group group-new --topic page_visits5
[2017-07-12 10:32:25,130] WARN WARNING: ConsumerOffsetChecker is deprecated and will be dropped in releases following 0.9.0. Use ConsumerGroupCommand instead. (kafka.tools.ConsumerOffsetChecker$)
Group Topic Pid Offset logSize Lag Owner
group-new page_visits5 0 300 400 100 none
[work@iZbp14iiauukqckkhyphv9Z kafka_2.10-0.10.0.1]$
[work@iZbp14iiauukqckkhyphv9Z kafka_2.10-0.10.0.1]$ ./bin/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker --zookeeper 172.16.30.13:2181 --group group-new --topic page_visits5
[2017-07-12 10:33:02,699] WARN WARNING: ConsumerOffsetChecker is deprecated and will be dropped in releases following 0.9.0. Use ConsumerGroupCommand instead. (kafka.tools.ConsumerOffsetChecker$)
Group Topic Pid Offset logSize Lag Owner
group-new page_visits5 0 300 400 100 none
[work@iZbp14iiauukqckkhyphv9Z kafka_2.10-0.10.0.1]$
[work@iZbp14iiauukqckkhyphv9Z kafka_2.10-0.10.0.1]$
[work@iZbp14iiauukqckkhyphv9Z kafka_2.10-0.10.0.1]$ ./bin/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker --zookeeper 172.16.30.13:2181 --group group-new --topic page_visits5
[2017-07-12 10:35:10,992] WARN WARNING: ConsumerOffsetChecker is deprecated and will be dropped in releases following 0.9.0. Use ConsumerGroupCommand instead. (kafka.tools.ConsumerOffsetChecker$)
Group Topic Pid Offset logSize Lag Owner
group-new page_visits5 0 310 400 90 none
[work@iZbp14iiauukqckkhyphv9Z kafka_2.10-0.10.0.1]$ ./bin/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker --zookeeper 172.16.30.13:2181 --group group-new --topic page_visits5
[2017-07-12 11:01:30,047] WARN WARNING: ConsumerOffsetChecker is deprecated and will be dropped in releases following 0.9.0. Use ConsumerGroupCommand instead. (kafka.tools.ConsumerOffsetChecker$)
Group Topic Pid Offset logSize Lag Owner
group-new page_visits5 0 311 500 189 none
[work@iZbp14iiauukqckkhyphv9Z kafka_2.10-0.10.0.1]$ ./bin/kafka-run-class.sh kafka.tools.ConsumerOffsetChecker --zookeeper 172.16.30.13:2181 --group group-new --topic page_visits5
[2017-07-12 11:01:46,404] WARN WARNING: ConsumerOffsetChecker is deprecated and will be dropped in releases following 0.9.0. Use ConsumerGroupCommand instead. (kafka.tools.ConsumerOffsetChecker$)
Group Topic Pid Offset logSize Lag Owner
group-new page_visits5 0 315 500 185 none
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import java.util.Properties;
public abstract class ProducerUtil {
private static Properties props = new Properties();// 配置文件
private static Producer producer = null;// 生产者
private static Producer oldProducer = null;// 生产者
static {
if (producer == null) {
// props.put("metadata.broker.list", "localhost:9092,");
props.put("metadata.broker.list", "172.16.1.248:9092,172.16.1.248:9093");
// ce
// props.put("metadata.broker.list", "172.16.1.248:9095");
props.put("serializer.class", "kafka.serializer.StringEncoder");
props.put("key.serializer.class", "kafka.serializer.StringEncoder");
// key.serializer.class默认为serializer.class
// props.put("partitioner.class", "com.magic.cd.test.PartitionerDemo");
// 可选配置,如果不配置,则使用默认的partitioner
props.put("request.required.acks", "1");
producer = new Producer(new ProducerConfig(props));
}
}
// 普通测试
@Test
public void testProducer2() throws Exception {
try {
// ProducerUtil.sendMsg("page_visits4", "bbk", "你好一般测试!");
long b1 = System.currentTimeMillis();
for (int i = 0; i < 10; i++) {
ProducerUtil.sendMsg("page_visits4", "bbk", "你好一般测试!" + i);
}
long b2 = System.currentTimeMillis();
System.out.println("时间:" + (b2 - b1));
} catch (Exception e) {
e.printStackTrace();
}
}
private static Properties props = new Properties();// 配置文件
private static Producer producer = null;// 生产者
private static int topicCount = 3;
/**
* 消费者,消费线程池
*/
private ExecutorService executor = Executors.newFixedThreadPool(topicCount);
* 初始化消费者,8-5暂停该类的xiao
*/
@PostConstruct
public void initCustomer() {
logger.debug("=======System start init consumer client.===================");
if (!KafkaConfigUtils.IS_START_CONSUMER) {
logger.debug("System do not start consumer client.");
return;
}
new Thread(new CustomerKafka()).start();
}
/**
* 消费者启动线程
*
* @author my
* @Date 2016年3月30日 上午11:09:11
*/
public class CustomerKafka implements Runnable {
public void run() {
logger.debug("system starting listern kafka message.");
ConsumerConnector consumer = ConsumerConnector();
while (consumer == null) {
try {
Thread.sleep(600000); // 60 second
} catch (InterruptedException e) {
e.printStackTrace();
}
consumer = ConsumerConnector(); // reconnect
}
logger.debug(" kafka connector success.");
snapshotService.initSnapshotRule();
logger.debug(" snapshot rule init success ");
// 3.通过消费者获取流
Map topicCountMap = new HashMap();
topicCountMap.put(KafkaConfigUtils.ORDER_TOPIC_NAME, topicCount);// 数字表示通过几个流执行
Mapbyte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
final Listbyte[], byte[]>> streams = consumerMap.get(KafkaConfigUtils.ORDER_TOPIC_NAME);
logger.debug("read streams ");
// 4.读取消息
for (int i = 0; i < streams.size(); i++) {
final KafkaStream stream = streams.get(i);
executor.execute(new Runnable() {
public void run() {
ConsumerIterator<byte[], byte[]> it = stream.iterator();
while (it.hasNext()) {
byte[] each = it.next().message();
String message = new String(each);
logger.info("receive order message:" + StringUtils.substring(message, 0, 500) + "........");
// logger.debug("receive order message:" + message);
// saveOrderMessageTwo(message);
saveOrderMessageThree(message);
}
}
});
}
}
public static Map getKafkaConsumer(boolean commintFlag) {
Map props = new HashMap<>();
// props.put("bootstrap.servers", "172.16.1.248:9092,172.16.1.248:9093");
// props.put("bootstrap.servers", "172.16.30.13:9093");
//开发测试环境
props.put("bootstrap.servers", "172.16.30.13:9095,172.16.30.13:9096");
props.put("group.id", "group-new");
/**
* 1.
*
*/
props.put("enable.auto.commit", String.valueOf(commintFlag));
/**
* 1.自动提交offset间隔时间,•可以这样理解:第二次poll调用的时候,提交上一次poll的offset和心跳发送。
* 2. 而且是交付一个DelayedTaskQueue 来完成的
*/
props.put("auto.commit.interval.ms", "1000");
/**
* 意思每次心跳间隔时间,要求不高于session.timeout.ms时间1/3
*/
// props.put("heartbeat.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
/**
* auto.offset.reset 默认值为largest,那么auto.offset.reset 有什么作用呢?auto.offset
* .reset定义了Consumer在ZooKeeper中发现没有初始的offset时或者发现offset非法时定义Comsumer的行为,常见的配置有:
*1.smallest : 自动把offset设为最小的offset;
*2.largest : 自动把offset设为最大的offset;
*3.anything else: 抛出异常;
*
*遇到过这种情况:先produce一些数据,然后停止produce数据的线程——〉
* 然后再用consumer 新的group上面的代码消费数据,发现无数据可消费!
*
*其原因在于:初始的offset默认是非法的,而auto.offset.reset 默认值为largest,表示自动把offset设为最大的offset,由于此时没有生产者向kafka
* push数据,当然没有数据可以消费了。如果此时有生产者向kafka push数据,那么该代码可以从最新位置消费数据。
*/
props.put("auto.offset.reset", "earliest");
/**
* so it's natural to want to set a limit on the number of records handled at once. This setting provides that
* . By default, there is essentially no limit.
* 1.我发现offset的每次确认与心跳都是在调用poll方法的时候触发,建议max.poll.records设置为100-400的数量,如果你处理速度慢建议低些。
*
*/
props.put("max.poll.records", "10");
props.put("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
// return new KafkaConsumer(props);
return props;
}
<dependency>
<groupId>org.apache.kafkagroupId>
<artifactId>kafka-clientsartifactId>
<version>0.10.0.0version>
dependency>
<dependency>
<groupId>org.apache.kafkagroupId>
<artifactId>kafka_2.10artifactId>
<version>0.10.0.0version>
dependency>
kafka producer都是异步发送。 每次发送数据时,Producer都是send()之后就认为已经发送出去了,但其实大多数情况下消息还在内存的RecordAccumulator当中。这时候如果Producer挂掉,那就会出现丢数据的情况。或者发送失败,客服端不能明确知道消息是否成功。
解决办法: ack机制,一般设置为acks=1,消息只需要被Leader接受并确认即可,这样同时保证了可靠性和效率。对于我们想明确指定消息是否成功,请在producer.send 实现其callback方法,callback方法是在明确server返回后的回调机制。请用单独日志文件打印来跟踪丢失的消息。
1. 我通过测试例子来观察,2个partition的topic。只有一个consomer a instance的时候,两个partition都会由a实例轮询消费。
2. 再添加第2个消费 b instance后,partition会立即自动重新分配,由a,b应用各自负责一个partition消费。
3. 再关掉b 实例后,partition又会立即自动重新分配,由a 实例轮询消费partition。