(参考自http://chengjianxiaoxue.iteye.com/blog/2190488)
生产者代码:
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import kafka.serializer.StringEncoder;
public class kafkaProducer extends Thread{
private String topic;
public kafkaProducer(String topic){
super();
this.topic = topic;
}
@Override
public void run() {
Producer producer = createProducer();
int i=0;
while(true){
producer.send(new KeyedMessage(topic, "message: " + i++));
try {
TimeUnit.SECONDS.sleep(1);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
private Producer createProducer() {
Properties properties = new Properties();
properties.put("zookeeper.connect", "192.168.205.153:2181");//声明zk
properties.put("serializer.class", StringEncoder.class.getName());
properties.put("metadata.broker.list", "192.168.205.153:9092");// 声明kafka broker
return new Producer(new ProducerConfig(properties));
}
public static void main(String[] args) {
new kafkaProducer("test").start();// 使用kafka集群中创建好的主题 test
}
}
消费者代码:
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
/**
* 接收数据
接收到: message: 10
接收到: message: 11
接收到: message: 12
接收到: message: 13
接收到: message: 14
*/
public class kafkaConsumer extends Thread{
private String topic;
public kafkaConsumer(String topic){
super();
this.topic = topic;
}
@Override
public void run() {
ConsumerConnector consumer = createConsumer();
Map topicCountMap = new HashMap();
topicCountMap.put(topic, 1); // 1表示consumer thread线程数量
Map>> messageStreams = consumer.createMessageStreams(topicCountMap);
KafkaStream stream = messageStreams.get(topic).get(0);// 获取每次接收到的这个数据
ConsumerIterator iterator = stream.iterator();
while(iterator.hasNext()){
String message = new String(iterator.next().message());
System.out.println("接收到: " + message);
}
}
private ConsumerConnector createConsumer() {
Properties properties = new Properties();
properties.put("zookeeper.connect", "192.168.205.153:2181");//声明zk
properties.put("group.id", "group1");// 必须要使用别的组名称, 如果生产者和消费者都在同一组,则不能访问同一组内的topic数据
return Consumer.createJavaConsumerConnector(new ConsumerConfig(properties));
}
public static void main(String[] args) {
new kafkaConsumer("test").start();// 使用kafka集群中创建好的主题 test
}
}
注意:以上代码在kafka_2.10-0.8.2.0和kafka_2.11-0.9.0.1中都能运行,因为高版本会兼容低版本,所以高版本的代码不一定能在低版本里运行,而低版本的代码一般都能在高版本中运行。
例子一:(代码参考自http://blog.csdn.net/lnho2015/article/details/51353936)
生产者代码:
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.Properties;
public class KafkaProducerExample {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "h153:9092");
props.put("acks", "all");
props.put("retries", 0);
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer producer = new KafkaProducer<>(props);
for(int i = 0; i < 100; i++)
// producer.send(new ProducerRecord<>("test", Integer.toString(i), Integer.toString(i)));
producer.send(new ProducerRecord("test", "Hello"));
producer.close();
}
}
消费者代码:
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.util.Arrays;
import java.util.Properties;
public class KafkaConsumerExample {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "h153:9092");
props.put("group.id", "test");
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList("test"));
while (true) {
ConsumerRecords records = consumer.poll(100);
for (ConsumerRecord record : records)
System.out.printf("offset = %d, key = %s, value = %s\n", record.offset(), record.key(), record.value());
}
}
}
注意1:一开始用myeclipse导入kafka_2.10-0.8.2.0jar包正常,在导入kafka_2.11-0.9.0.1后虽然报错但能生产数据,在导入kafka_2.10-0.10.1.0和kafka_2.11-0.11.0jar包就直接报错了而不产生数据。在导入kafka_2.11-0.9.0.1jar包报这个错:
[2017-10-30 19:08:39,779] ERROR Uncaught error in kafka producer I/O thread: (org.apache.kafka.clients.producer.internals.Sender:143)
org.apache.kafka.common.protocol.types.SchemaException: Error reading field 'throttle_time_ms': java.nio.BufferUnderflowException
at org.apache.kafka.common.protocol.types.Schema.read(Schema.java:71)
at org.apache.kafka.clients.NetworkClient.handleCompletedReceives(NetworkClient.java:439)
at org.apache.kafka.clients.NetworkClient.poll(NetworkClient.java:265)
at org.apache.kafka.clients.producer.internals.Sender.run(Sender.java:216)
at org.apache.kafka.clients.producer.internals.Sender.run(Sender.java:141)
at java.lang.Thread.run(Thread.java:724)
原因:我在myeclipse中导入的是kafka_2.11-0.9.0.1的jar包,而在Linux虚拟机中安装的却是kafka_2.10-0.8.2.0
注意2:当我在Linux本地上运行生产者代码却报这个错
[hadoop@h32 q1]$ javac KafkaProducerExample.java
KafkaProducerExample.java:1: error: package org.apache.kafka.clients.producer does not exist
import org.apache.kafka.clients.producer.KafkaProducer;
^
KafkaProducerExample.java:2: error: package org.apache.kafka.clients.producer does not exist
import org.apache.kafka.clients.producer.Producer;
^
KafkaProducerExample.java:3: error: package org.apache.kafka.clients.producer does not exist
import org.apache.kafka.clients.producer.ProducerRecord;
^
KafkaProducerExample.java:19: error: cannot find symbol
Producer producer = new KafkaProducer<>(props);
^
symbol: class Producer
location: class KafkaProducerExample
KafkaProducerExample.java:19: error: cannot find symbol
Producer producer = new KafkaProducer<>(props);
^
symbol: class KafkaProducer
location: class KafkaProducerExample
KafkaProducerExample.java:21: error: cannot find symbol
producer.send(new ProducerRecord<>("test", Integer.toString(i), Integer.toString(i)));
^
symbol: class ProducerRecord
location: class KafkaProducerExample
6 errors
解决:我当时装的Java版本是jdk1.8.0_91,换为jdk1.7.0_25,并且在~/.bash_profile中添加如Kafkajar包到环境变量中,如下:
export CLASSPATH=.:/home/hadoop/hbase-1.0.0/lib/*:/home/hadoop/kafka_2.11-0.9.0.1/libs/*
例子二:(参考自http://blog.csdn.net/zero__007/article/details/51068165)
生产者代码:
import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import java.util.Properties;
public class Produce {
public static void main(String[] args) {
String topic = "test";
Properties props = new Properties();
props.put("bootstrap.servers", "h153:9092");
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("acks", "all");
props.put("retries", 1);
Producer producer = new KafkaProducer(props);
producer.send(new ProducerRecord(topic, "Hello"));
producer.send(new ProducerRecord(topic, "World"), new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception e) {
if (e != null) {
e.printStackTrace();
} else {
System.out.println(metadata.toString());//org.apache.kafka.clients.producer.RecordMetadata@1d89e2b5
System.out.println(metadata.offset());//1
}
}
});
producer.flush();
producer.close();
}
}
消费者代码:
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.TopicPartition;
import java.util.Arrays;
import java.util.Properties;
public class Consumer {
public static void main(String[] args) {
String topic = "test";
Properties props = new Properties();
props.put("bootstrap.servers", " h153:9092");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty("group.id", "0");
props.setProperty("enable.auto.commit", "true");
props.setProperty("auto.offset.reset", "earliest");
KafkaConsumer consumer = new KafkaConsumer(props);
consumer.subscribe(Arrays.asList(topic));
for (int i = 0; i < 2; i++) {
ConsumerRecords records = consumer.poll(1000);
System.out.println(records.count());
for (ConsumerRecord record : records) {
System.out.println(record);
// consumer.seekToBeginning(new TopicPartition(record.topic(), record.partition()));
}
}
}
}
注意:auto.offset.reset这里设置为earliest,是为了consumer能够从头开始读取内容即offset=0开始,在org.apache.kafka.clients.consumer.ConsumerConfig中对其意义的描述如下:What to do when there is no initial offset in Kafka or if the current offset does not exist any more on the server (e.g. because that data has been deleted): earliest: automatically reset the offset to the earliest offset;latest: automatically reset the offset to the latest offset;none: throw exception to the consumer if no previous offset is found for the consumer's group;anything else: throw exception to the consumer。consumer.seekToBeginning也可以设置offset,但是跟源码发现,This function evaluates lazily, seeking to the final offset in all partitions only when {@link #poll(long)} or {@link #position(TopicPartition)} are called.也就是说seekToBeginning()的设置要生效的话,必须在poll或则position方法调用后设置seekToBeginning()才行。
1、可参考这篇文章:http://blog.csdn.net/lishuangzhe7047/article/details/74530417
earliest
当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,从头开始消费
latest
当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,消费新产生的该分区下的数据
none
topic各分区都存在已提交的offset时,从offset后开始消费;只要有一个分区不存在已提交的offset,则抛出异常
kafka-0.10.1.X版本之前: auto.offset.reset 的值为smallest,和,largest.(offest保存在zk中)
kafka-0.10.1.X版本之后: auto.offset.reset 的值更改为:earliest,latest,和none (offest保存在kafka的一个特殊的topic名为:__consumer_offsets里面)
2、代码实现--from-beginning
subscribe:
props.put("auto.offset.reset", "earliest"); // from-beginning功能
// Group id
props.put(groupId, UUID.randomUUID().toString());
assign:
consumer.assign(Arrays.asList(new TopicPartition(topicName, 0)));
consumer.seekToBeginning(Arrays.asList(new TopicPartition(topicName, 0)));