kafka 有时间访问时程序日志中得到的initoffset为-1,并且得不到信息,可以调整kafka.zookeeper.session.timeout.ms=1000000 来解决
使用时遇到:Failed to send messages after 3 tries.
maven 包
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.8.0</artifactId>
<version>0.8.1.1</version>
</dependency>
1:生产者代码
import java.util.Properties; import kafka.javaapi.producer.Producer; import kafka.producer.KeyedMessage; import kafka.producer.ProducerConfig; public class ProducerSample { public static void main(String[] args) { Properties props = new Properties(); props.put("zk.connect", "183.57.57.76:2181/YRFS/test-datacenter/test-server"); //与kafkaserver.properties的zookeeper.connect有关 props.put("serializer.class", "kafka.serializer.StringEncoder"); props.put("metadata.broker.list", "183.57.57.76:9092");//你要向哪台kafka上保存消息 ProducerConfig config = new ProducerConfig(props); Producer<String, String> producer = new Producer<String, String>(config); KeyedMessage<String, String> data = new KeyedMessage<String, String>("gbz", "test-message2"); //topic名称与消息内容 producer.send(data); producer.close(); //用完关闭,否则kafka后台会报错,可从logs/server.log查到 } }
2:消费者代码
import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import kafka.consumer.ConsumerConfig; import kafka.consumer.ConsumerIterator; import kafka.consumer.KafkaStream; import kafka.javaapi.consumer.ConsumerConnector; public class ConsumerTest extends Thread { private final ConsumerConnector consumer; private final String topic; public static void main(String[] args) { ConsumerTest consumerThread = new ConsumerTest("gbz"); consumerThread.start(); } public ConsumerTest(String topic) { consumer = kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig()); this.topic = topic; } private static ConsumerConfig createConsumerConfig() { Properties props = new Properties(); props.put("zookeeper.connect", "183.57.57.76:2181/YRFS/test-datacenter/test-server"); props.put("group.id", "0"); //相同消费者分组 props.put("zookeeper.session.timeout.ms", "10000"); return new ConsumerConfig(props); } public void run() { Map<String, Integer> topicCountMap = new HashMap<String, Integer>(); topicCountMap.put(topic, new Integer(1)); Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap); KafkaStream<byte[], byte[]> stream = consumerMap.get(topic).get(0); ConsumerIterator<byte[], byte[]> it = stream.iterator(); while (it.hasNext()) System.out.println(new String(it.next().message())); } }
注:consumerMap.get(topic) 得到topic的一组patiton流,它们有自己的offset;每个流对应一个consumer
3:生产者(多个partition)
可参拷官网:https://cwiki.apache.org/confluence/display/KAFKA/0.8.0+Producer+Example
import java.util.*; import kafka.javaapi.producer.Producer; import kafka.producer.KeyedMessage; import kafka.producer.ProducerConfig; public class TestProducer { public static void main(String[] args) { long events = Long.parseLong(args[0]); Random rnd = new Random(); Properties props = new Properties(); props.put("metadata.broker.list", "broker1:9092,broker2:9092 ");//接收者代理,负责把信息传给不同的partition props.put("serializer.class", "kafka.serializer.StringEncoder"); props.put("partitioner.class", "example.producer.SimplePartitioner");//分割数据到不同的节点的partition props.put("request.required.acks", "1"); //生产者得到一个回复即确定消息发送成功 ProducerConfig config = new ProducerConfig(props); Producer<String, String> producer = new Producer<String, String>(config); for (long nEvents = 0; nEvents < events; nEvents++) { long runtime = new Date().getTime(); String ip = “192.168.2.” + rnd.nextInt(255); String msg = runtime + “,www.example.com,” + ip; KeyedMessage<String, String> data = new KeyedMessage<String, String>("page_visits", ip, msg);//参数为:topic,key(可为空,此时消息随机分发不同的partition中),message producer.send(data); } producer.close(); } }
自定义的分割类
import kafka.producer.Partitioner; import kafka.utils.VerifiableProperties; public class SimplePartitioner implements Partitioner<String> {//这里会报错,去掉泛型 public SimplePartitioner (VerifiableProperties props) { } public int partition(String key, int a_numPartitions) { int partition = 0; int offset = key.lastIndexOf('.'); if (offset > 0) { partition = Integer.parseInt( key.substring(offset+1)) % a_numPartitions; } return partition; } }
4:消费者 (处理多个partition)
可参拷官网:https://cwiki.apache.org/confluence/display/KAFKA/Consumer+Group+Example
package com.test.groups; import kafka.consumer.ConsumerConfig; import kafka.consumer.KafkaStream; import kafka.javaapi.consumer.ConsumerConnector; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; public class ConsumerGroupExample { private final ConsumerConnector consumer; private final String topic; private ExecutorService executor; public ConsumerGroupExample(String a_zookeeper, String a_groupId, String a_topic) { consumer = kafka.consumer.Consumer.createJavaConsumerConnector( createConsumerConfig(a_zookeeper, a_groupId)); this.topic = a_topic; } public void shutdown() { if (consumer != null) consumer.shutdown(); if (executor != null) executor.shutdown(); } public void run(int a_numThreads) { Map<String, Integer> topicCountMap = new HashMap<String, Integer>(); topicCountMap.put(topic, new Integer(a_numThreads)); Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap); List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic); // now launch all the threads // executor = Executors.newFixedThreadPool(a_numThreads); // now create an object to consume the messages // int threadNumber = 0; for (final KafkaStream stream : streams) { executor.submit(new ConsumerTest(stream, threadNumber)); threadNumber++; } } private static ConsumerConfig createConsumerConfig(String a_zookeeper, String a_groupId) { Properties props = new Properties(); props.put("zookeeper.connect", a_zookeeper); props.put("group.id", a_groupId); props.put("zookeeper.session.timeout.ms", "400"); props.put("zookeeper.sync.time.ms", "200"); props.put("auto.commit.interval.ms", "1000"); return new ConsumerConfig(props); } public static void main(String[] args) { String zooKeeper = args[0]; String groupId = args[1]; String topic = args[2]; int threads = Integer.parseInt(args[3]); ConsumerGroupExample example = new ConsumerGroupExample(zooKeeper, groupId, topic); example.run(threads); try { Thread.sleep(10000); } catch (InterruptedException ie) { } example.shutdown(); } }
线程数多于partition时,有些线程将得不到消息;线程小于partition时,线程处理多个partition;最好线程数与partition数一致
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
public class ConsumerTest implements Runnable {
private KafkaStream m_stream;
private int m_threadNumber;
public ConsumerTest(KafkaStream a_stream, int a_threadNumber) {
m_threadNumber = a_threadNumber;
m_stream = a_stream;
}
public void run() {
ConsumerIterator<byte[], byte[]> it = m_stream.iterator();
while (it.hasNext())
System.out.println("Thread " + m_threadNumber + ": " + new String(it.next().message()));
System.out.println("Shutting down Thread: " + m_threadNumber);
}
}
props.put("auto.commit.enable", "false"); 消费者不会自动提交offset
转载
一 PRODUCER的API
1.Producer的创建,依赖于ProducerConfig
public Producer(ProducerConfig config);
2.单个或是批量的消息发送
public void send(KeyedMessage<K,V> message);
public void send(List<KeyedMessage<K,V>> messages);
3.关闭Producer到所有broker的连接
public void close();
二 CONSUMER的高层API
主要是Consumer和ConsumerConnector,这里的Consumer是ConsumerConnector的静态工厂类
class Consumer {
public static kafka.javaapi.consumer.ConsumerConnector createJavaConsumerConnector(config: ConsumerConfig);
}
具体的消息的消费都是在ConsumerConnector中
创建一个消息处理的流,包含所有的topic,并根据指定的Decoder
public <K,V> Map<String, List<KafkaStream<K,V>>>
createMessageStreams(Map<String, Integer> topicCountMap, Decoder<K> keyDecoder, Decoder<V> valueDecoder);
创建一个消息处理的流,包含所有的topic,使用默认的Decoder
public Map<String, List<KafkaStream<byte[], byte[]>>> createMessageStreams(Map<String, Integer> topicCountMap);
获取指定消息的topic,并根据指定的Decoder
public <K,V> List<KafkaStream<K,V>>
createMessageStreamsByFilter(TopicFilter topicFilter, int numStreams, Decoder<K> keyDecoder, Decoder<V> valueDecoder);
获取指定消息的topic,使用默认的Decoder
public List<KafkaStream<byte[], byte[]>> createMessageStreamsByFilter(TopicFilter topicFilter);
提交偏移量到这个消费者连接的topic
public void commitOffsets();
关闭消费者
public void shutdown();
高层的API中比较常用的就是public List<KafkaStream<byte[], byte[]>> createMessageStreamsByFilter(TopicFilter topicFilter);和public void commitOffsets();
三 CONSUMER的简单API–SIMPLECONSUMER
批量获取消息
public FetchResponse fetch(request: kafka.javaapi.FetchRequest);
获取topic的元信息
public kafka.javaapi.TopicMetadataResponse send(request: kafka.javaapi.TopicMetadataRequest);
获取目前可用的偏移量
public kafka.javaapi.OffsetResponse getOffsetsBefore(request: OffsetRequest);
关闭连接
public void close();
对于大部分应用来说,高层API就已经足够使用了,但是若是想做更进一步的控制的话,可以使用简单的API,例如消费者重启的情况下,希望得到最新的offset,就该使用SimpleConsumer.
四 KAFKA HADOOP CONSUMER API
提供了一个可水平伸缩的解决方案来结合hadoop的使用参见
https://github.com/linkedin/camus/tree/camus-kafka-0.8/
五 实战
maven依赖:
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.10</artifactId>
<version>0.8.0</version>
</dependency>
生产者代码:
import kafka.javaapi.producer.Producer; import kafka.producer.KeyedMessage; import kafka.producer.ProducerConfig; import java.util.Properties; /** * <pre> * Created by zhaoming on 14-5-4 下午3:23 * </pre> */ public class KafkaProductor { public static void main(String[] args) throws InterruptedException { Properties properties = new Properties(); properties.put("zk.connect", "127.0.0.1:2181"); properties.put("metadata.broker.list", "localhost:9092"); properties.put("serializer.class", "kafka.serializer.StringEncoder"); ProducerConfig producerConfig = new ProducerConfig(properties); Producer<String, String> producer = new Producer<String, String>(producerConfig); // 构建消息体 KeyedMessage<String, String> keyedMessage = new KeyedMessage<String, String>("test-topic", "test-message"); producer.send(keyedMessage); Thread.sleep(1000); producer.close(); } }
消费端代码
import java.io.UnsupportedEncodingException; import java.util.List; import java.util.Properties; import java.util.concurrent.TimeUnit; import kafka.consumer.*; import kafka.javaapi.consumer.ConsumerConnector; import kafka.message.MessageAndMetadata; import org.apache.commons.collections.CollectionUtils; /** * <pre> * Created by zhaoming on 14-5-4 下午3:32 * </pre> */ public class kafkaConsumer { public static void main(String[] args) throws InterruptedException, UnsupportedEncodingException { Properties properties = new Properties(); properties.put("zookeeper.connect", "127.0.0.1:2181"); properties.put("auto.commit.enable", "true"); properties.put("auto.commit.interval.ms", "60000"); properties.put("group.id", "test-group"); ConsumerConfig consumerConfig = new ConsumerConfig(properties); ConsumerConnector javaConsumerConnector = Consumer.createJavaConsumerConnector(consumerConfig); //topic的过滤器 Whitelist whitelist = new Whitelist("test-topic"); List<KafkaStream<byte[], byte[]>> partitions = javaConsumerConnector.createMessageStreamsByFilter(whitelist); if (CollectionUtils.isEmpty(partitions)) { System.out.println("empty!"); TimeUnit.SECONDS.sleep(1); } //消费消息 for (KafkaStream<byte[], byte[]> partition : partitions) { ConsumerIterator<byte[], byte[]> iterator = partition.iterator(); while (iterator.hasNext()) { MessageAndMetadata<byte[], byte[]> next = iterator.next(); System.out.println("partiton:" + next.partition()); System.out.println("offset:" + next.offset()); System.out.println("message:" + new String(next.message(), "utf-8")); } } } }