package com.iflytek.spark.test;
import java.text.MessageFormat;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import com.iflytek.spark.bean.SysCode;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;
/**
* 同一consumer group的多线程消费可以两种方法实现:
*
* 1、实现单线程客户端,启动多个去消费
*
*
* 2、在客户端的createMessageStreams里为topic指定大于1的线程数,再启动多个线程处理每个stream
*
*/
public class JConsumer extends Thread {
private ConsumerConnector consumer;
private String topic;
private final int SLEEP = 20;
public JConsumer(String topic) {
consumer = Consumer.createJavaConsumerConnector(this.consumerConfig());
this.topic = topic;
}
private ConsumerConfig consumerConfig() {
Properties props = new Properties();
props.put("zookeeper.connect", SysCode.CDH_ZKHOST);
props.put("group.id", "jd-group");
props.put("auto.commit.enable", "true");// 默认为true,让consumer定期commit offset,zookeeper会将offset持久化,否则只在内存,若故障则再消费时会从最后一次保存的offset开始
props.put("auto.commit.interval.ms", 1000+"");// 经过INTERVAL时间提交一次offset
props.put("auto.offset.reset", "largest");// What to do when there is no initial offset in ZooKeeper or if an offset is out of range
props.put("zookeeper.session.timeout.ms", 4000 + "");
props.put("zookeeper.sync.time.ms", "200");
props.put("auto.offset.reset", "largest");
return new ConsumerConfig(props);
}
@Override
public void run() {
Map topicCountMap = new HashMap();
topicCountMap.put(topic, new Integer(1));// 线程数
Mapbyte[], byte[]>>> streams = consumer.createMessageStreams(topicCountMap);
KafkaStream<byte[], byte[]> stream = streams.get(topic).get(0);// 若上面设了多个线程去消费,则这里需为每个stream开个线程做如下的处理
ConsumerIterator<byte[], byte[]> it = stream.iterator();
MessageAndMetadata<byte[], byte[]> messageAndMetaData = null;
while (it.hasNext()) {
messageAndMetaData = it.next();
System.out.println(MessageFormat.format("Receive->[ message:{0} , partition:{1} , offset:{2} ]",
new String(messageAndMetaData.message()),
messageAndMetaData.partition() + "", messageAndMetaData.offset() + ""));
try {
sleep(SLEEP);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
public static void main(String[] args) {
JConsumer con = new JConsumer("102011");
con.start();
}
}
package com.iflytek.spark.test;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import com.iflytek.spark.bean.SysCode;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
/**
* 可以指定规则(key和分区函数)以让消息写到特定分区:
*
* 1、若发送的消息没有指定key则Kafka会随机选择一个分区
*
*
* 2、否则,若指定了分区函数(通过partitioner.class)则该函数以key为参数确定写到哪个分区
*
*
* 3、否则,Kafka根据hash(key)%partitionNum确定写到哪个分区
*
*/
public class JProducer extends Thread {
private Producer producer;
private String topic;
private final int SLEEP = 10;
private final int msgNum = 1000;
public JProducer(String topic) {
Properties props = new Properties();
props.put("metadata.broker.list", SysCode.KAFKAHOST);// 如192.168.6.127:9092,192.168.6.128:9092
// request.required.acks
// 0, which means that the producer never waits for an acknowledgement from the broker (the same behavior as 0.7). This option provides the lowest latency but the weakest durability guarantees
// (some data will be lost when a server fails).
// 1, which means that the producer gets an acknowledgement after the leader replica has received the data. This option provides better durability as the client waits until the server
// acknowledges the request as successful (only messages that were written to the now-dead leader but not yet replicated will be lost).
// -1, which means that the producer gets an acknowledgement after all in-sync replicas have received the data. This option provides the best durability, we guarantee that no messages will be
// lost as long as at least one in sync replica remains.
props.put("request.required.acks", "-1");
// 配置value的序列化类
props.put("serializer.class", "kafka.serializer.StringEncoder");
// 配置key的序列化类
props.put("key.serializer.class", "kafka.serializer.StringEncoder");
// 提供自定义的分区函数将消息写到分区上,未指定的话Kafka根据hash(messageKey)%partitionNum确定写到哪个分区
// props.put("partitioner.class", "com.zsm.kfkdemo.MyPartitioner");
producer = new Producer(new ProducerConfig(props));
this.topic = topic;
}
@Override
public void run() {
boolean isBatchWriteMode = false;
System.out.println("isBatchWriteMode: " + isBatchWriteMode);
if (isBatchWriteMode) {
// 批量发送
int batchSize = 100;
List> msgList = new ArrayList>(batchSize);
for (int i = 0; i < msgNum; i++) {
String msg = "Message_" + i;
msgList.add(new KeyedMessage(topic, i + "", msg));
// msgList.add(new KeyedMessage(topic, msg));//未指定key,Kafka会自动选择一个分区
if (i % batchSize == 0) {
producer.send(msgList);
System.out.println("Send->[" + msgList + "]");
msgList.clear();
try {
sleep(SLEEP);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
producer.send(msgList);
} else {
// 单个发送
for (int i = 0; i < msgNum; i++) {
KeyedMessage msg = new KeyedMessage(topic, i + "", "Message_" + i);
// KeyedMessage msg = new KeyedMessage(topic, "Message_" + i);//未指定key,Kafka会自动选择一个分区
producer.send(msg);
System.out.println("Send->[" + msg + "]");
try {
sleep(SLEEP);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
System.out.println("send done");
}
public static void main(String[] args) {
JProducer pro = new JProducer("test1");
pro.start();
}
}
package com.iflytek.spark.test;
import com.iflytek.spark.bean.SysCode;
import kafka.admin.TopicCommand;
public class JTopic {
public static void createTopic(String zkAddr, String topicName, int partition, int replication) {
String[] options = new String[] { "--create", "--zookeeper", zkAddr, "--topic", topicName, "--partitions",
partition + "", "--replication-factor", replication + "" };
TopicCommand.main(options);
}
public static void listTopic(String zkAddr) {
String[] options = new String[] { "--list", "--zookeeper", zkAddr };
TopicCommand.main(options);
}
public static void describeTopic(String zkAddr, String topicName) {
String[] options = new String[] { "--describe", "--zookeeper", zkAddr, "--topic", topicName, };
TopicCommand.main(options);
}
public static void alterTopic(String zkAddr, String topicName) {
String[] options = new String[] { "--alter", "--zookeeper", zkAddr, "--topic", topicName, "--partitions", "5" };
TopicCommand.main(options);
}
// 通过删除zk里面对应的路径来实现删除topic的功能,只会删除zk里面的信息,Kafka上真实的数据并没有删除
public static void deleteTopic(String zkAddr, String topicName) {
String[] options = new String[] { "--zookeeper", zkAddr, "--topic", topicName };
TopicCommand.main(options);
}
public static void main(String[] args) {
// TODO Auto-generated method stub
String myTestTopic = "test2";
int myPartition = 4;
int myreplication = 1;
// createTopic(SysCode.ZKHOST, myTestTopic, myPartition, myreplication);
listTopic(SysCode.ZKHOST);
// describeTopic(SysCode.ZKHOST, myTestTopic);
// alterTopic(SysCode.ZKHOST, myTestTopic);
// deleteTopic(SysCode.ZKHOST, myTestTopic);
}
}
package com.iflytek.spark.test;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import kafka.admin.AdminUtils;
import kafka.server.ConfigType;
import kafka.utils.ZkUtils;
import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.security.JaasUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.iflytek.spark.bean.SysCode;
import kafka.admin.AdminUtils;
import kafka.admin.RackAwareMode;
import kafka.admin.TopicCommand;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import kafka.serializer.StringDecoder;
import kafka.server.ConfigType;
import kafka.utils.VerifiableProperties;
import kafka.utils.ZkUtils;
public class KafkaTest {
private static final Logger LOG = LoggerFactory.getLogger(KafkaTest.class);
//
// //配置信息
// private Properties prop;
//
// // 话题名称
// private final String topic;
//
// // 线程数量,与kafka分区数量相同
// private final int threadNum;
//
// private int key = 0;
//
// public KafkaTest(Properties prop) {
//
// this.prop = prop;
// topic = prop.getProperty("kafka.topic");
// threadNum = Integer.parseInt(prop.getProperty("thread.count"));
//
// }
/**
* 创建topic
* @param topic
* @param zkHost
*/
public static void createTopic(String topic,String zkHost){
ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
// 创建一个单分区单副本名为t1的topic
AdminUtils.createTopic(zkUtils, topic, 1, 1, new Properties(), RackAwareMode.Enforced$.MODULE$);
zkUtils.close();
}
/**
* 删除topic
* @param topic
* @param zkHost
*/
public static void deleteTopic(String topic,String zkHost){
ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
// 删除topic 't1'
AdminUtils.deleteTopic(zkUtils, topic);
zkUtils.close();
}
/**
* 查询topic
* @param topic
* @param zkHost
*/
public static void queryTopic(String topic,String zkHost){
ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
// 获取topic 'test'的topic属性属性
Properties props = AdminUtils.fetchEntityConfig(zkUtils, ConfigType.Topic(), topic);
// 查询topic-level属性
Iterator it = props.entrySet().iterator();
while(it.hasNext()){
Map.Entry entry=(Map.Entry)it.next();
Object key = entry.getKey();
Object value = entry.getValue();
System.out.println(key + " = " + value);
}
zkUtils.close();
}
/**
* 更新topic
* @param topic
* @param zkHost
*/
public static void updateTopic(String topic,String zkHost){
ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
Properties props = AdminUtils.fetchEntityConfig(zkUtils, ConfigType.Topic(), topic);
// 增加topic级别属性
props.put("min.cleanable.dirty.ratio", "0.3");
// 删除topic级别属性
props.remove("max.message.bytes");
// 修改topic 'test'的属性
AdminUtils.changeTopicConfig(zkUtils, topic, props);
zkUtils.close();
}
/**
* 消费者
* @param topic
* @param props
*/
public static void consume(String topic,Properties props) {
ConsumerConfig config = new ConsumerConfig(props);
ConsumerConnector consumer = Consumer.createJavaConsumerConnector(config);
Map topicCountMap = new HashMap();
topicCountMap.put(topic, new Integer(1));
StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());
Map>> consumerMap =
consumer.createMessageStreams(topicCountMap,keyDecoder,valueDecoder);
KafkaStream stream = consumerMap.get(topic).get(0);
ConsumerIterator it = stream.iterator();
while (it.hasNext()){
System.out.println(it.next().message());
}
}
/**
* kafka消费者
* @param topic
* @param zkHost
*/
public static void kafkaConsumer(String topic,String zkHost) {
Properties props = new Properties();
//zookeeper 配置
props.put("zookeeper.connect", zkHost);
//group 代表一个消费组
props.put("group.id", "jd-group");
//zk连接超时
props.put("zookeeper.session.timeout.ms", "4000");
props.put("zookeeper.sync.time.ms", "200");
props.put("auto.commit.interval.ms", "1000");
props.put("auto.offset.reset", "smallest");
//序列化类
props.put("serializer.class", "kafka.serializer.StringEncoder");
consume(topic,props);
}
/** ================================下面代码有问题,有待改进========================================== **/
public static void producer(String topic, String key, String value, Properties props) {
//实例化producer
KafkaProducer kafkaProducer = new KafkaProducer(props);
ProducerRecord producerRecord = new ProducerRecord(topic, key , value);
for(int i = 0 ; i < 10 ; i++){
kafkaProducer.send(new ProducerRecord(topic, i+""));
System.out.println(i);
}
// kafkaProducer.send(producerRecord, new Callback() {
//
// @Override
// public void onCompletion(RecordMetadata recordMetadata, Exception exception) {
// // TODO Auto-generated method stub
// if(null != exception){
// LOG.info(String.format("发送数据出错 错误:%s", exception.getMessage()));
// }
// }
// });
kafkaProducer.close();
}
public static void kafkaProducer(String topic, String key, String value) {
Properties props = new Properties();
props.put("bootstrap.servers", SysCode.KAFKAHOST);
props.put("acks", "all");
props.put("retries",0);
props.put("batch.size",16384);
props.put("linger.ms",1);
props.put("buffer.memory", 33554432);
props.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer","org.apache.kafka.common.serialization.StringSerializer");
producer(topic, key, value,props);
}
/**
* 查询所有的topic
* @param zkHost
*/
public static List queryAllTopics(String zkHost){
String [] options = new String[]{
"--list",
"--zookeeper",
zkHost
};
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(1024*3);
PrintStream printStream = new PrintStream(byteArrayOutputStream);
PrintStream oldPrintStream = System.out;
System.setOut(printStream);
TopicCommand.main(options);
String message = byteArrayOutputStream.toString();
List list = new ArrayList();
String[] ss = message.split("\n");
list = Arrays.asList(ss);
System.setOut(oldPrintStream);
for(int i = 0 ; i < ss.length ; i++){
System.out.println(list.get(i));
}
return list;
}
/**
* 发送信息到kafka(key为null)
*/
// public void simpleAddQueue(String... msgs) {
// Producer producer = new Producer(new ProducerConfig(prop));
// List> data = new ArrayList>();
// for (String msg : msgs) {
// data.add(new KeyedMessage(topic, msg));
// LOG.debug("加入kafka队列:主题[" + topic + "];消息[" + msg + "]");
// }
// if (!data.isEmpty()) {
// producer.send(data);
// LOG.debug("发送kafka成功!");
// }
// // 关闭producer
// producer.close();
// }
/**
* 键值对形式发送消息到kafka
*/
// public void addQueue(Map> msgs) {
// Producer producer = new Producer(new ProducerConfig(prop));
// List> data = new ArrayList>();
// for (Entry> entry : msgs.entrySet()) {
// for (String msg : entry.getValue()) {
// data.add(new KeyedMessage(topic, entry.getKey(), msg));
// LOG.debug("加入kafka队列:主题[" + topic + "];key[" + entry.getKey() + "];消息[" + msg + "]");
// }
// }
// if (!data.isEmpty()) {
// producer.send(data);
// LOG.debug("发送kafka成功!");
// }
// producer.close();
// }
/**
* 根据threadNum平均发给每一个kafka分区
*/
// public void addQueue(String... msgs) {
// Map> map = new HashMap>();
// for (String msg : msgs) {
// key = key >= threadNum ? 0 : key;
// if (!map.containsKey(key + "")) {
// map.put(key + "", new ArrayList());
// }
// map.get(key + "").add(msg);
// key++;
// if(key > Integer.MAX_VALUE/2){
// key = 0;
// }
// }
// addQueue(map);
// }
/**
* 获得默认的kafka消费流列表
*/
// public List> getStream() {
// ConsumerConnector consumerConnector = Consumer
// .createJavaConsumerConnector(new ConsumerConfig(prop));
//
// Map map = new HashMap();
// map.put(topic, threadNum);
// Map>> consumerMap = consumerConnector
// .createMessageStreams(map);
// return consumerMap.get(topic);
// }
/**
* 根据groupId获得kafka消费流列表
*/
// public List> getStream(String groupId) {
// prop.setProperty("group.id", groupId);
// return getStream();
// }
//
/**
* 获得话题
*/
// public String getTopic() {
// return topic;
// }
//
// /**
// * 获得进程数,与kafka分区patition数相同
// */
// public int getThreadNum() {
// return threadNum;
// }
/**
* 测试发送
*/
// public static void testSendKfk(){
//
// Properties conf = new Properties();
// conf.put("metadata.broker.list", "10.30.15.55:39091,10.30.15.56:39091,10.30.15.57:39091");
// conf.put("kafka.topic", "SparkTest");
// conf.put("serializer.class", "kafka.serializer.StringEncoder");
// conf.put("key.serializer.class", "kafka.serializer.StringEncoder");
// conf.put("thread.count", "1");
//
// KafkaTest kfk = new KafkaTest(conf);
// //for(int i=0;i<10;i++){
// kfk.addQueue("test1","test2","test3","test4","test5");
// kfk.addQueue("test1","test2","test3","test4","test5");
// kfk.addQueue("test1","test2","test3","test4","test5");
// kfk.addQueue("test1","test2","test3","test4","test5");
// kfk.addQueue("test1","test2","test3","test4","test5");
// LOG.info("发送完毕");
// //}
// }
//
// /**
// * 测试消费
// */
// public static void testConsumer(){
//
// Properties conf = new Properties();
// conf.put("kafka.topic", "SparkTest");
// conf.put("thread.count", "1");
// conf.put("zookeeper.connect", SysCode.ZKHOST);
// conf.put("zookeeper.connectiontimeout.ms", "30000");
// conf.put("zookeeper.session.timeout.ms", "800");
// conf.put("zookeeper.sync.time.ms", "200");
// conf.put("auto.commit.interval.ms", "1000");
// conf.put("auto.offset.reset", "smallest");
// conf.put("kafka.topic", "SparkTest");
//
//
//
// KafkaTest kfk = new KafkaTest(conf);
//
// List> result = kfk.getStream();
// // 线程池
// ExecutorService executor = Executors.newFixedThreadPool(kfk.getThreadNum());
// for (final KafkaStream stream : result) {
// executor.submit(new ConsumerThread(stream));
//
// }
// }
public static void main(String args[]){
// createTopic("SparkTest", SysCode.ZKHOST);
// deleteTopic("test1", SysCode.CDH_ZKHOST);
// queryTopic("test",SysCode.CDH_ZKHOST);
// updateTopic("test",SysCode.CDH_ZKHOST);
// kafkaProducer("test", "hmwang", "22222");//有问题
kafkaConsumer("test1", SysCode.ZKHOST);
}
}