kafka常用代码

1,消费者代码

package com.iflytek.spark.test;

import java.text.MessageFormat;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;



import com.iflytek.spark.bean.SysCode;

import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;

/**
 * 同一consumer group的多线程消费可以两种方法实现:
 * 

* 1、实现单线程客户端,启动多个去消费 *

*

* 2、在客户端的createMessageStreams里为topic指定大于1的线程数,再启动多个线程处理每个stream *

*/
public class JConsumer extends Thread { private ConsumerConnector consumer; private String topic; private final int SLEEP = 20; public JConsumer(String topic) { consumer = Consumer.createJavaConsumerConnector(this.consumerConfig()); this.topic = topic; } private ConsumerConfig consumerConfig() { Properties props = new Properties(); props.put("zookeeper.connect", SysCode.CDH_ZKHOST); props.put("group.id", "jd-group"); props.put("auto.commit.enable", "true");// 默认为true,让consumer定期commit offset,zookeeper会将offset持久化,否则只在内存,若故障则再消费时会从最后一次保存的offset开始 props.put("auto.commit.interval.ms", 1000+"");// 经过INTERVAL时间提交一次offset props.put("auto.offset.reset", "largest");// What to do when there is no initial offset in ZooKeeper or if an offset is out of range props.put("zookeeper.session.timeout.ms", 4000 + ""); props.put("zookeeper.sync.time.ms", "200"); props.put("auto.offset.reset", "largest"); return new ConsumerConfig(props); } @Override public void run() { Map topicCountMap = new HashMap(); topicCountMap.put(topic, new Integer(1));// 线程数 Mapbyte[], byte[]>>> streams = consumer.createMessageStreams(topicCountMap); KafkaStream<byte[], byte[]> stream = streams.get(topic).get(0);// 若上面设了多个线程去消费,则这里需为每个stream开个线程做如下的处理 ConsumerIterator<byte[], byte[]> it = stream.iterator(); MessageAndMetadata<byte[], byte[]> messageAndMetaData = null; while (it.hasNext()) { messageAndMetaData = it.next(); System.out.println(MessageFormat.format("Receive->[ message:{0} , partition:{1} , offset:{2} ]", new String(messageAndMetaData.message()), messageAndMetaData.partition() + "", messageAndMetaData.offset() + "")); try { sleep(SLEEP); } catch (Exception ex) { ex.printStackTrace(); } } } public static void main(String[] args) { JConsumer con = new JConsumer("102011"); con.start(); } }

2,生产者代码

package com.iflytek.spark.test;

import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

import com.iflytek.spark.bean.SysCode;

import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;

/**
 * 可以指定规则(key和分区函数)以让消息写到特定分区:
 * 

* 1、若发送的消息没有指定key则Kafka会随机选择一个分区 *

*

* 2、否则,若指定了分区函数(通过partitioner.class)则该函数以key为参数确定写到哪个分区 *

*

* 3、否则,Kafka根据hash(key)%partitionNum确定写到哪个分区 *

*/
public class JProducer extends Thread { private Producer producer; private String topic; private final int SLEEP = 10; private final int msgNum = 1000; public JProducer(String topic) { Properties props = new Properties(); props.put("metadata.broker.list", SysCode.KAFKAHOST);// 如192.168.6.127:9092,192.168.6.128:9092 // request.required.acks // 0, which means that the producer never waits for an acknowledgement from the broker (the same behavior as 0.7). This option provides the lowest latency but the weakest durability guarantees // (some data will be lost when a server fails). // 1, which means that the producer gets an acknowledgement after the leader replica has received the data. This option provides better durability as the client waits until the server // acknowledges the request as successful (only messages that were written to the now-dead leader but not yet replicated will be lost). // -1, which means that the producer gets an acknowledgement after all in-sync replicas have received the data. This option provides the best durability, we guarantee that no messages will be // lost as long as at least one in sync replica remains. props.put("request.required.acks", "-1"); // 配置value的序列化类 props.put("serializer.class", "kafka.serializer.StringEncoder"); // 配置key的序列化类 props.put("key.serializer.class", "kafka.serializer.StringEncoder"); // 提供自定义的分区函数将消息写到分区上,未指定的话Kafka根据hash(messageKey)%partitionNum确定写到哪个分区 // props.put("partitioner.class", "com.zsm.kfkdemo.MyPartitioner"); producer = new Producer(new ProducerConfig(props)); this.topic = topic; } @Override public void run() { boolean isBatchWriteMode = false; System.out.println("isBatchWriteMode: " + isBatchWriteMode); if (isBatchWriteMode) { // 批量发送 int batchSize = 100; List> msgList = new ArrayList>(batchSize); for (int i = 0; i < msgNum; i++) { String msg = "Message_" + i; msgList.add(new KeyedMessage(topic, i + "", msg)); // msgList.add(new KeyedMessage(topic, msg));//未指定key,Kafka会自动选择一个分区 if (i % batchSize == 0) { producer.send(msgList); System.out.println("Send->[" + msgList + "]"); msgList.clear(); try { sleep(SLEEP); } catch (Exception ex) { ex.printStackTrace(); } } } producer.send(msgList); } else { // 单个发送 for (int i = 0; i < msgNum; i++) { KeyedMessage msg = new KeyedMessage(topic, i + "", "Message_" + i); // KeyedMessage msg = new KeyedMessage(topic, "Message_" + i);//未指定key,Kafka会自动选择一个分区 producer.send(msg); System.out.println("Send->[" + msg + "]"); try { sleep(SLEEP); } catch (Exception ex) { ex.printStackTrace(); } } } System.out.println("send done"); } public static void main(String[] args) { JProducer pro = new JProducer("test1"); pro.start(); } }

3,topic代码

 package com.iflytek.spark.test;

import com.iflytek.spark.bean.SysCode;

import kafka.admin.TopicCommand;


public class JTopic {
    public static void createTopic(String zkAddr, String topicName, int partition, int replication) {
        String[] options = new String[] { "--create", "--zookeeper", zkAddr, "--topic", topicName, "--partitions",
                partition + "", "--replication-factor", replication + "" };
        TopicCommand.main(options);
    }

    public static void listTopic(String zkAddr) {
        String[] options = new String[] { "--list", "--zookeeper", zkAddr };
        TopicCommand.main(options);
    }

    public static void describeTopic(String zkAddr, String topicName) {
        String[] options = new String[] { "--describe", "--zookeeper", zkAddr, "--topic", topicName, };
        TopicCommand.main(options);
    }

    public static void alterTopic(String zkAddr, String topicName) {
        String[] options = new String[] { "--alter", "--zookeeper", zkAddr, "--topic", topicName, "--partitions", "5" };
        TopicCommand.main(options);
    }

    // 通过删除zk里面对应的路径来实现删除topic的功能,只会删除zk里面的信息,Kafka上真实的数据并没有删除
    public static void deleteTopic(String zkAddr, String topicName) {
        String[] options = new String[] { "--zookeeper", zkAddr, "--topic", topicName };
        TopicCommand.main(options);
    }

    public static void main(String[] args) {
        // TODO Auto-generated method stub

        String myTestTopic = "test2";
        int myPartition = 4;
        int myreplication = 1;

//        createTopic(SysCode.ZKHOST, myTestTopic, myPartition, myreplication);
        listTopic(SysCode.ZKHOST);
//        describeTopic(SysCode.ZKHOST, myTestTopic);
//         alterTopic(SysCode.ZKHOST, myTestTopic);
//         deleteTopic(SysCode.ZKHOST, myTestTopic);
    }

}

4,补充方法代码

package com.iflytek.spark.test;


import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;  
import java.util.Arrays;
import java.util.HashMap;  
import java.util.Iterator;
import java.util.List;  
import java.util.Map;  
import java.util.Map.Entry;  
import java.util.Properties;  
import java.util.concurrent.ExecutorService;  
import java.util.concurrent.Executors;  

import kafka.admin.AdminUtils;
import kafka.server.ConfigType;
import kafka.utils.ZkUtils;

import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.security.JaasUtils;
import org.slf4j.Logger;  
import org.slf4j.LoggerFactory;  

import com.iflytek.spark.bean.SysCode;

import kafka.admin.AdminUtils;
import kafka.admin.RackAwareMode;
import kafka.admin.TopicCommand;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;  
import kafka.consumer.ConsumerIterator;  
import kafka.consumer.KafkaStream;  
import kafka.javaapi.consumer.ConsumerConnector;  
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;  
import kafka.producer.ProducerConfig;  
import kafka.serializer.StringDecoder;
import kafka.server.ConfigType;
import kafka.utils.VerifiableProperties;
import kafka.utils.ZkUtils;

public class KafkaTest {  

    private static final Logger LOG = LoggerFactory.getLogger(KafkaTest.class);  
//  
//    //配置信息  
//    private Properties prop;  
//  
//    // 话题名称  
//    private final String topic;  
//  
//    // 线程数量,与kafka分区数量相同  
//    private final int threadNum;  
//  
//    private int key = 0;  
//  
//    public KafkaTest(Properties prop) {  
//          
//        this.prop = prop;  
//        topic = prop.getProperty("kafka.topic");  
//        threadNum = Integer.parseInt(prop.getProperty("thread.count"));  
//          
//    }  

   /**
    * 创建topic
    * @param topic
    * @param zkHost
    */
    public static void createTopic(String topic,String zkHost){

        ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
//       创建一个单分区单副本名为t1的topic
        AdminUtils.createTopic(zkUtils, topic, 1, 1, new Properties(), RackAwareMode.Enforced$.MODULE$);
        zkUtils.close();
    } 

    /**
     * 删除topic
     * @param topic
     * @param zkHost
     */
    public static void deleteTopic(String topic,String zkHost){

        ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
//       删除topic 't1'
        AdminUtils.deleteTopic(zkUtils, topic);
        zkUtils.close();
    }

    /**
     * 查询topic
     * @param topic
     * @param zkHost
     */
    public static void queryTopic(String topic,String zkHost){

        ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
        // 获取topic 'test'的topic属性属性
        Properties props = AdminUtils.fetchEntityConfig(zkUtils, ConfigType.Topic(), topic);
        // 查询topic-level属性
        Iterator it = props.entrySet().iterator();
        while(it.hasNext()){
            Map.Entry entry=(Map.Entry)it.next();
            Object key = entry.getKey();
            Object value = entry.getValue();
            System.out.println(key + " = " + value);
        }
        zkUtils.close();
    }

    /**
     * 更新topic
     * @param topic
     * @param zkHost
     */
    public static void updateTopic(String topic,String zkHost){

        ZkUtils zkUtils = ZkUtils.apply(zkHost, 30000, 30000, JaasUtils.isZkSecurityEnabled());
        Properties props = AdminUtils.fetchEntityConfig(zkUtils, ConfigType.Topic(), topic);
        // 增加topic级别属性
        props.put("min.cleanable.dirty.ratio", "0.3");
        // 删除topic级别属性
        props.remove("max.message.bytes");
        // 修改topic 'test'的属性
        AdminUtils.changeTopicConfig(zkUtils, topic, props);
        zkUtils.close();
    }

    /**
     * 消费者
     * @param topic
     * @param props
     */
    public static void consume(String topic,Properties props) {
        ConsumerConfig config = new ConsumerConfig(props);
        ConsumerConnector consumer = Consumer.createJavaConsumerConnector(config);

        Map topicCountMap = new HashMap();
        topicCountMap.put(topic, new Integer(1));

        StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
        StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());

        Map>> consumerMap = 
                consumer.createMessageStreams(topicCountMap,keyDecoder,valueDecoder);
        KafkaStream stream = consumerMap.get(topic).get(0);
        ConsumerIterator it = stream.iterator();
        while (it.hasNext()){
            System.out.println(it.next().message());
        }
    }


    /**
     * kafka消费者
     * @param topic
     * @param zkHost
     */
    public static void kafkaConsumer(String topic,String zkHost) {
        Properties props = new Properties();
        //zookeeper 配置
        props.put("zookeeper.connect", zkHost);
        //group 代表一个消费组
        props.put("group.id", "jd-group");
        //zk连接超时
        props.put("zookeeper.session.timeout.ms", "4000");
        props.put("zookeeper.sync.time.ms", "200");
        props.put("auto.commit.interval.ms", "1000");
        props.put("auto.offset.reset", "smallest");
        //序列化类
        props.put("serializer.class", "kafka.serializer.StringEncoder");
        consume(topic,props);
    }

    /** ================================下面代码有问题,有待改进========================================== **/

    public static void producer(String topic, String key, String value, Properties props) {

        //实例化producer
        KafkaProducer kafkaProducer = new KafkaProducer(props);

        ProducerRecord producerRecord = new ProducerRecord(topic, key , value);

        for(int i = 0 ; i < 10 ; i++){
            kafkaProducer.send(new ProducerRecord(topic, i+""));
            System.out.println(i);
        }
//      kafkaProducer.send(producerRecord, new Callback() {
//          
//          @Override
//          public void onCompletion(RecordMetadata recordMetadata, Exception exception) {
//              // TODO Auto-generated method stub
//              if(null != exception){
//                  LOG.info(String.format("发送数据出错 错误:%s", exception.getMessage()));
//              }
//          }
//      });
        kafkaProducer.close();
    }


    public static void kafkaProducer(String topic, String key, String value) {

        Properties props = new Properties();
        props.put("bootstrap.servers", SysCode.KAFKAHOST); 
        props.put("acks", "all"); 
        props.put("retries",0);
        props.put("batch.size",16384);
        props.put("linger.ms",1);
        props.put("buffer.memory", 33554432);
        props.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer","org.apache.kafka.common.serialization.StringSerializer");
        producer(topic, key, value,props);
    }

    /**
     * 查询所有的topic
     * @param zkHost
     */
    public static List queryAllTopics(String zkHost){
        String [] options = new String[]{
                "--list",
                "--zookeeper",
                zkHost
        };
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(1024*3);

        PrintStream printStream = new PrintStream(byteArrayOutputStream);

        PrintStream oldPrintStream = System.out;

        System.setOut(printStream);

        TopicCommand.main(options);

        String message = byteArrayOutputStream.toString();

        List list = new ArrayList();

        String[] ss = message.split("\n");

        list = Arrays.asList(ss);

        System.setOut(oldPrintStream);

        for(int i = 0 ; i < ss.length ; i++){
            System.out.println(list.get(i));
        }

        return list;
    }



    /** 
     * 发送信息到kafka(key为null) 
     */  
//    public void simpleAddQueue(String... msgs) {  
//        Producer producer = new Producer(new ProducerConfig(prop));  
//        List> data = new ArrayList>();  
//        for (String msg : msgs) {  
//            data.add(new KeyedMessage(topic, msg));  
//            LOG.debug("加入kafka队列:主题[" + topic + "];消息[" + msg + "]");  
//        }  
//        if (!data.isEmpty()) {  
//            producer.send(data);  
//            LOG.debug("发送kafka成功!");  
//        }  
//        // 关闭producer  
//        producer.close();  
//    }  

    /** 
     * 键值对形式发送消息到kafka 
     */  
//    public void addQueue(Map> msgs) {  
//        Producer producer = new Producer(new ProducerConfig(prop));  
//        List> data = new ArrayList>();  
//        for (Entry> entry : msgs.entrySet()) {  
//            for (String msg : entry.getValue()) {  
//                data.add(new KeyedMessage(topic, entry.getKey(), msg));  
//                LOG.debug("加入kafka队列:主题[" + topic + "];key[" + entry.getKey() + "];消息[" + msg + "]");  
//            }  
//        }  
//        if (!data.isEmpty()) {  
//            producer.send(data);  
//            LOG.debug("发送kafka成功!");  
//        }  
//        producer.close();  
//    }  

    /** 
     * 根据threadNum平均发给每一个kafka分区 
     */  
//    public void addQueue(String... msgs) {  
//        Map> map = new HashMap>();  
//        for (String msg : msgs) {  
//            key = key >= threadNum ? 0 : key;  
//            if (!map.containsKey(key + "")) {  
//                map.put(key + "", new ArrayList());  
//            }  
//            map.get(key + "").add(msg);  
//            key++;  
//            if(key > Integer.MAX_VALUE/2){  
//                key = 0;  
//            }  
//        }  
//        addQueue(map);
//    }  

    /** 
     * 获得默认的kafka消费流列表 
     */  
//    public List> getStream() {  
//        ConsumerConnector consumerConnector = Consumer  
//                .createJavaConsumerConnector(new ConsumerConfig(prop));  
//  
//        Map map = new HashMap();  
//        map.put(topic, threadNum);  
//        Map>> consumerMap = consumerConnector  
//                .createMessageStreams(map);  
//        return consumerMap.get(topic);  
//    }  

    /** 
     * 根据groupId获得kafka消费流列表 
     */  
//    public List> getStream(String groupId) {  
//        prop.setProperty("group.id", groupId);  
//        return getStream();  
//    }  
//  
    /** 
     * 获得话题 
     */  
//    public String getTopic() {  
//        return topic;  
//    }  
//  
//    /** 
//     * 获得进程数,与kafka分区patition数相同 
//     */  
//    public int getThreadNum() {  
//        return threadNum;  
//    }  

    /** 
     * 测试发送 
     */  
//    public static void testSendKfk(){  
//          
//        Properties conf = new Properties();  
//        conf.put("metadata.broker.list", "10.30.15.55:39091,10.30.15.56:39091,10.30.15.57:39091");  
//        conf.put("kafka.topic", "SparkTest");  
//        conf.put("serializer.class", "kafka.serializer.StringEncoder");  
//        conf.put("key.serializer.class", "kafka.serializer.StringEncoder");  
//        conf.put("thread.count", "1");  
//          
//        KafkaTest kfk = new KafkaTest(conf);  
//        //for(int i=0;i<10;i++){  
//        kfk.addQueue("test1","test2","test3","test4","test5");  
//        kfk.addQueue("test1","test2","test3","test4","test5");  
//        kfk.addQueue("test1","test2","test3","test4","test5");  
//        kfk.addQueue("test1","test2","test3","test4","test5");  
//        kfk.addQueue("test1","test2","test3","test4","test5");  
//        LOG.info("发送完毕");  
//        //}  
//    }  
//      
//    /** 
//     * 测试消费 
//     */  
//    public static void testConsumer(){  
//          
//        Properties conf = new Properties();  
//        conf.put("kafka.topic", "SparkTest");  
//        conf.put("thread.count", "1");  
//        conf.put("zookeeper.connect", SysCode.ZKHOST);  
//        conf.put("zookeeper.connectiontimeout.ms", "30000");  
//        conf.put("zookeeper.session.timeout.ms", "800");  
//        conf.put("zookeeper.sync.time.ms", "200");  
//        conf.put("auto.commit.interval.ms", "1000");  
//        conf.put("auto.offset.reset", "smallest");  
//        conf.put("kafka.topic", "SparkTest");  
//          
//          
//          
//        KafkaTest kfk = new KafkaTest(conf);  
//          
//        List> result = kfk.getStream();  
//        // 线程池  
//        ExecutorService executor = Executors.newFixedThreadPool(kfk.getThreadNum());  
//        for (final KafkaStream stream : result) {  
//             executor.submit(new ConsumerThread(stream));  
//               
//        }  
//    }  

    public static void main(String args[]){  
//        createTopic("SparkTest", SysCode.ZKHOST);
//        deleteTopic("test1", SysCode.CDH_ZKHOST);
//        queryTopic("test",SysCode.CDH_ZKHOST);
//        updateTopic("test",SysCode.CDH_ZKHOST);
//        kafkaProducer("test", "hmwang", "22222");//有问题
        kafkaConsumer("test1", SysCode.ZKHOST);
    }  

}  

你可能感兴趣的:(大数据,kafka-java)