Kafka的生产者与消费者

写了一个kafka的demo,kafka生产者和消费者,消费者用线程池创建多个消费者,并且创建的消费者大于或者等于小于partition个数,验证了kafka消费端负载的算法,算法见:http://blog.csdn.net/qq_20641565/article/details/59746101

创建一个maven工程,程序的结构如下:

Kafka的生产者与消费者_第1张图片

pom文件

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0modelVersion>
    <groupId>kafka-newgroupId>
    <artifactId>kafka-newartifactId>
    <version>0.0.1-SNAPSHOTversion>
    <dependencies>
        <dependency>
            <groupId>org.apache.kafkagroupId>
            <artifactId>kafka_2.11artifactId>
            <version>0.10.1.1version>
        dependency>
        <dependency>
            <groupId>org.apache.hadoopgroupId>
            <artifactId>hadoop-commonartifactId>
            <version>2.2.0version>
        dependency>
        <dependency>
            <groupId>org.apache.hadoopgroupId>
            <artifactId>hadoop-hdfsartifactId>
            <version>2.2.0version>
        dependency>
        <dependency>
            <groupId>org.apache.hadoopgroupId>
            <artifactId>hadoop-clientartifactId>
            <version>2.2.0version>
        dependency>
        <dependency>
            <groupId>org.apache.hbasegroupId>
            <artifactId>hbase-clientartifactId>
            <version>1.0.3version>
        dependency>
        <dependency>
            <groupId>org.apache.hbasegroupId>
            <artifactId>hbase-serverartifactId>
            <version>1.0.3version>
        dependency>
        <dependency>
            <groupId>org.apache.hadoopgroupId>
            <artifactId>hadoop-hdfsartifactId>
            <version>2.2.0version>
        dependency>
        <dependency>
            <groupId>jdk.toolsgroupId>
            <artifactId>jdk.toolsartifactId>
            <version>1.7version>
            <scope>systemscope>
            <systemPath>${JAVA_HOME}/lib/tools.jarsystemPath>
        dependency>
        <dependency>
            <groupId>org.apache.httpcomponentsgroupId>
            <artifactId>httpclientartifactId>
            <version>4.3.6version>
        dependency>
    dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-compiler-pluginartifactId>
                <configuration>
                    <source>1.7source>
                    <target>1.7target>
                configuration>
            plugin>
        plugins>
    build>
project>

MyProducer类

package com.lijie.kafka;

import java.util.Properties;
import java.util.UUID;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;

/**
 * 
 * @author Lijie
 *
 */
public class MyProducer {
     

    public static void main(String[] args) throws Exception {
        produce();
    }

    public static void produce() throws Exception {

        //topic
        String topic = "mytopic";

        //配置
        Properties properties = new Properties();
        properties.put("bootstrap.servers", "192.168.80.123:9092");

        //序列化类型
        properties.put("value.serializer","org.apache.kafka.common.serialization.StringSerializer");
        properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");

        //创建生产者
        KafkaProducer pro = new KafkaProducer<>(properties);
        while (true) {

            //模拟message
            String value = UUID.randomUUID().toString();

            //封装message
            ProducerRecord pr = new ProducerRecord(topic, value);

            //发送消息
            pro.send(pr);

            //sleep
            Thread.sleep(1000);
        }
    }
}

MyConsumer类

package com.lijie.kafka;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;

/**
 * 
 * @author Lijie
 *
 */
public class MyConsumer {
     
    public static void main(String[] args) {
        consumer();
    }

    public static void consumer() {

        String topic = "mytopic";

        //配置文件
        Properties properties = new Properties();
        properties.put("group.id", "lijieGroup");
        properties.put("zookeeper.connect", "192.168.80.123:2181");
        properties.put("auto.offset.reset", "largest");
        properties.put("auto.commit.interval.ms", "1000");

//      properties.put("value.serializer",
//          "org.apache.kafka.common.serialization.StringSerializer");
//      properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");

        //设置消费者的配置文件
        ConsumerConfig config = new ConsumerConfig(properties);

        //创建连接器
        ConsumerConnector conn = Consumer.createJavaConsumerConnector(config);

        //key为topic   value为partition的个数
        Map map = new HashMap();

        //封装对应消息的的topic和partition个数
        map.put(topic, 3);

        //获取partition的流, key为对应的topic名字,value为每个partition的流,这里有三个partiiton所以list里面有三个流
        Mapbyte[], byte[]>>> createMessageStreams = conn
            .createMessageStreams(map);

        //取出对应topic的流的list
        Listbyte[], byte[]>> list = createMessageStreams.get(topic);

        //用线程池创建3个对应的消费者
        ExecutorService executor = Executors.newFixedThreadPool(3);

        //执行消费
        for (int i = 0; i < list.size(); i++) {
            executor.execute(new ConsumerThread("消费者" + (i + 1), list.get(i)));
        }

    }
}

ConsumerThread类

package com.lijie.kafka;

import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.message.MessageAndMetadata;

/**
 * 
 * @author Lijie
 *
 */
public class ConsumerThread implements Runnable {
     

    //当前消费者的名字
    private String consumerName;

    //当前消费者的流
    private KafkaStream<byte[], byte[]> stream;

    //构造函数
    public ConsumerThread(String consumerName, KafkaStream<byte[], byte[]> stream) {
        super();
        this.consumerName = consumerName;
        this.stream = stream;
    }

    @Override
    public void run() {

        //获取当前数据的迭代器
        ConsumerIterator<byte[], byte[]> iterator = stream.iterator();

        //消费数据
        while (iterator.hasNext()) {

            //取出消息
            MessageAndMetadata<byte[], byte[]> next = iterator.next();

            //获取topic名字
            String topic = next.topic();

            //获取partition编号
            int partitionNum = next.partition();

            //获取offset
            long offset = next.offset();

            //获取消息体
            String message = new String(next.message());

            //测试打印
            System.out
                .println("consumerName: "+ consumerName + "topic: " + topic + " ,partitionNum: "
                            + partitionNum + " ,offset: " + offset + " ,message: " + message);
        }
    }

}

执行结果:
Kafka的生产者与消费者_第2张图片

按照上面的结果可以发现,我创建的topic是mytopic,并且创建的3个partition,消费者1消费partition0,消费者2消费partition1,消费者3消费partition2,一个消费者对应一个partition

如果我把消费者改成4个,但是我的partition还是只有3个,那么回怎么样呢?
如图:

Kafka的生产者与消费者_第3张图片

结果和上面一样,而消费者4没有效果

但是如果我把消费者设置为小于partition的个数,比如2个,又会怎样么?
如图:

Kafka的生产者与消费者_第4张图片

可以看到消费者1会消费两个partition,分别是partition1和partition0,而消费者2只会消费partition2

这个现象正好验证了我上一篇博客,kafka消费者和partition个数的负载算法,详情见:http://blog.csdn.net/qq_20641565/article/details/59746101

当然,在kafka里面,默认的生产者生产数据会均衡的放到各个partition当中,如果我们需要指定特定消息到特定的partition里面,我们需要自定义partition

你可能感兴趣的:(kafka,kafka,生产者,消费者)