flume的自定义sink-Kafka


    1、创建一个agent,sink类型需指定为自定义sink
        vi /usr/local/flume/conf/agent3.conf
        agent3.sources=as1
        agent3.channels=c1
        agent3.sinks=s1

        agent3.sources.as1.type=avro
        agent3.sources.as1.bind=0.0.0.0
        agent3.sources.as1.port=41414
        agent3.sources.as1.channels=c1

        agent3.channels.c1.type=memory

        agent3.sinks.s1.type=storm.test.kafka.TestKafkaSink
        agent3.sinks.s1.channel=c1
    2、创建自定义kafka sink(自定义kafka sink中包装的是kafka的生产者),代码如下
        //参考flume官方的开发文档:http://flume.apache.org/FlumeDeveloperGuide.html#sink
        //自定义kafkasink需要继承AbstractSink类实现Configurable接口
        //该sink中使用的kafka topic(test111)必须存在      

 1 package storm.test.kafka;
 2 
 3         import java.util.Properties;
 4 
 5         import kafka.javaapi.producer.Producer;
 6         import kafka.producer.KeyedMessage;
 7         import kafka.producer.ProducerConfig;
 8         import kafka.serializer.StringEncoder;
 9 
10         import org.apache.flume.Channel;
11         import org.apache.flume.Context;
12         import org.apache.flume.Event;
13         import org.apache.flume.EventDeliveryException;
14         import org.apache.flume.Transaction;
15         import org.apache.flume.conf.Configurable;
16         import org.apache.flume.sink.AbstractSink;
17 
18         public class TestKafkaSink extends AbstractSink implements Configurable {
19 
20             Producer producer;
21             String topic = "test111";
22             
23             @Override
24             public Status process() throws EventDeliveryException {
25                 Status status = null;
26                 Channel channel = getChannel();
27                 Transaction transaction = channel.getTransaction();
28                 transaction.begin();
29                 try {
30                     Event event = channel.take();
31                     if (event==null) {
32                         transaction.rollback();
33                         status = Status.BACKOFF;
34                         return status;
35                     }
36                     byte[] body = event.getBody();
37                     final String msg = new String(body);
38                     final KeyedMessage message = new KeyedMessage(topic , msg);
39                     producer.send(message);
40                     transaction.commit();
41                     status = Status.READY;            
42                 } catch (Exception e) {
43                     transaction.rollback();
44                     status = Status.BACKOFF;
45                 } finally {
46                     transaction.close();
47                 }
48                 
49                 return status;
50             }
51 
52             @Override
53             public void configure(Context arg0) {
54                 Properties prop = new Properties();
55                 prop.put("zookeeper.connect", "h5:2181,h6:2181,h7:2181");
56                 prop.put("metadata.broker.list", "h5:9092,h6:9092,h7:9092");
57                 prop.put("serializer.class", StringEncoder.class.getName());
58                 producer = new Producer(new ProducerConfig(prop));
59             }
60 
61         }

        将代码打包为kafkasink.jar后复制到flume所在节点上的flume/lib目录下,然后还需要将kafka_2.10-0.8.2.0.jar、kafka-clients-0.8.2.0.jar、metrics-core-2.2.0.jar、scala-library-2.10.4.jar这4个jar包复制到flume所在节点上的flume/lib目录下。
    3、启动flume自定义的kafkasink的agent
        [root@h5 ~]# cd /usr/local/flume/
        [root@h5 flume]# bin/flume-ng agent --conf conf/ --conf-file conf/agent3.conf --name agent3 -Dflume.root.logger=INFO,console
    4、将日志写入到flume的agent,代码如下
        log4j.properties
            log4j.rootLogger=INFO,flume
            log4j.appender.flume = org.apache.flume.clients.log4jappender.Log4jAppender
            log4j.appender.flume.Hostname = 192.168.1.35
            log4j.appender.flume.Port = 41414
            log4j.appender.flume.UnsafeMode = true
        将日志写入到flume,代码如下          

 1 package com.mengyao.flume;
 2 
 3             import java.io.File;
 4             import java.io.IOException;
 5             import java.util.Collection;
 6             import java.util.List;
 7 
 8             import org.apache.commons.io.FileUtils;
 9             import org.apache.log4j.Logger;
10 
11             public class FlumeProducer {
12 
13                 private static List getLines() {
14                     List lines = null;
15                     try {
16                         final Collection listFiles = FileUtils.listFiles(new File("D:/"), null, false);
17                         for (File file : listFiles) {
18                             lines = FileUtils.readLines(file);
19                             break;
20                         }
21                     } catch (IOException e) {
22                         e.printStackTrace();
23                     }
24 
25                     return lines;
26                 }
27                 
28                 public static void main(String[] args) throws Exception {
29                     final List lines = getLines();
30                     final Logger logger = Logger.getLogger(FlumeProducer.class);
31                     for (String line : lines) {
32                         logger.info(line+"\t"+System.currentTimeMillis());
33                         Thread.sleep(1000);            
34                     }
35                 }    
36             }


            必须加入flume-ng-log4jappender-1.5.0-cdh5.1.3-jar-with-dependencies.jar这个依赖jar
    5、使用kafka消费者消费flume(自定义kafka sink中使用了kafka的生产者)生产的数据
        1、消费者shell代码
            [root@h7 kafka]# bin/kafka-console-consumer.sh --zookeeper h7:2181 --topic test111 --from-beginning        ##kafka集群是h5、h6、h7;zookeeper集群是h5、h6、h7。在任意kafka节点上使用消费者都一样
        
        2、消费者java代码        

 1 package storm.test.kafka;
 2 
 3             import java.util.HashMap;
 4             import java.util.List;
 5             import java.util.Map;
 6             import java.util.Properties;
 7 
 8             import kafka.consumer.Consumer;
 9             import kafka.consumer.ConsumerConfig;
10             import kafka.consumer.ConsumerIterator;
11             import kafka.consumer.KafkaStream;
12             import kafka.javaapi.consumer.ConsumerConnector;
13             import kafka.serializer.StringEncoder;
14 
15             public class TestConsumer {
16 
17                 static final String topic = "test111";
18                 
19                 public static void main(String[] args) {
20                     Properties prop = new Properties();
21                     prop.put("zookeeper.connect", "h5:2181,h6:2181,h7:2181");
22                     prop.put("serializer.class", StringEncoder.class.getName());
23                     prop.put("metadata.broker.list", "h5:9092,h6:9092,h7:9092");
24                     prop.put("group.id", "group1");
25                     ConsumerConnector consumer = Consumer.createJavaConsumerConnector(new ConsumerConfig(prop));
26                     Map topicCountMap = new HashMap();
27                     topicCountMap.put(topic, 1);
28                     Mapbyte[], byte[]>>> messageStreams = consumer.createMessageStreams(topicCountMap);
29                     final KafkaStream<byte[], byte[]> kafkaStream = messageStreams.get(topic).get(0);
30                     ConsumerIterator<byte[], byte[]> iterator = kafkaStream.iterator();
31                     while (iterator.hasNext()) {
32                         String msg = new String(iterator.next().message());
33                         System.out.println("收到消息:"+msg);
34                     }
35                 }
36 
37             }

 

转载于:https://www.cnblogs.com/mengyao/p/4526058.html

你可能感兴趣的:(大数据,shell,java)