Flink消费Kafka数据

package flinkjava.Connecter.Kafka;

import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;

import java.util.Properties;

public class KafkaProducer {
    public static void main(String[] args) {
        String brokerList = "114.116.219.197:5008,114.116.199.154:5008,114.116.220.98:5008";
        Properties properties = new Properties();
//        properties.setProperty("broker.list",brokerList);
        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,brokerList);
        properties.put(ProducerConfig.ACKS_CONFIG,"all");
        properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
        properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,StringSerializer.class);

        org.apache.kafka.clients.producer.KafkaProducer kafkaProducer = new org.apache.kafka.clients.producer.KafkaProducer(properties);


        for(int i=0;i<1000;i++){
            kafkaProducer.send(new ProducerRecord<String,String>("flinktestkafka",Integer.toString(i),i+"id,"+i+"name,"+i));
            try {
                Thread.sleep(3000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
}

普通的消费


package flinkjava.Connecter.Kafka;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;

import java.util.Properties;

public class KafkaSource {
    public static void main(String[] args) {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(5000);
        //Kafka源
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","114.116.219.197:5008");
        properties.setProperty("group.id","test");


        FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>("flinktestkafka", new SimpleStringSchema(), properties);
        DataStreamSource<String> streamFromKafka = env.addSource(flinkKafkaConsumer);
        SingleOutputStreamOperator<KafkaElement> mapresult = streamFromKafka.map(new MapFunction<String, KafkaElement>() {
            @Override
            public KafkaElement map(String value) throws Exception {
                String[] sp = value.split(",");
                System.out.println(sp[0]+"  "+sp[1]+"  "+sp[2]);
                return new KafkaElement(sp[0], sp[1], Integer.valueOf(sp[2]));
            }
        });
        try {
            env.execute("this is kafkaflink job");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

指定分区和偏移量消费

package flinkjava.Connecter.Kafka;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;

import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

public class KafkaFlinkTest1 {
    public static void main(String[] args) {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(5000);
        //Kafka源
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","114.116.219.197:5008");
        properties.setProperty("group.id","KafkaFlinkTest1a");


        FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>("flinktestkafka", new SimpleStringSchema(), properties);
        Map<KafkaTopicPartition,Long> specificStartOfssets = new HashMap<>();
        specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka",0),10L);
//        specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka",1),0L);
//        specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka",2),0L);
        flinkKafkaConsumer.setStartFromSpecificOffsets(specificStartOfssets);
        DataStreamSource<String> streamFromKafka = env.addSource(flinkKafkaConsumer);

        streamFromKafka.map(new MapFunction<String, Object>() {
            @Override
            public Object map(String value) throws Exception {
                String[] sp = value.split(",");
                System.out.println(sp[0]+"  "+sp[1]+"  "+sp[2]);

                return null;
            }
        });

        try {
            env.execute("this is kafkaflink job");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

这是我指定了分区2的消费,可以看见,写进入kafka的每一条消息,都可能会分到不同分区,主要还是根据key把,但是我这里似乎好像是随机的,这不是重点没事。
Flink消费Kafka数据_第1张图片

解决迟到数据,采用侧输出

package flinkjava.Connecter.Kafka;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;

import javax.annotation.Nullable;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;

public class KafkaFlinkTest1 {
    public static void main(String[] args) {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//        env.enableCheckpointing(5000);
        //Kafka源
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","114.116.219.197:5008");
        properties.setProperty("group.id","KafkaFlinkTest1a");


        FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>("flinktestkafka8", new SimpleStringSchema(), properties);
//        Map specificStartOfssets = new HashMap<>();
//        specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka4",0),0L);
//        specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka4",1),0L);
//        specificStartOfssets.put(new KafkaTopicPartition("flinktestkafka4",2),0L);
//        flinkKafkaConsumer.setStartFromSpecificOffsets(specificStartOfssets);
        DataStreamSource<String> streamFromKafka = env.addSource(flinkKafkaConsumer);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        SingleOutputStreamOperator<KafkaElement> mapresult = streamFromKafka.map(new MapFunction<String, KafkaElement>() {
            @Override
            public KafkaElement map(String value) throws Exception {
                String[] splits = value.split(",");
                System.out.println(Thread.currentThread()+"===haha"+value);
                return new KafkaElement(splits[0], splits[1], Long.valueOf(splits[2]));
            }
        });

        SingleOutputStreamOperator<KafkaElement> watermarksstream = mapresult.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<KafkaElement>() {
            Long currentMaxTimestamp = 0L;
            Long maxOutOfOrderness = 5000L;


            @Nullable
            @Override
            public Watermark getCurrentWatermark() {
                return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
            }

            @Override
            public long extractTimestamp(KafkaElement element, long previousElementTimestamp) {
                long timestamp = element.getAge();
                currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
                return timestamp;
            }
        });
        /**迟到数据标志*/
        OutputTag<KafkaElement> outputTag = new OutputTag<KafkaElement>("late-data"){};

        SingleOutputStreamOperator<KafkaOut> winstream = watermarksstream.keyBy("id")
                .window(TumblingEventTimeWindows.of(Time.seconds(3)))
                .sideOutputLateData(outputTag)//j解决迟到数据
                .apply(new WindowFunctionTest());

        /**
         * 迟到数据处理
         * 这里打印的迟到数据应该是没有经过window和apply处理的初始数据
         * */
        DataStream<KafkaElement> sideOutput = winstream.getSideOutput(outputTag);
        sideOutput.print();
        /**
         * 没有迟到数据处理
         * */
        winstream.map(new MapFunction<KafkaOut, Object>() {
            @Override
            public Object map(KafkaOut value) throws Exception {
               System.out.println(value);
                 return null;
            }
        }).print();


        try {
            env.execute("this is kafkaflink job");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    //这是一个自定义的windown Function
    static class WindowFunctionTest implements WindowFunction<KafkaElement,KafkaOut, Tuple, TimeWindow>{
        /**
         * 用来触发窗口的计算
         * input作为所有这个窗口元素的迭代
         * */
        @Override
        public void apply(Tuple tuple, TimeWindow window, Iterable<KafkaElement> input, Collector<KafkaOut> out) throws Exception {
            System.out.println("string s是:"+tuple.toString());
            Iterator<KafkaElement> iterators = input.iterator();
            StringBuilder stringBuilder = new StringBuilder();
            while(iterators.hasNext()){
                KafkaElement tmpkafkaElement = iterators.next();
                stringBuilder.append(tmpkafkaElement.getName());
            }
            out.collect(new KafkaOut(Integer.valueOf(tuple.getField(0)),new String(stringBuilder)));
        }



    }

}

你可能感兴趣的:(Kafka,Flink,flink,kafka)