Flink1.11 多并行度watermark测试

source 单并行度参考 https://blog.csdn.net/xu470438000/article/details/83271123

结论:

###窗口触发条件
1、watermark时间 >= window_end_time
2、在[window_start_time,window_end_time)中有数据存在

###对于设置了allowedLateness(Time.seconds(N))的
1. 第一次触发是在watermark >=window_end_time时
2. 第二次(或多次)触发的条件是watermark < window_end_time + allowedLateness时间内,这个窗口有late数据到达时。

多并行度的情况下,watermark对齐会取所有channel最小的watermark

这里把数据贴出来,省的小伙伴照着敲,并行度为1的watermark上述博客中有,不再赘述

      data                                                                                                   并行度为1下的watermark          多并行度下watermark(并行度=3)
0001,1538359882000                                                                                  1538359872000
0001,1538359886000                                                                                  1538359876000
0001,1538359892000                                                                                  1538359882000                              1538359872000
0001,1538359893000                                                                                  1538359883000                              1538359876000
0001,1538359894000 (并行度为1下第一次触发窗口计算的时间点)            1538359884000                              1538359882000
0001,1538359896000                                                                                                                                          1538359883000
0001,1538359897000 (3并行度下第一次触发窗口计算的时间点)                                                                        1538359884000 
0001,1538359899000
0001,1538359891000
0001,1538359903000

##测试延迟的数据
0001,1538359890000
0001,1538359903000
0001,1538359890000
0001,1538359891000
0001,1538359892000
0001,1538359904000
0001,1538359890000
0001,1538359891000
0001,1538359892000
0001,1538359905000
0001,1538359890000
0001,1538359891000
0001,1538359892000

代码,基本和单source源的一致,改了source源和watermark生成部分

public class StreamingWindowWatermark {
    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        FlinkKafkaConsumer010 kafkaConsumer = new FlinkKafkaConsumer010<>("zjf_topic_003", new SimpleStringSchema(), getKafkaProperties());

        SingleOutputStreamOperator text = env.addSource(kafkaConsumer).uid("gateway-source").setParallelism(1);
        text.setParallelism(3);


        //设置使用eventtime,默认是使用processtime
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        //设置并行度为1,默认并行度是当前机器的cpu数量
        env.setParallelism(3);

        //连接socket获取输入的数据
//        DataStream text = env.socketTextStream("172.31.120.110", 8999, "\n");

        //数据分割 数据+时间戳
        SingleOutputStreamOperator> input = text.map(new MapFunction>() {
            @Override
            public Tuple2 map(String value) throws Exception {
                String[] arr = value.split(",");
                return new Tuple2<>(arr[0], Long.parseLong(arr[1]));
            }
        });

        //生成watermark
        SingleOutputStreamOperator> watermarks = input.assignTimestampsAndWatermarks(
                WatermarkStrategy.>forBoundedOutOfOrderness(Duration.ofSeconds(10))
                        .withTimestampAssigner(new SerializableTimestampAssigner>() {
                            @Override
                            public long extractTimestamp(Tuple2 element, long recordTimestamp) {

                                return element.f1; //指定EventTime对应的字段
                            }
                        })
        );

        SingleOutputStreamOperator window = watermarks.keyBy(0)
                .window(TumblingEventTimeWindows.of(Time.seconds(3)))//按照消息的EventTime分配窗口,和调用TimeWindow效果一样
//                .allowedLateness(Time.seconds(2))
                .apply(new WindowFunction, String, Tuple, TimeWindow>() {
                    /**
                     * 对window内的数据进行排序,保证数据的顺序
                     * @param tuple
                     * @param window
                     * @param input
                     * @param out
                     * @throws Exception
                     */
                    @Override
                    public void apply(Tuple tuple, TimeWindow window, Iterable> input, Collector out) throws Exception {
                        String key = tuple.toString();
                        List arrarList = new ArrayList();

                        Iterator> it = input.iterator();
                        while (it.hasNext()) {
                            Tuple2 next = it.next();
                            arrarList.add(next.f1);
                        }
                        Collections.sort(arrarList);
                        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
                        String result = key + "---------" + arrarList.size() + "-----------" + sdf.format(arrarList.get(0)) + "," + sdf.format(arrarList.get(arrarList.size() - 1))
                                + "," + sdf.format(window.getStart()) + "," + sdf.format(window.getEnd());
                        out.collect(result);
                    }
                });

        window.print();


        env.execute("watermark demo");


    }
    private static Properties getKafkaProperties() {
        Properties properties = new Properties();
        //2020-01-09 切换到vpc环境
//        properties.setProperty("bootstrap.servers", "172.21.164.59:9092,172.21.147.215:9092,172.21.243.86:9092");
        properties.setProperty("bootstrap.servers", "172.31.117.101:9092");
        properties.setProperty("group.id", "flink_01");
        return properties;
    }
}

pom文件:



        
            org.apache.flink
            flink-connector-jdbc_2.11
            1.11.2
        

        
            org.apache.commons
            commons-dbcp2
            2.1.1
        

        
            com.alibaba.ververica
            flink-format-changelog-json
            1.0.0
        

        
            com.alibaba.ververica
            flink-connector-mysql-cdc
            1.0.0
        
        
            org.apache.flink
            flink-cep_${scala.version}
            ${flink.version}
        

        
        
            com.google.guava
            guava
            22.0
        

        
        
            org.apache.flink
            flink-scala_${scala.version}
            ${flink.version}
        
        
            org.apache.flink
            flink-clients_2.11
            ${flink.version}
        

        
            org.scala-lang
            scala-library
            2.11.8
        

        
            org.apache.logging.log4j
            log4j-api-scala_2.11
            11.0
        
        
        
            org.apache.logging.log4j
            log4j-core
            2.8.2
        
        
        
            org.apache.logging.log4j
            log4j-api
            2.8.2
        

        
            org.apache.flink
            flink-table-api-java-bridge_2.11
            ${flink.version}
        

        
            org.apache.flink
            flink-table-api-scala-bridge_2.11
            ${flink.version}
        

        
            org.apache.flink
            flink-table-common
            ${flink.version}
        
        
            org.apache.flink
            flink-table-planner_2.11
            ${flink.version}
        
        
            org.apache.flink
            flink-table-planner-blink_2.11
            ${flink.version}
        

        
            org.apache.flink
            flink-connector-kafka-0.11_2.11
            ${flink.version}
        

        
            org.apache.flink
            flink-connector-elasticsearch7_2.11
            ${flink.version}
        

        
            org.projectlombok
            lombok
            1.18.10
        

        
            org.apache.kafka
            kafka-clients
            2.3.0
        

        
        
            org.apache.kafka
            kafka_2.12
            2.3.0
        
        
        
            com.typesafe
            config
            1.2.1
        

        
            com.alibaba
            fastjson
            1.2.47
        

        
            mysql
            mysql-connector-java
            5.1.38
        

        
            org.slf4j
            slf4j-api
            1.7.25
        
        
            org.slf4j
            slf4j-simple
            1.7.25
        

        
            com.alibaba
            easyexcel
            2.2.6
        
        
            junit
            junit
            RELEASE
        

        
            ru.ivi.opensource
            flink-clickhouse-sink
            1.1.0
            
                
                    flink-java
                    org.apache.flink
                
                
                    flink-core
                    org.apache.flink
                
                
                    flink-hadoop-fs
                    org.apache.flink
                
                
                    flink-streaming-java_2.11
                    org.apache.flink
                
            
        

        
        org.apache.flink
        flink-runtime-web_2.11
        ${flink.version}
    
    

 

你可能感兴趣的:(Flink数据写入数据库,flink1.11,watermark,多并行度)