flink 窗口函数简单案例

文章目录

      • SensorReading
      • window1
      • window2
      • window3
      • window4

SensorReading

package kgc.kb11.beans;

import java.util.ArrayList;

/**
 * @author zhouhu
 * @Date
 * @Desription
 */

public class SensorReading {
    private String id;
    private Long timestamp;
    private Double temperature;

    @Override
    public String toString() {
        return "SensorReading{" +
                "id='" + id + '\'' +
                ", timestamp=" + timestamp +
                ", temperature=" + temperature +
                '}';
    }

    public SensorReading(String id, Long timestamp, Double temperature) {
        this.id = id;
        this.timestamp = timestamp;
        this.temperature = temperature;
    }

    public SensorReading() {
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public Long getTimestamp() {
        return timestamp;
    }

    public void setTimestamp(Long timestamp) {
        this.timestamp = timestamp;
    }

    public Double getTemperature() {
        return temperature;
    }

    public void setTemperature(Double temperature) {
        this.temperature = temperature;
    }

}

window1

package kgc.kb11.window;

import kgc.kb11.beans.SensorReading;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Properties;

/**
 * @author zhouhu
 * @Date
 * @Desription
 */

public class Window1 {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


        Properties prop = new Properties();
        prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.119.125:9092");
        prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "sensor_group1");
        prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");

        DataStreamSource<String> inputStream = env.addSource(new FlinkKafkaConsumer011<String>(
                "sensor",
                new SimpleStringSchema(),
                prop
        ));


        SingleOutputStreamOperator<SensorReading> mapStream = inputStream.map(new MapFunction<String, SensorReading>() {
            @Override
            public SensorReading map(String s) throws Exception {
                String[] splits = s.split(",");
                return new SensorReading(
                        splits[0],
                        Long.parseLong(splits[1]),
                        Double.parseDouble(splits[2])
                );
            }
        });
        SingleOutputStreamOperator<SensorReading> max = mapStream.keyBy("id")
//                .timeWindow(Time.seconds(15));//滚动
//        .timeWindow(Time.seconds(15),Time.seconds(5));//滑动窗口
//        .countWindow(6);
//        .countWindow(6,2);
//        .window(TumblingEventTimeWindows.of(Time.seconds(5))); //eventtime   事件发生时间
//        .window(TumblingProcessingTimeWindows.of(Time.seconds(5)));//processingtime 时间处理时间
//                .timeWindow(Time.seconds(15))
//                .max("temperature");
                .countWindow(6, 2)//6个元素一个 窗口,每两个计算一次
                .reduce(new ReduceFunction<SensorReading>() {
                    @Override
                    public SensorReading reduce(SensorReading value1, SensorReading value2) throws Exception {
                        return new SensorReading(value1.getId(),
                                value1.getTimestamp(),
                                value1.getTemperature()+value2.getTemperature()
                                );
                    }
                });


        max.print("max");


        env.execute("finkwindow");
    }
}

window2

package kgc.kb11.window;

import kgc.kb11.beans.SensorReading;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Properties;

/**
 * @author zhouhu
 * @Date
 * @Desription
 */

public class Window2 {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


        Properties prop = new Properties();
        prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.119.125:9092");
        prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "sensor_group1");
        prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");

        DataStreamSource<String> inputStream = env.addSource(new FlinkKafkaConsumer011<String>(
                "sensor",
                new SimpleStringSchema(),
                prop
        ));


        SingleOutputStreamOperator<SensorReading> mapStream = inputStream.map(new MapFunction<String, SensorReading>() {
            @Override
            public SensorReading map(String s) throws Exception {
                String[] splits = s.split(",");
                return new SensorReading(
                        splits[0],
                        Long.parseLong(splits[1]),
                        Double.parseDouble(splits[2])
                );
            }
        });


        SingleOutputStreamOperator<Double> aggregate = mapStream.keyBy("id")
                .countWindow(6, 2)
                .aggregate(new MyAvgFunction());

        aggregate.print("max");


        env.execute("finkwindow");
    }

    private static class MyAvgFunction implements AggregateFunction<SensorReading,
            Tuple2<Double,Integer>,Double> {
        @Override
        public Tuple2<Double, Integer> createAccumulator() {
            return new Tuple2<>(0.0,0);
        }

        @Override
        public Tuple2<Double, Integer> add(SensorReading value, Tuple2<Double, Integer> accumulator) {
            Double temp = value.getTemperature() + accumulator.f0;
            Integer count = 1 + accumulator.f1;
            return new Tuple2<>(temp,count);
        }

        @Override
        public Double getResult(Tuple2<Double, Integer> accumulator) {
            return accumulator.f0/accumulator.f1;
        }

        @Override
        public Tuple2<Double, Integer> merge(Tuple2<Double, Integer> a, Tuple2<Double, Integer> b) {
            Double sumtemp =a.f0+b.f0;
            Integer sumcount=a.f1+b.f1;
            return new Tuple2<>(sumtemp,sumcount);
        }
    }
}

apply

package kgc.kb11.window;

import kgc.kb11.beans.SensorReading;
import org.apache.commons.collections.IteratorUtils;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Iterator;
import java.util.Properties;

/**
 * @author zhouhu
 * @Date
 * @Desription
 */

public class Window4 {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        Properties prop = new Properties();
        prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.119.125:9092");
        prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "sensor_group2");
        prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");

        DataStreamSource<String> inputStream = env.addSource(new FlinkKafkaConsumer011<String>(
                "sensor",
                new SimpleStringSchema(),
                prop
        ));


        SingleOutputStreamOperator<SensorReading> mapStream = inputStream.map(new MapFunction<String, SensorReading>() {
            @Override
            public SensorReading map(String s) throws Exception {
                String[] splits = s.split(",");
                return new SensorReading(
                        splits[0],
                        Long.parseLong(splits[1]),
                        Double.parseDouble(splits[2])
                );
            }
        })
                .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<SensorReading>(Time.seconds(0)) {
                    @Override
                    public long extractTimestamp(SensorReading element) {
                        return element.getTimestamp() * 1000L;
                    }
                });


        SingleOutputStreamOperator<Tuple4<String, Long, Long, Integer>> apply = mapStream.keyBy("id")
                .timeWindow(Time.seconds(5)) //,Time.seconds(5)
                .apply(new WindowFunction<SensorReading, Tuple4<String, Long, Long, Integer>, Tuple, TimeWindow>() {
                    @Override
                    public void apply(Tuple tuple, TimeWindow window, Iterable<SensorReading> input, Collector<Tuple4<String, Long, Long, Integer>> out) throws Exception {
                        String key = tuple.getField(0);
                        long start = window.getStart();
                        long end = window.getEnd();
                        Iterator<SensorReading> iterator = input.iterator();
                        int size = IteratorUtils.toList(iterator).size();
                        Tuple4<String, Long, Long, Integer> returnValue = new Tuple4<>(key, start, end, size);
                        out.collect(returnValue);
                    }
                });
        apply.print();


        env.execute("finkwindow1");
    }

}

window3

package kgc.kb11.window;

import kgc.kb11.beans.SensorReading;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.OutputTag;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.ArrayList;
import java.util.Properties;

import static jdk.nashorn.internal.objects.NativeMath.max;

/**
 * @author zhouhu
 * @Date
 * @Desription
 */

public class Window3 {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        Properties prop = new Properties();
        prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.119.125:9092");
        prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "sensor_group2");
        prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");

        DataStreamSource<String> inputStream = env.socketTextStream("192.168.119.125", 7777);
//        DataStreamSource inputStream = env.addSource(new FlinkKafkaConsumer011(
//                "sensor",
//                new SimpleStringSchema(),
//                prop
//        ));


        SingleOutputStreamOperator<SensorReading> mapStream = inputStream.map(new MapFunction<String, SensorReading>() {
            @Override
            public SensorReading map(String s) throws Exception {
                String[] splits = s.split(",");
                return new SensorReading(
                        splits[0],
                        Long.parseLong(splits[1]),
                        Double.parseDouble(splits[2])
                );
            }
        })
                .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<SensorReading>(Time.seconds(0)) {
                    @Override
                    public long extractTimestamp(SensorReading element) {
                        return element.getTimestamp() * 1000L;
                    }
                });


        OutputTag<SensorReading> outputTag = new OutputTag<>("late");

        SingleOutputStreamOperator<SensorReading> max = mapStream.keyBy("id")
//                .timeWindow(Time.seconds(3)) //,Time.seconds(5)

                .window(TumblingEventTimeWindows.of(Time.seconds(2)))

                .allowedLateness(Time.seconds(30))
                .sideOutputLateData(outputTag)
                .max("temperature");



        env.execute("finkwindow1");
    }

}

window4

package kgc.kb11.window;

import kgc.kb11.beans.SensorReading;
import org.apache.commons.collections.IteratorUtils;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Iterator;
import java.util.Properties;

/**
 * @author zhouhu
 * @Date
 * @Desription
 */

public class Window4 {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        Properties prop = new Properties();
        prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.119.125:9092");
        prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "sensor_group2");
        prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
        prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");

        DataStreamSource<String> inputStream = env.addSource(new FlinkKafkaConsumer011<String>(
                "sensor",
                new SimpleStringSchema(),
                prop
        ));


        SingleOutputStreamOperator<SensorReading> mapStream = inputStream.map(new MapFunction<String, SensorReading>() {
            @Override
            public SensorReading map(String s) throws Exception {
                String[] splits = s.split(",");
                return new SensorReading(
                        splits[0],
                        Long.parseLong(splits[1]),
                        Double.parseDouble(splits[2])
                );
            }
        })
                .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<SensorReading>(Time.seconds(0)) {
                    @Override
                    public long extractTimestamp(SensorReading element) {
                        return element.getTimestamp() * 1000L;
                    }
                });


        SingleOutputStreamOperator<Tuple4<String, Long, Long, Integer>> apply = mapStream.keyBy("id")
                .timeWindow(Time.seconds(5)) //,Time.seconds(5)
                .apply(new WindowFunction<SensorReading, Tuple4<String, Long, Long, Integer>, Tuple, TimeWindow>() {
                    @Override
                    public void apply(Tuple tuple, TimeWindow window, Iterable<SensorReading> input, Collector<Tuple4<String, Long, Long, Integer>> out) throws Exception {
                        String key = tuple.getField(0);
                        long start = window.getStart();
                        long end = window.getEnd();
                        Iterator<SensorReading> iterator = input.iterator();
                        int size = IteratorUtils.toList(iterator).size();
                        Tuple4<String, Long, Long, Integer> returnValue = new Tuple4<>(key, start, end, size);
                        out.collect(returnValue);
                    }
                });
        apply.print();


        env.execute("finkwindow1");
    }

}

你可能感兴趣的:(flink 窗口函数简单案例)