flink1.12 stream windows-top-n

  1. 窗口 top-N flink 1.12
package com.cn.stream;

import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.MapFunction;

import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;

import org.apache.flink.util.Collector;


import java.time.Duration;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.stream.Collectors;


public class WindowTopN {

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        /*
         * 这是一种创建kafka source 的 方式
         * */
     /*   KafkaSource kafkaSource = KafkaSource.builder()
                .setBootstrapServers("localhost:9092")
                .setGroupId("a")
                .setTopics("kafkaS")
                .setStartingOffsets(OffsetsInitializer.latest())
                .setDeserializer(KafkaRecordDeserializer.valueOnly(SimpleStringSchema.class)).setProperties()
                .build();

        DataStreamSource Input = env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "kafka source");*/
       /* Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "localhost:9092");
        properties.setProperty("group.id", "tesst");

        FlinkKafkaConsumer myConsumer =
                new FlinkKafkaConsumer<>("kafkaS", new SimpleStringSchema(), properties);
        DataStreamSource Input = env.addSource(myConsumer);*/
        DataStreamSource<String> Input = env.readTextFile("E:\\大数据相关-学员参考\\flinkdemo\\src\\main\\resources\\ok.txt");
        SingleOutputStreamOperator<Tuple3<Long, Float, String>> InputMap = Input.map(new MapFunction<String, Tuple3<Long, Float, String>>() {

                    @Override
                    public Tuple3<Long, Float, String> map(String value) throws Exception {
                        String[] v = value.split(",");
                        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm");
                        long timestampe = LocalDateTime.parse(v[0], formatter).toInstant(ZoneOffset.of("+8")).toEpochMilli();
                        return new Tuple3<Long, Float, String>(timestampe, Float.valueOf(v[1]), v[2]);
                    }
                }).uid("001-zlg-map01").name("切分数据源")
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy
                                .<Tuple3<Long, Float, String>>forBoundedOutOfOrderness(
                                        Duration.ofSeconds(5))
                                .withTimestampAssigner((event, timstamp) -> {
                                            return event.f0;
                                        }
                                ).withIdleness(Duration.ofSeconds(1)));


        SingleOutputStreamOperator<List<Tuple3<Float, String, Long>>> process = InputMap
                .rebalance()
                .keyBy(new KeySelector<Tuple3<Long, Float, String>, String>() {
                    @Override
                    public String getKey(Tuple3<Long, Float, String> value) throws Exception {
                        return value.f2;
                    }
                })
                .window(TumblingEventTimeWindows.of(Time.minutes(10)))
                .aggregate(new aggregations(), new processWindowsA()).uid("aggregation").name("窗口聚合")
                .keyBy(new KeySelector<Tuple3<Float, String, Long>, Long>() {
                    @Override
                    public Long getKey(Tuple3<Float, String, Long> value) throws Exception {
                        return value.f2;
                    }
                })
                .process(new keywd()).uid("topc").name("求topn");

        process.print();
        env.execute("top n");

    }
}

class aggregations implements AggregateFunction<Tuple3<Long, Float, String>, Float, Float> {
    //创建一个 累加器
    @Override
    public Float createAccumulator() {
        float num = 0l;
        return num;
    }

    // 创建累加规则
    @Override
    public Float add(Tuple3<Long, Float, String> value, Float accumulator) {
        return accumulator + value.f1;
    }

    //返回的结果
    @Override
    public Float getResult(Float accumulator) {
        return accumulator;
    }

    // 对不同的节点 计算结果的汇总
    @Override
    public Float merge(Float a, Float b) {
        return a + b;
    }
}

class processWindowsA implements WindowFunction<Float, Tuple3<Float, String, Long>, String, TimeWindow> {

    //操作 里面有 状态的信息 返回 关闭的窗口的时间
    @Override
    public void apply(String s, TimeWindow window, Iterable<Float> input, Collector<Tuple3<Float, String, Long>> out) throws Exception {
        Float next = input.iterator().next();
        // 获取 窗口结束的时候 目的是为了 触发 OnTimer 去处排序和 取值问题
        long end = window.getEnd();
        out.collect(new Tuple3<Float, String, Long>(next, s, end));
    }
}

/*
class processWindsowwdsA extends ProcessWindowFunction, Tuple4, String, TimeWindow> {


    @Override
    public void process(String s, ProcessWindowFunction, Tuple4, String, TimeWindow>.Context context, java.lang.Iterable> elements, Collector> out) throws Exception {

    }
}*/
class keywd extends KeyedProcessFunction<Long, Tuple3<Float, String, Long>, List<Tuple3<Float, String, Long>>> {
    ListState<Tuple3<Float, String, Long>> listState = null;

    @Override
    public void open(Configuration parameters) throws Exception {
        // 在开始前就 初始化 状态 和状态描述器
        ListStateDescriptor<Tuple3<Float, String, Long>> tuple3ListStateDescriptor = new ListStateDescriptor<Tuple3<Float, String, Long>>("top-n", TypeInformation.of(new TypeHint<Tuple3<Float, String, Long>>() {
            @Override
            public TypeInformation<Tuple3<Float, String, Long>> getTypeInfo() {
                return super.getTypeInfo();
            }
        }));
        listState = getRuntimeContext().getListState(tuple3ListStateDescriptor);
    }

    @Override
    public void onTimer(long timestamp, KeyedProcessFunction<Long, Tuple3<Float, String, Long>, List<Tuple3<Float, String, Long>>>.OnTimerContext ctx, Collector<List<Tuple3<Float, String, Long>>> out) throws Exception {
        // 将状态 状装入 集合并且清除 状态集合
        List<Tuple3<Float, String, Long>> list = new ArrayList<>();
        Iterator<Tuple3<Float, String, Long>> iterator = listState.get().iterator();
        while (iterator.hasNext()) {
            Tuple3<Float, String, Long> next = iterator.next();
            list.add(next);
        }
        listState.clear();
        //排序 取前三
        List<Tuple3<Float, String, Long>> collect = list.stream().sorted(new Comparator<Tuple3<Float, String, Long>>() {
            @Override
            public int compare(Tuple3<Float, String, Long> o1, Tuple3<Float, String, Long> o2) {
                return o1.f0 - o2.f0 < 0 ? 1 : -1;
            }
        }).limit(3).collect(Collectors.toList());

        out.collect(collect);

    }


    @Override
    public void processElement(Tuple3<Float, String, Long> value, KeyedProcessFunction<Long, Tuple3<Float, String, Long>, List<Tuple3<Float, String, Long>>>.Context ctx, Collector<List<Tuple3<Float, String, Long>>> out) throws Exception {
        //将 数据 装入到集合中
        listState.add(value);
        //注册Ontimer 执行时间 就是在 窗口 关闭的 后1 纳秒 就 执行
        ctx.timerService().registerEventTimeTimer(value.f2 + 1);
    }
}

提供的 文本

2020-04-15 08:05,4.00,supplier1
2020-04-15 08:06,4.00,supplier2
2020-04-15 08:07,2.00,supplier1
2020-04-15 08:08,2.00,supplier3
2020-04-15 08:09,5.00,supplier4
2020-04-15 08:11,2.00,supplier3
2020-04-15 08:13,1.00,supplier1
2020-04-15 08:15,3.00,supplier2
2020-04-15 08:17,6.00,supplier5
2020-04-15 08:25,6.00,supplier5
2020-04-15 08:30,6.00,supplier5

注意 需要注意的有以下几点 第一个 引入包的问题 因为 flink 有java scala 两套API 所以在引入的时候一定观察好不要引入错了 而造成 聚合报错
第二个 就是 类型 一般 使用Tuple的时候很容易造成 类型推断问题 在 程序里面参考我做的 处理 基本可以避免此类问题 缺点但就是 写的时候可能繁琐一些

你可能感兴趣的:(flink,flink,windows,大数据)