flink滚动窗口和滑动窗口

文章目录

  • 滚动窗口
  • 滑动窗口

滚动窗口

  • 时间不重叠,只有一个时间参数,即多长时间滚动一次
  • 滚动窗口又分为滚动事件时间窗口,滚动处理时间窗口
  • 事件时间指数据中的时间字段
  • 处理时间指系统时间

TumblingProcess

package com.hehe.window;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
//每10秒滚动统计一次且不叠加
public class TumblingProcess {
    public static void main(String[] args) {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> ds = env.socketTextStream("master", 8888);
        SingleOutputStreamOperator<Tuple2<String, Integer>> sum = ds.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) {
                for (String str : s.split(",")) {
                    collector.collect(Tuple2.of(str, 1));
                }

            }
        }).keyBy(0).window(TumblingProcessingTimeWindows.of(Time.seconds(10))).sum(1);
        sum.print().setParallelism(1);
        try {
            env.execute();
        } catch (Exception e) {
            e.printStackTrace();
        }


    }
}

TumblingEvent

package com.hehe.window;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;


/**
 * 业务场景:指定时间窗口内,统计事件/词汇的次数(热点更新等)
 */
public class TumblingEvent {
    public static void main(String[] args) throws Exception {
        //1.创建一个 flink steam 程序的执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);  // 设置使用EventTime划分窗口

        // 2. 创建数据源
        SingleOutputStreamOperator<Tuple3<Long, String, Long>> input = env.fromElements(
                Tuple3.of(1L, "hh", 1588491228L),
                Tuple3.of(1L,"hh", 1588491229L),
                Tuple3.of(1L, "66", 1588491238L),
                Tuple3.of(1L, "yy", 1588491248L),
                Tuple3.of(2L, "kk", 1588491258L),
                Tuple3.of(2L, "java", 1588491268L),
                Tuple3.of(2L,"java", 1588491270L)).assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Tuple3<Long,String, Long>>() {
            @Override
            public long extractAscendingTimestamp(Tuple3<Long,String, Long> element) {
                return element.f2;
            }
        });
        SingleOutputStreamOperator<Tuple2<String, Long>> map = input.map(new MapFunction<Tuple3<Long, String, Long>, Tuple2<String, Long>>() {
            @Override
            public Tuple2<String, Long> map(Tuple3<Long, String, Long> longStringLongTuple3) {

                return Tuple2.of(longStringLongTuple3.f1, longStringLongTuple3.f0);
            }
        });
        SingleOutputStreamOperator<Tuple2<String, Long>> sum = map.keyBy(0).window(TumblingEventTimeWindows.of(Time.seconds(10))).sum(1);

        
        sum.print();
        env.execute();
    }
}

滑动窗口

  • 滑动窗口可以重叠,有两个时间参数,前面的时间指窗口大小,后面的时间指滑动步长
  • 滑动窗口也分为事件时间和处理时间

SlidingEvent

package com.hehe.window;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
//窗口大小为10,每5秒滑动一次,有重叠
public class SlidingProcess {
    public static void main(String[] args) {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> stringDataStreamSource = env.socketTextStream("192.168.154.123", 8888);
        SingleOutputStreamOperator<Tuple2<String, Integer>> tuple2SingleOutputStreamOperator = stringDataStreamSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) {
                for (String str : s.split(",")) {
                    collector.collect(Tuple2.of(s, 1));
                }

            }
        });
        SingleOutputStreamOperator<Tuple2<String, Integer>> sum = tuple2SingleOutputStreamOperator.keyBy(0)
                .window(SlidingProcessingTimeWindows.of(Time.seconds(10), Time.seconds(5))).sum(1);
        sum.print();
        try {
            env.execute();
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}

SlidingProcess

package com.hehe.window;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
//窗口大小为10,每5秒滑动一次,有重叠
public class SlidingProcess {
    public static void main(String[] args) {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> stringDataStreamSource = env.socketTextStream("192.168.154.123", 8888);
        SingleOutputStreamOperator<Tuple2<String, Integer>> tuple2SingleOutputStreamOperator = stringDataStreamSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) {
                for (String str : s.split(",")) {
                    collector.collect(Tuple2.of(s, 1));
                }

            }
        });
        SingleOutputStreamOperator<Tuple2<String, Integer>> sum = tuple2SingleOutputStreamOperator.keyBy(0)
                .window(SlidingProcessingTimeWindows.of(Time.seconds(10), Time.seconds(5))).sum(1);
        sum.print();
        try {
            env.execute();
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}

你可能感兴趣的:(Hadoop,flink,java,window)