Flink计算(工作记录)-多流合并

// 创建参数工具,用于从命令行参数中读取配置信息

public static void main(String[] args) throws Exception {


final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);

// 1、创建流式执行环境
StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
env.setParallelism(1); // 设置并行度为1
env.enableCheckpointing(5 * 60 * 1000); // 每5分钟进行一次checkpoint

// 构建IM流,从RocketMQ中获取数据,并对数据进行处理
SingleOutputStreamOperator<A> imStream =
        env.addSource(AliRocketMQSource.createMQSource(parameterTool,
                new RocketMQConnectConfig(PropertiesConstants.ROCKET_LIVE_IM_TOPIC, PropertiesConstants.ROCKET_LIVE_IM_GROUP, PropertiesConstants.ROCKET_LIVE_IM_TAG)))
                .process(new ImStreamProcess(), TypeInformation.of(A.class))
                .name("imStream");

// 构建ws_topic数据流,从RocketMQ中获取数据,并对数据进行处理
SingleOutputStreamOperator<B> wsStream =
        env.addSource(AliRocketMQSource.createMQSource(parameterTool,
                new RocketMQConnectConfig(PropertiesConstants.ROCKET_LIVE_WS_TOPIC, PropertiesConstants.ROCKET_LIVE_WS_GROUP, PropertiesConstants.ROCKET_LIVE_WS_TAG)))
                .process(new WsStreamProcess(), TypeInformation.of(B.class))
                .name("wsStream");

// 构建用户行为流,从RocketMQ中获取数据,并对数据进行处理
SingleOutputStreamOperator<C> actionStream =
        env.addSource(AliRocketMQSource.createMQSource(parameterTool,
                new RocketMQConnectConfig(PropertiesConstants.ROCKET_LIVE_EVENT_TOPIC, PropertiesConstants.ROCKET_LIVE_EVENT_GROUP, PropertiesConstants.ROCKET_LIVE_EVENT_TAG)))
                .process(new ActionStreamProcess(), TypeInformation.of(C.class))
                .name("actionStream");

// 数据流连接:将三个数据流合并为一个流
SingleOutputStreamOperator<DIndex> reduceStream = imStream.union(wsStream, actionStream)
        .assignTimestampsAndWatermarks(WatermarkStrategy
                //乱序流 10s 延迟
                .<LiveRoomUnionInfo>forBoundedOutOfOrderness(Duration.ofSeconds(10))
                .withTimestampAssigner(new SerializableTimestampAssigner<LiveRoomUnionInfo>() {
                        @Override
                        public long extractTimestamp(LiveRoomUnionInfo element, long recordTimestamp) {
                            if (element == null) {
                                return System.currentTimeMillis();
                            }
                            // 提取 eventTime作为 watermark时间戳
                            return element.getEventTime();
                        }
                    })
                .withIdleness(Duration.ofSeconds(2))) // 允许迟到2s
        .keyBy(LiveRoomUnionInfo::getGroupId) // 按照groupId进行分组
        .window(TumblingEventTimeWindows.of(Time.minutes(5), Time.seconds(0))) // 定义窗口为5分钟滑动窗口
        .trigger(new LastWindowCloseTrigger(1200)) // 当窗口内的最后一个元素被处理时,触发新的窗口计算
        .reduce(new LiveRoomReduceFunction(), new LiveRoomReduceProcessWindowFun()); // 对窗口内的数据进行聚合操作

reduceStream.print("LiveRoomStatisticCountJob reduceStream"); // 将结果输出到控制台

ElasticSearch6SinkLiveActionIndex.addSink(1, 1, reduceStream, parameterTool); // 将结果写入Elasticsearch索引

// 同步执行,等待结果返回
env.execute(LiveRoomStatisticsJob.class.getSimpleName()); // 提交作业并开始执行

}

你可能感兴趣的:(flink,大数据)