侧输出流SideOutput

主要功能是通过侧数据流 拆分流数据

代码示例

package com.gwm.driver;

import com.alibaba.flink.connectors.datahub.datastream.source.DatahubSourceFunction;
import com.aliyun.datahub.client.model.RecordEntry;
import com.gwm.pojo.Aecollectordata;
import com.gwm.process.GetJsonObject;
import com.gwm.utils.ConfigPropUtils;
import com.gwm.utils.getString;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;

import java.util.List;

/**
 * @author yangyingchun
 * @version 1.0
 * @date 2023/8/28 16:32
 */
public class SideOutput {
    
    private static String endPoint = "your 's endPoint ";
    //private static String endPoint ="public endpoint";//公网访问(填写内网Endpoint,就不用填写公网Endpoint)。
    private static String projectName = "projectName ";
    private static String topicSourceName =  "topicSourceName";
    private static String topicSinkName =  ConfigPropUtils.get("datahub_sink_topic");
    private static String accessId = "accessId ";
    private static String accessKey = "accessKey ";
    //设置消费的启动位点对应的时间。TimeToStampUtil.timeToStamp("2021-12-21") 此时间至少为当前时间
    private static Long datahubStartInMs = System.currentTimeMillis();
    //    private static Long datahubStartInMs = System.currentTimeMillis();
    private static Long datahubEndInMs=Long.MAX_VALUE;
    public static void main(String[] args) throws Exception {
        //todo 1.获取执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(3600000L);
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000L));
        env.setParallelism(8);
        
        //todo 2.获取数据
        DataStreamSource> aedata =  env.addSource(
                new DatahubSourceFunction(
                        endPoint,
                        projectName,
                        topicSourceName,
                        accessId,
                        accessKey,
                        datahubStartInMs,
                        datahubEndInMs,
                        20L,
                        1000L,
                        1000
                ));

        //todo 3.进行数据转换
        DataStream aecollectordataDataStream = aedata.flatMap(new FlatMapFunction, Aecollectordata>() {
            @Override
            public void flatMap(List value, Collector out) throws Exception {
                for (RecordEntry recordEntry : value) {

                    String timestamp = getString.getString(recordEntry, "timestamp");
                    String message = getString.getString(recordEntry, "message");

                    Aecollectordata aecollectordata = GetJsonObject.getAeCollectFastJson(message, timestamp);
                    out.collect(aecollectordata);

                }
            }
        });


        //todo 4.设置侧流数据了标记
        //8715891475D0454AAAFED0E901C10178 全员营销
        OutputTag allSaleTag = new OutputTag("allSaleTag"){};
        //AA5DE272584E436E84207D6FF897B472 O-APP
        OutputTag oAppTag = new OutputTag("oAppTag"){};
        //6B5D1224809D4BA3B2D397099F870F41 p-APP
        OutputTag pAppTag = new OutputTag("pAppTag"){};
        //dirty
        OutputTag dirtyTag = new OutputTag("dirtyTag"){};


        //todo 5.拆分数据流
        SingleOutputStreamOperator SideDs = aecollectordataDataStream.process(new ProcessFunction() {
            @Override
            public void processElement(Aecollectordata value, Context ctx, Collector out) throws Exception {

                if ("8715891475D0454AAAFED0E901C10178".equals(value.getAppKey())) {
                    ctx.output(allSaleTag, value.toString());
                } else if ("AA5DE272584E436E84207D6FF897B472".equals(value.getAppKey())) {
                    ctx.output(oAppTag, value.toString());
                } else if ("6B5D1224809D4BA3B2D397099F870F41".equals(value.getAppKey())) {
                    ctx.output(pAppTag, value.toString());
                } else if (value.getAppKey() == null) {
                    ctx.output(dirtyTag, value.toString());
                } else {
                    out.collect(value.toString());
                }

            }
        });


        //todo 6.获取侧数据流并打印
        DataStream allSaleSide = SideDs.getSideOutput(allSaleTag);
        DataStream oraAppSide = SideDs.getSideOutput(oAppTag);
        DataStream pickupAppSide = SideDs.getSideOutput(pAppTag);
        DataStream dirtySide = SideDs.getSideOutput(dirtyTag);


        SideDs.print("主流>>>>>>>>>>>>>");
        allSaleSide.print("allSaleSide>>>>>>>>>>>");
        oraAppSide.print("oAppTag>>>>>>>>>>>");
        pickupAppSide.print("pAppSide>>>>>>>>>>>");
        dirtySide.print("dirtySide>>>>>>>>>>>");

        //todo 7.启动
        env.execute("SideOutput");

    }
}

你可能感兴趣的:(flink,阿里云,侧输出流,flink)