Flink 数据保存 Redis(自定义Redis Sink ) 实现exactly-once

通过如下链接:Flink官方文档,我们知道数据保存到 Redis 的容错机制是 at least once。所以我们通过幂等操作,使用新数据覆盖旧数据的方式,以此来实现 exactly-once 。

1.代码部分

1.1 config.properties配置文件

redis.host=192.168.204.210
redis.port=6379
redis.password=123456
redis.timeout=5000
redis.db=0

1.2 FlinkUtils工具类

package cn.huimin100.bigdata.tools;

import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Arrays;
import java.util.List;
import java.util.Properties;

public class FlinkUtils {
   


    //获取运行环境
    private static final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //获取运行环境
    public static StreamExecutionEnvironment getEnv() {
   
        return env;
    }


    /**
     * 创建一个kafka streaming
     *
     * @param topics topic
     *               //     * @param parameterPath 参数文件路劲
     * @param clazz  序列化类
     * @param 
     * @return DataStream
     * @throws Exception
     */
    public static <T> DataStream<T> createKafkaStreaming(ParameterTool parameters, String topics, String groupId, Class<? extends DeserializationSchema<T>> clazz) throws Exception {
   


        //设置全局变量
        env.getConfig().setGlobalJobParameters(parameters);

        //设置checkPoint 确保一次语义,10scheckpoint 一次
        //设置Checkpoint模式(与Kafka整合,一定要设置Checkpoint模式为Exactly_Once)
        env.enableCheckpointing(parameters.getLong("checkpoint.interval", 10000L), CheckpointingMode.EXACTLY_ONCE);

        //系统异常退出或人为 Cancel 掉,不删除checkpoint数据
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        //此处设置重启策略为:出现异常重启6次,隔5秒一次(你也可以在flink-conf.yaml配置文件中写死。此处配置会覆盖配置文件中的)
        String restartAttempts = parameters.get("restartAttempts","6");
        String delayInterval = parameters.get("delayInterval","5000");
//        String restartAttempts = parameters.getRequired("restartAttempts");
//        String delayInterval = parameters.getRequired("delayInterval");
        env.getConfig(

你可能感兴趣的:(flink,redis,flink,大数据)