Flink实战-基础环境精准一次消费Kafka模板

简介

快速搭建Flink消费kafka的模板代码。

开始

父pom


        8
        8
        1.13.6
        1.8
        2.12
        1.7.30
        3.1.2
    

    
        
        
            
                org.apache.flink
                flink-java
                ${flink.version}
            
            
                org.apache.flink
                flink-streaming-java_${scala.binary.version}
                ${flink.version}
            

            
            
                org.apache.flink
                flink-connector-kafka_${scala.binary.version}
                ${flink.version}
            

            
                org.apache.flink
                flink-connector-base
                ${flink.version}
            

            
                org.apache.flink
                flink-clients_${scala.binary.version}
                ${flink.version}
            
            
            
                org.slf4j
                slf4j-api
                ${slf4j.version}
            
            
                org.slf4j
                slf4j-log4j12
                ${slf4j.version}
            

            
                org.apache.flink
                flink-table-api-java-bridge_${scala.binary.version}
                ${flink.version}
            
            
                org.apache.flink
                flink-table-planner-blink_${scala.binary.version}
                ${flink.version}
            
            
                com.alibaba
                fastjson
                1.2.62
            


            
                org.projectlombok
                lombok
                1.18.24
            


            
                org.apache.flink
                flink-json
                ${flink.version}
            


            
                org.apache.flink
                flink-statebackend-rocksdb_2.11
                1.13.6
            


            
                com.clearspring.analytics
                stream
                2.7.0
            

            
                org.apache.flink
                flink-runtime-web_${scala.binary.version}
                1.12.2
            
        
    

    
        
            
                org.apache.maven.plugins
                maven-compiler-plugin
                
                    8
                    8
                
            

            
                org.apache.maven.plugins
                maven-assembly-plugin
                3.0.0
                
                    
                        jar-with-dependencies
                    
                
                
                    
                        make-assembly
                        package
                        
                            single
                        
                    
                
            
        
    

    
        
            nexus-aliyun
            nexus-aliyun
            http://maven.aliyun.com/nexus/content/groups/public/
            
                true
            
            
                false
            
        
    

子pom

bigdata


    
        
            org.apache.flink
            flink-java
        
        
            org.apache.flink
            flink-streaming-java_${scala.binary.version}
        

        
            org.apache.flink
            flink-connector-kafka_${scala.binary.version}
        

        
            org.apache.flink
            flink-connector-base
        

        
            org.apache.flink
            flink-statebackend-rocksdb_2.11
        


        
            org.apache.flink
            flink-runtime-web_2.12
        

        
        
            org.slf4j
            slf4j-api
        
        
            org.slf4j
            slf4j-log4j12
        

        
            com.clearspring.analytics
            stream
        

        
            com.alibaba
            fastjson
        
    

log4j.properties

log4j.rootLogger=ERROR, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n

CommonConfig

public class CommonConfig {
    /**
     * kafka服务地址
     */
    public final static String BOOTSTRAP_SERVERS="";

    /**
     * 目标topic
     */
    public final static String SOURCE_TOPIC="";


    public final static String TARGET_TOPIC="";

    /**
     * 应用名称
     */
    public final static String APP_NAME="";

    /**
     * 状态后端保存地址
     */
    public final static String STATE_BACKEND_PATH="hdfs://master1:8020/checkpoint";
}

KafkaConsumerUtil

public class KafkaConsumerUtil {
    static String BOOTSTRAP_SERVERS = CommonConfig.BOOTSTRAP_SERVERS;

    public static FlinkKafkaConsumer getKafkaConsumer(String topic, String groupId) {
        Properties prop = new Properties();
        prop.setProperty("bootstrap.servers", BOOTSTRAP_SERVERS);
        prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
        prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");

        FlinkKafkaConsumer consumer = new FlinkKafkaConsumer<>(topic,
                //由于默认的解码器,如果字符串为空的时候他会保存,所以自定义一个
                new KafkaDeserializationSchema() {
                    @Override
                    public boolean isEndOfStream(String nextElement) {
                        return false;
                    }

                    @Override
                    public String deserialize(ConsumerRecord record) throws Exception {
                        if(record == null || record.value() == null) {
                            return "";
                        }
                        return new String(record.value(),"UTF-8");
                    }

                    @Override
                    public TypeInformation getProducedType() {
                        return BasicTypeInfo.STRING_TYPE_INFO;
                    }
                }, prop);
        return consumer;
    }
}

KafkaProductUtil

public class KafkaProductUtil {

    public static FlinkKafkaProducer getKafkaProduct(String targetTopic) {
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", CommonConfig.BOOTSTRAP_SERVERS);

        /**
         * 这里的作用是如果flink的checkpoint的时间小于事物的时间,就会失败,所以要设置时间要大于等于flink的checkpoint的时间
         */
        properties.setProperty(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, 60 * 15 * 1000 + "");
        KafkaSerializationSchema serializationSchema = new KafkaSerializationSchema() {
            @Override
            public ProducerRecord serialize(String element, @Nullable Long timestamp) {
                return new ProducerRecord<>(
                        targetTopic, // target topic
                        element.getBytes(StandardCharsets.UTF_8)); // record contents
            }
        };

        FlinkKafkaProducer myProducer = new FlinkKafkaProducer<>(
                targetTopic,             // target topic
                serializationSchema,    // serialization schema
                properties,             // producer config
                FlinkKafkaProducer.Semantic.EXACTLY_ONCE); // fault-tolerance

        return myProducer;
    }
}

FlinkMessageUnique

public class FlinkMessageUnique {
    private final static String GROUP_ID= FlinkMessageUnique.class.getSimpleName();

    public static void main(String[] args) throws Exception {
        // TODO 配置本地flink ui
//        Configuration configuration = new Configuration();
//        configuration.setInteger(RestOptions.PORT, 8082);
//        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(configuration);

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //这里和kafka的分区保持一致
        env.setParallelism(1);
        // TODO 1. 状态后端设置
        env.enableCheckpointing(3000L, CheckpointingMode.EXACTLY_ONCE);
        //检查点超时时间
        env.getCheckpointConfig().setCheckpointTimeout(60 * 1000L);
        //两次检查点最小间隔时间,就是第一次检查点完成以后,最少经过3s钟开始检查点
        env.getCheckpointConfig().setMinPauseBetweenCheckpoints(3000L);
        env.getCheckpointConfig().enableExternalizedCheckpoints(
//                ExternalizedCheckpointCleanup用于指定当job canceled的时候externalized checkpoint该如何清理
//                DELETE_ON_CANCELLATION的话,在job canceled的时候会自动删除externalized state
//                RETAIN_ON_CANCELLATION则在job canceled的时候会保留externalized checkpoint state
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION
        );
        env.setRestartStrategy(RestartStrategies.failureRateRestart(
                // 设置任务失败重启 允许任务失败最大次数 10次
                10,
                // 任务失败的时间启动的间隔
                Time.of(1L, TimeUnit.MINUTES),
                // 允许任务延迟时间
                Time.of(3L, TimeUnit.MINUTES)
        ));
        //设置状态后端
        // 此处也可以是HDFS路径,这里为了测试方便,所以使用的是本地路径
        env.setStateBackend(new RocksDBStateBackend(CommonConfig.STATE_BACKEND_PATH, true));
//        env.setStateBackend(new RocksDBStateBackend("hdfs://master1:8020/fink-checkpoints", true));
//        env.getCheckpointConfig().setCheckpointStorage("hdfs://master1:8020/bigdata/ck");
        System.setProperty("HADOOP_USER_NAME", "bigdata");





        DataStreamSource  data = env.addSource(KafkaConsumerUtil.getKafkaConsumer(CommonConfig.SOURCE_TOPIC, GROUP_ID));
        //TODO 使用布隆过滤器进行数据过滤
        SingleOutputStreamOperator datadata = data.keyBy(new KeySelector() {
            @Override
            public String getKey(String s) throws Exception {
                return "key";
            }
        }).process(new KeyedProcessFunction() {
            private transient ValueState uniqueCount;

            @Override
            public void open(Configuration parameters) throws Exception {
                ValueStateDescriptor descriptor =
                        new ValueStateDescriptor<>(
                                "uniqueCount", // the state name
                                TypeInformation.of(new TypeHint() {}), // type information
                                new Integer(0)); // default value of the state, if nothing was set
                uniqueCount = getRuntimeContext().getState(descriptor);
            }

            @Override
            public void processElement(String s, Context context, Collector collector) throws Exception {
                Integer count = uniqueCount.value();
                if(count==0){
                    collector.collect(s);
                    uniqueCount.update(1);
                }
            }
            //这里作用主要是方便容错
        }).uid("process_unique_count");


        //去重复以后写回kafka主题
        datadata.addSink(KafkaProductUtil.getKafkaProduct(CommonConfig.TARGET_TOPIC));
        env.execute(CommonConfig.APP_NAME);
    }
}

任务提交

 /home/bigdata/module/flink-1.13.6/bin/flink run \
 -d \
 -m yarn-cluster \
 -yqu default \
 -ynm 提交到yarn的名称 \
 -c com.bigdata \
 SNAPSHOT-jar-with-dependencies.jar

状态恢复

服务以后会重新生成一个ck的随机目录接着上一次的1560消费比如1561

  /home/bigdata/module/flink-1.13.6/bin/flink run \
  -d \
  -s hdfs://master1:8020/checkpoint/bigdata/ck/d5e28ce894fbd7ea9d25a52c1972892d/chk-1560 \
  -m yarn-cluster \
  -yqu default \
  -ynm 提交到yarn的名称 \
  -c com.bigdata.FlinkMessageUnique \
  SNAPSHOT-jar-with-dependencies.jar

你可能感兴趣的:(flink,kafka,flink,大数据)