Flink Kafka读取数据并写入Redis实现Exactly-Once

pom.xml



	4.0.0

	org.myorg.quickstart
	quickstart
	0.1
	jar

	Flink Quickstart Job
	http://www.myorganization.org

	
		UTF-8
		1.9.1
		1.8
		2.11
		${java.version}
		${java.version}
	

	
		
			apache.snapshots
			Apache Development Snapshot Repository
			https://repository.apache.org/content/repositories/snapshots/
			
				false
			
			
				true
			
		
	

	
		
		
		
			org.apache.flink
			flink-java
			${flink.version}
			provided
		
		
			org.apache.flink
			flink-streaming-java_${scala.binary.version}
			${flink.version}
			provided
		

		

		

		
		
		
			org.slf4j
			slf4j-log4j12
			1.7.7
			runtime
		
		
			log4j
			log4j
			1.2.17
			runtime
		

		
		
			org.apache.httpcomponents
			httpasyncclient
			4.1.4
		

		
		
			com.alibaba
			fastjson
			1.2.51
		


		
			org.apache.flink
			flink-connector-kafka_${scala.binary.version}
			${flink.version}
		

		
			org.apache.flink
			flink-connector-redis_${scala.binary.version}
			1.1.5
		

	

	
		

			
			
				org.apache.maven.plugins
				maven-compiler-plugin
				3.1
				
					${java.version}
					${java.version}
				
			

			
			
			
				org.apache.maven.plugins
				maven-shade-plugin
				3.0.0
				
					
					
						package
						
							shade
						
						
							
								
									org.apache.flink:force-shading
									com.google.code.findbugs:jsr305
									org.slf4j:*
									log4j:*
								
							
							
								
									
									*:*
									
										META-INF/*.SF
										META-INF/*.DSA
										META-INF/*.RSA
									
								
							
							
								
									org.myorg.quickstart.StreamingJob
								
							
						
					
				
			
		

		
			

				
				
					org.eclipse.m2e
					lifecycle-mapping
					1.0.0
					
						
							
								
									
										org.apache.maven.plugins
										maven-shade-plugin
										[3.0.0,)
										
											shade
										
									
									
										
									
								
								
									
										org.apache.maven.plugins
										maven-compiler-plugin
										[3.1,)
										
											testCompile
											compile
										
									
									
										
									
								
							
						
					
				
			
		
	

	
	
	
	
		
			add-dependencies-for-IDEA

			
				
					idea.version
				
			

			
				
					org.apache.flink
					flink-java
					${flink.version}
					compile
				
				
					org.apache.flink
					flink-streaming-java_${scala.binary.version}
					${flink.version}
					compile
				
			
		
	


flink-config.propertites (此配置文件最好不要放在工程里面,可以单独放在服务器目录,方便修改)

topics=test
group.id=lzc
bootstrap.servers=bigdata1:9092,bigdata2:9092
auto.offset.reset=earliest
enable.auto.commit=false
checkpoint.interval=10000
redis.host=localhost
redis.pwd=123456
redis.db=0
FlinkKafkaToRedis
package org.myorg.quickstart.kafka;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.util.Collector;

public class FlinkKafkaToRedis {

    public static void main(String[] args) throws Exception{
        //传入配置文件路径即可
        ParameterTool parameters = ParameterTool.fromPropertiesFile(args[0]);

        DataStream lines = FlinkUtil.createKafkaStream(parameters, SimpleStringSchema.class);

        SingleOutputStreamOperator words = lines.flatMap(new FlatMapFunction() {
            @Override
            public void flatMap(String words, Collector collector) throws Exception {
                for (String word : words.split(" ")) {
                    collector.collect(word);
                }
            }
        });
        SingleOutputStreamOperator> word = words.map(new MapFunction>() {
            @Override
            public Tuple2 map(String word) throws Exception {
                return new Tuple2<>(word, 1);
            }
        });

        //为了保证程序出现问题可以继续累加
        SingleOutputStreamOperator> sum = word.keyBy(0).sum(1);


        sum.map(new MapFunction, Tuple3>() {
            @Override
            public Tuple3 map(Tuple2 tp) throws Exception {

                return Tuple3.of("word_count",tp.f0,tp.f1.toString());
            }
        }).addSink(new MyRedisSink());

        FlinkUtil.getEnv().execute("kafkaSource");

    }

}
自定义工具类FlinkUtil
package org.myorg.quickstart.kafka;

import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;

public class FlinkUtil {

    private static StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    /**
     * createKafkaStream
     * @param parameters
     * @param clazz
     * @param 
     * @return
     * @throws Exception
     */
    public static  DataStream  createKafkaStream(ParameterTool parameters, Class> clazz) throws Exception{
         env.getConfig().setGlobalJobParameters(parameters);
        //本地环境读取hdfs需要设置,集群上不需要
        //System.setProperty("HADOOP_USER_NAME","root");

        //默认情况下,检查点被禁用。要启用检查点
        env.enableCheckpointing(parameters.getLong("checkpoint.interval",5000L),CheckpointingMode.EXACTLY_ONCE);
        //设置重启策略 默认不停重启
        env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(3,2000));

        //设置state存储的后端(建议在flink配置文件里面配置)
        //env.setStateBackend(new FsStateBackend("hdfs://namenode:40010/flink/checkpoints"));

        //程序异常退出或人为cancel掉,不删除checkpoint数据(默认是会删除)
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);


        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", parameters.getRequired("bootstrap.servers"));
        properties.setProperty("group.id", parameters.getRequired("group.id"));
        //如果没有记录偏移量  第一次从最开始消费
        properties.setProperty("auto.offset.reset",parameters.get("auto.offset.reset","earliest"));
        //kafka的消费者不自动提交偏移量 而是交给flink通过checkpoint管理
        properties.setProperty("enable.auto.commit",parameters.get("enable.auto.commit","false"));

        String topics = parameters.getRequired("topics");
        List topicList = Arrays.asList(topics.split(","));

        //Source : Kafka, 从Kafka中读取数据
        FlinkKafkaConsumer kafkaConsumer = new FlinkKafkaConsumer(
                topicList,
                clazz.newInstance(),
                properties);

        //flink checkpoint成功后还要向kafka特殊的topic中写入偏移量  默认是true
        kafkaConsumer.setCommitOffsetsOnCheckpoints(true);

        return env.addSource(kafkaConsumer);

    }


    /**
     * 获取执行环境
     * @return
     */
    public static StreamExecutionEnvironment getEnv(){
        return env;
    }
}
自定义redisSink(MyRedisSink)
package org.myorg.quickstart.kafka;

import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import redis.clients.jedis.Jedis;

public class MyRedisSink extends RichSinkFunction> {

    private transient Jedis jedis;
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        //获取全局参数
        ParameterTool params = (ParameterTool) getRuntimeContext().getExecutionConfig().getGlobalJobParameters();
        String host = params.getRequired("redis.host");
        String pwd = params.getRequired("redis.pwd");
        int db = params.getInt("redis.db", 0);
        jedis = new Jedis(host, 6379, 5000);
        jedis.auth(pwd);
        jedis.select(db);
    }

    @Override
    public void invoke(Tuple3 value, Context context) throws Exception {

        if (!jedis.isConnected()){
            jedis.connect();
        }

        jedis.hset(value.f0,value.f1,value.f2);

    }

    @Override
    public void close() throws Exception {
        super.close();
        jedis.close();

    }


}

Flink Kafka读取数据并写入Redis实现Exactly-Once_第1张图片

你可能感兴趣的:(Flink)