Flink示例——Sink

文章目录

  • Flink示例——Sink
    • 版本信息
    • Mavan依赖
    • 自定义SourceFunction
    • Print Sink 示例
    • Kafka Sink 示例
    • ElasticSearch Sink 示例
    • Redis Sink 示例
    • 自定义 Sink 示例 (JDBC)

Flink示例——Sink

版本信息

产品 版本
Flink 1.7.2
Java 1.8.0_231
Scala 2.11.12

Mavan依赖

  • pom.xml 依赖部分
    <dependency>
        <groupId>org.apache.flinkgroupId>
        <artifactId>flink-javaartifactId>
        <version>${flink.version}version>
    dependency>
    <dependency>
        <groupId>org.apache.flinkgroupId>
        <artifactId>flink-streaming-java_2.11artifactId>
        <version>${flink.version}version>
    dependency>
    <dependency>
        <groupId>org.apache.flinkgroupId>
        <artifactId>flink-clients_2.11artifactId>
        <version>${flink.version}version>
    dependency>
    

自定义SourceFunction

  • 提供一个SourceFunction,方便后面测试
    public class CustomSourceFunction extends RichSourceFunction<Tuple2<String, Long>> {
    
        private boolean flag = true;
    
        @Override
        public void run(SourceContext<Tuple2<String, Long>> ctx) throws Exception {
            List<String> data = Arrays.asList("a", "b", "c", "d", "e", "f", "g");
            Random random = new Random();
            while (flag) {
                Thread.sleep(100);
                // 随机取一个值
                String key = data.get(random.nextInt(data.size()));
                long value = System.currentTimeMillis();
                ctx.collect(Tuple2.of(key, value));
            }
        }
    
        @Override
        public void cancel() {
            flag = false;
        }
    
    }
    

Print Sink 示例

  • 代码 PrintSinkDemo
    public class PrintSinkDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
            
            // 自定义SourceFunction
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction);
            
    //        dataDS.print("stream");
    
            // 查看源码可知,print内调用了 PrintSinkFunction
            PrintSinkFunction<Tuple2<String, Long>> sinkFunction = new PrintSinkFunction<>("stream", false);
            customDS.addSink(sinkFunction);
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
    }
    

Kafka Sink 示例

  • Maven导包 pom.xml
    <dependency>
        <groupId>org.apache.flinkgroupId>
        <artifactId>flink-connector-kafka-0.11_2.11artifactId>
        <version>${flink.version}version>
    dependency>
    
  • 代码 KafkaSinkDemo
    public class KafkaSinkDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    
            // 自定义SourceFunction
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            // 添加数据源
            DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction);
            // 处理,转为String
            DataStream<String> resultDS = customDS.map(new MapFunction<Tuple2<String, Long>, String>() {
                @Override
                public String map(Tuple2<String, Long> value) throws Exception {
                    return value.f0 + "|" + value.f1;
                }
            });
    
            // 创建FlinkKafkaProduce
            FlinkKafkaProducer011<String> kafkaProducer011 = generateKafkaProducer();
    
            // 发入Kafka
            resultDS.addSink(kafkaProducer011);
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        /**
         * 生成 FlinkKafkaProducer
         */
        private static FlinkKafkaProducer011<String> generateKafkaProducer() {
            // 创建FlinkKafkaProducer
            FlinkKafkaProducer011<String> kafkaProducer011 = new FlinkKafkaProducer011<>(
                    "192.168.0.101:9092", "topic_01", new SimpleStringSchema()
            );
            // 自定义序列化 - 示例
            /*
            new KeyedSerializationSchema() {
                @Override
                public byte[] serializeKey(String element) {
                    // 可以直接为null,也可以为String编码
                    return null;
                }
    
                @Override
                public byte[] serializeValue(String element) {
                    // 编码String为byte[]
                    return element.getBytes(StandardCharsets.UTF_8);
                }
    
                @Override
                public String getTargetTopic(String element) {
                    // 由源码可知,此处优先级最高,FlinkKafkaProducer011中传的topicid是默认值
                    // 可以在此决定不同的数据发往到不同的topic
                    return null;
                }
            };
            */
    
            return kafkaProducer011;
        }
    
    }
    
  • 两阶段提交,保证exactly-once
    // 关于事务超时问题
    // Kafka默认事务最大超时15min,transaction.max.timeout.ms = 900000
    // Flink默认事务超时1h, transaction.timeout.ms = 3600000
    // 如果时间间隔过长,会导致Kafka已关闭事务,Flink却以为未关闭
    // 因此可以将Flink设置小于等于Kafka事务超时时间
    Properties props = new Properties();
    props.setProperty("bootstrap.servers", "192.168.0.101:9092");
    props.setProperty("transaction.timeout.ms", "900000");
    FlinkKafkaProducer011<String> kafkaProducer011 = new FlinkKafkaProducer011<>(
            "topic_01",
            new SimpleStringSchema(),
            props,
            // 两阶段提交,保证exactly-once,默认AT_LEAST_ONCE
            FlinkKafkaProducer011.Semantic.EXACTLY_ONCE 
    );
    

ElasticSearch Sink 示例

  • Maven导包 pom.xml
    <dependency>
        <groupId>org.apache.flinkgroupId>
        <artifactId>flink-connector-elasticsearch6_2.11artifactId>
        <version>${flink.version}version>
    dependency>
    
  • 代码 ESSinkDemo
    public class ESSinkDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    
            // 自定义SourceFunction
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            // 添加数据源
            DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction);
            
            // 创建ElasticsearchSink
            ElasticsearchSink<Tuple2<String, Long>> esSink = generateESSink();
    
            // 发入Elasticsearch
            customDS.addSink(esSink);
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        /**
         * 生成ElasticsearchSink
         */
        private static ElasticsearchSink<Tuple2<String, Long>> generateESSink() {
            // 配置HttpHost
            List<HttpHost> httpHosts = Collections.singletonList(
                    new HttpHost("192.168.0.120", 9200)
            );
            ElasticsearchSinkFunction<Tuple2<String, Long>> sinkFunction = new ElasticsearchSinkFunction<Tuple2<String, Long>>() {
                @Override
                public void process(Tuple2<String, Long> tuple2, RuntimeContext runtimeContext, RequestIndexer requestIndexer) {
                    // 封装数据
                    HashMap<String, String> map = new HashMap<>();
                    map.put("content", tuple2.f0);
                    map.put("eventTime", tuple2.f1.toString());
                    map.put("processTime", String.valueOf(System.currentTimeMillis()));
                    // 封装Request
                    IndexRequest request = Requests.indexRequest()
                            .index("my_index")
                            .type("my_data")
                            .source(map);
                    
                    // 发送request
                    requestIndexer.add(request);
                }
            };
    
            ElasticsearchSink<Tuple2<String, Long>> esSink = new ElasticsearchSink.Builder<>(
                    httpHosts, sinkFunction
            ).build();
    
            return esSink;
        }
    
    }
    

Redis Sink 示例

  • Maven导包 pom.xml
    <dependency>
        <groupId>org.apache.bahirgroupId>
        <artifactId>flink-connector-redis_2.11artifactId>
        <version>1.0version>
    dependency>
    
  • 代码 RedisSinkDemo
    public class RedisSinkDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    
            // 自定义SourceFunction
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            // 添加数据源
            DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction);
    
            // 创建RedisSink
            RedisSink<Tuple2<String, Long>> redisSink = generateRedisSink();
    
            // 发入Redis
            customDS.addSink(redisSink);
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        /**
         * 生成RedisSink
         */
        private static RedisSink<Tuple2<String, Long>> generateRedisSink() {
            // Redis配置
            FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder()
                    .setMaxTotal(8) // 最大实例总数
                    .setMaxIdle(4) // 实例最多空闲数
                    .setMinIdle(2)
                    .setHost("192.168.0.110")
                    .setPort(6379)
                    .build();
            // Mapper
            RedisMapper<Tuple2<String, Long>> redisMapper = new RedisMapper<Tuple2<String, Long>>() {
                @Override
                public RedisCommandDescription getCommandDescription() {
                    // 定义保存数据到Redis的命令
                    return new RedisCommandDescription(
                            RedisCommand.HSET, // 使用hset命令
                            "my_hash" // 表名
                    );
                }
    
                @Override
                public String getKeyFromData(Tuple2<String, Long> tuple2) {
                    return tuple2.f0;
                }
    
                @Override
                public String getValueFromData(Tuple2<String, Long> tuple2) {
                    return tuple2.f1.toString();
                }
            };
    
            return new RedisSink<>(config, redisMapper);
        }
    
    }
    

自定义 Sink 示例 (JDBC)

  • Maven导包 pom.xml
    <dependency>
        <groupId>mysqlgroupId>
        <artifactId>mysql-connector-javaartifactId>
        <version>5.1.38version>
    dependency>
    
  • 代码 JDBCUtils
    public class JDBCUtils {
    
        static {
            try {
                Class.forName("com.mysql.jdbc.Driver");
            } catch (ClassNotFoundException e) {
                e.printStackTrace();
            }
        }
    
        public static Connection getConnection() {
            String url = "jdbc:mysql://192.168.0.130:3306/test_db";
            String user = "user_name";
            String password = "123456";
    
            Connection conn = null;
            try {
                conn = DriverManager.getConnection(url, user, password);
            } catch (SQLException e) {
                e.printStackTrace();
            }
    
            if (conn == null) {
                throw new RuntimeException("JDBC创建连接失败!");
            }
    
            return conn;
        }
    
    }
    
  • 代码 JDBCSinkFunction
    public class JDBCSinkFunction extends RichSinkFunction<Tuple2<String, Long>> {
    
        private Connection conn = null;
        private PreparedStatement pst;
    
        @Override
        public void open(Configuration parameters) throws Exception {
            conn = JDBCUtils.getConnection();
            pst = conn.prepareStatement(
                    "INSERT INTO tb_name (content, event_time, update_time) VALUES (?, ?, ?)"
            );
        }
    
        @Override
        public void close() throws Exception {
            conn.close();
        }
    
        @Override
        public void invoke(Tuple2<String, Long> value, Context context) throws Exception {
            // 执行插入
            pst.setString(1, value.f0);
            pst.setLong(2, value.f1);
            pst.setLong(3, System.currentTimeMillis());
            pst.execute();
        }
    
        // 批量插入-示例
        /*
        private int count = 0;
    
        @Override
        public void invoke(Tuple2 value, Context context) throws Exception {
            // 执行插入
            pst.setString(1, value.f0);
            pst.setLong(2, value.f1);
            pst.setLong(3, System.currentTimeMillis());
            pst.addBatch();
    
            count++;
    
            // 每1000条记录插入一次
            if (count == 1000){
                pst.executeBatch();
                conn.commit(); // 进行手动提交
                pst.clearBatch();
                count = 0;
            }
            // 记得在open处关闭自动提交[conn.setAutoCommit(false);]
        }
        */
    
    }
    
  • 代码 CustomSinkDemo
    public class CustomSinkDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    
            // 自定义SourceFunction
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            // 添加数据源
            DataStreamSource<Tuple2<String, Long>> dataDS = env.addSource(sourceFunction);
    
            // 创建JDBCSinkFunction
            JDBCSinkFunction jdbcSink = new JDBCSinkFunction();
    
            // 自定义发入MySQL
            dataDS.addSink(jdbcSink);
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
    }
    

你可能感兴趣的:(BigData,#,Flink,Java)