【FLink-5-Flink算子-常用SinkOperator算子】

FLink-5-Flink算子-常用SinkOperator算子

  • 常用SinkOperator算子
    • 1.KafkaSink
    • 2.JdbcSink
      • 1.不保证Exactly-Once的JdbcSink代码示例
      • 2.保证Exactly-Once的JdbcSink代码示例
    • 3.RedisSink算子

常用SinkOperator算子

各种sink算子,无非是对各种数据存储的客户端程序的封装。
kafka sink封装了 kafkaProducer
jdbc sink 封装了 jdbc
redis sink 封装了jedis

1.KafkaSink

KafkaSink 是能结合 Flink 的 Checkpoint 机制,来支持端到端精确一次语义的 ;
(底层,当然是利用了 kafka producer 的事务机制)

  • Flink 可以和 Kafka 多个版本整合,比如 0.11.x、1.x、2.x 等;
    从 Flink1.9 开始,使用的是 kafka 2.2 的客户端。
    核心类 :

    • KafkaStringSerializationSchema – 反序列化
    • FlinkKafkaProducer – 生产者(即 sink)
  • 新版kafkaSink算子的代码示例:

import com.alibaba.fastjson.JSON;
import com.yang.flink.source.MySourceFunction;
import com.yang.flink.vo.EventLog;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class KafkaSinkOperatorDemo {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 开启checkpoint
        env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
        env.getCheckpointConfig().setCheckpointStorage("file:///e:/ckpt");

        DataStreamSource<EventLog> streamSource = env.addSource(new MySourceFunction());//自定义的source源,可以使用kafka的source源
        KafkaSink<String> kafkaSink = KafkaSink.<String>builder()
                .setBootstrapServers("hadoop102:9092,hadoop103:9092")
                .setRecordSerializer(KafkaRecordSerializationSchema.<String>builder()
                        .setTopic("event_log")
                        .setValueSerializationSchema(new SimpleStringSchema())
                        .build()
                )
                .setDeliverGuarantee(DeliveryGuarantee.EXACTLY_ONCE)
                .setTransactionalIdPrefix("yang")
                .build();

        streamSource.map(JSON::toJSONString).disableChaining()//disableChaining()禁止被绑定,作为一个单独的task来处理,计算的弹性很强
                .sinkTo(kafkaSink);
        env.execute();
    }
}

2.JdbcSink

1.不保证Exactly-Once的JdbcSink代码示例

import com.alibaba.fastjson.JSON;
import com.mysql.cj.jdbc.MysqlXADataSource;
import com.yang.flink.source.MySourceFunction;
import com.yang.flink.vo.EventLog;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.connector.jdbc.*;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.util.function.SerializableSupplier;

import javax.sql.XADataSource;
import java.sql.PreparedStatement;
import java.sql.SQLException;

public class JDBCSinkOperatorDemo {
    public static void main(String[] args) throws Exception {

        Configuration configuration = new Configuration();
        configuration.setInteger("rest.port", 8822);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
        // 开启checkpoint
        env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
        env.getCheckpointConfig().setCheckpointStorage("file:///e:/ckpt");
        // 构造好一个数据流
        DataStreamSource<EventLog> streamSource = env.addSource(new MySourceFunction());

        /**
         *  一、 不保证 EOS语义的方式
         */
        SinkFunction<EventLog> jdbcSink = JdbcSink.sink(
                "insert into t_eventlog values (?,?,?,?) on duplicate key update guid=?,sessionId=?,eventId=?,ts=?",
                new JdbcStatementBuilder<EventLog>() {
                    @Override
                    public void accept(PreparedStatement preparedStatement, EventLog eventLog) throws SQLException {
                        preparedStatement.setLong(1, eventLog.getGuid());
                        preparedStatement.setString(2, eventLog.getSessionId());
                        preparedStatement.setString(3, eventLog.getEventId());
                        preparedStatement.setLong(4, eventLog.getTimeStamp());

                        preparedStatement.setString(5, eventLog.getSessionId());
                        preparedStatement.setString(6, eventLog.getEventId());
                        preparedStatement.setLong(7, eventLog.getTimeStamp());
                    }
                },
                JdbcExecutionOptions.builder()
                        .withMaxRetries(3)
                        .withBatchSize(1)
                        .build(),
                new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
                        .withUrl("jdbc:mysql://hadoop102:3306/abc?serverTimezone=Asia/Shanghai&useUnicode=true&characterEncoding=UTF-8")
                        .withUsername("root")
                        .withPassword("000000")
                        .build()
        );
        // 输出数据
        streamSource.addSink(jdbcSink);
        env.execute();
    }
}

2.保证Exactly-Once的JdbcSink代码示例

import com.mysql.cj.jdbc.MysqlXADataSource;
import com.yang.flink.source.MySourceFunction;
import com.yang.flink.vo.EventLog;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.connector.jdbc.*;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.util.function.SerializableSupplier;

import javax.sql.XADataSource;
import java.sql.PreparedStatement;
import java.sql.SQLException;

public class JDBCSinkOperatorDemo {
    public static void main(String[] args) throws Exception {

        Configuration configuration = new Configuration();
        configuration.setInteger("rest.port", 8822);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);

        // 开启checkpoint
        env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
        env.getCheckpointConfig().setCheckpointStorage("file:///e:/ckpt");

        // 构造好一个数据流
        DataStreamSource<EventLog> streamSource = env.addSource(new MySourceFunction());

        /**
         * 二、可以提供 EOS 语义保证的 sink
         * 构造一个JdbcSink算子对象(保证exactly-once的一种)
         * 底层是利用jdbc目标数据库的事务机制
         *
         */
        SinkFunction<EventLog> exactlyOnceSink = JdbcSink.exactlyOnceSink(
                "insert into t_eventlog values (?,?,?,?) on duplicate key update guid=?,sessionId=?,eventId=?,ts=?",
                new JdbcStatementBuilder<EventLog>() {
                    @Override
                    public void accept(PreparedStatement preparedStatement, EventLog eventLog) throws SQLException {
                        preparedStatement.setLong(1, eventLog.getGuid());
                        preparedStatement.setString(2, eventLog.getSessionId());
                        preparedStatement.setString(3, eventLog.getEventId());
                        preparedStatement.setLong(4, eventLog.getTimeStamp());

                        preparedStatement.setString(5, eventLog.getSessionId());
                        preparedStatement.setString(6, eventLog.getEventId());
                        preparedStatement.setLong(7, eventLog.getTimeStamp());
                    }
                },
                JdbcExecutionOptions.builder()
                        .withMaxRetries(3)
                        .withBatchSize(1)
                        .build(),
                JdbcExactlyOnceOptions.builder()
                        // mysql不支持同一个连接上存在并行的多个事务,必须把该参数设置为true
                        .withTransactionPerConnection(true)
                        .build(),
                new SerializableSupplier<XADataSource>() {
                    @Override
                    public XADataSource get() {
                        // XADataSource就是jdbc连接,不过它是支持分布式事务的连接
                        // 而且它的构造方法,不同的数据库构造方法不同
                        MysqlXADataSource xaDataSource = new MysqlXADataSource();
                        xaDataSource.setUrl("jdbc:mysql://hadoop102:3306/abc?serverTimezone=Asia/Shanghai&useUnicode=true&characterEncoding=UTF-8");
                        xaDataSource.setUser("root");
                        xaDataSource.setPassword("000000");
                        return xaDataSource;
                    }
                }
        );
        // 输出数据
        streamSource.addSink(exactlyOnceSink);
        env.execute();
    }
}

3.RedisSink算子

  • 现在官方没有相关的Flink写出到Redis的算子,

    • 1.需要将apache支持的
      https://github.com/apache/bahir-flink下面的工程导入到自己代码库里面,
    • 2.然后删除掉其他的module,并把父pom中的module删除,
    • 3.然后编译flink-connector-redis这个模块,本地打包编译完成以后,直接引用本地此模块即可。
  • 代码案例如下

import com.alibaba.fastjson.JSON;
import com.yang.flink.source.MySourceFunction;
import com.yang.flink.vo.EventLog;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;

import java.util.Optional;

/**
 * 将数据流写入redis,利用RedisSink算子
 */
public class RedisSinkOperatorDemo {
    public static void main(String[] args) throws Exception {

        Configuration configuration = new Configuration();
        configuration.setInteger("rest.port", 8822);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);


        // 开启checkpoint
        env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
        env.getCheckpointConfig().setCheckpointStorage("file:///e:/ckpt");

        // 构造好一个数据流
        DataStreamSource<EventLog> streamSource = env.addSource(new MySourceFunction());


        // eventLog数据插入redis,你想用什么结构来存储?
        FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder().setHost("hadoop102").build();

        RedisSink<EventLog> redisSink = new RedisSink<>(config, new StringInsertMapper());

        streamSource.addSink(redisSink);


        env.execute();
    }


    /**
     * 1.String结构插入数据到Redis
     */
    static class StringInsertMapper implements RedisMapper<EventLog> {

        @Override
        public RedisCommandDescription getCommandDescription() {
            return new RedisCommandDescription(RedisCommand.SET,"eventlogs");
        }

        /**
         *  如果选择的是没有内部key的redis数据结构,则此方法返回的就是大 key,最外部的key
         *  如果选择的是有内部key的redis数据结构(hset),则此方法返回的是hset内部的小key,而把上面Description中传入的值作为大key
         * @param data
         * @return
         */
        @Override
        public String getKeyFromData(EventLog data) {
            return data.getGuid()+"-"+data.getSessionId()+"-"+data.getTimeStamp();   // 这里就是string数据的大key
        }

        @Override
        public String getValueFromData(EventLog data) {
            return JSON.toJSONString(data);   // 这里就是string数据的value
        }
    }


    /**
     * 2.HASH结构数据插入
     */
    static class HsetInsertMapper implements RedisMapper<EventLog>{
        // 可以根据具体数据, 选择额外key(就是hash这种结构,它有额外key(大key)
        @Override
        public Optional<String> getAdditionalKey(EventLog data) {
            return RedisMapper.super.getAdditionalKey(data);
        }

        // 可以根据具体数据,设置不同的TTL(time to live,数据的存活时长)
        @Override
        public Optional<Integer> getAdditionalTTL(EventLog data) {
            return RedisMapper.super.getAdditionalTTL(data);
        }

        @Override
        public RedisCommandDescription getCommandDescription() {
            return new RedisCommandDescription(RedisCommand.HSET,"event-logs");
        }

        /**
         *  如果选择的是没有内部key的redis数据结构,则此方法返回的就是大 key
         *  如果选择的是有内部key的redis数据结构(hset),则此方法返回的是hset内部的小key,而把上面Description中传入的值作为大key
         * @param data
         * @return
         */
        @Override
        public String getKeyFromData(EventLog data) {
            return data.getGuid()+"-"+data.getSessionId()+"-"+data.getTimeStamp();  // 这里就是hset中的field(小key)
        }

        @Override
        public String getValueFromData(EventLog data) {
            return data.getEventId();   // 这里就是hset中的value
        }


    }
}

你可能感兴趣的:(大数据,flink,flink,大数据)