Java 实现mysql 同步 flink cdc 做数据收集 demo

**

准备环境

**

  1. java 8
  2. mysql
  3. flink1.3.5
  4. flink cdc 2.2.1

数据库 创建 2张测试表格 student、和 student1

CREATE TABLE `student` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `name` varchar(25) COLLATE utf8mb4_bin NOT NULL,
  `age` int(4) NOT NULL,
  `ctime` datetime DEFAULT NULL,
  `mtime` datetime DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=341 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;

**

Pom配置类

**



    4.0.0

    
        org.apache
        apache
        20
    

    com.token
    token-flink
    1.0.1-SNAPSHOT
    token-flink
    token-flink
    
    
    
        1.8
        1.7.15
        2.17.1
        1.13.5
        8.0.25
    
    
        
            org.slf4j
            slf4j-api
            ${slf4j.version}
        

        
            org.apache.logging.log4j
            log4j-slf4j-impl
            ${log4j.version}
            test
        
        
            org.apache.logging.log4j
            log4j-api
            ${log4j.version}
            test
        
        
            org.apache.logging.log4j
            log4j-core
            ${log4j.version}
            test
        
        
            
            org.apache.logging.log4j
            log4j-1.2-api
            ${log4j.version}
            test
        

        
            org.slf4j
            slf4j-simple
            ${slf4j.version}
        

        
            org.apache.flink
            flink-java
            ${flink-version}
        

        
            org.apache.flink
            flink-scala_2.12
            ${flink-version}
        

        
            org.apache.flink
            flink-streaming-java_2.12
            ${flink-version}
        

        
            org.apache.flink
            flink-streaming-scala_2.12
            ${flink-version}
        

        
            org.apache.flink
            flink-clients_2.12
            ${flink-version}
        

        
        
            org.apache.flink
            flink-table-planner_2.12
            ${flink-version}
        

        
            org.apache.flink
            flink-runtime-web_2.11
            ${flink-version}
        

        
            com.google.code.gson
            gson
            2.9.0
        

        
            com.ververica
            
            flink-connector-mysql-cdc
            
            2.2.1
        

        
            org.apache.flink
            flink-table-api-scala-bridge_2.12
            ${flink-version}
        
        
            org.apache.flink
            flink-table-api-java-bridge_2.12
            ${flink-version}
        


        
        
            mysql
            mysql-connector-java
            ${mysql-version}
        
        
        
            com.alibaba
            fastjson
            2.0.14
            compile
        

    

    
        token-flink

        

            
                org.apache.maven.plugins
                maven-assembly-plugin
                3.0.0
                
                    
                        
                            jar-with-dependencies
                        
                    
                
                
                    
                        make-assembly
                        package
                        
                            single
                        
                    
                
            
            
                org.apache.maven.plugins
                maven-compiler-plugin
                
                    8
                    8
                
            
        
    



需要 注意 mysql 版本 需要 8.0以上 8.0.26 以下

具体代码逻辑

package com.token.tokenFlink.entity;

import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import io.debezium.data.Envelope;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.source.SourceRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;

public class CustomDeserialization implements DebeziumDeserializationSchema {

    private static final Logger log =
            LoggerFactory.getLogger(CustomDeserialization.class);

    @Override
    public void deserialize(SourceRecord sourceRecord, Collector collector)
            throws Exception {

        JSONObject res = new JSONObject();

        // 获取数据库和表名称
        String topic = sourceRecord.topic();
        String[] fields = topic.split("\\.");
        String database = fields[1];
        String tableName = fields[2];

        Struct value = (Struct) sourceRecord.value();
        // 获取before数据
        Struct before = value.getStruct("before");
        JSONObject beforeJson = new JSONObject();
        if (before != null) {
            Schema beforeSchema = before.schema();
            List beforeFields = beforeSchema.fields();
            for (Field field : beforeFields) {
                Object beforeValue = before.get(field);
                beforeJson.put(field.name(), beforeValue);
            }
        }

        // 获取after数据
        Struct after = value.getStruct("after");
        JSONObject afterJson = new JSONObject();
        if (after != null) {
            Schema afterSchema = after.schema();
            List afterFields = afterSchema.fields();
            for (Field field : afterFields) {
                Object afterValue = after.get(field);
                afterJson.put(field.name(), afterValue);
            }
        }

        //获取操作类型 READ DELETE UPDATE CREATE
        Envelope.Operation operation = Envelope.operationFor(sourceRecord);
        String type = operation.toString().toLowerCase();
        if ("create".equals(type)) {
            type = "insert";
        }

        // 将字段写到json对象中
        res.put("database", database);
        res.put("tableName", tableName);
        res.put("before", beforeJson);
        res.put("after", afterJson);
        res.put("type", type);
        log.info("输出到控制台{}",JSONObject.toJSON(res));
        //输出数据
        collector.collect(res.toString());
    }

    @Override
    public TypeInformation getProducedType() {
        return BasicTypeInfo.STRING_TYPE_INFO;
    }
}

package com.token.tokenFlink.entity;


import java.io.Serializable;
import java.time.LocalDateTime;

/**
 * 

* *

* * @author qfy * @since 2022-10-17 */ public class Student implements Serializable { private static final long serialVersionUID = 1L; private Integer id; /** * 名字 */ private String name; /** * 年龄 */ private Integer age; private LocalDateTime ctime; private LocalDateTime mtime; public Student() { } public Student(Integer id, String name, Integer age, LocalDateTime ctime, LocalDateTime mtime) { this.id = id; this.name = name; this.age = age; this.ctime = ctime; this.mtime = mtime; } public Integer getId() { return id; } public void setId(Integer id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public Integer getAge() { return age; } public void setAge(Integer age) { this.age = age; } public LocalDateTime getCtime() { return ctime; } public void setCtime(LocalDateTime ctime) { this.ctime = ctime; } public LocalDateTime getMtime() { return mtime; } public void setMtime(LocalDateTime mtime) { this.mtime = mtime; } }
package com.token.tokenFlink.task;


public class TableData {

    private String database;

    private String tableName;

    private String update;

    private T before;

    private T after;

    public TableData() {
    }

    public TableData(String database, String tableName, String update, T before, T after) {
        this.database = database;
        this.tableName = tableName;
        this.update = update;
        this.before = before;
        this.after = after;
    }

    public String getDatabase() {
        return database;
    }

    public void setDatabase(String database) {
        this.database = database;
    }

    public String getTableName() {
        return tableName;
    }

    public void setTableName(String tableName) {
        this.tableName = tableName;
    }

    public String getUpdate() {
        return update;
    }

    public void setUpdate(String update) {
        this.update = update;
    }

    public T getBefore() {
        return before;
    }

    public void setBefore(T before) {
        this.before = before;
    }

    public T getAfter() {
        return after;
    }

    public void setAfter(T after) {
        this.after = after;
    }
}

package com.token.tokenFlink.task;

import com.token.tokenFlink.entity.Student;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import java.util.List;

public class CustomSink extends RichSinkFunction> {

    protected static final Logger log = LoggerFactory.getLogger(MySqlSourceExample2.class);

    public volatile PreparedStatement ps;
    public volatile Connection connection;

    @Override
    public void invoke(List value, Context context) throws Exception {

        if (value == null || value.size() ==0) {
            return;
        }
        for (Student student : value) {
            ps.setString(1, student.getName());
            ps.setInt(2, student.getAge());
            ps.setTimestamp(3, Timestamp.valueOf(LocalDateTime.now()));
            ps.setTimestamp(4, Timestamp.valueOf(LocalDateTime.now()));
            ps.addBatch();
        }
        try {
            int[] count = ps.executeBatch();//批量后执行
            log.info("成功了插入了" + count.length + "行数据");
        }catch (Exception e){
            log.info("失败{}",e.getMessage());
        }
    }

    @Override
    public void  open(Configuration parameters) throws Exception {
        String url = "jdbc:mysql://xxxxxx:3306/exchange?characterEncoding=utf-8" +
                "&zeroDateTimeBehavior=convertToNull&autoReconnect=true" +
                "&rewriteBatchedStatements=true&useServerPrepStmts=false" +
                "&allowMultiQueries=true&useSSL=false&failOverReadOnly=false";
        String user = "xxxxx";
        String password = "xxxx";
        Class.forName("com.mysql.cj.jdbc.Driver");
        connection = DriverManager.getConnection(url, user, password);
        String sql = "INSERT INTO `exchange`.`student1` ( `name`, `age`, `ctime`, `mtime`) values( ?, ?, ?,?)";
        ps = connection.prepareStatement(sql);
    }

    @Override
    public void close() throws Exception {
        super.close();
        //关闭连接和释放资源
        if (connection != null) {
            connection.close();
        }
        if (ps != null) {
            ps.close();
        }
    }

}



package com.token.tokenFlink.task;

import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.Lists;
import com.token.tokenFlink.entity.CustomDeserialization;
import com.token.tokenFlink.entity.Student;
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import com.ververica.cdc.connectors.mysql.source.MySqlSourceBuilder;
import com.ververica.cdc.connectors.mysql.table.StartupOptions;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.AllWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;
import java.util.stream.Collectors;

public class MySqlSourceExample2 {

    protected static final Logger LOG = LoggerFactory.getLogger(MySqlSourceExample2.class);

    public static void main(String[] args) throws Exception {


        MySqlSource mySqlSource = new MySqlSourceBuilder()
                .hostname("xxxx")
                .port(3306)
                .databaseList("exchange") // monitor all tables under inventory database
                .tableList("exchange.student") // set captured table
                .username("xxxx")
                .password("xxxx")
                /*.serverId("5401-5404")*/
                .deserializer(new CustomDeserialization())
                .startupOptions(StartupOptions.latest())
                .scanNewlyAddedTableEnabled(true)
                .includeSchemaChanges(true) // 输出DDL事件
                .build();

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // enable checkpoint

        env.enableCheckpointing(3000, CheckpointingMode.EXACTLY_ONCE);

        TumblingProcessingTimeWindows windows = TumblingProcessingTimeWindows.of(Time.seconds(60),Time.seconds(5));
        //SlidingProcessingTimeWindows windows = SlidingProcessingTimeWindows.of(Time.seconds(60L),Time.seconds(60L));
        SingleOutputStreamOperator map = env.fromSource(
                        mySqlSource,
                        WatermarkStrategy.noWatermarks(),
                        "MySqlParallelSource")
                .filter(s -> {
                    JSONObject jsonObject = JSONObject.parseObject((String) s, JSONObject.class);
                    if (jsonObject.get("type").equals("update") || jsonObject.get("type").equals("insert")) {
                        return true;
                    }
                    return false;
                })
                .map(new MapFunction() {
                    @Override
                    public Student map(String s) throws Exception {
                        JSONObject jsonObject = JSONObject.parseObject(s, JSONObject.class);
                        Object beforeJson = null;
                        if (jsonObject.get("type").equals("update")){
                             beforeJson = jsonObject.get("before");

                        }
                        if (jsonObject.get("type").equals("insert")){
                            beforeJson = jsonObject.get("after");

                        }
                        return JSONObject.parseObject(beforeJson.toString(),Student.class);
                    }
                });
        // set the source parallelism to 4
             map.windowAll(windows)
                .apply(new AllWindowFunction,TimeWindow>() {
                    @Override
                    public void apply(TimeWindow timeWindow, Iterable iterable, Collector> collector) throws Exception {
                      List students =  Lists.newArrayList(iterable);
                        students = students.stream()
                                // 表示id为key, 接着如果有重复的,那么从BillsNums对象o1与o2中筛选出一个,这里选择o1,
                                // 并把id重复,需要将nums和sums与o1进行合并的o2, 赋值给o1,最后返回o1
                                .collect(Collectors.toMap(Student::getName, a -> a, (o1, o2) -> {
                                    o1.setAge(o1.getAge() + o2.getAge());
                                    return o1;
                                })).values().stream().collect(Collectors.toList());
                        collector.collect(students);
                    }
                })
                .addSink(new CustomSink());

        env.execute("my test flink");
    }

}

打包 提交到 flink 集群

需要指定启动类 ,并且 设置并行度
Java 实现mysql 同步 flink cdc 做数据收集 demo_第1张图片

Java 实现mysql 同步 flink cdc 做数据收集 demo_第2张图片

你可能感兴趣的:(大数据,java,mysql,flink)