Flink CDC 之实战DebeziumDeserializationSchema 自定义数据转换

Flink 的CDC 自定义实现数据转换

public class MysqlCdc {
     
    public static void main(String[] args) throws Exception {
     
        SourceFunction<JSONObject> sourceFunction = MySQLSource.<JSONObject>builder()
                .hostname("xxx.xxx.xx.xx")
                // 监控库存数据库下的所有表
                .databaseList("xx")
                // 可以监听一张表 也可以全库的所有表
//                .tableList("c_rr_goalpricerate")
                .username("xx")
                .password("xx")
                .port(33061)
                // 将SourceRecord转换为String类型 官方标准定义
//                .deserializer(new StringDebeziumDeserializationSchema())
                .deserializer(new CustomDebeziumDeserializationSchema())
                .build();
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.addSource(sourceFunction).print()
                //使用并行度1的接收器保持消息顺序
                .setParallelism(1);
        env.execute("flink-mysql-cdc");
    }
}
package com.quant.flowcalculation.flinkapi.cdc.customdebezium;

import com.alibaba.fastjson.JSONObject;
import com.alibaba.ververica.cdc.debezium.DebeziumDeserializationSchema;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.source.SourceRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;


/**
 * @author czly
 * @date: 2021/8/18 10:31
 * @description: 自定义的DebeziumDeserializationSchema, mysql-binglog转化为期待的数据类型
 */
public class CustomDebeziumDeserializationSchema implements DebeziumDeserializationSchema<JSONObject> {
     

    private static final Logger LOGGER = LoggerFactory.getLogger(CustomDebeziumDeserializationSchema.class);

    private static final long serialVersionUID = 7906905121308228264L;

    public CustomDebeziumDeserializationSchema() {
     
    }

    /**
     * 新增:SourceRecord{sourcePartition={server=mysql_binlog_source}, sourceOffset={file=mysql-bin.000220, pos=16692, row=1, snapshot=true}} ConnectRecord{topic='mysql_binlog_source.test_hqh.mysql_cdc_person', kafkaPartition=null, key=Struct{id=2}, keySchema=Schema{mysql_binlog_source.test_hqh.mysql_cdc_person.Key:STRUCT}, value=Struct{after=Struct{id=2,name=JIM,sex=male},source=Struct{version=1.2.1.Final,connector=mysql,name=mysql_binlog_source,ts_ms=0,snapshot=true,db=test_hqh,table=mysql_cdc_person,server_id=0,file=mysql-bin.000220,pos=16692,row=0},op=c,ts_ms=1603357255749}, valueSchema=Schema{mysql_binlog_source.test_hqh.mysql_cdc_person.Envelope:STRUCT}, timestamp=null, headers=ConnectHeaders(headers=)}
     * 更新:SourceRecord{sourcePartition={server=mysql_binlog_source}, sourceOffset={ts_sec=1603357705, file=mysql-bin.000220, pos=22964, row=1, server_id=1, event=2}} ConnectRecord{topic='mysql_binlog_source.test_hqh.mysql_cdc_person', kafkaPartition=null, key=Struct{id=8}, keySchema=Schema{mysql_binlog_source.test_hqh.mysql_cdc_person.Key:STRUCT}, value=Struct{before=Struct{id=8,name=TOM,sex=male},after=Struct{id=8,name=Lucy,sex=female},source=Struct{version=1.2.1.Final,connector=mysql,name=mysql_binlog_source,ts_ms=1603357705000,db=test_hqh,table=mysql_cdc_person,server_id=1,file=mysql-bin.000220,pos=23109,row=0,thread=41},op=u,ts_ms=1603357705084}, valueSchema=Schema{mysql_binlog_source.test_hqh.mysql_cdc_person.Envelope:STRUCT}, timestamp=null, headers=ConnectHeaders(headers=)}
     * 删除:SourceRecord{sourcePartition={server=mysql_binlog_source}, sourceOffset={ts_sec=1603357268, file=mysql-bin.000220, pos=18510, row=1, server_id=1, event=2}} ConnectRecord{topic='mysql_binlog_source.test_hqh.mysql_cdc_person', kafkaPartition=null, key=Struct{id=7}, keySchema=Schema{mysql_binlog_source.test_hqh.mysql_cdc_person.Key:STRUCT}, value=Struct{before=Struct{id=7,name=TOM,sex=male},source=Struct{version=1.2.1.Final,connector=mysql,name=mysql_binlog_source,ts_ms=1603357268000,db=test_hqh,table=mysql_cdc_person,server_id=1,file=mysql-bin.000220,pos=18655,row=0,thread=41},op=d,ts_ms=1603357268728}, valueSchema=Schema{mysql_binlog_source.test_hqh.mysql_cdc_person.Envelope:STRUCT}, timestamp=null, headers=ConnectHeaders(headers=)}
     *
     * @param sourceRecord sourceRecord
     * @param collector    out
     */
    @Override
    public void deserialize(SourceRecord sourceRecord, Collector<JSONObject> collector) {
     
        JSONObject resJson = new JSONObject();
        try {
     
            Struct valueStruct = (Struct) sourceRecord.value();
            Struct afterStruct = valueStruct.getStruct("after");
            Struct beforeStruct = valueStruct.getStruct("before");
            // 注意:若valueStruct中只有after,则表明插入;若只有before,说明删除;若既有before,也有after,则代表更新
            if (afterStruct != null && beforeStruct != null) {
     
                // 修改
                System.out.println("Updating >>>>>>>");
                LOGGER.info("Updated, ignored ...");
            } else if (afterStruct != null) {
     
                // 插入
                System.out.println("Inserting >>>>>>>");
                List<Field> fields = afterStruct.schema().fields();
                String name;
                Object value;
                for (Field field : fields) {
     
                    name = field.name();
                    value = afterStruct.get(name);
                    resJson.put(name, value);
                }
            } else if (beforeStruct != null) {
     
                // 删除
                System.out.println("Deleting >>>>>>>");
                LOGGER.info("Deleted, ignored ...");
            } else {
     
                System.out.println("No this operation ...");
                LOGGER.warn("No this operation ...");
            }
        } catch (Exception e) {
     
            System.out.println("Deserialize throws exception:");
            LOGGER.error("Deserialize throws exception:", e);
        }
        collector.collect(resJson);
    }

    @Override
    public TypeInformation<JSONObject> getProducedType() {
     
        return BasicTypeInfo.of(JSONObject.class);
    }
}

参考:https://ververica.github.io/flink-cdc-connectors/release-2.0/content/about.html

你可能感兴趣的:(flink,flink,cdc,mysql,数据同步)