Flink CDC2.2动态加载表,读取mysql数据,写入到mq

1、使用 Flink Stream API 开发一个 Flink CDC Demo

2、以Mysql为例,采集Mysql binlog数据并发送到mq

3、生成 checkpoint 数据,重启程序从执行的状态恢复数据

4、演示2.2版本动态加载表的新特性

新建一个maven项目,引入pom


<dependency>
    <groupId>com.ververicagroupId>
    <artifactId>flink-connector-mysql-cdcartifactId>
    <version>2.2.1version>
dependency>
<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-streaming-java_2.12artifactId>
    <version>1.13.6version>
dependency>
<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-clients_2.12artifactId>
    <version>1.13.6version>
dependency>
<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-javaartifactId>
    <version>1.13.6version>
dependency>
<dependency>
    <groupId>org.apache.flinkgroupId>
    <artifactId>flink-table-planner-blink_2.12artifactId>
    <version>1.13.6version>
    <type>test-jartype>
dependency>

<dependency>
    <groupId>org.springframework.bootgroupId>
    <artifactId>spring-boot-starter-amqpartifactId>
    <version>2.0.9.RELEASEversion>
dependency>
<dependency>
    <groupId>com.alibabagroupId>
    <artifactId>fastjsonartifactId>
    <version>1.2.75version>
dependency>

新建mq工厂类

import com.rabbitmq.client.Channel;
import com.rabbitmq.client.Connection;
import com.rabbitmq.client.ConnectionFactory;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.io.IOException;
import java.util.concurrent.TimeoutException;

/**
 * @description:
 * @author: wwp
 * @date: 2022-06-23 14:48
 */
public class HandlerSink extends RichSinkFunction<String> {

    private transient Channel channel;
    private transient Connection connection;

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        // create connection factory
        ConnectionFactory connectionFactory = new ConnectionFactory();
        connectionFactory.setHost("localhost");
        connectionFactory.setPort(5672);
        connectionFactory.setUsername("guest");
        connectionFactory.setPassword("guest");
        connectionFactory.setConnectionTimeout(0);
        connection = connectionFactory.newConnection();
        channel = connection.createChannel();
        channel.queueDeclare("direct.cdc.data.queue", true, false, false, null);

    }

    @Override
    public void close() throws IOException, TimeoutException {
        //关闭资源
        channel.close();
        connection.close();
    }

    @Override
    public void invoke(String value, Context context) throws Exception {
        if(StringUtils.isNotBlank(value)){
            channel.basicPublish("direct.cdc.data.sync", "flink-cdc-sync-data", null, value.getBytes());
        }
    }

}

cdc解析日志数据

import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import io.debezium.data.Envelope;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.source.SourceRecord;

import java.util.List;

/**
 * @description:
 * @author: wwp
 * @date: 2022-06-23 15:48
 */
public class MyJsonDebeziumDeserializationSchema implements DebeziumDeserializationSchema<String> {

    @Override
    public void deserialize(SourceRecord sourceRecord, Collector<String> collector)
            throws Exception {
        Struct value = (Struct) sourceRecord.value();
        Struct source = value.getStruct("source");

        // get db
        String db = source.getString("db");
        String table = source.getString("table");

        // get type, create and read convert to insert
        String type = Envelope.operationFor(sourceRecord).toString().toLowerCase();
        if ("create".equals(type) || "read".equals(type)) {
            type = "insert";
        }
        JSONObject jsonObject = new JSONObject();
        jsonObject.put("database", db);
        jsonObject.put("table", table);
        jsonObject.put("type", type);
        // get data
        Struct before = value.getStruct("before");
        Struct after = value.getStruct("after");
        JSONObject dataJson = new JSONObject();
        List<Field> fields;
        // delete get before else get after
        if("delete".equals(type)){
            fields = before.schema().fields();
        }else{
            fields = after.schema().fields();
        }
        for (Field field : fields) {
            String field_name = field.name();
            Object fieldValue;
            if("delete".equals(type)){
                fieldValue  = before.get(field);
            }else{
                fieldValue  = after.get(field);
            }

            dataJson.put(field_name, fieldValue);
        }
        
        jsonObject.put("data", JSONObject.toJSONString(dataJson));
        collector.collect(JSONObject.toJSONString(jsonObject));
    }

    @Override
    public TypeInformation<String> getProducedType() {
        return BasicTypeInfo.STRING_TYPE_INFO;
    }

main方法直接配置数据库和表结构

import com.hui.www.service.flinkcdc.HandlerSink;
import com.hui.www.service.flinkcdc.MyJsonDebeziumDeserializationSchema;
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

/**
 * @description:
 * @author: wwp
 * @date: 2022-06-23 15:48
 */
public class WebApplication {
    public static void main(String[] args) {
        try {
          	// config env and db
            MySqlSource<String> cdcMysqlSource = MySqlSource.<String>builder()
                    .hostname("127.0.0.1")
                    .port(3306)
                    .scanNewlyAddedTableEnabled(true)
                    .username("user")
                    .password("user123")
              			.serverTimeZone("Asia/Shanghai")
                    .databaseList("kd_shop_1,kd_main")
                    .tableList("kd_shop_1.kd_goods, kd_shop_1.kd_goods_pool" +
                            "kd_main.kd_shop")
                    .deserializer(new MyJsonDebeziumDeserializationSchema())
                    .build();

            Configuration configuration = new Configuration();
            // read checkpoint record
          	// 第一次读取需要注释此行,后续增加表时,开启此行,flink-ck后 ‘27b27e36750ff997a7bd3b9933c5f3c9/chk-12404’换成存储路径下对应文件夹即可,实现旧表增量读取,新表全量读取
            configuration.setString("execution.savepoint.path", "file:///tmp/flink-ck/27b27e36750ff997a7bd3b9933c5f3c9/chk-12404");
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(configuration);
            // enable checkpoint
            env.enableCheckpointing(3000);
            // set local storage path
            env.getCheckpointConfig().setCheckpointStorage("file:///tmp/flink-ck");
           	// output mq
            env.fromSource(cdcMysqlSource, WatermarkStrategy.noWatermarks(), "MySQL Source")
                    .addSink(new HandlerSink());
            env.execute("flinkCdc");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

接收实体

public class CdcDataSyncDO {
    String database;
    String type;
    String table;
    String data;
}

mq接收类

@Slf4j
@Component
public class CdcDataSyncListener implements BaseRabbitHandler {

    @Autowired
    private SyncDataHandlerProcessService syncDataHandlerProcessService;

    @RabbitHandler
    @RabbitListener(queues = {RabbitConfig.QUEUE_CDC_DATA_SYNC}, containerFactory = "multiListenerContainer")
    public void handler(@Payload byte[] param, Message message, Channel channel) {
        String paramStr = new String(param, StandardCharsets.UTF_8);
        log.info("cdc同步数据结束-{}", Util.toJson(paramStr));
        CdcDataSyncDO cdcDataSyncDO = JSON.parseObject(paramStr, CdcDataSyncDO.class);
        // 根据表名返回对应的接口实现类,策略模式
        SyncDataHandlerService handler = syncDataHandlerProcessService.getHandler(cdcDataSyncDO.getTable());
        handler.syncData(cdcDataSyncDO);
        log.info("cdc同步数据结束-{}", Util.toJson(paramStr));
    }

    @Override
    public void executeException(Object[] datas, Throwable throwable) {
        log.error("cdc同步数据失败,失败原因 : " + throwable.getMessage());
    }
}

策略类分发

@Service
@Data
public class SyncDataHandlerProcessService {
    @Autowired
    GoodsPublishSyncDataImpl goodsPublishSyncData;

    @Autowired
    GoodsPoolSyncDataImpl goodsPoolSyncData;
    @Autowired
    CollectErrorMessageSyncDataImpl collectErrorMessageSyncData;

    /**
     * 获取对应的同步对象
     * @param tableName
     * @return
     */
    public SyncDataHandlerService getHandler(String tableName){
        if(tableName.equals(CdcSyncDataConstant.GOODS_PUBLISH_ALL)){
            return goodsPublishSyncData;
        }else if(tableName.equals(CdcSyncDataConstant.GOODS_PUBLISH_POOL)){
            return goodsPoolSyncData;
        }else if (tableName.equals(CdcSyncDataConstant.COLLECT_ERROR_MESSAGE)){
            return collectErrorMessageSyncData;
        } else if (CdcSyncDataConstant.SHOP.equals(tableName)) {

        } else if (CdcSyncDataConstant.SHOP_ACCOUNT.equals(tableName)) {

        } else if (CdcSyncDataConstant.SHOP_AUTH_INFO.equals(tableName)) {

        }

        return null;
    }


    /**
     * cdc数据刷新
     * @param param
     */
    public void refreshData(CdcRefreshPO param){
        SyncDataHandlerService handler = getHandler(param.getTableName());
        handler.refreshData(param.getStartTime(),param.getEndTime());
    }
}

接口类进行sql类型解析

public interface SyncDataHandlerService {
    /**
     * 同步数据,具体实现子类去实现,写入es
     *
     * @param param
     */
    default void syncData(CdcDataSyncDO param) {
        if ("delete".equals(param.getType())) {
            deleteSync(param);
        } else if ("insert".equals(param.getType())) {
            insertSync(param);
        } else if ("update".equals(param.getType())) {
            updateSync(param);
        } else {
            return;
        }
    }


    /**
     * 删除同步
     *
     * @param param
     * @return
     */
    String deleteSync(CdcDataSyncDO param);

    /**
     * 更新同步
     *
     * @param param
     * @return
     */
    String updateSync(CdcDataSyncDO param);

    /**
     * 插入同步
     *
     * @param param
     * @return
     */
    String insertSync(CdcDataSyncDO param);
}

总结:

1、通过cdc读取mysql binlog日志,然后进行解析封装,发送到mq;

2、mq接收消息根据不同策略,读取不同表数据,进行crud,并写入es或其他地方;

你可能感兴趣的:(CDC数据同步,flink,mysql,java-rabbitmq)