1、使用 Flink Stream API 开发一个 Flink CDC Demo
2、以Mysql为例,采集Mysql binlog数据并发送到mq
3、生成 checkpoint 数据,重启程序从执行的状态恢复数据
4、演示2.2版本动态加载表的新特性
新建一个maven项目,引入pom
<dependency>
<groupId>com.ververicagroupId>
<artifactId>flink-connector-mysql-cdcartifactId>
<version>2.2.1version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-streaming-java_2.12artifactId>
<version>1.13.6version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-clients_2.12artifactId>
<version>1.13.6version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-javaartifactId>
<version>1.13.6version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-table-planner-blink_2.12artifactId>
<version>1.13.6version>
<type>test-jartype>
dependency>
<dependency>
<groupId>org.springframework.bootgroupId>
<artifactId>spring-boot-starter-amqpartifactId>
<version>2.0.9.RELEASEversion>
dependency>
<dependency>
<groupId>com.alibabagroupId>
<artifactId>fastjsonartifactId>
<version>1.2.75version>
dependency>
新建mq工厂类
import com.rabbitmq.client.Channel;
import com.rabbitmq.client.Connection;
import com.rabbitmq.client.ConnectionFactory;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.io.IOException;
import java.util.concurrent.TimeoutException;
/**
* @description:
* @author: wwp
* @date: 2022-06-23 14:48
*/
public class HandlerSink extends RichSinkFunction<String> {
private transient Channel channel;
private transient Connection connection;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
// create connection factory
ConnectionFactory connectionFactory = new ConnectionFactory();
connectionFactory.setHost("localhost");
connectionFactory.setPort(5672);
connectionFactory.setUsername("guest");
connectionFactory.setPassword("guest");
connectionFactory.setConnectionTimeout(0);
connection = connectionFactory.newConnection();
channel = connection.createChannel();
channel.queueDeclare("direct.cdc.data.queue", true, false, false, null);
}
@Override
public void close() throws IOException, TimeoutException {
//关闭资源
channel.close();
connection.close();
}
@Override
public void invoke(String value, Context context) throws Exception {
if(StringUtils.isNotBlank(value)){
channel.basicPublish("direct.cdc.data.sync", "flink-cdc-sync-data", null, value.getBytes());
}
}
}
cdc解析日志数据
import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import io.debezium.data.Envelope;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.source.SourceRecord;
import java.util.List;
/**
* @description:
* @author: wwp
* @date: 2022-06-23 15:48
*/
public class MyJsonDebeziumDeserializationSchema implements DebeziumDeserializationSchema<String> {
@Override
public void deserialize(SourceRecord sourceRecord, Collector<String> collector)
throws Exception {
Struct value = (Struct) sourceRecord.value();
Struct source = value.getStruct("source");
// get db
String db = source.getString("db");
String table = source.getString("table");
// get type, create and read convert to insert
String type = Envelope.operationFor(sourceRecord).toString().toLowerCase();
if ("create".equals(type) || "read".equals(type)) {
type = "insert";
}
JSONObject jsonObject = new JSONObject();
jsonObject.put("database", db);
jsonObject.put("table", table);
jsonObject.put("type", type);
// get data
Struct before = value.getStruct("before");
Struct after = value.getStruct("after");
JSONObject dataJson = new JSONObject();
List<Field> fields;
// delete get before else get after
if("delete".equals(type)){
fields = before.schema().fields();
}else{
fields = after.schema().fields();
}
for (Field field : fields) {
String field_name = field.name();
Object fieldValue;
if("delete".equals(type)){
fieldValue = before.get(field);
}else{
fieldValue = after.get(field);
}
dataJson.put(field_name, fieldValue);
}
jsonObject.put("data", JSONObject.toJSONString(dataJson));
collector.collect(JSONObject.toJSONString(jsonObject));
}
@Override
public TypeInformation<String> getProducedType() {
return BasicTypeInfo.STRING_TYPE_INFO;
}
main方法直接配置数据库和表结构
import com.hui.www.service.flinkcdc.HandlerSink;
import com.hui.www.service.flinkcdc.MyJsonDebeziumDeserializationSchema;
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* @description:
* @author: wwp
* @date: 2022-06-23 15:48
*/
public class WebApplication {
public static void main(String[] args) {
try {
// config env and db
MySqlSource<String> cdcMysqlSource = MySqlSource.<String>builder()
.hostname("127.0.0.1")
.port(3306)
.scanNewlyAddedTableEnabled(true)
.username("user")
.password("user123")
.serverTimeZone("Asia/Shanghai")
.databaseList("kd_shop_1,kd_main")
.tableList("kd_shop_1.kd_goods, kd_shop_1.kd_goods_pool" +
"kd_main.kd_shop")
.deserializer(new MyJsonDebeziumDeserializationSchema())
.build();
Configuration configuration = new Configuration();
// read checkpoint record
// 第一次读取需要注释此行,后续增加表时,开启此行,flink-ck后 ‘27b27e36750ff997a7bd3b9933c5f3c9/chk-12404’换成存储路径下对应文件夹即可,实现旧表增量读取,新表全量读取
configuration.setString("execution.savepoint.path", "file:///tmp/flink-ck/27b27e36750ff997a7bd3b9933c5f3c9/chk-12404");
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(configuration);
// enable checkpoint
env.enableCheckpointing(3000);
// set local storage path
env.getCheckpointConfig().setCheckpointStorage("file:///tmp/flink-ck");
// output mq
env.fromSource(cdcMysqlSource, WatermarkStrategy.noWatermarks(), "MySQL Source")
.addSink(new HandlerSink());
env.execute("flinkCdc");
} catch (Exception e) {
e.printStackTrace();
}
}
}
接收实体
public class CdcDataSyncDO {
String database;
String type;
String table;
String data;
}
mq接收类
@Slf4j
@Component
public class CdcDataSyncListener implements BaseRabbitHandler {
@Autowired
private SyncDataHandlerProcessService syncDataHandlerProcessService;
@RabbitHandler
@RabbitListener(queues = {RabbitConfig.QUEUE_CDC_DATA_SYNC}, containerFactory = "multiListenerContainer")
public void handler(@Payload byte[] param, Message message, Channel channel) {
String paramStr = new String(param, StandardCharsets.UTF_8);
log.info("cdc同步数据结束-{}", Util.toJson(paramStr));
CdcDataSyncDO cdcDataSyncDO = JSON.parseObject(paramStr, CdcDataSyncDO.class);
// 根据表名返回对应的接口实现类,策略模式
SyncDataHandlerService handler = syncDataHandlerProcessService.getHandler(cdcDataSyncDO.getTable());
handler.syncData(cdcDataSyncDO);
log.info("cdc同步数据结束-{}", Util.toJson(paramStr));
}
@Override
public void executeException(Object[] datas, Throwable throwable) {
log.error("cdc同步数据失败,失败原因 : " + throwable.getMessage());
}
}
策略类分发
@Service
@Data
public class SyncDataHandlerProcessService {
@Autowired
GoodsPublishSyncDataImpl goodsPublishSyncData;
@Autowired
GoodsPoolSyncDataImpl goodsPoolSyncData;
@Autowired
CollectErrorMessageSyncDataImpl collectErrorMessageSyncData;
/**
* 获取对应的同步对象
* @param tableName
* @return
*/
public SyncDataHandlerService getHandler(String tableName){
if(tableName.equals(CdcSyncDataConstant.GOODS_PUBLISH_ALL)){
return goodsPublishSyncData;
}else if(tableName.equals(CdcSyncDataConstant.GOODS_PUBLISH_POOL)){
return goodsPoolSyncData;
}else if (tableName.equals(CdcSyncDataConstant.COLLECT_ERROR_MESSAGE)){
return collectErrorMessageSyncData;
} else if (CdcSyncDataConstant.SHOP.equals(tableName)) {
} else if (CdcSyncDataConstant.SHOP_ACCOUNT.equals(tableName)) {
} else if (CdcSyncDataConstant.SHOP_AUTH_INFO.equals(tableName)) {
}
return null;
}
/**
* cdc数据刷新
* @param param
*/
public void refreshData(CdcRefreshPO param){
SyncDataHandlerService handler = getHandler(param.getTableName());
handler.refreshData(param.getStartTime(),param.getEndTime());
}
}
接口类进行sql类型解析
public interface SyncDataHandlerService {
/**
* 同步数据,具体实现子类去实现,写入es
*
* @param param
*/
default void syncData(CdcDataSyncDO param) {
if ("delete".equals(param.getType())) {
deleteSync(param);
} else if ("insert".equals(param.getType())) {
insertSync(param);
} else if ("update".equals(param.getType())) {
updateSync(param);
} else {
return;
}
}
/**
* 删除同步
*
* @param param
* @return
*/
String deleteSync(CdcDataSyncDO param);
/**
* 更新同步
*
* @param param
* @return
*/
String updateSync(CdcDataSyncDO param);
/**
* 插入同步
*
* @param param
* @return
*/
String insertSync(CdcDataSyncDO param);
}
总结:
1、通过cdc读取mysql binlog日志,然后进行解析封装,发送到mq;
2、mq接收消息根据不同策略,读取不同表数据,进行crud,并写入es或其他地方;