Flink CDC读取Mongodb数据可参考:https://blog.csdn.net/penngo/article/details/124913985
Flink Mysql CDC的核心原理都是通过监控mysql的binlog的日志变化,从而进行日志解析,得到变化的数据。
Flink CDC官网:https://github.com/ververica/flink-cdc-connectors
MySql CDC:https://github.com/ververica/flink-cdc-connectors/blob/master/docs/content/connectors/mysql-cdc.md
在my.cnf中的增加如下配置
server-id = 1
log_bin = mysql-bin
binlog_format = ROW
binlog_row_image = FULL
expire_logs_days = 30
重启mysql,通过下边sql检查log_bin=ON来确认binlog是否成功开启:
SHOW VARIABLES LIKE '%bin%';
创建一个具有读取binlog权限的MySQL用户:
mysql> CREATE USER 'flinkuser'@'localhost' IDENTIFIED BY 'flinkpwd';
设置用户权限:
mysql> GRANT SELECT, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'flinkuser' IDENTIFIED BY 'flinkpwd';
注意:当scan.incremental.snapshot.enabled被启用(默认启用)时,RELOAD权限不再需要。
刷新用户权限:
mysql> FLUSH PRIVILEGES;
更新多配置可参考https://debezium.io/documentation/reference/1.5/connectors/mysql.html#mysql-creating-user
如果在集群环境,需要启用GTID模式,MySQL-cdc连接器通过使用GTID信息来提高MySQL集群的高可用性。
gtid_mode = on
enforce_gtid_consistency = on
如果CDC监控的MySQL服务器地址包含slave实例,需要设置log-slave-updates = 1,使从服务器也可以将从主服务器同步的数据写入到它的binlog中。
gtid_mode = on
enforce_gtid_consistency = on
log-slave-updates = 1
CREATE DATABASE `flinktest`;
USE `flinktest`;
CREATE TABLE `products` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL,
`description` varchar(512) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=9 DEFAULT CHARSET=utf8mb4;
insert into `products`(`id`,`name`,`description`) values
(1,'aaa','aaaa'),
(2,'ccc','ccc'),
(3,'dd','ddd'),
(4,'eeee','eee'),
(5,'ffff','ffff'),
(6,'hhhh','hhhh'),
(7,'iiii','iiii'),
(8,'jjjj','jjjj');
pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0modelVersion>
<groupId>com.penngo.flinkcdcgroupId>
<artifactId>FlickCDCartifactId>
<packaging>jarpackaging>
<version>1.0-SNAPSHOTversion>
<name>FlickCDCname>
<url>https://21doc.net/url>
<properties>
<project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8project.reporting.outputEncoding>
<maven.compiler.source>11maven.compiler.source>
<maven.compiler.target>11maven.compiler.target>
<flink-version>1.13.3flink-version>
<flink-cdc-version>2.1.1flink-cdc-version>
<slf4j.version>1.7.25slf4j.version>
<log4j.version>2.16.0log4j.version>
properties>
<dependencies>
<dependency>
<groupId>junitgroupId>
<artifactId>junitartifactId>
<version>3.8.1version>
<scope>testscope>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-javaartifactId>
<version>${flink-version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-baseartifactId>
<version>${flink-version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-streaming-java_2.12artifactId>
<version>${flink-version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-clients_2.12artifactId>
<version>${flink-version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-table-commonartifactId>
<version>${flink-version}version>
dependency>
<dependency>
<groupId>com.ververicagroupId>
<artifactId>flink-connector-mysql-cdcartifactId>
<version>${flink-cdc-version}version>
dependency>
<dependency>
<groupId>com.ververicagroupId>
<artifactId>flink-connector-mongodb-cdcartifactId>
<version>${flink-cdc-version}version>
dependency>
dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-compiler-pluginartifactId>
<version>3.8.1version>
<configuration>
<source>${maven.compiler.source}source>
<target>${maven.compiler.target}target>
<encoding>${project.build.sourceEncoding}encoding>
configuration>
plugin>
plugins>
build>
<repositories>
<repository>
<id>alimavenid>
<name>Maven Aliyun Mirrorname>
<url>https://maven.aliyun.com/repository/centralurl>
repository>
repositories>
project>
MysqlExample.java
package com.penngo.flinkcdc;
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import com.ververica.cdc.connectors.mysql.table.StartupOptions;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;
import java.util.logging.Level;
import java.util.logging.Logger;
public class MysqlExample {
public static void main(String[] args) throws Exception {
MySqlSource mySqlSource = MySqlSource.builder()
.hostname("localhost")
.port(3306)
.databaseList("flinktest")
.tableList("flinktest.products")
.username("flinkuser")
.password("flinkpwd")
.startupOptions(StartupOptions.initial())
.deserializer(new JsonDebeziumDeserializationSchema())
.build();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// enable checkpoint
env.enableCheckpointing(3000);
DataStreamSource dataStreamSource = env.fromSource(mySqlSource, WatermarkStrategy.noWatermarks(), "MySQL Source");
SingleOutputStreamOperator
附件源码