各组件说明如下表:
组件 | 版本 | 服务器 | 安装教程 | 作用 |
---|---|---|---|---|
canal | 1.1.5 | canal1, canal2, canal3 | canal HA安装 + mysql多节点自动切换 | |
zookeeper | 3.6.3 | canal1, canal2, canal3 | zookeeper全分布式安装 | |
mysql | 8.0.25 | canal1, canal2 | mysql master-master架构搭建 | 源数据库 |
mysql | 8.0.25 | canal3 | centos7安装mysql8.0.25版本 | ClientAdapter远程配置数据库 |
postgresql | 13 | canal3 | Centos7安装和配置PostgreSQL13 | 目标数据库 |
postgresql创建数据库db1和表db1.public.tb1_1
canal.serverMode = tcp
example_rdb_db1/instance.properties设置只同步db1数据库的数据,内容如下
#################################################
## mysql serverId , v1.0.26+ will autoGen
# canal2为1262,canal3为1263
canal.instance.mysql.slaveId=1261
# enable gtid use true/false
canal.instance.gtidon=false
# position info
canal.instance.master.address=canal2:3306
canal.instance.master.journal.name=mysql-bin.000026
canal.instance.master.position=4
canal.instance.master.timestamp=
canal.instance.master.gtid=
# rds oss binlog
canal.instance.rds.accesskey=
canal.instance.rds.secretkey=
canal.instance.rds.instanceId=
# table meta tsdb info
canal.instance.tsdb.enable=true
#canal.instance.tsdb.url=jdbc:mysql://127.0.0.1:3306/canal_tsdb
#canal.instance.tsdb.dbUsername=canal
#canal.instance.tsdb.dbPassword=canal
canal.instance.standby.address = canal1:3306
#canal.instance.standby.journal.name =
#canal.instance.standby.position =
#canal.instance.standby.timestamp =
#canal.instance.standby.gtid=
# username/password
canal.instance.dbUsername=canal
canal.instance.dbPassword=Canal_123
canal.instance.connectionCharset = UTF-8
# enable druid Decrypt database password
canal.instance.enableDruid=false
#canal.instance.pwdPublicKey=MFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBALK4BUxdDltRRE5/zXpVEVPUgunvscYFtEip3pmLlhrWpacX7y7GCMo2/JM6LeHmiiNdH1FWgGCpUfircSwlWKUCAwEAAQ==
# table regex
canal.instance.filter.regex=db1\\.tb\\d_\\d
# table black regex
canal.instance.filter.black.regex=mysql\\.slave_.*
# table field filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2)
#canal.instance.filter.field=test1.t_product:id/subject/keywords,test2.t_company:id/name/contact/ch
# table field black filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2)
#canal.instance.filter.black.field=test1.t_product:subject/product_image,test2.t_company:id/name/contact/ch
# mq config
canal.mq.topic=example
# dynamic topic route by schema or table regex
#canal.mq.dynamicTopic=mytest1.user,mytest2\\..*,.*\\..*
canal.mq.partition=0
# hash partition config
#canal.mq.partitionsNum=3
#canal.mq.partitionHash=test.table:id^name,.*\\..*
#canal.mq.dynamicTopicPartitionNum=test.*:4,mycanal:6
#################################################
# 需要开启心跳检查
canal.instance.detecting.enable = true
# 心跳检查sql
canal.instance.detecting.sql = select 1
# 心跳检查频率
canal.instance.detecting.interval.time = 3
# 心跳检查失败次数阀值,超过该阀值后会触发mysql链接切换,比如切换到standby机器上继续消费binlog
canal.instance.detecting.retry.threshold = 3
# 心跳检查超过失败次数阀值后,是否开启master/standby的切换
canal.instance.detecting.heartbeatHaEnable = true
从github下载的canal.adapter-1.1.5.tar.gz,存在postgresql兼容性问题,改用源码编译,编译过程请参考这里,之后再上传,解压
[root@canal1 ~]#
[root@canal1 ~]# mkdir canal.adapter-1.1.5
[root@canal1 ~]#
[root@canal1 ~]# tar -zxvf canal.adapter-1.1.5.tar.gz -C canal.adapter-1.1.5
[root@canal1 ~]#
server:
port: 8081
logging:
level:
com.alibaba.otter.canal.client.adapter.rdb: DEBUG
spring:
jackson:
date-format: yyyy-MM-dd HH:mm:ss
time-zone: GMT+8
default-property-inclusion: non_null
canal.conf:
mode: tcp #tcp kafka rocketMQ rabbitMQ
flatMessage: true
zookeeperHosts: canal1:2181,canal2:2181,canal3:2181
batchSize: 500
syncBatchSize: 1000
retries: -1 # -1表示无限重试
timeout:
accessKey:
secretKey:
consumerProperties:
# canal tcp consumer
# canal.tcp.server.host: 127.0.0.1:11111
canal.tcp.zookeeper.hosts: canal1:2181,canal2:2181,canal3:2181
canal.tcp.batch.size: 500
canal.tcp.username:
canal.tcp.password:
# kafka consumer
kafka.bootstrap.servers: 127.0.0.1:9092
kafka.enable.auto.commit: false
kafka.auto.commit.interval.ms: 1000
kafka.auto.offset.reset: latest
kafka.request.timeout.ms: 40000
kafka.session.timeout.ms: 30000
kafka.isolation.level: read_committed
kafka.max.poll.records: 1000
# rocketMQ consumer
rocketmq.namespace:
rocketmq.namesrv.addr: 127.0.0.1:9876
rocketmq.batch.size: 1000
rocketmq.enable.message.trace: false
rocketmq.customized.trace.topic:
rocketmq.access.channel:
rocketmq.subscribe.filter:
# rabbitMQ consumer
rabbitmq.host:
rabbitmq.virtual.host:
rabbitmq.username:
rabbitmq.password:
rabbitmq.resource.ownerId:
srcDataSources:
defaultDS:
url: jdbc:mysql://canal1:3306/default_test?useUnicode=true # default_test数据库无数据
username: root
password: Root_123
canalAdapters:
- instance: example_rdb_db1 # canal instance Name or mq topic name
groups: # 一个instance可以被多个group并行消费
- groupId: g1
outerAdapters:
- name: logger
# - name: rdb
# key: mysql1
# properties:
# jdbc.driverClassName: com.mysql.jdbc.Driver
# jdbc.url: jdbc:mysql://127.0.0.1:3306/mytest2?useUnicode=true
# jdbc.username: root
# jdbc.password: 121212
# - name: rdb
# key: oracle1
# properties:
# jdbc.driverClassName: oracle.jdbc.OracleDriver
# jdbc.url: jdbc:oracle:thin:@localhost:49161:XE
# jdbc.username: mytest
# jdbc.password: m121212
- name: rdb
key: postgres1 # 唯一值, 与表映射配置的outerAdapterKey对应
properties:
jdbc.driverClassName: org.postgresql.Driver
jdbc.url: jdbc:postgresql://canal3:5432/db1
jdbc.username: postgres
jdbc.password: postgres123
threads: 2
commitSize: 3000
# - name: hbase
# properties:
# hbase.zookeeper.quorum: 127.0.0.1
# hbase.zookeeper.property.clientPort: 2181
# zookeeper.znode.parent: /hbase
# - name: es
# hosts: 127.0.0.1:9300 # 127.0.0.1:9200 for rest mode
# properties:
# mode: transport # or rest
# # security.auth: test:123456 # only used for rest mode
# cluster.name: elasticsearch
# - name: kudu
# key: kudu
# properties:
# kudu.master.address: 127.0.0.1 # ',' split multi address
adapter会自动加载conf/rdb下的所有.yml结尾的表映射配置文件
#dataSourceKey: defaultDS
#destination: example
#groupId: g1
#outerAdapterKey: mysql1
#concurrent: true
#dbMapping:
# database: mytest
# table: user
# targetTable: mytest2.user
# targetPk:
# id: id
# mapAll: true
# targetColumns:
# id:
# name:
# role_id:
# c_time:
# test1:
# etlCondition: "where c_time>={}"
# commitBatch: 3000 # 批量提交的大小
## Mirror schema synchronize config
dataSourceKey: defaultDS
destination: example_rdb_db1
groupId: g1
outerAdapterKey: postgres1 # 对应application.yml中outAdapters的key
concurrent: true # 并行同步的表必须保证主键不会更改及主键不能为其他同步表的外键
dbMapping:
mirrorDb: true
database: db1 # 源数据库和目标数据库名
dataSourceKey: defaultDS
destination: example_rdb_db1
groupId: g1
outerAdapterKey: postgres1
concurrent: true
dbMapping:
database: db1
table: tb1_1
targetTable: tb1_1
targetPk:
idA: idA
mapAll: true
targetColumns:
id:
name:
role_id:
c_time:
test1:
etlCondition: "where c_time>={}"
commitBatch: 3000 # 批量提交的大小
## Mirror schema synchronize config
#dataSourceKey: defaultDS
#destination: example_rdb_db1
#groupId: g1
#outerAdapterKey: postgres1 # 对应application.yml中outAdapters的key
#concurrent: true # 并行同步的表必须保证主键不会更改及主键不能为其他同步表的外键
#dbMapping:
# mirrorDb: true
# database: db1 # 源数据库和目标数据库名
[root@canal1 ~]#
[root@canal1 ~]# /root/canal.adapter-1.1.5/bin/restart.sh
[root@canal1 ~]#
[root@canal1 ~]# /root/canal.adapter-1.1.5/bin/stop.sh
[root@canal1 ~]#
[root@canal1 ~]#
[root@canal1 ~]# curl http://canal1:8081/destinations
[{"destination":"example_rdb_db1","status":"on"}]
[root@canal1 ~]#
[root@canal1 ~]#
[root@canal1 ~]# curl http://canal1:8081/syncSwitch/example_rdb_db1
{"stauts":"on"}
[root@canal1 ~]#
[root@canal1 ~]#
[root@canal1 ~]# curl http://canal1:8081/etl/rdb/postgres1/tb1_1.yml -X POST -d ""
{"succeeded":true,"resultMessage":"导入RDB 数据:3 条"}
[root@canal1 ~]#
[root@canal1 ~]#
[root@canal1 ~]# curl http://canal1:8081/syncSwitch/example_rdb_db1/off -X PUT
{"code":20000,"message":"实例: example_rdb_db1 关闭同步成功"}
[root@canal1 ~]#
[root@canal1 ~]#
[root@canal1 ~]# curl http://canal1:8081/count/rdb/postgres1/tb1_1.yml
{"count":3,"targetTable":"tb1_1"}
[root@canal1 ~]#
运行adapter的bin/stop.sh, zookeeper的example_rdb_db1/1001/cursor就被删除了,重启后,也没有example_rdb_db1/1001/cursor节点,再往mysql源数据库插入一条数据,example_rdb_db1/1001/cursor节点又有数据了
刚开始mysql中有一个字段是scoreA, 后面表删除换成了ageA, 又将表删除换成scoreA;postgresql表对应的字段也是scoreA, 增量同步的时候,却报如下错误:
c.a.otter.canal.adapter.launcher.loader.AdapterProcessor - java.lang.RuntimeException: java.util.concurrent.ExecutionException: java.lang.RuntimeException: java.lang.RuntimeException: Target column: ageA not matched
java.lang.RuntimeException: java.lang.RuntimeException: java.util.concurrent.ExecutionException: java.lang.RuntimeException: java.lang.RuntimeException: Target column: ageA not matched
手动运行curl ETL全量同步命令, mysql的tb1_1和tb1_2表数据量分别为5和1,同步的数据量却是3和3
conf/application.yml配置,改存放到mysql未完成
所以放弃canal ClientAdapter方式,改为自己手动代码实现,所以本文仅供参考