环境准备
jdk
canal是基于java语言开发的,所以需先确保有安装 jdk
开启 Binlog 写入功能
[mysqld]
log-bin=mysql-bin # 开启 binlog
binlog-format=ROW # 选择 ROW 模式
server_id=1 # 配置 MySQL replaction 需要定义,不要和 canal 的 slaveId 重复
binlog-format有三种策略
STATEMENT 日志记录基于语句。
ROW 日志记录基于行。这是默认值。
MIXED 日志记录使用混合格式
详细内容参考: https://dev.mysql.com/doc/refman/8.0/en/binary-log.html
授权 canal 链接 MySQL 账号具有作为 MySQL slave 的权限, 如果已有账户可直接 grant
CREATE USER canal IDENTIFIED BY ‘canal’; – 密码可自定义
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON . TO ‘canal’@’%’;
FLUSH PRIVILEGES;
下载 canal
wget https://github.com/alibaba/canal/releases/tag/canal-1.1.4/canal.deployer-1.1.4.tar.gz
解压缩
mkdir /tmp/canal
canal解压后会有4个文件目录,比较凌乱,所以需要创建个文件夹来归并一下
tar -zxvf canal.deployer-1.1.4.tar.gz -C canal/
解压完成后,进入 canal 目录,可以看到如下结构
配置修改
vi conf/example/instance.properties
canal.instance.mysql.slaveId = 1234
#position info,需要改成自己的数据库信息
canal.instance.master.address = 127.0.0.1:3306
canal.instance.master.journal.name =
canal.instance.master.position =
canal.instance.master.timestamp =
#canal.instance.standby.address =
#canal.instance.standby.journal.name =
#canal.instance.standby.position =
#canal.instance.standby.timestamp =
#username/password,需要改成自己的数据库信息
canal.instance.dbUsername = canal
canal.instance.dbPassword = canal
canal.instance.defaultDatabaseName =
canal.instance.connectionCharset = UTF-8
#table regex 这里配置过滤规则 如:
canal.instance.filter.regex=.\…
启动
sh bin/startup.sh
至此,canal server搭建完毕,可查看/logs/canal/canal.log查看启动日志 /logs/example/example.log 查看数据订阅日志
MQ
canal 可直接投递消息至mq,需修改配置 ,以kafka为例
配置文件 canal.properties
canal.serverMode = kafka
canal.mq.servers = 127.0.0.1:6667
canal.mq.retries = 0
canal.mq.batchSize = 16384
canal.mq.maxRequestSize = 1048576
canal.mq.lingerMs = 1
canal.mq.bufferMemory = 33554432
canal.mq.canalBatchSize = 50
canal.mq.canalGetTimeout = 100
canal.mq.flatMessage = false
canal.mq.compressionType = none
canal.mq.acks = all
canal.mq.transaction = fals
配置文件 instance.properties
canal.instance.master.address=192.168.1.20:3306
canal.instance.dbUsername = canal
canal.instance.dbPassword = canal
#canal.mq.dynamicTopic=mytest,.,mytest.user,mytest\…,.\…
canal.mq.partition=0
#canal.mq.partitionsNum=3
#canal.mq.partitionHash=mytest.person:id,mytest.role:id
至此有关MQ相关配置完成,需要注意的是数据投递至MQ时,有关消息顺序性问题
首先binary_log写入是有序的,但是从binary_log投递至MQ中的顺序如何保证,有如下几种情况
以kafka为例,topic有以下四种形式
单topic单分区 :这种情况可保证与binary_log的顺序一致,缺点是由于串行化结构,所以吞吐性能较为劣势,官方TPS测试数据为: 2k~3k
单topic多分区 : 多分区无法保证与binary_log的写入顺序一致,可在路由规则上做部分补偿从而达到表级或业务上的时序性,TPS上相对于单分区有明显提高
多topic单分区/多分区: 若用户选择的是指定table的方式,则保障的是表级别的顺序性(存在热点表写入分区的性能问题),如果用户选择的是指定pk hash的方式,
那只能保障的是一个pk的多次binlog顺序性 ** pk hash的方式需要业务权衡,这里性能会最好,但如果业务上有pk变更或者对多pk数据有顺序性依赖,
就会产生业务处理错乱的情况. 如果有pk变更,pk变更前和变更后的值会落在不同的分区里,业务消费就会有先后顺序的问题,需要注意
详细配置请参考:https://github.com/alibaba/canal/wiki/Canal-Kafka-RocketMQ-QuickStart
mq集群配置
由于单机模式无法满足对容灾、可用性的要求,故此以下是集群模式的配置,以3个节点为例
依次修改每个节点的canal.properties
#端口
canal.port = 11111
canal.metrics.pull.port = 11112
#这里是zk的配置 多主机用 , 分割
canal.zkServers =10.90.80.88:2182,10.90.81.121:2182,10.90.81.135:2182
canal.serverMode = kafka
canal.mq.servers = 10.90.80.88:9092,10.90.81.121:9092,10.90.81.135:9092
依次修改每个节点的 example/instance.properties
canal.instance.mysql.slaveId=101
canal.instance.gtidon=false
#canal.instance.master.address=w-kh-public-sit-mysql.service.testdb:3306
canal.instance.master.position=
canal.instance.master.timestamp=
canal.instance.master.gtid=
canal.instance.rds.accesskey=
canal.instance.rds.secretkey=
canal.instance.rds.instanceId=
canal.instance.dbUsername=canal
canal.instance.dbPassword=canal
#canal.instance.filter.regex=.\…
canal.instance.filter.regex=test\…*
canal.instance.filter.black.regex=
#canal.instance.filter.field=test1.t_product:id/subject/keywords,test2.t_company:id/name/contact/ch
#canal.instance.filter.black.field=test1.t_product:subject/product_image,test2.t_company:id/name/contact/ch
canal.mq.topic=example
#canal.mq.dynamicTopic=mytest1.user,mytest2\…,.\…*
#canal.mq.partition=0
canal.mq.partitionsNum=4
canal.mq.partitionHash=test\…*:id
依次启动每个节点 sh ./startup.sh
查看 /logs/canal/canal.log 是否启动成功
查看 /logs/example/example.log 检查master节点是否链接成功
Canal Client (JAVA版)
示例:
package com.example.canal_test.config;
import com.alibaba.otter.canal.client.CanalConnector;
import com.alibaba.otter.canal.client.CanalConnectors;
import com.alibaba.otter.canal.protocol.CanalEntry;
import com.alibaba.otter.canal.protocol.Message;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import java.net.InetSocketAddress;
import java.util.List;
@Component
public class canalClient {
@PostConstruct
public void init(){
// 创建链接
CanalConnector connector = CanalConnectors.newSingleConnector(new InetSocketAddress("10.90.81.121",
11112), "example", "", "");
int batchSize = 1000;
try {
connector.connect();
connector.subscribe(".*\\..*");
connector.rollback();
while (true) {
Message message = connector.getWithoutAck(batchSize); // 获取指定数量的数据
long batchId = message.getId();
int size = message.getEntries().size();
if (batchId == -1 || size == 0) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
}
} else {
printEntry(message.getEntries());
}
connector.ack(batchId); // 提交确认
}
} finally {
connector.disconnect();
}
}
private static void printEntry(List entrys) {
for (CanalEntry.Entry entry : entrys) {
if (entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONBEGIN || entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONEND) {
continue;
}
CanalEntry.RowChange rowChage = null;
try {
rowChage = CanalEntry.RowChange.parseFrom(entry.getStoreValue());
} catch (Exception e) {
throw new RuntimeException("ERROR ## parser of eromanga-event has an error , data:" + entry.toString(),
e);
}
CanalEntry.EventType eventType = rowChage.getEventType();
System.out.println(String.format("================> binlog[%s:%s] , name[%s,%s] , eventType : %s",
entry.getHeader().getLogfileName(), entry.getHeader().getLogfileOffset(),
entry.getHeader().getSchemaName(), entry.getHeader().getTableName(),
eventType));
for (CanalEntry.RowData rowData : rowChage.getRowDatasList()) {
if (eventType == CanalEntry.EventType.DELETE) {
printColumn(rowData.getBeforeColumnsList());
} else if (eventType == CanalEntry.EventType.INSERT) {
printColumn(rowData.getAfterColumnsList());
}else if (eventType == CanalEntry.EventType.UPDATE) {
printColumn(rowData.getAfterColumnsList());
} else {
System.out.println("-------> before");
printColumn(rowData.getBeforeColumnsList());
System.out.println("-------> after");
printColumn(rowData.getAfterColumnsList());
}
}
}
}
private static void printColumn(List columns) {
for (CanalEntry.Column column : columns) {
System.out.println(column.getName() + " : " + column.getValue() + " update=" + column.getUpdated());
}
}
}