canal docker搭建总结

关于canal docker 容器搭建方式

一 配置mysql 8.0

1.1 docker-compose mysql配置 配置

我的mysql 8.0 docker-compose

version: '3.3'
services:
  mysql:
    image: mysql/mysql-server:${MYSQL_VERSION}
    container_name: mysql
    ports:
      - "${MYSQL_HOST_PORT}:3306"
    volumes:
      - ${MYSQL_CONF_FILE}:/etc/mysql/conf.d/mysql.cnf:ro
      - ${DATA_DIR}/mysql:/var/lib/mysql/:rw
      - ${MYSQL_LOG_DIR}:/var/log/mysql/:rw

    restart: always
    networks:
      - default
    environment:
      MYSQL_ROOT_PASSWORD: "${MYSQL_ROOT_PASSWORD}"
      MYSQL_ROOT_HOST: "${MYSQL_ROOT_HOST}"
      TZ: "$TZ

1.2 my.cnf 配置

[client]
port                    = 3306
default-character-set   = utf8mb4


[mysqld]
user                    = mysql
port                    = 3306
sql_mode                = NO_ENGINE_SUBSTITUTION,STRICT_TRANS_TABLES

default-storage-engine  = InnoDB
default-authentication-plugin   = mysql_native_password
character-set-server    = utf8mb4
collation-server        = utf8mb4_unicode_ci
init_connect            = 'SET NAMES utf8mb4'

log-bin = mysql-bin
binlog-format = row
server-id = 1
binlog-do-db = crawler

slow_query_log
#long_query_time         = 3
slow-query-log-file     = /var/log/mysql/mysql.slow.log
log-error               = /var/log/mysql/mysql.error.log

default-time-zone       = '+8:00'

[mysql]
default-character-set   = utf8mb4

需要注意的是要配置这4行

log-bin = mysql-bin # bin log 位置可以自定义绝对路径

binlog-format = row # 设置成按行
server-id = 1 # 主库的server-id
binlog-do-db = crawler # 要监听的库

1.3 重启mysql 查看同步状态

show variables like 'log_bin%';  出现on
show variables like 'binlog_format'; 出现on
CREATE USER canal IDENTIFIED BY 'canal'; 创建从库
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'canal'@'%'; 授予权限
FLUSH PRIVILEGES; 
show grants for 'canal';


# 查看同步位置
SHOW MASTER STATUS;

二 配置canal

2.1 canal docker-compose 配置

version: '3'
services:
  canal-server:
    image: canal/canal-server:latest
    container_name: canal-server
    ports:
      - "11111:11111"
    volumes:
      - ./canal-logs:/home/admin/canal-server/logs # 挂载日志,主要是example 里面 的启动日志了
      - ./canal-conf:/home/admin/canal-server/conf
    environment:
      - canal.instance.master.address=192.168.217.87:3326
      - canal.instance.dbUsername=canal # 从库账号
      - canal.instance.dbPassword=canal # 从库密码

我的建议是先不要挂载,先启动容器,再把conf 配置文件copy 到宿主机

例如启动容器执行 docker cp canal-server:/home/admin/canal-server/conf ./canal-conf

2.2 主要配置文件说明

├── canal.properties   # 默认是tcp传输就不要改了, 入口配置文件需要我是改了只监听 dml ,其他都过滤,然后如果有多个库需要改 canal.destinations = example,xxxx,默认是TCP传输没改
├── canal_local.properties
├── example
│   ├── h2.mv.db # 生成的数据
│   ├── instance.properties  # 主要配置文件,可以在环境变量里面,也可以手动设置
				# 主要改了,账号密码设置在docker-compose 里面了也可以通过这个修改
				canal.instance.mysql.slaveId=2
				canal.instance.filter.regex=crawler.t_crawler_article  # 这个是主要监听 crawler 库 和 t_crawler_article 表
				# canal.instance.filter.black.regex=mysql\\.slave_.*  这个是黑名单,注释了,因为配置了上面这行
# canal.instance.filter.black.regex=mysql\\.slave_.*
│   └── meta.dat # 生成的初始信息 
├── logback.xml   # 主要改了 30天,并且不压缩 .gz 去掉, 60改30

2.3 启动docker-compose up , tail -f example.log

如果出现下面success 的信息说明配置成功了

2023-07-12 09:58:18.221 [main] INFO  c.a.otter.canal.instance.core.AbstractCanalInstance - subscribe filter change to crawler\.t_crawler_article
2023-07-12 09:58:18.230 [main] WARN  c.a.o.canal.parse.inbound.mysql.dbsync.LogEventConvert - --> init table filter : ^crawler\.t_crawler_article$
2023-07-12 09:58:18.233 [main] INFO  c.a.otter.canal.instance.core.AbstractCanalInstance - start successful....
2023-07-12 09:58:18.631 [destination = example , address = /192.168.217.87:3326 , EventParser] WARN  c.a.o.c.p.inbound.mysql.rds.RdsBinlogEventParserProxy - prepare to find start position just last position
 {"identity":{"slaveId":-1,"sourceAddress":{"address":"192.168.217.87","port":3326}},"postion":{"gtid":"","included":false,"journalName":"binlog.000026","position":47167,"serverId":1,"timestamp":1689127031000}}
2023-07-12 09:58:20.897 [destination = example , address = /192.168.217.87:3326 , EventParser] WARN  c.a.o.c.p.inbound.mysql.rds.RdsBinlogEventParserProxy - ---> find start position successfully, EntryPosition[included=false,journalName=binlog.000026,position=47167,serverId=1,gtid=,timestamp=1689127031000] cost : 3286ms , the next step is binlog dump

三 编写canal客户端

3.1 写客户端是为了更方便灵活订阅规则

我这边用的golang, 其他语言的同学也可以找一找,java 客户端肯定是支持的

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"fmt"
	"log"
	"os"
	"time"

	"github.com/golang/protobuf/proto"
	"github.com/withlin/canal-go/client"
	pbe "github.com/withlin/canal-go/protocol/entry"
)

func main() {

	// 192.168.199.17 替换成你的canal server的地址
	// example 替换成-e canal.destinations=example 你自己定义的名字
	connector := client.NewSimpleCanalConnector("192.168.217.87", 11111, "", "", "example", 60000, 60*60*1000)
	err := connector.Connect()
	if err != nil {
		log.Println(err)
		os.Exit(1)
	}

	// https://github.com/alibaba/canal/wiki/AdminGuide
	//mysql 数据解析关注的表,Perl正则表达式.
	//
	//多个正则之间以逗号(,)分隔,转义符需要双斜杠(\\)
	//
	//常见例子:
	//
	//  1.  所有表:.*   or  .*\\..*
	//	2.  canal schema下所有表: canal\\..*
	//	3.  canal下的以canal打头的表:canal\\.canal.*
	//	4.  canal schema下的一张表:canal\\.test1
	//  5.  多个规则组合使用:canal\\..*,mysql.test1,mysql.test2 (逗号分隔)

	//err = connector.Subscribe(".*\\..*")
	err = connector.Subscribe("crawler.t_crawler_article")
	if err != nil {
		log.Println(err)
		os.Exit(1)
	}

	for {

		message, err := connector.Get(100, nil, nil)
		if err != nil {
			log.Println(err)
			os.Exit(1)
		}
		batchId := message.Id
		if batchId == -1 || len(message.Entries) <= 0 {
			time.Sleep(300 * time.Millisecond)
			fmt.Println("===没有数据了===")
			continue
		}

		printEntry(message.Entries)

	}
}

func printEntry(entrys []pbe.Entry) {

	for _, entry := range entrys {
		if entry.GetEntryType() == pbe.EntryType_TRANSACTIONBEGIN || entry.GetEntryType() == pbe.EntryType_TRANSACTIONEND {
			continue
		}
		rowChange := new(pbe.RowChange)

		err := proto.Unmarshal(entry.GetStoreValue(), rowChange)
		checkError(err)
		if rowChange != nil {
			eventType := rowChange.GetEventType()
			header := entry.GetHeader()
			fmt.Println(fmt.Sprintf("================> binlog[%s : %d],name[%s,%s], eventType: %s", header.GetLogfileName(), header.GetLogfileOffset(), header.GetSchemaName(), header.GetTableName(), header.GetEventType()))

			for _, rowData := range rowChange.GetRowDatas() {
				if eventType == pbe.EventType_DELETE {
					printColumn(rowData.GetBeforeColumns())
				} else if eventType == pbe.EventType_INSERT {
					printColumn(rowData.GetAfterColumns())
				} else {
					fmt.Println("-------> before")
					printColumn(rowData.GetBeforeColumns())
					fmt.Println("-------> after")
					printColumn(rowData.GetAfterColumns())
				}
			}
		}
	}
}

func printColumn(columns []*pbe.Column) {
	for _, col := range columns {
		fmt.Println(fmt.Sprintf("%s : %s  update= %t", col.GetName(), col.GetValue(), col.GetUpdated()))
	}
}

func checkError(err error) {
	if err != nil {
		fmt.Fprintf(os.Stderr, "Fatal error: %s", err.Error())
		os.Exit(1)
	}
}

3.2 测试,在mysql中修改表,看控制台输出mysql 日志

Get local news delivered to your inbox!  update= false
ref_id : 720c8090279dd552e56c637e16f9926a  update= false
tags : alert  update= false
local_url :   update= false
s3_url : xxxsssssss  update= false
milli_at : 1689064272526  update= false
category : headlines  update= false
article_time : 1688987700  update= false
is_status : 2  update= false

四 遇到的坑

4.1 当重启数据库的时候,监听不到

显示连接失败,这个时候我用mysql 手动登录下 canal 账号就好了

,显示连接失败,这个时候我用mysql 手动登录下 canal 账号就好了
mysql -h 192.168.217.87  -u canal -p -P 3326

4.2 几年前我提的问题,几年后有大神给的解决办法,mysql 8.0同步到es的问题

https://github.com/alibaba/canal/issues/2802#issuecomment-1534500708

你可能感兴趣的:(docker,adb,容器,canal)