需求:传统数据库搜索速度非常慢,需要依赖搜索引擎实现快速搜索。(使用canal实现无侵入数据异构)
canal server:用于伪装mysql slave获取mysql binlog。
canal adapter:提供多种中间件的导入,如kafka、rocketmq、hbase、elasticsearch,可直接配置使用。
注:es1.1.4只支持es6.x 版本。
elasticsearch(es):弹性搜索引擎底层使用lucene实现,提供集群分片扩展。
kbana:es可视化工具,用于可视化es数据,提供各种工具如:开发工具、索引管理等。
1、创建异构表
SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
-- Table structure for t_user
-- ----------------------------
DROP TABLE IF EXISTS `t_user`;
CREATE TABLE `t_user` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL COMMENT '用户姓名',
`gender` tinyint(4) DEFAULT NULL COMMENT '性别1:男2:女',
`phone` varchar(20) NOT NULL COMMENT '手机号码',
`email` varchar(50) DEFAULT NULL COMMENT '邮箱',
`status` tinyint(4) NOT NULL DEFAULT '1' COMMENT '状态1:启用2:禁用',
`birthday` date DEFAULT NULL COMMENT '出生日期',
`id_card` varchar(20) DEFAULT NULL COMMENT '证件号码',
`head_portrait` varchar(255) DEFAULT NULL COMMENT '头像',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '最后修改时间',
PRIMARY KEY (`id`),
UNIQUE KEY `uk_user_phone` (`phone`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=12 DEFAULT CHARSET=utf8mb4 COMMENT='用户表';
2、检查mysql binlog存储方式
[mysqld]
log-bin=mysql-bin #添加这一行就ok
binlog-format=ROW #选择row模式
server_id=1 #配置mysql replaction需要定义,不能和canal的slaveId重复
创建canal用户用于拉数据
CREATE USER canal IDENTIFIED BY 'canal';
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'canal'@'%';
-- GRANT ALL PRIVILEGES ON *.* TO 'canal'@'%' ;
FLUSH PRIVILEGES;
3、添加canal server实例配置文件instance.properties。
################################################
## mysql serverId , v1.0.26+ will autoGen
# canal.instance.mysql.slaveId=0
# enable gtid use true/false
canal.instance.gtidon=false
# position info
canal.instance.master.address=配置数据库地址
canal.instance.master.journal.name=
canal.instance.master.position=
canal.instance.master.timestamp=
canal.instance.master.gtid=
# rds oss binlog
canal.instance.rds.accesskey=
canal.instance.rds.secretkey=
canal.instance.rds.instanceId=
# table meta tsdb info
canal.instance.tsdb.enable=true
#canal.instance.tsdb.url=jdbc:mysql://127.0.0.1:3306/canal_tsdb
#canal.instance.tsdb.dbUsername=canal
#canal.instance.tsdb.dbPassword=canal
#canal.instance.standby.address =
#canal.instance.standby.journal.name =
#canal.instance.standby.position =
#canal.instance.standby.timestamp =
#canal.instance.standby.gtid=
# username/password
canal.instance.dbUsername=配置数据库账号
canal.instance.dbPassword=配置数据库密码
canal.instance.connectionCharset = UTF-8
# enable druid Decrypt database password
canal.instance.enableDruid=false
#canal.instance.pwdPublicKey=MFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBALK4BUxdDltRRE5/zXpVEVPUgunvscYFtEip3pmLlhrWpacX7y7GCMo2/JM6LeHmiiNdH1FWgGCpUfircSwlWKUCAwEAAQ==
# table regex
canal.instance.filter.regex=.*\\..*
# table black regex
canal.instance.filter.black.regex=
# table field filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2)
#canal.instance.filter.field=test1.t_product:id/subject/keywords,test2.t_company:id/name/contact/ch
# table field black filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2)
#canal.instance.filter.black.field=test1.t_product:subject/product_image,test2.t_company:id/name/contact/ch
# mq config
canal.mq.topic=example
# dynamic topic route by schema or table regex
#canal.mq.dynamicTopic=mytest1.user,mytest2\\..*,.*\\..*
canal.mq.partition=0
# hash partition config
#canal.mq.partitionsNum=3
#canal.mq.partitionHash=test.table:id^name,.*\\..*
#################################################
4、修改canal adapter配置文件application.yml
server:
port: 8081
spring:
jackson:
date-format: yyyy-MM-dd HH:mm:ss
time-zone: GMT+8
default-property-inclusion: non_null
canal.conf:
mode: tcp # kafka rocketMQ
canalServerHost: 127.0.0.1:11111
# zookeeperHosts: slave1:2181
# mqServers: 127.0.0.1:9092 #or rocketmq
# flatMessage: true
batchSize: 500
syncBatchSize: 1000
retries: 0
timeout:
accessKey:
secretKey:
srcDataSources:
defaultDS:
url: jdbc连接地址
username: 账号
password: 密码
canalAdapters:
- instance: example # canal instance Name or mq topic name
groups:
- groupId: g1
outerAdapters:
- name: logger
# - name: rdb
# key: mysql1
# properties:
# jdbc.driverClassName: com.mysql.jdbc.Driver
# jdbc.url: jdbc:mysql://127.0.0.1:3306/mytest2?useUnicode=true
# jdbc.username: root
# jdbc.password: 121212
# - name: rdb
# key: oracle1
# properties:
# jdbc.driverClassName: oracle.jdbc.OracleDriver
# jdbc.url: jdbc:oracle:thin:@localhost:49161:XE
# jdbc.username: mytest
# jdbc.password: m121212
# - name: rdb
# key: postgres1
# properties:
# jdbc.driverClassName: org.postgresql.Driver
# jdbc.url: jdbc:postgresql://localhost:5432/postgres
# jdbc.username: postgres
# jdbc.password: 121212
# threads: 1
# commitSize: 3000
# - name: hbase
# properties:
# hbase.zookeeper.quorum: 127.0.0.1
# hbase.zookeeper.property.clientPort: 2181
# zookeeper.znode.parent: /hbase
- name: es
hosts: 127.0.0.1:9200 #127.0.0.1:9300 # 127.0.0.1:9200 for rest mode
properties:
mode: rest #transport # or rest
# security.auth: test:123456 # only used for rest mode
cluster.name: elasticsearch
5、添加config\es文件夹下的yml文件
dataSourceKey: defaultDS
destination: example
groupId: g1
esMapping:
_index: user
_type: _doc
_id: id
sql: "select u.id,u.name,u.phone,u.gender,u.create_time from t_user u"
commitBatch: 3000
6、es添加索引
# 删除索引
DELETE /user
# 创建索引和mapping
PUT /user?include_type_name=true
{
"mappings":{
"_doc":{
"properties":{
"id":{"type":"long"},
"name":{"type":"text"},
"phone":{"type":"text"},
"gender":{"type":"long"},
"create_time":{
"type":"date"
}
}
}
}
}
# 添加mapping
PUT /user/_mapping/_doc?include_type_name=true
{
"properties":{
"id":{"type":"long"},
"name":{"type":"text"},
"phone":{"type":"text"},
"gender":{"type":"long"},
"create_time":{
"type":"date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
# 查看索引下的mapping
GET /user/_mapping
# 查询索引下的数据
POST /user/_search
{
"query": { "match_all": {}}
}
# 添加数据
POST /user/_doc
{
"id":1,
"name":"xxx",
"gender":1,
"phone":"17310128850",
"create_time":"2019-10-18 10:50:00"
}
7、使用变更sql测试
insert into t_user (name,phone,gender,create_time) values ('xxx1','111',1,now())
update t_user set name ='ccc' where id = 1
8、修改kbana配置文件,修改es地址和语言,查看es数据。
elasticsearch.hosts: ["es地址"]
i18n.locale: "zh-CN"