使用canal同步mysql数据到es

需求:传统数据库搜索速度非常慢,需要依赖搜索引擎实现快速搜索。(使用canal实现无侵入数据异构)

canal server:用于伪装mysql slave获取mysql binlog。
canal adapter:提供多种中间件的导入,如kafka、rocketmq、hbase、elasticsearch,可直接配置使用。
                          注:es1.1.4只支持es6.x   版本。
elasticsearch(es):弹性搜索引擎底层使用lucene实现,提供集群分片扩展。
kbana:es可视化工具,用于可视化es数据,提供各种工具如:开发工具、索引管理等。

1、创建异构表


SET FOREIGN_KEY_CHECKS=0;

-- ----------------------------
-- Table structure for t_user
-- ----------------------------
DROP TABLE IF EXISTS `t_user`;
CREATE TABLE `t_user` (
  `id` bigint(20) NOT NULL AUTO_INCREMENT,
  `name` varchar(255) NOT NULL COMMENT '用户姓名',
  `gender` tinyint(4) DEFAULT NULL COMMENT '性别1:男2:女',
  `phone` varchar(20) NOT NULL COMMENT '手机号码',
  `email` varchar(50) DEFAULT NULL COMMENT '邮箱',
  `status` tinyint(4) NOT NULL DEFAULT '1' COMMENT '状态1:启用2:禁用',
  `birthday` date DEFAULT NULL COMMENT '出生日期',
  `id_card` varchar(20) DEFAULT NULL COMMENT '证件号码',
  `head_portrait` varchar(255) DEFAULT NULL COMMENT '头像',
  `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '最后修改时间',
  PRIMARY KEY (`id`),
  UNIQUE KEY `uk_user_phone` (`phone`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=12 DEFAULT CHARSET=utf8mb4 COMMENT='用户表';

2、检查mysql binlog存储方式

[mysqld]
log-bin=mysql-bin #添加这一行就ok
binlog-format=ROW #选择row模式
server_id=1 #配置mysql replaction需要定义,不能和canal的slaveId重复

    创建canal用户用于拉数据

CREATE USER canal IDENTIFIED BY 'canal';  
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'canal'@'%';
-- GRANT ALL PRIVILEGES ON *.* TO 'canal'@'%' ;
FLUSH PRIVILEGES;

3、添加canal server实例配置文件instance.properties。

################################################
## mysql serverId , v1.0.26+ will autoGen
# canal.instance.mysql.slaveId=0

# enable gtid use true/false
canal.instance.gtidon=false

# position info
canal.instance.master.address=配置数据库地址
canal.instance.master.journal.name=
canal.instance.master.position=
canal.instance.master.timestamp=
canal.instance.master.gtid=

# rds oss binlog
canal.instance.rds.accesskey=
canal.instance.rds.secretkey=
canal.instance.rds.instanceId=

# table meta tsdb info
canal.instance.tsdb.enable=true
#canal.instance.tsdb.url=jdbc:mysql://127.0.0.1:3306/canal_tsdb
#canal.instance.tsdb.dbUsername=canal
#canal.instance.tsdb.dbPassword=canal

#canal.instance.standby.address =
#canal.instance.standby.journal.name =
#canal.instance.standby.position =
#canal.instance.standby.timestamp =
#canal.instance.standby.gtid=

# username/password
canal.instance.dbUsername=配置数据库账号
canal.instance.dbPassword=配置数据库密码
canal.instance.connectionCharset = UTF-8
# enable druid Decrypt database password
canal.instance.enableDruid=false
#canal.instance.pwdPublicKey=MFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBALK4BUxdDltRRE5/zXpVEVPUgunvscYFtEip3pmLlhrWpacX7y7GCMo2/JM6LeHmiiNdH1FWgGCpUfircSwlWKUCAwEAAQ==

# table regex
canal.instance.filter.regex=.*\\..*
# table black regex
canal.instance.filter.black.regex=
# table field filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2)
#canal.instance.filter.field=test1.t_product:id/subject/keywords,test2.t_company:id/name/contact/ch
# table field black filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2)
#canal.instance.filter.black.field=test1.t_product:subject/product_image,test2.t_company:id/name/contact/ch

# mq config
canal.mq.topic=example
# dynamic topic route by schema or table regex
#canal.mq.dynamicTopic=mytest1.user,mytest2\\..*,.*\\..*
canal.mq.partition=0
# hash partition config
#canal.mq.partitionsNum=3
#canal.mq.partitionHash=test.table:id^name,.*\\..*
#################################################

4、修改canal adapter配置文件application.yml

server:
  port: 8081
spring:
  jackson:
    date-format: yyyy-MM-dd HH:mm:ss
    time-zone: GMT+8
    default-property-inclusion: non_null

canal.conf:
  mode: tcp # kafka rocketMQ
  canalServerHost: 127.0.0.1:11111
#  zookeeperHosts: slave1:2181
#  mqServers: 127.0.0.1:9092 #or rocketmq
#  flatMessage: true
  batchSize: 500
  syncBatchSize: 1000
  retries: 0
  timeout:
  accessKey:
  secretKey:
  srcDataSources:
    defaultDS:
      url: jdbc连接地址
      username: 账号
      password: 密码
  canalAdapters:
  - instance: example # canal instance Name or mq topic name
    groups:
    - groupId: g1
      outerAdapters:
      - name: logger
#      - name: rdb
#        key: mysql1
#        properties:
#          jdbc.driverClassName: com.mysql.jdbc.Driver
#          jdbc.url: jdbc:mysql://127.0.0.1:3306/mytest2?useUnicode=true
#          jdbc.username: root
#          jdbc.password: 121212
#      - name: rdb
#        key: oracle1
#        properties:
#          jdbc.driverClassName: oracle.jdbc.OracleDriver
#          jdbc.url: jdbc:oracle:thin:@localhost:49161:XE
#          jdbc.username: mytest
#          jdbc.password: m121212
#      - name: rdb
#        key: postgres1
#        properties:
#          jdbc.driverClassName: org.postgresql.Driver
#          jdbc.url: jdbc:postgresql://localhost:5432/postgres
#          jdbc.username: postgres
#          jdbc.password: 121212
#          threads: 1
#          commitSize: 3000
#      - name: hbase
#        properties:
#          hbase.zookeeper.quorum: 127.0.0.1
#          hbase.zookeeper.property.clientPort: 2181
#          zookeeper.znode.parent: /hbase
      - name: es
        hosts: 127.0.0.1:9200 #127.0.0.1:9300 # 127.0.0.1:9200 for rest mode
        properties:
          mode: rest #transport # or rest
          # security.auth: test:123456 #  only used for rest mode
          cluster.name: elasticsearch

5、添加config\es文件夹下的yml文件

dataSourceKey: defaultDS
destination: example
groupId: g1
esMapping:
  _index: user
  _type: _doc
  _id: id
  sql: "select u.id,u.name,u.phone,u.gender,u.create_time from t_user u"
  commitBatch: 3000

6、es添加索引

# 删除索引
DELETE /user

# 创建索引和mapping
PUT /user?include_type_name=true
{
  "mappings":{
    "_doc":{
      "properties":{
        "id":{"type":"long"},
        "name":{"type":"text"},
        "phone":{"type":"text"},
        "gender":{"type":"long"},
        "create_time":{
          "type":"date"
        }
      }
    }
  }
}

# 添加mapping
PUT /user/_mapping/_doc?include_type_name=true
{
  "properties":{
    "id":{"type":"long"},
    "name":{"type":"text"},
    "phone":{"type":"text"},
    "gender":{"type":"long"},
    "create_time":{
      "type":"date",
      "format": "yyyy-MM-dd HH:mm:ss"
    }
  }
}

# 查看索引下的mapping
GET /user/_mapping

# 查询索引下的数据
POST /user/_search
{
  "query": { "match_all": {}}
}

# 添加数据
POST /user/_doc
{
  "id":1,
  "name":"xxx",
  "gender":1,
  "phone":"17310128850",
  "create_time":"2019-10-18 10:50:00" 
}

7、使用变更sql测试

insert into t_user (name,phone,gender,create_time) values ('xxx1','111',1,now())

update t_user set name ='ccc' where id = 1

8、修改kbana配置文件,修改es地址和语言,查看es数据。

elasticsearch.hosts: ["es地址"]
i18n.locale: "zh-CN"

 

你可能感兴趣的:(架构,lucene)