使用docker 配置mysql group replication实验环境

1.  使用单主机模式 进行测试.

用到的配置和脚本如下:

node1 - my.cnf

[mysqld]
user=mysql
server_id=1
gtid_mode=ON
enforce_gtid_consistency=ON
master_info_repository=TABLE
relay_log_info_repository=TABLE
binlog_checksum=NONE
log_slave_updates=ON
log_bin=binlog
binlog_format=ROW

transaction_write_set_extraction=XXHASH64
loose-group_replication_group_name="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
loose-group_replication_start_on_boot=off
loose-group_replication_local_address= "node1:33060"
loose-group_replication_group_seeds= "node1:33060,node2:33060,node3:33060"
loose-group_replication_bootstrap_group=off
loose-group_replication_recovery_user=repl
loose-group-replication-single-primary-mode='ON'
loose-group-replication-enforce-update-everywhere-checks='OFF'

relay-log=replay-bin

node2-my.cnf

[mysqld]
user=mysql
server_id=2
gtid_mode=ON
enforce_gtid_consistency=ON
master_info_repository=TABLE
relay_log_info_repository=TABLE
binlog_checksum=NONE
log_slave_updates=ON
log_bin=binlog
binlog_format=ROW

transaction_write_set_extraction=XXHASH64
loose-group_replication_group_name="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
loose-group_replication_start_on_boot=off
loose-group_replication_local_address= "node2:33060"
loose-group_replication_group_seeds= "node1:33060,node2:33060,node3:33060"
loose-group_replication_bootstrap_group=off
loose-group_replication_recovery_user=repl
loose-group-replication-single-primary-mode='ON'
loose-group-replication-enforce-update-everywhere-checks='OFF'

relay-log=replay-bin

 node3-my.cnf

[mysqld]
user=mysql
server_id=3
gtid_mode=ON
enforce_gtid_consistency=ON
master_info_repository=TABLE
relay_log_info_repository=TABLE
binlog_checksum=NONE
log_slave_updates=ON
log_bin=binlog
binlog_format=ROW

transaction_write_set_extraction=XXHASH64
loose-group_replication_group_name="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
loose-group_replication_start_on_boot=off
loose-group_replication_local_address= "node3:33060"
loose-group_replication_group_seeds= "node1:33060,node2:33060,node3:33060"
loose-group_replication_bootstrap_group=off
loose-group-replication-single-primary-mode=on
loose-group-replication-enforce-update-everywhere-checks=off

relay-log=replay-bin

[mysqld_safe]

 docker启动脚本:run.sh

#!/bin/bash

if [ "$1" = "" ] || [ "$1" = '3306' ];then
  echo -p 'examples: ./run.sh 33060; port must not a number and not equal with 3306!'
fi


#you can update this test dir for another dir
base_dir=/home/mysql_test/mysql_5.7/

if [ ! -d $base_dir ];then
    echo "$base_dir not exist, please update base_dir in the shell!"
    exit
fi

cd $base_dir
nodeDir=node$1


if [ ! -d $nodeDir ];then
   mkdir $nodeDir
#else
   #echo "$1 has exist, please input another port number!"
fi

base_dir=${base_dir}$nodeDir
conf_dir_name=conf
data_dir_name=data
conf_dir=${base_dir}/$conf_dir_name
data_dir=${base_dir}/$data_dir_name
port=3306$1:3306
container=mysql/mysql-server:5.7

cd $base_dir
if [ ! -d $conf_dir_name ];then
    mkdir $conf_dir_name
fi

cd $conf_dir_name
if [ ! -f 'my.cnf' ];then
    printf "[mysqld]\nuser=mysql\n[mysqld_safe]\n" > my.cnf
fi

cd $base_dir
if [ ! -d $data_dir_name ];then
    mkdir $data_dir_name
fi


docker rm -f $nodeDir
docker run --name=$nodeDir --net=groupnet \
--mount type=bind,src=${conf_dir}/my.cnf,dst=/etc/my.cnf \
--mount type=bind,src=$data_dir,dst=/var/lib/mysql \
-d $container
#-p $port -d $container

 测试步骤:

1.  docker安装参照:centos6 安装docker17.06

     docker pull mysql/mysql-server:5.7 

2. 创建虚拟网络:docker network create groupnet (

    删除使用docker network rm groupnet

    禁用某个container net:   docker network disconnect groupnet node3

  )

3.  cd /home/mysql_test/mysql_5.7 没有的话需要创建

     mkdir  -p  node1/conf/  && touch my.cnf &&  {将node1对应的配置写入my.cnf}

     mkdir  -p  node2/conf/  && touch my.cnf &&  {将node2对应的配置写入my.cnf}

     mkdir  -p  node3/conf/  && touch my.cnf &&  {将node3对应的配置写入my.cnf}

     ./run.sh 1

     ./run.sh 2

     ./run.sh 3

4.  将node1作为主节点:

    INSTALL PLUGIN group_replication SONAME 'group_replication.so';

    SET SQL_LOG_BIN=0;

    #必须设置SQL_LOG_BIN为0,否则会被其他主机执行

    GRANT REPLICATION SLAVE ON *.* TO repl@'%'  IDENTIFIED BY '123456';

    SET SQL_LOG_BIN=1;

    SET GLOBAL group_replication_bootstrap_group=ON;

    START GROUP_REPLICATION;

    SET GLOBAL group_replication_bootstrap_group=OFF;

5. node2, node3执行从配置:

    INSTALL PLUGIN group_replication SONAME 'group_replication.so';

    SET SQL_LOG_BIN=0;

    #必须设置SQL_LOG_BIN为0,否则会被其他主机执行

    GRANT REPLICATION SLAVE ON *.* TO repl@'%'  IDENTIFIED BY '123456';

    SET SQL_LOG_BIN=1;

    START GROUP_REPLICATION;

6.   简单测试数据库创建和表创建:    

mysql> CREATE DATABASE test;
mysql> USE test;
mysql> CREATE TABLE t1 (c1 INT PRIMARY KEY, c2 TEXT NOT NULL);
mysql> INSERT INTO t1 VALUES (1, 'Luis');

7. MGR状态监控: 

 

mysql> select * from performance_schema.replication_group_member_stats \G
*************************** 1. row ***************************
                      CHANNEL_NAME: group_replication_applier
                           VIEW_ID: 15426117843744977:39
                         MEMBER_ID: 7390fffa-e96e-11e8-a664-0242ac110001
       COUNT_TRANSACTIONS_IN_QUEUE: 0
        COUNT_TRANSACTIONS_CHECKED: 9
          COUNT_CONFLICTS_DETECTED: 0
COUNT_TRANSACTIONS_ROWS_VALIDATING: 0
TRANSACTIONS_COMMITTED_ALL_MEMBERS: 7390fffa-e96e-11e8-a664-0242ac110001:1-3,
aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:1-48
    LAST_CONFLICT_FREE_TRANSACTION: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:46
1 row in set (0.00 sec)
mysql> SELECT * FROM performance_schema.replication_group_members \G
*************************** 1. row ***************************
CHANNEL_NAME: group_replication_applier
   MEMBER_ID: 7390fffa-e96e-11e8-a664-0242ac110001
 MEMBER_HOST: 72057b10e109
 MEMBER_PORT: 3306
MEMBER_STATE: ONLINE
*************************** 2. row ***************************
CHANNEL_NAME: group_replication_applier
   MEMBER_ID: 9d32e2f1-e96e-11e8-a6a0-0242ac110002
 MEMBER_HOST: f0f94ac3a074
 MEMBER_PORT: 3306
MEMBER_STATE: ONLINE
*************************** 3. row ***************************
CHANNEL_NAME: group_replication_applier
   MEMBER_ID: 9f9b2e75-e96e-11e8-a7f4-0242ac110003
 MEMBER_HOST: 0da2cce13664
 MEMBER_PORT: 3306
MEMBER_STATE: ONLINE
3 rows in set (0.00 sec)

mysql> SELECT * FROM performance_schema.replication_connection_status \G
*************************** 1. row ***************************
             CHANNEL_NAME: group_replication_applier
               GROUP_NAME: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa
              SOURCE_UUID: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa
                THREAD_ID: NULL
            SERVICE_STATE: ON
COUNT_RECEIVED_HEARTBEATS: 0
 LAST_HEARTBEAT_TIMESTAMP: 0000-00-00 00:00:00
 RECEIVED_TRANSACTION_SET: 7390fffa-e96e-11e8-a664-0242ac110001:1-3,
aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa:1-48
        LAST_ERROR_NUMBER: 0
       LAST_ERROR_MESSAGE:
     LAST_ERROR_TIMESTAMP: 0000-00-00 00:00:00
mysql> SELECT * FROM performance_schema.replication_applier_status \G
*************************** 1. row ***************************
              CHANNEL_NAME: group_replication_applier
             SERVICE_STATE: ON
           REMAINING_DELAY: NULL
COUNT_TRANSACTIONS_RETRIES: 0
1 row in set (0.00 sec)

 

    #查看已经执行过的事务。   

    select @@global.gtid_executed;

 

遇见的问题:

1 .   [GCS] There is no local IP address matching the one configured for the local node

需要配置虚拟网络,并且所有组中的节点需要使用这个网络。

docker network create groupnet

 --net=groupnet

2.    如下错误:

2018-11-19T07:20:01.743346Z 0 [ERROR] Plugin group_replication reported: 'The member contains transactions not present in the group. The member will now exit the group.'
2018-11-19T07:20:01.743353Z 0 [Note] Plugin group_replication reported: 'To force this member into the group you can use the group_replication_allow_local_disjoint_gtids_join option'

 

2018-11-19T08:20:20.702632Z 197 [Note] Slave I/O thread for channel 'group_replication_recovery': connected to master 'repl@72057b10e109:3306',replication started in log 'FIRST' at position 4
2018-11-19T08:20:20.735420Z 198 [Note] Slave SQL thread for channel 'group_replication_recovery' initialized, starting replication in log 'FIRST' at position 0, relay log './replay-bin-group_replication_recovery.000001' position: 4
2018-11-19T08:20:21.047090Z 198 [ERROR] Slave SQL for channel 'group_replication_recovery': Error 'Can't drop database 'test'; database doesn't exist' on query. Default database: 'test'. Query: 'drop database test', Error_code: 1008
2018-11-19T08:20:21.047151Z 198 [Warning] Slave: Can't drop database 'test'; database doesn't exist Error_code: 1008
2018-11-19T08:20:21.047198Z 198 [ERROR] Error running query, slave SQL thread aborted. Fix the problem, and restart the slave SQL thread with "SLAVE START". We stopped at log 'binlog.000008' position 594.
2018-11-19T08:20:21.047227Z 195 [Note] Plugin group_replication reported: 'Terminating existing group replication donor connection and purging the corresponding logs.'
2018-11-19T08:20:21.167739Z 197 [Note] Slave I/O thread exiting for channel 'group_replication_recovery', read up to log 'binlog.000009', position 4

 

解决方法:

set global group_replication_allow_local_disjoint_gtids_join=on;

START GROUP_REPLICATION;

set global group_replication_allow_local_disjoint_gtids_join=off;

如果上述执行完还不正常,需要查看冲突的点。比如上述错误很明显是主库有删除数据库test的操作,但是从库并没有相应的数据库。只要在当前库中创建test库即可。步骤如下:

STOP GROUP_REPLICATION;

set global read_only=false;

SET SQL_LOG_BIN=0;

create database test;

SET SQL_LOG_BIN=1;

START GROUP_REPLICATION;

set global group_replication_allow_local_disjoint_gtids_join=off;

3. [ERROR] Plugin group_replication reported: 'Member was expelled from the group due to network failures, changing member status to ERROR.'

需要在网络错误的节点执行:STOP GROUP_REPLICATION; START GROUP_REPLICATION;

4. 2018-11-22T09:53:00.999736Z 3656 [ERROR] Slave SQL for channel 'group_replication_applier': Error 'FUNCTION GTID_COUNT already exists' on query. Default database: 'sys'. Query: 'CREATE DEFINER=`root`@`localhost` FUNCTION `GTID_COUNT`(gtid_set TEXT(10000)) RETURNS int(11)
    DETERMINISTIC
BEGIN
  DECLARE result BIGINT DEFAULT 0;
  DECLARE colon_pos INT;
  DECLARE next_dash_pos INT;
  DECLARE next_colon_pos INT;
  DECLARE next_comma_pos INT;
  SET gtid_set = GTID_NORMALIZE(gtid_set);
  SET colon_pos = LOCATE2(':', gtid_set, 1);
  WHILE colon_pos != LENGTH(gtid_set) + 1 DO
     SET next_dash_pos = LOCATE2('-', gtid_set, colon_pos + 1);
     SET next_colon_pos = LOCATE2(':', gtid_set, colon_pos + 1);
     SET next_comma_pos = LOCATE2(',', gtid_set, colon_pos + 1);
     IF next_dash_pos < next_colon_pos AND next_dash_pos < next_comma_pos THEN
       SET result = result +
         SUBSTR(gtid_set, next_dash_pos + 1,
                LEAST(next_colon_pos, next_comma_pos) - (next_dash_pos + 1)) -
         SUBSTR(gtid_set, colon_pos + 1, next_dash_pos - (co
2018-11-22T09:53:00.999766Z 3656 [Warning] Slave: FUNCTION GTID_COUNT already exists Error_code: 1304
2018-11-22T09:53:00.999776Z 3656 [ERROR] Plugin group_replication reported: 'The applier thread execution was aborted. Unable to process more transactions, this member will now leave the group.'
2018-11-22T09:53:00.999823Z 3656 [ERROR] Error running query, slave SQL thread aborted. Fix the problem, and restart the slave SQL thread with "SLAVE START". We stopped at log 'FIRST' position 0.
2018-11-22T09:53:00.999858Z 3653 [ERROR] Plugin group_replication reported: 'Fatal error during execution on the Applier process of Group Replication. The server will now leave the group.'
2018-11-22T09:53:00.999897Z 3653 [ERROR] Plugin group_replication reported: '[GCS] The member is already leaving or joining a group.'
2018-11-22T09:53:00.999912Z 3653 [ERROR] Plugin group_replication reported: 'Unable to confirm whether the server has left the group or not. Check performance_schema.replication_group_members to check group membership information.'
2018-11-22T09:53:00.999998Z 3653 [Note] Plugin group_replication reported: 'Going to wait for view modification'
2018-11-22T09:53:01.002868Z 0 [Note] Plugin group_replication reported: 'XCom protocol version: 3'
2018-11-22T09:53:01.002898Z 0 [Note] Plugin group_replication reported: 'XCom initialized and ready to accept incoming connections on port 33100'
2018-11-22T09:53:03.245164Z 0 [ERROR] Plugin group_replication reported: 'There was a previous plugin error while the member joined the group. The member will now exit the group.'

mysql> set global sql_slave_skip_counter=1;
ERROR 1858 (HY000): sql_slave_skip_counter can not be set when the server is running with @@GLOBAL.GTID_MODE = ON. Instead, for each transaction that you want to skip, generate an empty transaction with the same GTID as the transaction

出现如上错误时,需要执行:

SELECT * FROM performance_schema.replication_group_members \G

获取当前的写节点是哪台主机。假设该主机是localhost:3320。则需要访问3320的binlog,查看错误sql发生的gtid。

假如发生错误时的 gtid为:1bb1b861-f776-11e6-be42-782bcb377193:14,

则只要按照如下操作即可:

#查看当前执行过的gtid

SELECT @@global.gtid_executed;

SET GTID_NEXT='1bb1b861-f776-11e6-be42-782bcb377193:14';

BEGIN;

COMMIT;

SET GTID_NEXT='AUTOMATIC';

#验证当前已经执行过的gtid

SELECT @@global.gtid_executed;

这之后在开启组复制: START GROUP_REPLICATION; 查看是否有错误发生。如果还有类似记录已存在的错误,再继续按照上述方法进行跳过事务即可。

5. 2018-11-26T08:43:19.536306Z 0 [ERROR] Plugin group_replication reported: 'This member has more executed transactions than those present in the group. Local transactions: 2928f39d-f14f-11e8-acb4-005056a1794a:1-11 > Group transactions: 46c82b76-f14c-11e8-8bef-005056a17264:1-2,
f65d4372-f145-11e8-94e9-005056a17264:1-46,
fb3cb695-f152-11e8-83b1-005056a17264:1-15'
2018-11-26T08:43:19.536358Z 0 [ERROR] Plugin group_replication reported: 'The member contains transactions not present in the group. The member will now exit the group.'

执行reset master即可。

 

参考:

1. mgr相关参数:https://mysqlhighavailability.com/getting-started-with-mysql-group-replication/
2. oracle官方文档:https://dev.mysql.com/doc/refman/5.7/en/group-replication-adding-instances.html

3. docker测试mgr: https://mysqlhighavailability.com/setting-up-mysql-group-replication-with-mysql-docker-images/

4. 添加实例入组时遇到的问题: https://ronniethedba.wordpress.com/2017/04/22/this-member-has-more-executed-transactions-than-those-present-in-the-group/

5. MGR常见问题: http://drmingdrmer.github.io/tech/mysql/2018/08/04/mysql-group-replication.html

6. MGR测试用例: 

https://www.cnblogs.com/paul8339/p/9667701.html

https://blog.csdn.net/Mlztesoft/article/details/79927425

7.mgr原理和配置: https://blog.csdn.net/poxiaonie/article/details/73505948 

 

 

你可能感兴趣的:(数据库,mysql,复制方案)