基于docker的mysql mgr高可用集群方案,具体结构如下:
软件版本
- MySQL 5.7.30
- ProxySQL 2.0.12
准备工作
3*centos(ubuntu)服务器
服务器安装好最新的docker-ce
服务器安装好docker-compose
关闭防火墙
将3台服务器的hostname写入/etc/hosts文件中
启动mysql
具体docker-compose.yml文件如下
version: '3.5'
services:
node1:
image: mysql:5.7.30
restart: always
environment:
- TZ=Asia/Shanghai
- MYSQL_ROOT_PASSWORD=123456
volumes:
- ./data:/var/lib/mysql
command: ["mysqld",
"--server-id=1",
"--gtid-mode=ON",
"--enforce-gtid-consistency=ON",
"--master-info-repository=TABLE",
"--relay-log-info-repository=TABLE",
"--binlog-checksum=NONE",
"--log-slave-updates=ON",
"--log-bin=mysql-bin-1.log",
"--plugin-load=group_replication.so",
"--binlog_format=ROW",
"--transaction-write-set-extraction=XXHASH64",
"--relay-log-recovery=ON",
"--loose-group-replication-start-on-boot=OFF",
"--loose-group-replication-group-name=aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee",
"--loose-group-replication-local-address=192.168.3.2:33061",
"--loose-group-replication-group-seeds=192.168.3.2:33061,192.158.3.3:33061,192.168.3.4:33061",
"--loose-group-replication-single-primary-mode=ON",
"--loose-group-replication-enforce-update-everywhere-checks=OFF",
"--relay-log=db1-relay-bin",
"--max_allowed_packet=256M",
"--max_connections=400"
]
healthcheck:
test: "mysqladmin ping -u root -p123456"
interval: 2s
retries: 20
network_mode: "host"
设置集群中服务器不允许相同的值:
server-id
loose-group-replication-local-address
relay-log
分别启动三个节点上的mysql服务
组建集群
在主节点上执行以下sql
SET SQL_LOG_BIN=0;
CREATE USER rpl_user@'%' IDENTIFIED BY 'password';
GRANT REPLICATION SLAVE ON *.* TO rpl_user@'%';
FLUSH PRIVILEGES;
SET SQL_LOG_BIN=0;
CHANGE MASTER TO MASTER_USER='rpl_user', MASTER_PASSWORD='password' FOR CHANNEL 'group_replication_recovery';
SET GLOBAL group_replication_bootstrap_group=ON;
START GROUP_REPLICATION;
SET GLOBAL group_replication_bootstrap_group=OFF;
SELECT * FROM performance_schema.replication_group_members;
在两个从节点上执行以下sql
SET SQL_LOG_BIN=0;
drop user if exists rpl_user;
CREATE USER rpl_user@'%' IDENTIFIED BY 'password';
GRANT REPLICATION SLAVE ON *.* TO rpl_user@'%';
SET SQL_LOG_BIN=1;
CHANGE MASTER TO MASTER_USER='rpl_user', MASTER_PASSWORD='password' FOR CHANNEL 'group_replication_recovery';
reset master;
start group_replication;
查询集群状态sql
SELECT * FROM performance_schema.replication_group_members;
查询主节点sql
SHOW STATUS LIKE 'group_replication_primary_member';
配置proxysql环境
在MYSQL集群主节点添加用于ProxySQL监控数据库集群状态的用户
create user 'monitor'@'%' identified by 'monitor';
grant select on sys.* to 'monitor'@'%';
ProxySQL会通过该用户来访问Mysql集群,获取各节点的状态
在主节点添加用户用于ProxySQL代理的数据库访问
create user 'proxysql'@'%' identified by 'proxysql';
grant all on *.* to 'proxysql'@'%';
在MYSQL集群写节点执行如下SQL语句以增加视图,ProxySQL通过视图获取MYSQL各节点状态
USE sys;
DELIMITER $$
CREATE FUNCTION IFZERO(a INT, b INT)
RETURNS INT
DETERMINISTIC
RETURN IF(a = 0, b, a)$$
CREATE FUNCTION LOCATE2(needle TEXT(10000), haystack TEXT(10000), offset INT)
RETURNS INT
DETERMINISTIC
RETURN IFZERO(LOCATE(needle, haystack, offset), LENGTH(haystack) + 1)$$
CREATE FUNCTION GTID_NORMALIZE(g TEXT(10000))
RETURNS TEXT(10000)
DETERMINISTIC
RETURN GTID_SUBTRACT(g, '')$$
CREATE FUNCTION GTID_COUNT(gtid_set TEXT(10000))
RETURNS INT
DETERMINISTIC
BEGIN
DECLARE result BIGINT DEFAULT 0;
DECLARE colon_pos INT;
DECLARE next_dash_pos INT;
DECLARE next_colon_pos INT;
DECLARE next_comma_pos INT;
SET gtid_set = GTID_NORMALIZE(gtid_set);
SET colon_pos = LOCATE2(':', gtid_set, 1);
WHILE colon_pos != LENGTH(gtid_set) + 1 DO
SET next_dash_pos = LOCATE2('-', gtid_set, colon_pos + 1);
SET next_colon_pos = LOCATE2(':', gtid_set, colon_pos + 1);
SET next_comma_pos = LOCATE2(',', gtid_set, colon_pos + 1);
IF next_dash_pos < next_colon_pos AND next_dash_pos < next_comma_pos THEN
SET result = result +
SUBSTR(gtid_set, next_dash_pos + 1,
LEAST(next_colon_pos, next_comma_pos) - (next_dash_pos + 1)) -
SUBSTR(gtid_set, colon_pos + 1, next_dash_pos - (colon_pos + 1)) + 1;
ELSE
SET result = result + 1;
END IF;
SET colon_pos = next_colon_pos;
END WHILE;
RETURN result;
END$$
CREATE FUNCTION gr_applier_queue_length()
RETURNS INT
DETERMINISTIC
BEGIN
RETURN (SELECT sys.gtid_count( GTID_SUBTRACT( (SELECT
Received_transaction_set FROM performance_schema.replication_connection_status
WHERE Channel_name = 'group_replication_applier' ), (SELECT
@@global.GTID_EXECUTED) )));
END$$
CREATE FUNCTION gr_member_in_primary_partition()
RETURNS VARCHAR(3)
DETERMINISTIC
BEGIN
RETURN (SELECT IF( MEMBER_STATE='ONLINE' AND ((SELECT COUNT(*) FROM
performance_schema.replication_group_members WHERE MEMBER_STATE != 'ONLINE') >=
((SELECT COUNT(*) FROM performance_schema.replication_group_members)/2) = 0),
'YES', 'NO' ) FROM performance_schema.replication_group_members JOIN
performance_schema.replication_group_member_stats USING(member_id));
END$$
CREATE VIEW gr_member_routing_candidate_status AS SELECT
sys.gr_member_in_primary_partition() as viable_candidate,
IF( (SELECT (SELECT GROUP_CONCAT(variable_value) FROM
performance_schema.global_variables WHERE variable_name IN ('read_only',
'super_read_only')) != 'OFF,OFF'), 'YES', 'NO') as read_only,
sys.gr_applier_queue_length() as transactions_behind, Count_Transactions_in_queue as 'transactions_to_cert' from performance_schema.replication_group_member_stats;$$
DELIMITER ;
检查视图是否设置成功
mysql> SELECT * FROM sys.gr_member_routing_candidate_status;
+------------------+-----------+---------------------+----------------------+
| viable_candidate | read_only | transactions_behind | transactions_to_cert |
+------------------+-----------+---------------------+----------------------+
| YES | NO | 0 | 0 |
+------------------+-----------+---------------------+----------------------+
1 row in set (0.00 sec)
启动proxysql服务
proxysql.cnf配置文件
datadir="/var/lib/proxysql"
admin_variables=
{
admin_credentials="admin:admin"
mysql_ifaces="0.0.0.0:6032"
refresh_interval=2000
web_enabled=true
web_port=6080
stats_credentials="stats:admin"
}
mysql_variables=
{
threads=4
max_connections=1200
default_query_delay=0
default_query_timeout=36000000
have_compress=true
interfaces="0.0.0.0:6033"
default_schema="information_schema"
stacksize=1048576
server_version="5.7.30"
commands_stats=true
sessions_sort=true
monitor_username="monitor"
monitor_password="monitor"
connect_retries_on_failure=10
}
mysql_group_replication_hostgroups =
(
{
writer_hostgroup=10
backup_writer_hostgroup=20
reader_hostgroup=30
offline_hostgroup=40
active=1
max_writers=1
writer_is_also_reader=0
max_transactions_behind=5000
comment="proxy GR"
}
)
mysql_servers=
(
{ address="192.168.3.2" , port=3306 , hostgroup=10, max_connections=400 },
{ address="192.168.3.3" , port=3306 , hostgroup=30, max_connections=400 },
{ address="192.168.3.4" , port=3306 , hostgroup=30, max_connections=400 }
)
mysql_query_rules=
(
{
rule_id=10
active=1
match_pattern="^SELECT seq(.*)"
destination_hostgroup=10
apply=1
},
{
rule_id=50
active=1
match_pattern="^SELECT.*flyway_schema_history.*"
destination_hostgroup=10
apply=1
},
{
rule_id=100
active=1
match_pattern="^SELECT .* FOR UPDATE"
destination_hostgroup=10
apply=1
},
{
rule_id=150
active=1
match_pattern="^SELECT RELEASE_LOCK(.*)"
destination_hostgroup=10
multiplex=0
apply=1
},
{
rule_id=180
active=1
match_pattern="^SELECT GET_LOCK(.*)"
destination_hostgroup=10
multiplex=0
apply=1
},
{
rule_id=200
active=1
match_pattern="^SELECT .*"
destination_hostgroup=30
apply=1
},
{
rule_id=300
active=1
match_pattern=".*"
destination_hostgroup=10
multiplex=0
apply=1
}
)
mysql_users=
(
{ username = "proxysql", password = "proxysql", default_hostgroup = 10, transaction_persistent = 1, active = 1 }
)
docker-compose.yml
version: "3"
services:
proxysql:
image: proxysql:2.0.12
restart: always
volumes:
- "./proxysql.cnf:/etc/proxysql.cnf"
network_mode: "host"
备注
- 对mysql配置的修改不会自动同步到整个集群,需要每个节点单独修改,建议在搭建集群的时候就完成配置的修改,在运行时修改可能会导致集群数据不同步而使得集群崩溃。
- 集群搭建完成之后切勿再通过Slave节点对集群进行操作,所有的操作都需要通过Master节点进行,否则容易导致出现数据不同步而使得集群崩溃
- 数据库之间的数据同步是增量的方式,无法做全量
常见问题
1、从节点start group_replication报错
A、检查/etc/hosts文件,确保所有节点都在hosts文件中 B、在从节点执行 reset master; C、检查docker-compose中的配置
2、从节点无法同步 从节点初始化的时候执行该指令
CHANGE MASTER TO MASTER_USER='rpl_user', MASTER_PASSWORD='password' FOR CHANNEL 'group_replication_recovery';
start group_replication;
3、添加新节点
A、从现有库中做一次全量备份
mysqldump -h127.0.0.1 -uroot -p123456 --set-gtid-purged=off --lock-all-tables --all-databases --triggers --routines --events >dump.sql
B、将备份导入到新的节点
需要在新的节点执行一次 reset master;
然后再导入数据,否则可能会报错
mysql -uroot -p123456 -h127.0.0.1 < dump.sql
C、修改所有的group-replication-group-seeds配置,将新节点加入并重启数据库
D、在新节点操作并加入集群
CHANGE MASTER TO MASTER_USER='rpl_user', MASTER_PASSWORD='password' FOR CHANNEL 'group_replication_recovery';
start group_replication;
4、当数据库整体关机之后,需要现在主节点执行以下指令
STOP GROUP_REPLICATION;
SET GLOBAL group_replication_bootstrap_group=ON;
START GROUP_REPLICATION;
SET GLOBAL group_replication_bootstrap_group=OFF;
SELECT * FROM performance_schema.replication_group_members;
然后在从节点执行
start group_replication;
5、 切记不要在已有的集群节点上执行reset master,该操作会清空index文件并重新生成,导致集群同步出现问题。