架构:
192.168.1.111 master read & write
192.168.1.112 slave(candidate) read only
192.168.1.113 slave read only
192.168.1.114 mha-monitor
sestatus -v
setenforce 0
sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/sysconfig/selinux
systemctl stop firewalld.service
systemctl disable firewalld.service
安装mysql省略,其中配置文件 my.cnf 设置如下:
[client]
port = 3306
socket = /tmp/mysql.sock
[mysqld]
user=mysql
port = 3306
server_id = 112
socket=/tmp/mysql.sock
basedir =/usr/local/mysql
datadir =/usr/local/mysql/data
log-error =/var/log/mysql/mysqld.log
pid-file =/var/run/mysqld/mysqld.pid
socket =/tmp/mysql.sock
autocommit = 1
character_set_server=utf8
default-storage-engine=INNODB
transaction_isolation = READ-COMMITTED
event_scheduler = 1
lower_case_table_names=1
explicit_defaults_for_timestamp = 1
skip-external-locking
default-time-zone = '+8:00'
max_allowed_packet = 1G
innodb-buffer-pool-size = 1G
log-bin=/usr/local/mysql/binlog/mysql-bin
binlog_checksum = NONE
binlog_format = row
log_slave_updates = ON
expire_logs_days = 15
#全局事务
gtid_mode=ON
enforce_gtid_consistency=ON
master_info_repository=TABLE
relay_log_info_repository=TABLE
relay_log_recovery=ON
relay_log_purge=OFF
#组提交&并行复制
sync_binlog=1
innodb_flush_log_at_trx_commit=1
#slave_parallel_type=LOGICAL_CLOCK
#slave_parallel_workers=2
#binlog_group_commit_sync_delay=1000000
#binlog_group_commit_sync_no_delay_count=10
#group_replication_compression_threshold=100
#半同步(稍后在 111、112 实例添加)
plugin-load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
rpl-semi-sync-master-enabled = 1
rpl-semi-sync-slave-enabled = 1
sql_mode=STRICT_TRANS_TABLES
[mysqld_safe]
log-error=/var/log/mysql/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
master(192.168.1.111)创建复制用户(其他slave设置同步时会自动同步mysql库)
create user repl@'192.168.1.%' identified by 'repl';
grant replication slave,replication client on *.* to repl@'192.168.1.%';
flush privileges;
2个实例 master(192.168.1.111) 及备用slave(192.168.1.112)安装半同步插件
INSTALL PLUGIN rpl_semi_sync_master SONAME 'semisync_master.so';
INSTALL PLUGIN rpl_semi_sync_slave SONAME 'semisync_slave.so';
SELECT PLUGIN_NAME, PLUGIN_STATUS FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_NAME LIKE '%semi%';
首次设置主从同步,slave 192.168.1.112 和 slave 192.168.1.13 实例执行(用户repl会同步过来)
change master to
master_host='192.168.1.111',
master_port=3306,
master_user='repl',
master_password='repl',
master_auto_position=1;
start slave;
查看同步状态; 查看半同步的主备状态
show slave status;
show status like 'Rpl_semi_sync_%_status';
参考: Linux(Centos)服务器间共享文件夹及文件传输
#首先:111、112、113、114 都执行
ssh-keygen -t rsa
#然后:111、112、113、114 都逐行执行
ssh-copy-id -i /root/.ssh/id_rsa.pub [email protected]
ssh-copy-id -i /root/.ssh/id_rsa.pub [email protected]
ssh-copy-id -i /root/.ssh/id_rsa.pub [email protected]
ssh-copy-id -i /root/.ssh/id_rsa.pub [email protected]
参考:Ansible 安装及多服务部署示例
192.168.1.114 中安装 ansible
# 192.168.1.114 中安装 ansible
yum install -y ansible
# vim /etc/ansible/hosts
[mysql-mha-node]
192.168.1.111
192.168.1.112
192.168.1.113
192.168.1.114
# vim /etc/ansible/ansible.cfg
[defaults]
sudo_user=root
remote_port=22
remote_user=root
module_name=command
host_key_checking=False
command_warnings = False
inventory=/etc/ansible/hosts
roles_path=/etc/ansible/roles
log_path=/var/log/ansible.log
private_key_file=/root/.ssh/id_rsa
192.168.1.114 中执行:各节点hosts绑定 ip 和 服务器名称
ansible mysql-mha-node -m shell -a "echo -e '
192.168.1.111 server111
192.168.1.112 server112
192.168.1.113 server113
192.168.1.114 server114' >> /etc/hosts"
每个节点都下载安装 mha4mysql-node
yum install -y perl-DBD-MySQL perl-ExtUtils-MakeMaker perl-CPAN
yum install -y https://github.com/yoshinorim/mha4mysql-node/releases/download/v0.58/mha4mysql-node-0.58-0.el7.centos.noarch.rpm
192.168.1.114 节点执行安装 mha4mysql-manager
yum install -y https://github.com/yoshinorim/mha4mysql-manager/releases/download/v0.58/mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
192.168.1.114 manager 节点配置文件
# mkdir -p /var/log/mha/app1
# vim /etc/masterha.cnf
[server default]
remote_workdir=/var/log/mha
manager_workdir=/var/log/mha
manager_log=/var/log/mha/app1/manager.log
master_binlog_dir=/usr/local/mysql/binlog
user=root
password=mysql
repl_user=repl
repl_password=repl
ssh_user=root
ping_interval=2
#可从manager二进制包解压找到(mha4mysql-manager-0.58/samples/scripts)
#report_script= /opt/masterha/scripts/send_report
#shutdown_script= /opt/masterha/scripts/power_manager
#master_ip_failover_script= /opt/masterha/scripts/master_ip_failover
#master_ip_online_change_script= /opt/masterha/scripts/master_ip_online_change
[server1]
port=3306
hostname=192.168.1.111
candidate_master=1
check_repl_delay=0
[server2]
port=3306
hostname=192.168.1.112
candidate_master=1
check_repl_delay=0
[server3]
port=3306
hostname=192.168.1.113
相关命令(192.168.1.114 manager 节点执行)
检查ssh: masterha_check_ssh --conf=/etc/masterha.cnf
#检查ssh
[root@server114 ~]# masterha_check_ssh --conf=/etc/masterha.cnf
Sat Apr 20 14:28:31 2019 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Apr 20 14:28:31 2019 - [info] Reading application default configuration from /etc/masterha.cnf..
Sat Apr 20 14:28:31 2019 - [info] Reading server configuration from /etc/masterha.cnf..
Sat Apr 20 14:28:31 2019 - [info] Starting SSH connection tests..
Sat Apr 20 14:28:34 2019 - [debug]
Sat Apr 20 14:28:32 2019 - [debug] Connecting via SSH from [email protected](192.168.1.112:22) to [email protected](192.168.1.111:22)..
Sat Apr 20 14:28:33 2019 - [debug] ok.
Sat Apr 20 14:28:33 2019 - [debug] Connecting via SSH from [email protected](192.168.1.112:22) to [email protected](192.168.1.113:22)..
Sat Apr 20 14:28:34 2019 - [debug] ok.
Sat Apr 20 14:28:34 2019 - [debug]
Sat Apr 20 14:28:31 2019 - [debug] Connecting via SSH from [email protected](192.168.1.111:22) to [email protected](192.168.1.112:22)..
Sat Apr 20 14:28:32 2019 - [debug] ok.
Sat Apr 20 14:28:32 2019 - [debug] Connecting via SSH from [email protected](192.168.1.111:22) to [email protected](192.168.1.113:22)..
Sat Apr 20 14:28:33 2019 - [debug] ok.
Sat Apr 20 14:28:35 2019 - [debug]
Sat Apr 20 14:28:32 2019 - [debug] Connecting via SSH from [email protected](192.168.1.113:22) to [email protected](192.168.1.111:22)..
Sat Apr 20 14:28:33 2019 - [debug] ok.
Sat Apr 20 14:28:33 2019 - [debug] Connecting via SSH from [email protected](192.168.1.113:22) to [email protected](192.168.1.112:22)..
Sat Apr 20 14:28:34 2019 - [debug] ok.
Sat Apr 20 14:28:35 2019 - [info] All SSH connection tests passed successfully.
检查主从复制:masterha_check_repl --conf=/etc/masterha.cnf
#检查主从复制
[root@server114 ~]# masterha_check_repl --conf=/etc/masterha.cnf
Sat Apr 20 14:28:38 2019 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Apr 20 14:28:38 2019 - [info] Reading application default configuration from /etc/masterha.cnf..
Sat Apr 20 14:28:38 2019 - [info] Reading server configuration from /etc/masterha.cnf..
Sat Apr 20 14:28:38 2019 - [info] MHA::MasterMonitor version 0.58.
Sat Apr 20 14:28:40 2019 - [info] GTID failover mode = 1
Sat Apr 20 14:28:40 2019 - [info] Dead Servers:
Sat Apr 20 14:28:40 2019 - [info] Alive Servers:
Sat Apr 20 14:28:40 2019 - [info] 192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:28:40 2019 - [info] 192.168.1.112(192.168.1.112:3306)
Sat Apr 20 14:28:40 2019 - [info] 192.168.1.113(192.168.1.113:3306)
Sat Apr 20 14:28:40 2019 - [info] Alive Slaves:
Sat Apr 20 14:28:40 2019 - [info] 192.168.1.112(192.168.1.112:3306) Version=5.7.23-log (oldest major version between slaves) log-bin:enabled
Sat Apr 20 14:28:40 2019 - [info] GTID ON
Sat Apr 20 14:28:40 2019 - [info] Replicating from 192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:28:40 2019 - [info] Primary candidate for the new Master (candidate_master is set)
Sat Apr 20 14:28:40 2019 - [info] 192.168.1.113(192.168.1.113:3306) Version=5.7.23-log (oldest major version between slaves) log-bin:enabled
Sat Apr 20 14:28:40 2019 - [info] GTID ON
Sat Apr 20 14:28:40 2019 - [info] Replicating from 192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:28:40 2019 - [info] Current Alive Master: 192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:28:40 2019 - [info] Checking slave configurations..
Sat Apr 20 14:28:40 2019 - [info] Checking replication filtering settings..
Sat Apr 20 14:28:40 2019 - [info] binlog_do_db= , binlog_ignore_db=
Sat Apr 20 14:28:40 2019 - [info] Replication filtering check ok.
Sat Apr 20 14:28:40 2019 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Sat Apr 20 14:28:40 2019 - [info] Checking SSH publickey authentication settings on the current master..
Sat Apr 20 14:28:40 2019 - [info] HealthCheck: SSH to 192.168.1.111 is reachable.
Sat Apr 20 14:28:40 2019 - [info]
192.168.1.111(192.168.1.111:3306) (current master)
+--192.168.1.112(192.168.1.112:3306)
+--192.168.1.113(192.168.1.113:3306)
Sat Apr 20 14:28:40 2019 - [info] Checking replication health on 192.168.1.112..
Sat Apr 20 14:28:40 2019 - [info] ok.
Sat Apr 20 14:28:40 2019 - [info] Checking replication health on 192.168.1.113..
Sat Apr 20 14:28:40 2019 - [info] ok.
Sat Apr 20 14:28:40 2019 - [warning] master_ip_failover_script is not defined.
Sat Apr 20 14:28:40 2019 - [warning] shutdown_script is not defined.
Sat Apr 20 14:28:40 2019 - [info] Got exit code 0 (Not master dead).
MySQL Replication Health is OK.
当前MHA运行状态: masterha_check_status --conf=/etc/masterha.cnf
[root@server114 ~]# masterha_check_status --conf=/etc/masterha.cnf
masterha is stopped(2:NOT_RUNNING).
其他命令:
# MHA master 监控(与 masterha_manager 一样)
masterha_master_monitor --conf=/etc/masterha.cnf
#启动 MHA
nohup masterha_manager --conf=/etc/masterha.cnf > /dev/null 2>&1 &
nohup masterha_manager --conf=/etc/masterha.cnf &> /var/log/mha/app1/manager.log &
#关闭 MHA
masterha_stop --conf=/etc/masterha.cnf
#添加或删除配置的server信息
masterha_conf_host
#关闭 MHA
masterha_stop --conf=/etc/masterha.cnf
# 在线手动切换
masterha_master_switch --conf=/etc/masterha.cnf \
--master_state=alive \
--new_master_host=192.168.1.112 \
--new_master_port=3306 \
--orig_master_is_new_slave \
--running_updates_limit=10000
Sat Apr 20 14:29:22 2019 - [info] MHA::MasterRotate version 0.58.
Sat Apr 20 14:29:22 2019 - [info] Starting online master switch..
Sat Apr 20 14:29:22 2019 - [info]
Sat Apr 20 14:29:22 2019 - [info] * Phase 1: Configuration Check Phase..
Sat Apr 20 14:29:22 2019 - [info]
Sat Apr 20 14:29:22 2019 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Apr 20 14:29:22 2019 - [info] Reading application default configuration from /etc/masterha.cnf..
Sat Apr 20 14:29:22 2019 - [info] Reading server configuration from /etc/masterha.cnf..
Sat Apr 20 14:29:23 2019 - [info] GTID failover mode = 1
Sat Apr 20 14:29:23 2019 - [info] Current Alive Master: 192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:29:23 2019 - [info] Alive Slaves:
Sat Apr 20 14:29:23 2019 - [info] 192.168.1.112(192.168.1.112:3306) Version=5.7.23-log (oldest major version between slaves) log-bin:enabled
Sat Apr 20 14:29:23 2019 - [info] GTID ON
Sat Apr 20 14:29:23 2019 - [info] Replicating from 192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:29:23 2019 - [info] Primary candidate for the new Master (candidate_master is set)
Sat Apr 20 14:29:23 2019 - [info] 192.168.1.113(192.168.1.113:3306) Version=5.7.23-log (oldest major version between slaves) log-bin:enabled
Sat Apr 20 14:29:23 2019 - [info] GTID ON
Sat Apr 20 14:29:23 2019 - [info] Replicating from 192.168.1.111(192.168.1.111:3306)
It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on 192.168.1.111(192.168.1.111:3306)? (YES/no): 【 yes 】
Sat Apr 20 14:29:26 2019 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time..
Sat Apr 20 14:29:26 2019 - [info] ok.
Sat Apr 20 14:29:26 2019 - [info] Checking MHA is not monitoring or doing failover..
Sat Apr 20 14:29:26 2019 - [info] Checking replication health on 192.168.1.112..
Sat Apr 20 14:29:26 2019 - [info] ok.
Sat Apr 20 14:29:26 2019 - [info] Checking replication health on 192.168.1.113..
Sat Apr 20 14:29:26 2019 - [info] ok.
Sat Apr 20 14:29:26 2019 - [info] 192.168.1.112 can be new master.
Sat Apr 20 14:29:26 2019 - [info]
From:
192.168.1.111(192.168.1.111:3306) (current master)
+--192.168.1.112(192.168.1.112:3306)
+--192.168.1.113(192.168.1.113:3306)
To:
192.168.1.112(192.168.1.112:3306) (new master)
+--192.168.1.113(192.168.1.113:3306)
+--192.168.1.111(192.168.1.111:3306)
Starting master switch from 192.168.1.111(192.168.1.111:3306) to 192.168.1.112(192.168.1.112:3306)? (yes/NO): 【 yes 】
Sat Apr 20 14:29:31 2019 - [info] Checking whether 192.168.1.112(192.168.1.112:3306) is ok for the new master..
Sat Apr 20 14:29:31 2019 - [info] ok.
Sat Apr 20 14:29:31 2019 - [info] 192.168.1.111(192.168.1.111:3306): SHOW SLAVE STATUS returned empty result. To check replication filtering rules, temporarily executing CHANGE MASTER to a dummy host.
Sat Apr 20 14:29:31 2019 - [info] 192.168.1.111(192.168.1.111:3306): Resetting slave pointing to the dummy host.
Sat Apr 20 14:29:32 2019 - [info] ** Phase 1: Configuration Check Phase completed.
Sat Apr 20 14:29:32 2019 - [info]
Sat Apr 20 14:29:32 2019 - [info] * Phase 2: Rejecting updates Phase..
Sat Apr 20 14:29:32 2019 - [info]
master_ip_online_change_script is not defined. If you do not disable writes on the current master manually, applications keep writing on the current master. Is it ok to proceed? (yes/NO):【 yes 】
Sat Apr 20 14:29:45 2019 - [info] Locking all tables on the orig master to reject updates from everybody (including root):
Sat Apr 20 14:29:45 2019 - [info] Executing FLUSH TABLES WITH READ LOCK..
Sat Apr 20 14:29:45 2019 - [info] ok.
Sat Apr 20 14:29:45 2019 - [info] Orig master binlog:pos is mysql-bin.000015:441.
Sat Apr 20 14:29:45 2019 - [info] Waiting to execute all relay logs on 192.168.1.112(192.168.1.112:3306)..
Sat Apr 20 14:29:45 2019 - [info] master_pos_wait(mysql-bin.000015:441) completed on 192.168.1.112(192.168.1.112:3306). Executed 0 events.
Sat Apr 20 14:29:45 2019 - [info] done.
Sat Apr 20 14:29:45 2019 - [info] Getting new master's binlog name and position..
Sat Apr 20 14:29:45 2019 - [info] mysql-bin.000008:468
Sat Apr 20 14:29:45 2019 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.1.112', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='xxx';
Sat Apr 20 14:29:45 2019 - [info] Setting read_only=0 on 192.168.1.112(192.168.1.112:3306)..
Sat Apr 20 14:29:45 2019 - [info] ok.
Sat Apr 20 14:29:45 2019 - [info]
Sat Apr 20 14:29:45 2019 - [info] * Switching slaves in parallel..
Sat Apr 20 14:29:45 2019 - [info]
Sat Apr 20 14:29:45 2019 - [info] -- Slave switch on host 192.168.1.113(192.168.1.113:3306) started, pid: 4604
Sat Apr 20 14:29:45 2019 - [info]
Sat Apr 20 14:29:46 2019 - [info] Log messages from 192.168.1.113 ...
Sat Apr 20 14:29:46 2019 - [info]
Sat Apr 20 14:29:45 2019 - [info] Waiting to execute all relay logs on 192.168.1.113(192.168.1.113:3306)..
Sat Apr 20 14:29:45 2019 - [info] master_pos_wait(mysql-bin.000015:441) completed on 192.168.1.113(192.168.1.113:3306). Executed 0 events.
Sat Apr 20 14:29:45 2019 - [info] done.
Sat Apr 20 14:29:45 2019 - [info] Resetting slave 192.168.1.113(192.168.1.113:3306) and starting replication from the new master 192.168.1.112(192.168.1.112:3306)..
Sat Apr 20 14:29:46 2019 - [info] Executed CHANGE MASTER.
Sat Apr 20 14:29:46 2019 - [info] Slave started.
Sat Apr 20 14:29:46 2019 - [info] End of log messages from 192.168.1.113 ...
Sat Apr 20 14:29:46 2019 - [info]
Sat Apr 20 14:29:46 2019 - [info] -- Slave switch on host 192.168.1.113(192.168.1.113:3306) succeeded.
Sat Apr 20 14:29:46 2019 - [info] Unlocking all tables on the orig master:
Sat Apr 20 14:29:46 2019 - [info] Executing UNLOCK TABLES..
Sat Apr 20 14:29:46 2019 - [info] ok.
Sat Apr 20 14:29:46 2019 - [info] Starting orig master as a new slave..
Sat Apr 20 14:29:46 2019 - [info] Resetting slave 192.168.1.111(192.168.1.111:3306) and starting replication from the new master 192.168.1.112(192.168.1.112:3306)..
Sat Apr 20 14:29:47 2019 - [info] Executed CHANGE MASTER.
Sat Apr 20 14:29:47 2019 - [info] Slave started.
Sat Apr 20 14:29:47 2019 - [info] All new slave servers switched successfully.
Sat Apr 20 14:29:47 2019 - [info]
Sat Apr 20 14:29:47 2019 - [info] * Phase 5: New master cleanup phase..
Sat Apr 20 14:29:47 2019 - [info]
Sat Apr 20 14:29:48 2019 - [info] 192.168.1.112: Resetting slave info succeeded.
Sat Apr 20 14:29:48 2019 - [info] Switching master to 192.168.1.112(192.168.1.112:3306) completed successfully.
切换后查看各个slave,其 Master_Host 都更变成 192.168.1.112了,此时master 为 192.168.1.112。在数据库层面,主备进行了角色切换,同时复制仍保持正常。
SHOW SLAVE STATUS;
SHOW VARIABLES LIKE 'read_only';