MySQL 5.7.23 + MHA 0.58 部署示例

架构:

MySQL 5.7.23 + MHA 0.58 部署示例_第1张图片

 

192.168.1.111    master                   read & write
192.168.1.112    slave(candidate)    read only
192.168.1.113    slave                     read only
192.168.1.114    mha-monitor

 

mysql 一主多从配置/半同步配置

sestatus -v
setenforce 0
sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/sysconfig/selinux
systemctl stop firewalld.service 
systemctl disable firewalld.service 

安装mysql省略,其中配置文件 my.cnf 设置如下:

[client]
port = 3306
socket = /tmp/mysql.sock

[mysqld]
user=mysql
port = 3306
server_id = 112
socket=/tmp/mysql.sock
basedir =/usr/local/mysql
datadir =/usr/local/mysql/data

log-error =/var/log/mysql/mysqld.log
pid-file =/var/run/mysqld/mysqld.pid
socket =/tmp/mysql.sock  

autocommit = 1
character_set_server=utf8
default-storage-engine=INNODB
transaction_isolation = READ-COMMITTED
event_scheduler = 1
lower_case_table_names=1
explicit_defaults_for_timestamp = 1
skip-external-locking
default-time-zone = '+8:00'
max_allowed_packet = 1G
innodb-buffer-pool-size = 1G

log-bin=/usr/local/mysql/binlog/mysql-bin
binlog_checksum = NONE
binlog_format = row
log_slave_updates = ON
expire_logs_days = 15

#全局事务
gtid_mode=ON
enforce_gtid_consistency=ON
master_info_repository=TABLE
relay_log_info_repository=TABLE
relay_log_recovery=ON
relay_log_purge=OFF

#组提交&并行复制
sync_binlog=1
innodb_flush_log_at_trx_commit=1
#slave_parallel_type=LOGICAL_CLOCK 
#slave_parallel_workers=2
#binlog_group_commit_sync_delay=1000000
#binlog_group_commit_sync_no_delay_count=10
#group_replication_compression_threshold=100

#半同步(稍后在 111、112 实例添加)
plugin-load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
rpl-semi-sync-master-enabled = 1
rpl-semi-sync-slave-enabled = 1

sql_mode=STRICT_TRANS_TABLES

[mysqld_safe]
log-error=/var/log/mysql/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid

master(192.168.1.111)创建复制用户(其他slave设置同步时会自动同步mysql库)

create user repl@'192.168.1.%' identified by 'repl';
grant replication slave,replication client on *.* to repl@'192.168.1.%';
flush privileges;

2个实例 master(192.168.1.111) 及备用slave(192.168.1.112)安装半同步插件

INSTALL PLUGIN rpl_semi_sync_master SONAME 'semisync_master.so';
INSTALL PLUGIN rpl_semi_sync_slave SONAME 'semisync_slave.so';

SELECT PLUGIN_NAME, PLUGIN_STATUS FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_NAME LIKE '%semi%';

首次设置主从同步,slave 192.168.1.112 和 slave 192.168.1.13 实例执行(用户repl会同步过来)

change master to 
master_host='192.168.1.111', 
master_port=3306, 
master_user='repl', 
master_password='repl', 
master_auto_position=1; 

start slave;

查看同步状态; 查看半同步的主备状态

show slave status;
show status like 'Rpl_semi_sync_%_status';

 

ssh/scp 免密码访问配置

参考: Linux(Centos)服务器间共享文件夹及文件传输

#首先:111、112、113、114 都执行
ssh-keygen -t rsa

#然后:111、112、113、114 都逐行执行
ssh-copy-id -i /root/.ssh/id_rsa.pub [email protected]
ssh-copy-id -i /root/.ssh/id_rsa.pub [email protected]
ssh-copy-id -i /root/.ssh/id_rsa.pub [email protected]
ssh-copy-id -i /root/.ssh/id_rsa.pub [email protected]

Ansible 配置(非必需),只是方便管理大量服务器

参考:Ansible 安装及多服务部署示例

192.168.1.114 中安装 ansible

# 192.168.1.114 中安装 ansible
yum install -y ansible


# vim /etc/ansible/hosts
[mysql-mha-node]
192.168.1.111
192.168.1.112
192.168.1.113
192.168.1.114


# vim /etc/ansible/ansible.cfg
[defaults]
sudo_user=root
remote_port=22
remote_user=root
module_name=command
host_key_checking=False
command_warnings = False
inventory=/etc/ansible/hosts
roles_path=/etc/ansible/roles
log_path=/var/log/ansible.log
private_key_file=/root/.ssh/id_rsa

192.168.1.114 中执行:各节点hosts绑定 ip 和 服务器名称

ansible mysql-mha-node -m shell -a "echo -e '
192.168.1.111 server111
192.168.1.112 server112
192.168.1.113 server113
192.168.1.114 server114' >> /etc/hosts"

MHA 配置

每个节点都下载安装 mha4mysql-node

yum install -y perl-DBD-MySQL perl-ExtUtils-MakeMaker perl-CPAN
yum install -y https://github.com/yoshinorim/mha4mysql-node/releases/download/v0.58/mha4mysql-node-0.58-0.el7.centos.noarch.rpm

192.168.1.114 节点执行安装 mha4mysql-manager

yum install -y https://github.com/yoshinorim/mha4mysql-manager/releases/download/v0.58/mha4mysql-manager-0.58-0.el7.centos.noarch.rpm

192.168.1.114 manager 节点配置文件

# mkdir -p /var/log/mha/app1
# vim /etc/masterha.cnf

[server default]
remote_workdir=/var/log/mha
manager_workdir=/var/log/mha  
manager_log=/var/log/mha/app1/manager.log
master_binlog_dir=/usr/local/mysql/binlog
user=root
password=mysql
repl_user=repl
repl_password=repl
ssh_user=root
ping_interval=2
#可从manager二进制包解压找到(mha4mysql-manager-0.58/samples/scripts)
#report_script= /opt/masterha/scripts/send_report
#shutdown_script= /opt/masterha/scripts/power_manager
#master_ip_failover_script= /opt/masterha/scripts/master_ip_failover
#master_ip_online_change_script= /opt/masterha/scripts/master_ip_online_change

[server1]
port=3306
hostname=192.168.1.111
candidate_master=1
check_repl_delay=0

[server2]
port=3306
hostname=192.168.1.112
candidate_master=1
check_repl_delay=0

[server3]
port=3306
hostname=192.168.1.113

 

相关命令(192.168.1.114 manager 节点执行)

检查ssh: masterha_check_ssh --conf=/etc/masterha.cnf

#检查ssh
[root@server114 ~]# masterha_check_ssh --conf=/etc/masterha.cnf

Sat Apr 20 14:28:31 2019 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Apr 20 14:28:31 2019 - [info] Reading application default configuration from /etc/masterha.cnf..
Sat Apr 20 14:28:31 2019 - [info] Reading server configuration from /etc/masterha.cnf..
Sat Apr 20 14:28:31 2019 - [info] Starting SSH connection tests..
Sat Apr 20 14:28:34 2019 - [debug] 
Sat Apr 20 14:28:32 2019 - [debug]  Connecting via SSH from [email protected](192.168.1.112:22) to [email protected](192.168.1.111:22)..
Sat Apr 20 14:28:33 2019 - [debug]   ok.
Sat Apr 20 14:28:33 2019 - [debug]  Connecting via SSH from [email protected](192.168.1.112:22) to [email protected](192.168.1.113:22)..
Sat Apr 20 14:28:34 2019 - [debug]   ok.
Sat Apr 20 14:28:34 2019 - [debug] 
Sat Apr 20 14:28:31 2019 - [debug]  Connecting via SSH from [email protected](192.168.1.111:22) to [email protected](192.168.1.112:22)..
Sat Apr 20 14:28:32 2019 - [debug]   ok.
Sat Apr 20 14:28:32 2019 - [debug]  Connecting via SSH from [email protected](192.168.1.111:22) to [email protected](192.168.1.113:22)..
Sat Apr 20 14:28:33 2019 - [debug]   ok.
Sat Apr 20 14:28:35 2019 - [debug] 
Sat Apr 20 14:28:32 2019 - [debug]  Connecting via SSH from [email protected](192.168.1.113:22) to [email protected](192.168.1.111:22)..
Sat Apr 20 14:28:33 2019 - [debug]   ok.
Sat Apr 20 14:28:33 2019 - [debug]  Connecting via SSH from [email protected](192.168.1.113:22) to [email protected](192.168.1.112:22)..
Sat Apr 20 14:28:34 2019 - [debug]   ok.
Sat Apr 20 14:28:35 2019 - [info] All SSH connection tests passed successfully.

检查主从复制:masterha_check_repl --conf=/etc/masterha.cnf

#检查主从复制
[root@server114 ~]# masterha_check_repl --conf=/etc/masterha.cnf

Sat Apr 20 14:28:38 2019 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Apr 20 14:28:38 2019 - [info] Reading application default configuration from /etc/masterha.cnf..
Sat Apr 20 14:28:38 2019 - [info] Reading server configuration from /etc/masterha.cnf..
Sat Apr 20 14:28:38 2019 - [info] MHA::MasterMonitor version 0.58.
Sat Apr 20 14:28:40 2019 - [info] GTID failover mode = 1
Sat Apr 20 14:28:40 2019 - [info] Dead Servers:
Sat Apr 20 14:28:40 2019 - [info] Alive Servers:
Sat Apr 20 14:28:40 2019 - [info]   192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:28:40 2019 - [info]   192.168.1.112(192.168.1.112:3306)
Sat Apr 20 14:28:40 2019 - [info]   192.168.1.113(192.168.1.113:3306)
Sat Apr 20 14:28:40 2019 - [info] Alive Slaves:
Sat Apr 20 14:28:40 2019 - [info]   192.168.1.112(192.168.1.112:3306)  Version=5.7.23-log (oldest major version between slaves) log-bin:enabled
Sat Apr 20 14:28:40 2019 - [info]     GTID ON
Sat Apr 20 14:28:40 2019 - [info]     Replicating from 192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:28:40 2019 - [info]     Primary candidate for the new Master (candidate_master is set)
Sat Apr 20 14:28:40 2019 - [info]   192.168.1.113(192.168.1.113:3306)  Version=5.7.23-log (oldest major version between slaves) log-bin:enabled
Sat Apr 20 14:28:40 2019 - [info]     GTID ON
Sat Apr 20 14:28:40 2019 - [info]     Replicating from 192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:28:40 2019 - [info] Current Alive Master: 192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:28:40 2019 - [info] Checking slave configurations..
Sat Apr 20 14:28:40 2019 - [info] Checking replication filtering settings..
Sat Apr 20 14:28:40 2019 - [info]  binlog_do_db= , binlog_ignore_db= 
Sat Apr 20 14:28:40 2019 - [info]  Replication filtering check ok.
Sat Apr 20 14:28:40 2019 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Sat Apr 20 14:28:40 2019 - [info] Checking SSH publickey authentication settings on the current master..
Sat Apr 20 14:28:40 2019 - [info] HealthCheck: SSH to 192.168.1.111 is reachable.
Sat Apr 20 14:28:40 2019 - [info] 
192.168.1.111(192.168.1.111:3306) (current master)
 +--192.168.1.112(192.168.1.112:3306)
 +--192.168.1.113(192.168.1.113:3306)

Sat Apr 20 14:28:40 2019 - [info] Checking replication health on 192.168.1.112..
Sat Apr 20 14:28:40 2019 - [info]  ok.
Sat Apr 20 14:28:40 2019 - [info] Checking replication health on 192.168.1.113..
Sat Apr 20 14:28:40 2019 - [info]  ok.
Sat Apr 20 14:28:40 2019 - [warning] master_ip_failover_script is not defined.
Sat Apr 20 14:28:40 2019 - [warning] shutdown_script is not defined.
Sat Apr 20 14:28:40 2019 - [info] Got exit code 0 (Not master dead).

MySQL Replication Health is OK.

当前MHA运行状态: masterha_check_status --conf=/etc/masterha.cnf

[root@server114 ~]# masterha_check_status --conf=/etc/masterha.cnf
masterha is stopped(2:NOT_RUNNING).

其他命令:

# MHA master 监控(与 masterha_manager 一样)
masterha_master_monitor --conf=/etc/masterha.cnf

#启动 MHA
nohup masterha_manager --conf=/etc/masterha.cnf > /dev/null 2>&1 &
nohup masterha_manager --conf=/etc/masterha.cnf &> /var/log/mha/app1/manager.log &

#关闭 MHA 
masterha_stop --conf=/etc/masterha.cnf

#添加或删除配置的server信息
masterha_conf_host

 

测试:在线手动切换

#关闭 MHA 
masterha_stop --conf=/etc/masterha.cnf

# 在线手动切换
masterha_master_switch --conf=/etc/masterha.cnf \
--master_state=alive \
--new_master_host=192.168.1.112 \
--new_master_port=3306 \
--orig_master_is_new_slave \
--running_updates_limit=10000
Sat Apr 20 14:29:22 2019 - [info] MHA::MasterRotate version 0.58.
Sat Apr 20 14:29:22 2019 - [info] Starting online master switch..
Sat Apr 20 14:29:22 2019 - [info] 
Sat Apr 20 14:29:22 2019 - [info] * Phase 1: Configuration Check Phase..
Sat Apr 20 14:29:22 2019 - [info] 
Sat Apr 20 14:29:22 2019 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Sat Apr 20 14:29:22 2019 - [info] Reading application default configuration from /etc/masterha.cnf..
Sat Apr 20 14:29:22 2019 - [info] Reading server configuration from /etc/masterha.cnf..
Sat Apr 20 14:29:23 2019 - [info] GTID failover mode = 1
Sat Apr 20 14:29:23 2019 - [info] Current Alive Master: 192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:29:23 2019 - [info] Alive Slaves:
Sat Apr 20 14:29:23 2019 - [info]   192.168.1.112(192.168.1.112:3306)  Version=5.7.23-log (oldest major version between slaves) log-bin:enabled
Sat Apr 20 14:29:23 2019 - [info]     GTID ON
Sat Apr 20 14:29:23 2019 - [info]     Replicating from 192.168.1.111(192.168.1.111:3306)
Sat Apr 20 14:29:23 2019 - [info]     Primary candidate for the new Master (candidate_master is set)
Sat Apr 20 14:29:23 2019 - [info]   192.168.1.113(192.168.1.113:3306)  Version=5.7.23-log (oldest major version between slaves) log-bin:enabled
Sat Apr 20 14:29:23 2019 - [info]     GTID ON
Sat Apr 20 14:29:23 2019 - [info]     Replicating from 192.168.1.111(192.168.1.111:3306)

It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on 192.168.1.111(192.168.1.111:3306)? (YES/no): 【 yes 】
Sat Apr 20 14:29:26 2019 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time..
Sat Apr 20 14:29:26 2019 - [info]  ok.
Sat Apr 20 14:29:26 2019 - [info] Checking MHA is not monitoring or doing failover..
Sat Apr 20 14:29:26 2019 - [info] Checking replication health on 192.168.1.112..
Sat Apr 20 14:29:26 2019 - [info]  ok.
Sat Apr 20 14:29:26 2019 - [info] Checking replication health on 192.168.1.113..
Sat Apr 20 14:29:26 2019 - [info]  ok.
Sat Apr 20 14:29:26 2019 - [info] 192.168.1.112 can be new master.
Sat Apr 20 14:29:26 2019 - [info] 
From:
192.168.1.111(192.168.1.111:3306) (current master)
 +--192.168.1.112(192.168.1.112:3306)
 +--192.168.1.113(192.168.1.113:3306)

To:
192.168.1.112(192.168.1.112:3306) (new master)
 +--192.168.1.113(192.168.1.113:3306)
 +--192.168.1.111(192.168.1.111:3306)

Starting master switch from 192.168.1.111(192.168.1.111:3306) to 192.168.1.112(192.168.1.112:3306)? (yes/NO): 【 yes 】
Sat Apr 20 14:29:31 2019 - [info] Checking whether 192.168.1.112(192.168.1.112:3306) is ok for the new master..
Sat Apr 20 14:29:31 2019 - [info]  ok.
Sat Apr 20 14:29:31 2019 - [info] 192.168.1.111(192.168.1.111:3306): SHOW SLAVE STATUS returned empty result. To check replication filtering rules, temporarily executing CHANGE MASTER to a dummy host.
Sat Apr 20 14:29:31 2019 - [info] 192.168.1.111(192.168.1.111:3306): Resetting slave pointing to the dummy host.
Sat Apr 20 14:29:32 2019 - [info] ** Phase 1: Configuration Check Phase completed.
Sat Apr 20 14:29:32 2019 - [info] 
Sat Apr 20 14:29:32 2019 - [info] * Phase 2: Rejecting updates Phase..
Sat Apr 20 14:29:32 2019 - [info] 
master_ip_online_change_script is not defined. If you do not disable writes on the current master manually, applications keep writing on the current master. Is it ok to proceed? (yes/NO):【 yes 】
Sat Apr 20 14:29:45 2019 - [info] Locking all tables on the orig master to reject updates from everybody (including root):
Sat Apr 20 14:29:45 2019 - [info] Executing FLUSH TABLES WITH READ LOCK..
Sat Apr 20 14:29:45 2019 - [info]  ok.
Sat Apr 20 14:29:45 2019 - [info] Orig master binlog:pos is mysql-bin.000015:441.
Sat Apr 20 14:29:45 2019 - [info]  Waiting to execute all relay logs on 192.168.1.112(192.168.1.112:3306)..
Sat Apr 20 14:29:45 2019 - [info]  master_pos_wait(mysql-bin.000015:441) completed on 192.168.1.112(192.168.1.112:3306). Executed 0 events.
Sat Apr 20 14:29:45 2019 - [info]   done.
Sat Apr 20 14:29:45 2019 - [info] Getting new master's binlog name and position..
Sat Apr 20 14:29:45 2019 - [info]  mysql-bin.000008:468
Sat Apr 20 14:29:45 2019 - [info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.1.112', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='xxx';
Sat Apr 20 14:29:45 2019 - [info] Setting read_only=0 on 192.168.1.112(192.168.1.112:3306)..
Sat Apr 20 14:29:45 2019 - [info]  ok.
Sat Apr 20 14:29:45 2019 - [info] 
Sat Apr 20 14:29:45 2019 - [info] * Switching slaves in parallel..
Sat Apr 20 14:29:45 2019 - [info] 
Sat Apr 20 14:29:45 2019 - [info] -- Slave switch on host 192.168.1.113(192.168.1.113:3306) started, pid: 4604
Sat Apr 20 14:29:45 2019 - [info] 
Sat Apr 20 14:29:46 2019 - [info] Log messages from 192.168.1.113 ...
Sat Apr 20 14:29:46 2019 - [info] 
Sat Apr 20 14:29:45 2019 - [info]  Waiting to execute all relay logs on 192.168.1.113(192.168.1.113:3306)..
Sat Apr 20 14:29:45 2019 - [info]  master_pos_wait(mysql-bin.000015:441) completed on 192.168.1.113(192.168.1.113:3306). Executed 0 events.
Sat Apr 20 14:29:45 2019 - [info]   done.
Sat Apr 20 14:29:45 2019 - [info]  Resetting slave 192.168.1.113(192.168.1.113:3306) and starting replication from the new master 192.168.1.112(192.168.1.112:3306)..
Sat Apr 20 14:29:46 2019 - [info]  Executed CHANGE MASTER.
Sat Apr 20 14:29:46 2019 - [info]  Slave started.
Sat Apr 20 14:29:46 2019 - [info] End of log messages from 192.168.1.113 ...
Sat Apr 20 14:29:46 2019 - [info] 
Sat Apr 20 14:29:46 2019 - [info] -- Slave switch on host 192.168.1.113(192.168.1.113:3306) succeeded.
Sat Apr 20 14:29:46 2019 - [info] Unlocking all tables on the orig master:
Sat Apr 20 14:29:46 2019 - [info] Executing UNLOCK TABLES..
Sat Apr 20 14:29:46 2019 - [info]  ok.
Sat Apr 20 14:29:46 2019 - [info] Starting orig master as a new slave..
Sat Apr 20 14:29:46 2019 - [info]  Resetting slave 192.168.1.111(192.168.1.111:3306) and starting replication from the new master 192.168.1.112(192.168.1.112:3306)..
Sat Apr 20 14:29:47 2019 - [info]  Executed CHANGE MASTER.
Sat Apr 20 14:29:47 2019 - [info]  Slave started.
Sat Apr 20 14:29:47 2019 - [info] All new slave servers switched successfully.
Sat Apr 20 14:29:47 2019 - [info] 
Sat Apr 20 14:29:47 2019 - [info] * Phase 5: New master cleanup phase..
Sat Apr 20 14:29:47 2019 - [info] 
Sat Apr 20 14:29:48 2019 - [info]  192.168.1.112: Resetting slave info succeeded.
Sat Apr 20 14:29:48 2019 - [info] Switching master to 192.168.1.112(192.168.1.112:3306) completed successfully.

切换后查看各个slave,其 Master_Host 都更变成 192.168.1.112了,此时master 为 192.168.1.112。在数据库层面,主备进行了角色切换,同时复制仍保持正常。

SHOW SLAVE STATUS;
SHOW VARIABLES LIKE 'read_only';

MySQL 5.7.23 + MHA 0.58 部署示例_第2张图片

 

你可能感兴趣的:(MYSQL,MYSQL,高可用性)