简介

MHA(Master High Availability)在MySQL高可用方面是一个相对成熟的解决方案,它可以实现MySQL在高可用性环境下的故障切换和主从转换。当主结点的MySQL服务器产生故障时,MHA能自动完成数据库的故障切换操作,而且在进行故障切换的过程中,MHA能在最大程度上保证数据的一致性
MHA组成部分
MHA由MHA Manager和MHA Node组成,MHA Node运行在后台MySQL服务器上,MHA Manager会定时监控master结点的存活状态,当master出现故障时,它可以实现将slave自动提升为新的master的功能,并将其他所有的slave的master指向新产生的master
MHA工作过程
MHA试图从宕机的主服务器上保存二进制日志,最大程度的保证数据的不丢失,但这并不总是可行的。当主服务器硬件故障或无法通过ssh访问的时候,MHA无法进行二进制日志的保存,仅能进行故障转移,因此有可能会造成最新数据的丢失。若使用半同步复制,可以使丢失数据的风险大大降低,当只有一个slave收到最新的二进制日志时,MHA可以将最新的二进制日志应用于其他所有的slave服务器上,因此可以保证结点的数据一致性

前期准备

准备三台Centos7虚拟机,关闭防火墙和selinux,配置IP地址和hostname,同步系统时间,配置IP地址和Hostname映射

hostname ip
node1 192.168.29.143
node2 192.168.29.142
node3 192.168.29.144

其中node1结点为管理结点,其余node结点为从节点
本次部署需要用到VIP

VIP
192.168.29.121

下载压缩包

mha4mysql-manager-0.58.tar.gz
mha4mysql-node-0.58.tar.gz

配置SSH免密登录

[root@node1 ~]# ssh-keygen
[root@node2 ~]# ssh-keygen
[root@node3 ~]# ssh-keygen
[root@node1 ~]# ssh-copy-id [email protected]
[root@node1 ~]# ssh-copy-id [email protected]
[root@node1 ~]# ssh-copy-id [email protected]
[root@node2 ~]# ssh-copy-id [email protected]
[root@node2 ~]# ssh-copy-id [email protected]
[root@node2 ~]# ssh-copy-id [email protected]
[root@node3 ~]# ssh-copy-id [email protected]
[root@node3 ~]# ssh-copy-id [email protected]
[root@node3 ~]# ssh-copy-id [email protected]
#验证
[root@node1 ~]# ssh root@ip
[root@node2 ~]# ssh root@ip
[root@node3 ~]# ssh root@ip

搭建主从配置

角色 IP
master 192.168.29.143
slave 192.168.29.142
slave 192.168.29.144

步骤参考
https://blog.51cto.com/14832653/2500735
搭建完成后通过创建数据库和添加数据测试主从可用性

#在node1结点执行
[root@node1 ~]# mysql -u root -p
mysql> create database mydb charset utf8;
mysql> use mydb
mysql> create table test(id int primary key);
mysql> insert into test values(1);
mysql> insert into test values(2);
mysql> insert into test values(3;
mysql> insert into test values(4);
mysql> insert into test values(5);

#node2结点验证
[root@node2 ~]# mysql -u root -p -e "select * from mydb.test;"
Enter password: 
+----+
| id |
+----+
|  1 |
|  2 |
|  3 |
|  4 |
|  5 |
+----+

#node3结点验证
[root@node3 ~]# mysql -u root -p -e "select * from mydb.test;"
Enter password: 
+----+
| id |
+----+
|  1 |
|  2 |
|  3 |
|  4 |
|  5 |
+----+

开启半同步复制

mysql> INSTALL PLUGIN rpl_semi_sync_master SONAME 'semisync_master.so';
mysql> SET GLOBAL rpl_semi_sync_master_enabled=ON;

三个结点均安装依赖包

[root@node1 ~]# yum install  perl-ExtUtils-CBuilder perl-ExtUtils-MakeMaker perl-CPAN perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes perl-DBI mysql-libs perl-Email-Date-Format perl-File-Remove perl-Mail-Sender perl-Mail-Sendmail perl-MIME-Lite perl-MIME-Types perl-Module-Install perl-Module-ScanDeps perl-YAML -y
[root@node2 ~]# yum install  perl-ExtUtils-CBuilder perl-ExtUtils-MakeMaker perl-CPAN perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes perl-DBI mysql-libs perl-Email-Date-Format perl-File-Remove perl-Mail-Sender perl-Mail-Sendmail perl-MIME-Lite perl-MIME-Types perl-Module-Install perl-Module-ScanDeps perl-YAML -y
[root@node3 ~]# yum install  perl-ExtUtils-CBuilder perl-ExtUtils-MakeMaker perl-CPAN perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes perl-DBI mysql-libs perl-Email-Date-Format perl-File-Remove perl-Mail-Sender perl-Mail-Sendmail perl-MIME-Lite perl-MIME-Types perl-Module-Install perl-Module-ScanDeps perl-YAML -y

部署MHA集群架构

全部结点部署mha4mysql-node
上传文件并解压

[root@node1 ~]# tar -zxvf mha4mysql-node-0.58.tar.gz
[root@node1 ~]# cd mha4mysql-node-0.58/
[root@node1 mha4mysql-node-0.58]# perl Makefile.PL
[root@node1 mha4mysql-node-0.58]# make && make install
#执行完成后会得到四个工具
[root@node1 ~]# ls -al /usr/local/bin/
-r-xr-xr-x   1 root root 17639 4月  30 15:05 apply_diff_relay_logs
-r-xr-xr-x   1 root root  4807 4月  30 15:05 filter_mysqlbinlog
-r-xr-xr-x   1 root root  8337 4月  30 15:05 purge_relay_logs
-r-xr-xr-x   1 root root  7525 4月  30 15:05 save_binary_logs
#在node2和node3结点执行相同的操作

node1结点部署mha4mysql-manager
上传文件并解压

[root@node1 ~]# tar -zxvf mha4mysql-manager-0.58.tar.gz
[root@node1 ~]# cd mha4mysql-manager-0.58/
[root@node1 mha4mysql-manager-0.58]# perl Makefile.PL 
#执行完成后会得到manager工具
[root@node1 ~]# ls -al /usr/local/bin/
-r-xr-xr-x   1 root root  1995 4月  30 15:20 masterha_check_repl
-r-xr-xr-x   1 root root  1779 4月  30 15:20 masterha_check_ssh
-r-xr-xr-x   1 root root  1865 4月  30 15:20 masterha_check_status
-r-xr-xr-x   1 root root  3201 4月  30 15:20 masterha_conf_host
-r-xr-xr-x   1 root root  2517 4月  30 15:20 masterha_manager
-r-xr-xr-x   1 root root  2165 4月  30 15:20 masterha_master_monitor
-r-xr-xr-x   1 root root  2373 4月  30 15:20 masterha_master_switch
-r-xr-xr-x   1 root root  5172 4月  30 15:20 masterha_secondary_check
-r-xr-xr-x   1 root root  1739 4月  30 15:20 masterha_stop

node1结点配置MHA

编写配置文件

#创建目录
[root@node1 ~] mkdir /usr/local/mha

#编写配置文件
[root@node1 mha]# vi /usr/local/mha/mha.cnf 
[server default]
#设置日志位置
manager_log=/usr/local/mha/manager.log
#设置工作目录
manager_workdir=/usr/local/mha
#设置ip自动飘移脚本路径
master_ip_failover_script="/usr/local/mha/scripts/master_ip_failover"
#设置ip手动飘移脚本路径
master_ip_online_change_script="/usr/local/mha/scripts/master_ip_online_change"
#设置监控mysql的用户
user=root
#设置监控用户对应密码
password=your_password
#设置ping包发送时间间隔
ping_interval=1
#设置远程mysql切换时binlog保存路径
remote_workdir=/usr/local/mha
#设置主从复制用户
repl_user=repl
#主从复制用户密码
repl_password=your_password
#发生漂移时报警脚本路径
report_script=""
#检查MySQL主服务器的可用性
secondary_check_script=/usr/local/bin/masterha_secondary_check -s 192.168.29.143 -s 192.168.29.142 -s 192.168.29.144
#发生故障后关闭故障主机的脚本路径
shutdown_script=""
#ssh的用户
ssh_user=root

[server1]
hostname=192.168.29.143
port=3306

[server2]
hostname=192.168.29.142
port=3306

[server3]
hostname=192.168.29.144
port=3306
#设置候选master
candidate_master=1
#设置当slave的relay_logs落后master达到100M时不会选择此slave作为新的master
check_repl_delay=0

编写飘移脚本

#创建脚本目录
[root@node1 ~]# mkdir /usr/loca/mha/scripts

#编写自动ip飘移脚本master_ip_failover
[root@node1 ~]#vi /usr/local/mha/scripts/master_ip_failover 
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';

use Getopt::Long;

my (
    $command,          $ssh_user,        $orig_master_host, $orig_master_ip,
    $orig_master_port, $new_master_host, $new_master_ip,    $new_master_port
);

my $vip = '192.168.29.121/24';  # Virtual IP
my $key = "1";
my $int = "ens33";
my $ssh_start_vip = "/sbin/ifconfig $int:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig $int:$key down";
my $arp_effect = "/sbin/arping -Uq -s192.168.29.121 -I $int 192.168.29.254 -c 3";    # Virtual IP and gatway

GetOptions(
    'command=s'          => \$command,
    'ssh_user=s'         => \$ssh_user,
    'orig_master_host=s' => \$orig_master_host,
    'orig_master_ip=s'   => \$orig_master_ip,
    'orig_master_port=i' => \$orig_master_port,
    'new_master_host=s'  => \$new_master_host,
    'new_master_ip=s'    => \$new_master_ip,
    'new_master_port=i'  => \$new_master_port,
);

exit &main();

sub main {

    print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";

    if ( $command eq "stop" || $command eq "stopssh" ) {

        # $orig_master_host, $orig_master_ip, $orig_master_port are passed.
        # If you manage master ip address at global catalog database,
        # invalidate orig_master_ip here.
        my $exit_code = 1;
        eval {
            print "Disabling the VIP on old master: $orig_master_host \n";
            &stop_vip();
            $exit_code = 0;
        };
        if ($@) {
            warn "Got Error: $@\n";
            exit $exit_code;
        }
        exit $exit_code;
    }
    elsif ( $command eq "start" ) {

        # all arguments are passed.
        # If you manage master ip address at global catalog database,
        # activate new_master_ip here.
        # You can also grant write access (create user, set read_only=0, etc) here.
        my $exit_code = 10;
        eval {
            print "Enabling the VIP - $vip on the new master - $new_master_host \n";
            &start_vip();
            $exit_code = 0;
        };
        if ($@) {
            warn $@;
            exit $exit_code;
        }
        exit $exit_code;
    }
    elsif ( $command eq "status" ) {
        print "Checking the Status of the script.. OK \n";
        #`ssh $ssh_user\@cluster1 \" $ssh_start_vip \"`;
        exit 0;
    }
    else {
        &usage();
        exit 1;
    }
}

sub start_vip() {
    `ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
sub stop_vip() {
     return 0  unless  ($ssh_user);
    `ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}

sub usage {
    print
    "Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
} 

#编写手动ip飘移脚本master_ip_online_change
[root@node1 ~]#vi /usr/local/mha/scripts/master_ip_online_change
#!/bin/bash
source /root/.bash_profile

vip=`echo '192.168.29.121/24'`  #设置VIP
key=`echo '1'`

command=`echo "$1" | awk -F = '{print $2}'`
orig_master_host=`echo "$2" | awk -F = '{print $2}'`
new_master_host=`echo "$7" | awk -F = '{print $2}'`
orig_master_ssh_user=`echo "${12}" | awk -F = '{print $2}'`
new_master_ssh_user=`echo "${13}" | awk -F = '{print $2}'`

#要求服务的网卡识别名一样(本机为ens33)
stop_vip=`echo "ssh root@$orig_master_host /usr/sbin/ifconfig ens33:$key down"`
start_vip=`echo "ssh root@$new_master_host /usr/sbin/ifconfig ens33:$key $vip"`

if [ $command = 'stop' ]
  then
    echo -e "\n\n\n****************************\n"
    echo -e "Disabled thi VIP - $vip on old master: $orig_master_host \n"
    $stop_vip
    if [ $? -eq 0 ]
      then
    echo "Disabled the VIP successfully"
      else
    echo "Disabled the VIP failed"
    fi
    echo -e "***************************\n\n\n"
  fi

if [ $command = 'start' -o $command = 'status' ]
  then
    echo -e "\n\n\n*************************\n"
    echo -e "Enabling the VIP - $vip on new master: $new_master_host \n"
    $start_vip
    if [ $? -eq 0 ]
      then
    echo "Enabled the VIP successfully"
      else
    echo "Enabled the VIP failed"
    fi
    echo -e "***************************\n\n\n"
fi

#为脚本添加执行权限
[root@node1 scripts]# chmod +x master_ip_failover 
[root@node1 scripts]# chmod +x master_ip_online_change 

验证MHA状态

#验证SSH状态
[root@node1 ~]# masterha_check_ssh --conf=/usr/local/mha/mha.cnf
[info] All SSH connection tests passed successfully.

#验证数据库集群状态
[root@node1 ~]# masterha_check_repl --conf=/usr/local/mha/mha.cnf
MySQL Replication Health is OK.

启动MHA集群

node1结点添加虚拟IP

[root@node1 ~]#  ifconfig ens33:1 192.168.29.121/24

启动MHA进程

[root@node1 ~]# masterha_manager --conf=/usr/local/mha/mha.cnf > /tmp/mha_manager.log < /dev/null 2>&1 &

检查MHA集群运行状态

[root@node1 ~]# masterha_check_status --conf=/usr/local/mha/mha.cnf
mha (pid:2702) is running(0:PING_OK), master:192.168.29.143
#启动成功

高可用测试

宿主机通过VIP访问数据库

>mysql -uroot -p -h192.168.29.121
>mysql> show databases;
+--------------------+
| Database           |
+--------------------+
| information_schema |
| mydb               |
| mysql              |
| performance_schema |
| sys                |
+--------------------+
mysql> use mydb;
Database changed
mysql> select * from test;
+----+
| id |
+----+
|  1 |
|  2 |
|  3 |
|  4 |
|  5 |
+----+
5 rows in set (0.00 sec)

模拟node1(master)结点宕机

[root@node1 ~]# systemctl stop mysqld.service

#node3结点查看vip飘移情况
[root@node3 ~]# ip a 
2: ens33:  mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    inet 192.168.29.121/24 brd 192.168.29.255 scope global secondary ens33:1
#可见VIP已经飘移到node3结点中,同时node2的master变成了node3

#宿主机再次通过VIP访问数据库
>mysql -u root -h 192.168.29.121 -p  -e "select * from mydb.test;"
+----+
| id |
+----+
|  1 |
|  2 |
|  3 |
|  4 |
|  5 |
+----+
#高可用性可实现

node1(master)结点恢复正常

[root@node1 ~]# systemctl start mysqld.service

#需要把node1结点机器设置为node3的slave节点
[root@node3 ~]# mysql -u root -p -e "show master status\G;"
*************************** 1. row ***************************
             File: binlog.000025
         Position: 2225
     Binlog_Do_DB: mydb
 Binlog_Ignore_DB: mysql
Executed_Gtid_Set: 

#node1结点配置
mysql> stop slave;
mysql> change master to
    -> master_host='192.168.29.144',
    -> master_user='repl',
    -> master_password='your_password',
    -> master_log_file='binlog.000025',
    -> master_log_pos=2225;
mysql> start slave;
mysql> show slave status\G;
Slave_IO_Running: Yes
Slave_SQL_Running: Yes

在线切换master

[root@node1 ~]# masterha_master_switch --conf=/usr/local/mha/mha.cnf --master_state=alive --new_master_host=192.168.29.143 --orig_master_is_new_slave --running_updates_limit=10000 --interactive=0

[info] Switching master to 192.168.29.143(192.168.29.143:3306) completed successfully.

#node1结点查看VIP
[root@node1 ~]# ip a 
ens33:  mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    inet 192.168.29.121/24 brd 192.168.29.255 scope global secondary ens33:1

#node2和node3的master也变回了node1
#宿主机再次通过VIP访问数据库
>mysql -u root -h 192.168.29.121 -p  -e "select * from mydb.test;"
+----+
| id |
+----+
|  1 |
|  2 |
|  3 |
|  4 |
|  5 |
+----+

注意:
1)当node1结点宕机后,node2和node3结点会继续工作,此时若产生数据写入或删除则先不要执行在线切换master,要确保node1中的数据与node2、node3完全一致才可执行在线切换master,否则可能会导致最新的数据丢失
2)在线切换master完成后需要把workdir中的保存文件删除,以免影响MHA使用

saved_master_binlog_from_192.168.29.143_3306_20200608100840.binlog
mha.failover.complete 

3)在线切换完成后,需要重新启动MHA进程

[root@node1 ~]# masterha_manager --conf=/usr/local/mha/mha.cnf > /tmp/mha_manager.log < /dev/null 2>&1 &
#检查状态
[root@node1 ~]# masterha_check_status --conf=/usr/local/mha/mha.cnf