说明:
centos 6.5 mysql 5.5.37 mha4mysql-manager-0.55 mha4mysql-node-0.54
manager 192.168.1.1 mha-manager管理端
db1 192.168.1.2 现有主库
db2 192.168.1.3 即将做双主的数据库
db3 192.168.1.4 现有从库
配置mysql主从
db1主库:
server-id = 1
log-bin=mysql-bin
binlog_format=mixed
db2主库:
server-id = 2
log-bin=mysql-bin
binlog_format=mixed
relay_log_purge=0
db3从库:
server_id = 3
log_bin = mysql-bin
relay_log = mysql-relay-bin
max_relay_log_size = 2G
log_slave_updates = 1
read_only = 1
relay_log_purge=0
db1主 (现有的主库)
grant replication slave on *.* to mharep@'192.168.1.%' identified by 'passwd'; flush privileges; stop slave; #设置复制权限帐号 GRANT ALL PRIVILEGES ON *.* TO 'mha_manager'@'192.168.1.%' IDENTIFIED BY '123456'; reset master; show master status\G
db2备 (即将做双主的这台机)
grant replication slave on *.* to mharep@'192.168.1.%' identified by 'passwd'; GRANT ALL PRIVILEGES ON *.* TO 'mha_manager'@'192.168.1.%' IDENTIFIED BY '123456'; flush privileges; reset master; slave stop; change master to MASTER_HOST='192.168.1.2', MASTER_PORT=3306,MASTER_USER='mharep', MASTER_PASSWORD='passwd',master_log_file='mysql-bin.000001', master_log_pos=107; slave start; show slave status\G;
db3从 (现有的从库)
reset master; slave stop; GRANT ALL PRIVILEGES ON *.* TO 'mha_manager'@'192.168.1.%' IDENTIFIED BY '123456'; flush privileges; change master to MASTER_HOST='192.168.1.2', MASTER_PORT=3306,MASTER_USER='mharep', MASTER_PASSWORD='passwd',master_log_file='mysql-bin.000001', master_log_pos=107; slave start; show slave status\G;
配置双机互信
manager
ssh-keygen -t rsa for i in db1 db2 db3;do ssh-copy-id -i ~/.ssh/id_rsa.pub root@$i;done
db1主
ssh-keygen -t rsa for i in manager db2 db3;do ssh-copy-id -i ~/.ssh/id_rsa.pub root@$i;done
db2备 和 db3从 配置略
/usr/sbin/ntpdate 132.163.4.101 请把时间同步
安装mysql-MHA
https://downloads.mariadb.com/files/MHA
wget https://downloads.mariadb.com/files/MHA/mha4mysql-manager-0.55-0.el6.noarch.rpm wget https://downloads.mariadb.com/files/MHA/mha4mysql-node-0.54-0.el6.noarch.rpm
db1主 db2备 db3从
yum install perl-DBD-MySQL
yum localinstall mha4mysql-node-0.54-0.el6.noarch.rpm
manager
yum install perl cpan yum install perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager
#这两个yum没有安装上 perl-Log-Dispatch perl-Parallel-ForkManager
wget http://rpmfind.net/linux/dag/redhat/el6/en/x86_64/dag/RPMS/perl-Log-Dispatch-2.26-1.el6.rf.noarch.rpm wget ftp://rpmfind.net/linux/dag/redhat/el6/en/x86_64/dag/RPMS/perl-Parallel-ForkManager-0.7.5-2.2.el6.rf.noarch.rpm wget ftp://rpmfind.net/linux/dag/redhat/el6/en/x86_64/dag/RPMS/perl-Mail-Sender-0.8.16-1.el6.rf.noarch.rpm wget ftp://rpmfind.net/linux/dag/redhat/el6/en/x86_64/dag/RPMS/perl-Mail-Sendmail-0.79-1.2.el6.rf.noarch.rpm
rpm -ivh perl-Mail-Sender-0.8.16-1.el6.rf.noarch.rpm rpm -ivh perl-Mail-Sendmail-0.79-1.2.el6.rf.noarch.rpm yum localinstall perl-Log-Dispatch-2.26-1.el6.rf.noarch.rpm yum localinstall perl-Parallel-ForkManager-0.7.5-2.2.el6.rf.noarch.rpm yum localinstall mha4mysql-node-0.54-0.el6.noarch.rpm yum localinstall mha4mysql-manager-0.55-0.el6.noarch.rpm
配置mysql-MHA
[root@manager ~]# masterha_<Tab> masterha_check_repl masterha_conf_host masterha_master_switch masterha_check_ssh masterha_manager masterha_secondary_check masterha_check_status masterha_master_monitor masterha_stop tar -xzf mha4mysql-manager-0.55.tar.gz # ls mha4mysql-manager-0.55/samples/conf #大家了解mha自带的配置文件,可忽略 masterha_default.cnf app1.cnf mkdir -p /usr/local/mha/scripts cp mha4mysql-manager-0.55/samples/scripts/* /usr/local/mha/scripts
[root@manager ~]# vim /usr/local/mha/mha.conf #根据自己的mysql配置改动 [server default] user=mha_manager #mha管理的用户名 password=123456 manager_workdir=/usr/local/mha manager_log=/usr/local/mha/manager.log remote_workdir=/usr/local/mha ssh_user=root #上面双机互信的用户 repl_user=mharep #mysql复制(同步)的用户名 repl_password=passwd ping_interval=1 #下面是mysql检测设置 secondary_check_script= masterha_secondary_check -s 192.168.1.2 -s 192.168.1.3 -s 192.168.1.4 #master_ip_failover_script=/usr/local/mha/scripts/master_ip_failover #shutdown_script= /usr/local/mha/scripts/power_manager report_script= /usr/local/mha/scripts/send_report master_ip_online_change_script= /usr/local/mha/scripts/master_ip_online_change [server1] hostname=192.168.1.2 #写ip或者主机名都可以 ssh_port=22 master_binlog_dir=/var/mysql/log candidate_master=1 [server2] hostname=192.168.1.3 ssh_port=22 master_binlog_dir=/var/mysql/log candidate_master=1 [server3] hostname=192.168.1.4 ssh_port=22 master_binlog_dir=/var/mysql/log no_master=1
#验证ssh通讯
masterha_check_ssh --conf=/usr/local/mha/mha.conf
#验证mysql复制
wget ftp://rpmfind.net/linux/dag/redhat/el6/en/x86_64/extras/RPMS/perl-Net-Telnet-3.03-2.el6.rfx.noarch.rpm yum localinstall perl-Net-Telnet-3.03-2.el6.rfx.noarch.rpm
#mysql必须都启动
[root@manager ~]# masterha_check_repl --conf=/usr/local/mha/mha.conf Thu Jan 8 18:05:40 2015 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Thu Jan 8 18:05:40 2015 - [info] Reading application default configurations from /usr/local/mha/mha.conf.. Thu Jan 8 18:05:40 2015 - [info] Reading server configurations from /usr/local/mha/mha.conf.. Thu Jan 8 18:05:40 2015 - [info] MHA::MasterMonitor version 0.55. Thu Jan 8 18:05:40 2015 - [info] Dead Servers: Thu Jan 8 18:05:40 2015 - [info] Alive Servers: Thu Jan 8 18:05:40 2015 - [info] 192.168.1.2(192.168.1.2:3306) Thu Jan 8 18:05:40 2015 - [info] 192.168.1.3(192.168.1.3:3306) Thu Jan 8 18:05:40 2015 - [info] 192.168.1.4(192.168.1.4:3306) Thu Jan 8 18:05:40 2015 - [info] Alive Slaves: Thu Jan 8 18:05:40 2015 - [info] 192.168.1.3(192.168.1.3:3306) Version=5.5.37-log (oldest major version between slaves) log-bin:enabled Thu Jan 8 18:05:40 2015 - [info] Replicating from 192.168.1.2(192.168.1.2:3306) Thu Jan 8 18:05:40 2015 - [info] Primary candidate for the new Master (candidate_master is set) Thu Jan 8 18:05:40 2015 - [info] 192.168.1.4(192.168.1.4:3306) Version=5.5.37-log (oldest major version between slaves) log-bin:enabled Thu Jan 8 18:05:40 2015 - [info] Replicating from 192.168.1.2(192.168.1.2:3306) Thu Jan 8 18:05:40 2015 - [info] Not candidate for the new Master (no_master is set) Thu Jan 8 18:05:40 2015 - [info] Current Alive Master: 192.168.1.2(192.168.1.2:3306) Thu Jan 8 18:05:40 2015 - [info] Checking slave configurations.. Thu Jan 8 18:05:40 2015 - [info] Checking replication filtering settings.. Thu Jan 8 18:05:40 2015 - [info] binlog_do_db= , binlog_ignore_db= Thu Jan 8 18:05:40 2015 - [info] Replication filtering check ok. Thu Jan 8 18:05:40 2015 - [info] Starting SSH connection tests.. Thu Jan 8 18:05:41 2015 - [info] All SSH connection tests passed successfully. Thu Jan 8 18:05:41 2015 - [info] Checking MHA Node version.. Thu Jan 8 18:05:42 2015 - [info] Version check ok. Thu Jan 8 18:05:42 2015 - [info] Checking SSH publickey authentication settings on the current master.. Thu Jan 8 18:05:42 2015 - [info] HealthCheck: SSH to 192.168.1.2 is reachable. Thu Jan 8 18:05:42 2015 - [info] Master MHA Node version is 0.54. Thu Jan 8 18:05:42 2015 - [info] Checking recovery script configurations on the current master.. Thu Jan 8 18:05:42 2015 - [info] Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/var/mysql/log --output_file=/usr/local/mha/save_binary_logs_test --manager_version=0.55 --start_file=mysql-bin.000002 Thu Jan 8 18:05:42 2015 - [info] Connecting to [email protected](192.168.1.2).. Creating /usr/local/mha if not exists.. ok. Checking output directory is accessible or not.. ok. Binlog found at /var/mysql/log, up to mysql-bin.000002 Thu Jan 8 18:05:42 2015 - [info] Master setting check done. Thu Jan 8 18:05:42 2015 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers.. Thu Jan 8 18:05:42 2015 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='mha_manager' --slave_host=192.168.1.3 --slave_ip=192.168.1.3 --slave_port=3306 --workdir=/usr/local/mha --target_version=5.5.37-log --manager_version=0.55 --relay_log_info=/var/mysql/data/relay-log.info --relay_dir=/var/mysql/data/ --slave_pass=xxx Thu Jan 8 18:05:42 2015 - [info] Connecting to [email protected](192.168.1.3:22).. Checking slave recovery environment settings.. Opening /var/mysql/data/relay-log.info ... ok. Relay log found at /var/mysql/data, up to mysql-relay-bin.000007 Temporary relay log file is /var/mysql/data/mysql-relay-bin.000007 Testing mysql connection and privileges.. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Thu Jan 8 18:05:42 2015 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='mha_manager' --slave_host=192.168.1.4 --slave_ip=192.168.1.4 --slave_port=3306 --workdir=/usr/local/mha --target_version=5.5.37-log --manager_version=0.55 --relay_log_info=/var/mysql/data/relay-log.info --relay_dir=/var/mysql/data/ --slave_pass=xxx Thu Jan 8 18:05:42 2015 - [info] Connecting to [email protected](192.168.1.4:22).. Checking slave recovery environment settings.. Opening /var/mysql/data/relay-log.info ... ok. Relay log found at /var/mysql/data, up to mysql-relay-bin.000006 Temporary relay log file is /var/mysql/data/mysql-relay-bin.000006 Testing mysql connection and privileges.. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Thu Jan 8 18:05:43 2015 - [info] Slaves settings check done. Thu Jan 8 18:05:43 2015 - [info] 192.168.1.2 (current master) +--192.168.1.3 +--192.168.1.4 Thu Jan 8 18:05:43 2015 - [info] Checking replication health on 192.168.1.3.. Thu Jan 8 18:05:43 2015 - [info] ok. Thu Jan 8 18:05:43 2015 - [info] Checking replication health on 192.168.1.4.. Thu Jan 8 18:05:43 2015 - [info] ok. Thu Jan 8 18:05:43 2015 - [info] Checking master_ip_failover_script status: Thu Jan 8 18:05:43 2015 - [info] /usr/local/mha/scripts/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.1.2 --orig_master_ip=192.168.1.2 --orig_master_port=3306 IN SCRIPT TEST====/sbin/ifconfig eth0:1 down==/sbin/ifconfig eth0:1 192.168.1.100;/sbin/arping -I eth0 -c 3 -s 192.168.1.100 192.168.1.1 >/dev/null 2>&1=== Checking the Status of the script.. OK Thu Jan 8 18:05:46 2015 - [info] OK. Thu Jan 8 18:05:46 2015 - [warning] shutdown_script is not defined. Thu Jan 8 18:05:46 2015 - [info] Got exit code 0 (Not master dead). MySQL Replication Health is OK.
启动MHA
mv /usr/local/mha/scripts/master_ip_failover /usr/local/mha/scripts/master_ip_failover.def
vim /usr/local/mha/scripts/master_ip_failover
#!/usr/bin/env perl use strict; use warnings FATAL => 'all'; use Getopt::Long; my ( $command, $ssh_user, $orig_master_host, $orig_master_ip, $orig_master_port, $new_master_host, $new_master_ip, $new_master_port ); my $vip = '192.168.1.100'; # Virtual IP my $gateway = '192.168.1.1'; #Gateway IP my $interface = 'eth0'; my $key = "1"; my $ssh_start_vip = "/sbin/ifconfig $interface:$key $vip;/sbin/arping -I $interface -c 3 -s $vip $gateway >/dev/null 2>&1"; my $ssh_stop_vip = "/sbin/ifconfig $interface:$key down"; GetOptions( 'command=s' => \$command, 'ssh_user=s' => \$ssh_user, 'orig_master_host=s' => \$orig_master_host, 'orig_master_ip=s' => \$orig_master_ip, 'orig_master_port=i' => \$orig_master_port, 'new_master_host=s' => \$new_master_host, 'new_master_ip=s' => \$new_master_ip, 'new_master_port=i' => \$new_master_port, ); exit &main(); sub main { print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n"; if ( $command eq "stop" || $command eq "stopssh" ) { # $orig_master_host, $orig_master_ip, $orig_master_port are passed. # If you manage master ip address at global catalog database, # invalidate orig_master_ip here. my $exit_code = 1; eval { print "Disabling the VIP on old master: $orig_master_host \n"; &stop_vip(); $exit_code = 0; }; if ($@) { warn "Got Error: $@\n"; exit $exit_code; } exit $exit_code; } elsif ( $command eq "start" ) { # all arguments are passed. # If you manage master ip address at global catalog database, # activate new_master_ip here. # You can also grant write access (create user, set read_only=0, etc) here. my $exit_code = 10; eval { print "Enabling the VIP - $vip on the new master - $new_master_host \n"; &start_vip(); $exit_code = 0; }; if ($@) { warn $@; exit $exit_code; } exit $exit_code; } elsif ( $command eq "status" ) { print "Checking the Status of the script.. OK \n"; `ssh $ssh_user\@$orig_master_host \" $ssh_start_vip \"`; exit 0; } else { &usage(); exit 1; } } # A simple system call that enable the VIP on the new master sub start_vip() { `ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`; } # A simple system call that disable the VIP on the old_master sub stop_vip() { `ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`; } sub usage { print "Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n"; }
# sed -i 's/#master_ip_failover_script/master_ip_failover_script/' /usr/local/mha/mha.conf
配置文件的master_ip_failover_script前面注释#取消
# chmod 755 /usr/local/mha/scripts/master_ip_failover
# masterha_check_repl --conf=/usr/local/mha/mha.conf
启动mha
[root@manager ~]# masterha_manager --conf=/usr/local/mha/mha.conf &
或者
nohup masterha_manager --conf=/usr/local/mha/mha.conf > /tmp/mha_manager.log < /dev/null 2>&1 &
查看MHA
[root@manager ~]# masterha_check_status --conf=/usr/local/mha/mha.conf
mha (pid:2686) is running(0:PING_OK), master:192.168.1.2
到mysql主库上看看ip信息,自己的ip,VIP
停止MHA
[root@manager ~]# masterha_stop --conf=/usr/local/mha/mha.conf
验证MHA:
[root@db1 ~]# ip a #现在有VIP
[root@db1 ~]# service mysqld stop #mysql停止,在这时候VIP会做转移
Shutting down MySQL... SUCCESS!
[root@db1 ~]# ip a #已经没有VIP
IP已经转移到db2,并且在db3上 mysql> show slave status\G 查看 Master_Server_Id: 2
查看mysql_MHA日志
/usr/local/mha/manager.log
#检查到db1数据库出错
Fri Jan 9 10:09:39 2015 - [info] Executing seconary network check script: masterha_secondary_check -s 192.168.1.2 -s 192.168.1.3 -s 192.168.1.4 --user=root --master_host=192.168.1.2 --master_ip=192.168.1.2 --master_port=3306
Fri Jan 9 10:09:39 2015 - [info] Executing SSH check script: save_binary_logs --command=test --start_pos=4 --binlog_dir=/var/mysql/log --output_file=/usr/local/mha/save_binary_logs_test --manager_version=0.55 --binlog_prefix=mysql-bin
Fri Jan 9 10:09:39 2015 - [info] HealthCheck: SSH to 192.168.1.2 is reachable.
Monitoring server 192.168.1.2 is reachable, Master is not reachable from 192.168.1.2. OK.
Monitoring server 192.168.1.3 is reachable, Master is not reachable from 192.168.1.3. OK.
Monitoring server 192.168.1.4 is reachable, Master is not reachable from 192.168.1.4. OK.
Fri Jan 9 10:09:39 2015 - [info] Master is not reachable from all other monitoring servers. Failover should start.
Fri Jan 9 10:09:40 2015 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111)
Fri Jan 9 10:09:40 2015 - [warning] Connection failed 1 time(s)..
#转移
Fri Jan 9 10:09:42 2015 - [info] Dead Servers:
Fri Jan 9 10:09:42 2015 - [info] 192.168.1.2(192.168.1.2:3306)
Fri Jan 9 10:09:42 2015 - [info] Alive Servers:
Fri Jan 9 10:09:42 2015 - [info] 192.168.1.3(192.168.1.3:3306)
Fri Jan 9 10:09:42 2015 - [info] 192.168.1.4(192.168.1.4:3306)
Fri Jan 9 10:09:42 2015 - [info] Alive Slaves:
Fri Jan 9 10:09:42 2015 - [info] 192.168.1.3(192.168.1.3:3306) Version=5.5.37-log (oldest major version between slaves) log-bin:enabled
Fri Jan 9 10:09:42 2015 - [info] Replicating from 192.168.1.2(192.168.1.2:3306)
Fri Jan 9 10:09:42 2015 - [info] Primary candidate for the new Master (candidate_master is set)
Fri Jan 9 10:09:42 2015 - [info] 192.168.1.4(192.168.1.4:3306) Version=5.5.37-log (oldest major version between slaves) log-bin:enabled
Fri Jan 9 10:09:42 2015 - [warning] relay_log_purge=0 is not set on slave 192.168.1.3(192.168.1.3:3306).
Fri Jan 9 10:09:42 2015 - [warning] relay_log_purge=0 is not set on slave 192.168.1.4(192.168.1.4:3306).
Disabling the VIP on old master: 192.168.1.2
Fri Jan 9 10:09:42 2015 - [info] done.
Fri Jan 9 10:09:42 2015 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master.
Fri Jan 9 10:09:42 2015 - [info] The latest binary log file/position on all slaves is mysql-bin.000003:107
Fri Jan 9 10:09:42 2015 - [info] Latest slaves (Slaves that received relay log files to the latest):
Fri Jan 9 10:09:42 2015 - [info] 192.168.1.3(192.168.1.3:3306) Version=5.5.37-log (oldest major version between slaves) log-bin:enabled
Fri Jan 9 10:09:42 2015 - [info] Replicating from 192.168.1.2(192.168.1.2:3306)
Fri Jan 9 10:09:42 2015 - [info] Primary candidate for the new Master (candidate_master is set)
Fri Jan 9 10:09:42 2015 - [info] 192.168.1.4(192.168.1.4:3306) Version=5.5.37-log (oldest major version between slaves) log-bin:enabled
#转移后
Fri Jan 9 10:09:43 2015 - [info] Starting master failover..
Fri Jan 9 10:09:43 2015 - [info]
From:
192.168.1.2 (current master)
+--192.168.1.3
+--192.168.1.4
To:
192.168.1.3 (new master)
+--192.168.1.4
----- Failover Report -----
mha: MySQL Master failover 192.168.1.2 to 192.168.1.3 succeeded
Master 192.168.1.2 is down!
看这个日志就跟看电影一样,高潮部分,O(∩_∩)O哈哈~
在 db1 此目录 /usr/local/mha/ 会有binlog文件
saved_master_binlog_from_192.168.1.2_3306_20150109100942.binlog
当db1 mysql down后,manager会停止运行,
[root@manager ~]# masterha_check_status --conf=/usr/local/mha/mha.conf
mha is stopped(2:NOT_RUNNING).
[1]+ Done nohup masterha_manager --conf=/usr/local/mha/mha.conf > /tmp/mha_manager.log < /dev/null 2>&1
#恢复操作
1,db1启动mysql
[root@db1 ~]# service mysqld start
Starting MySQL.. SUCCESS!
2,mysqlbinlog 查看binlog文件,然后把db1主做为备
# mysqlbinlog saved_master_binlog_from_192.168.1.2_3306_20150109100942.binlog > mysqlbinlog.sql #转换binlog为sql语句
3,less mysqlbinlog.sql #找出同步的pos做从,或者重新做mysql从
change master to MASTER_HOST='192.168.1.3', MASTER_PORT=3306,MASTER_USER='mharep', MASTER_PASSWORD='passwd',master_log_file='mysql-bin.000001', master_log_pos=107;
slave start;
show slave status\G;
4,启动manager的管理
# nohup masterha_manager --conf=/usr/local/mha/mha.conf > /tmp/mha_manager.log < /dev/null 2>&1 &
# masterha_check_status --conf=/usr/local/mha/mha.conf
如果有这个提示,那么删除此文件/usr/local/mha/mha.failover.complete
[error][/usr/share/perl5/vendor_perl/MHA/MasterFailover.pm, ln298] Last failover was done at 2015/01/09 10:00:47. Current time is too early to do failover again. If you want to do failover, manually remove /usr/local/mha/mha.failover.complete and run this script again.
5,MmysqlMHA的VIP手动切换
masterha_check_repl --conf=/usr/local/mha/mha.conf
nohup masterha_manager --conf=/usr/local/mha/mha.conf > /tmp/mha_manager.log < /dev/null 2>&1 &
masterha_check_status --conf=/usr/local/mha/mha.conf
masterha_stop --conf=/usr/local/mha/mha.conf
masterha_master_switch --conf=/usr/local/mha/mha.conf --master_state=alive --new_master_host=192.168.(new master IP address) --new_master_port=3306 --orig_master_is_new_slave --running_updates_limit=10000
上面用到的配置下载地址:http://down.51cto.com/data/2065725