maxscale61.blufly.com MaxScale-master maxscale62.blufly.com MaxScale-backup MSVIP: db51.blufly.com Master db52.blufly.com Slave1 db53.blufly.com Slave2 DBVIP:
ntpdate /root/init_system_centos7.sh
cat >> /etc/hosts << EOF maxscale61.blufly.com maxscale62.blufly.com db51.blufly.com db52.blufly.com db53.blufly.com EOF
###------------------ 一、mysql GTID主从复制 ------------------###
cd /opt wget http://mirrors.neusoft.edu.cn/mariadb//mariadb-10.3.6/source/mariadb-10.3.6.tar.gz wget https://github.com/jemalloc/jemalloc/releases/download/5.1.0/jemalloc-5.1.0.tar.bz2
yum install -y cmake ncurses-devel openssl-devel openssl zlib-devel
rpm -qa|grep mariadb rpm -e --nodeps mariadb-libs find / -name "mariadb" -exec rm -rf {} \; rm -rf /etc/my.cnf /etc/my.cnf.d/
cd /opt tar -jxvf jemalloc-5.1.0.tar.bz2 cd jemalloc-5.1.0 ./configure --prefix=/usr/local/jemalloc --libdir=/usr/local/lib make;make install echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf ldconfig cd ../ cd /opt mkdir -p /usr/local/mysql mkdir -p /data/mysql mkdir -p /data/mysql/binlog groupadd -r mysql useradd -g mysql -r -s /sbin/nologin -d /data/mysql/ mysql chown mysql.mysql -R /data/mysql chown mysql.mysql -R /usr/local/mysql mkdir -p /var/log/mariadb mkdir -p /var/run/mariadb chown mysql -R /var/run/mariadb chown mysql -R /var/log/mariadb tar -zxvf mariadb-10.3.6.tar.gz cd mariadb-10.3.6 切记:如果之前编译有错误,需要重新编译,请删除CMakeCache.txt rm -rf CMakeCache.txt cmake . -DCMAKE_INSTALL_PREFIX=/usr/local/mysql \ -DMYSQL_DATADIR=/data/mysql/ \ -DSYSCONFDIR=/etc \ -DWITHOUT_TOKUDB=1 \ -DWITH_INNOBASE_STORAGE_ENGINE=1 \ -DWITH_ARCHIVE_STPRAGE_ENGINE=1 \ -DWITH_BLACKHOLE_STORAGE_ENGINE=1 \ -DMYSQL_TCP_PORT=9106 \ -DWIYH_READLINE=1 \ -DWIYH_SSL=system \ -DVITH_ZLIB=system \ -DWITH_LOBWRAP=0 \ -DMYSQL_UNIX_ADDR=/tmp/mysql.sock \ -DDEFAULT_CHARSET=utf8 \ -DDEFAULT_COLLATION=utf8_general_ci \ -DWITH_EXTRA_CHARSETS=all \ -DCMAKE_EXE_LINKER_FLAGS='-ljemalloc' \ -DWITH_SAFEMALLOC=OFF make && make install
cd /usr/local/mysql/ ./scripts/mysql_install_db --user=mysql --datadir=/data/mysql/
cp support-files/mysql.server /etc/rc.d/init.d/mysqld chmod +x /etc/init.d/mysqld
##------ 设置my.cnf(master)-------##
[client] port = 9106 socket=/tmp/mysql.sock [mysql] #no-auto-rehash auto-rehash [mysqld] port = 9106 datadir=/data/mysql socket=/tmp/mysql.sock # Disabling symbolic-links is recommended to prevent assorted security risks symbolic-links=0 #*** network *** back_log = 512 max_connections = 3000 max_connect_errors = 6000 table_open_cache = 1024 max_allowed_packet = 32M # *** global cache *** read_buffer_size = 8M read_rnd_buffer_size = 64M sort_buffer_size = 16M join_buffer_size = 16M # *** thread *** thread_cache_size = 300 thread_stack = 512K # *** query cache *** query_cache_size = 128M query_cache_limit = 4M # *** index *** ft_min_word_len = 8 default-storage-engine = INNODB transaction_isolation = REPEATABLE-READ # *** tmp table *** tmp_table_size = 256M max_heap_table_size = 256M # *** slow query log *** slow_query_log long_query_time = 10 log-queries-not-using-indexes #*** MyISAM Specific options key_buffer_size = 256M read_buffer_size = 1M read_rnd_buffer_size = 16M bulk_insert_buffer_size = 256M myisam_sort_buffer_size = 256M myisam_max_sort_file_size = 10G myisam_repair_threads = 1 myisam_recover_options=force,backup # *** INNODB Specific options *** #innodb_additional_mem_pool_size = 64M innodb_buffer_pool_size = 512M innodb_data_file_path = ibdata1:10M:autoextend #innodb_data_home_dir =innodb_write_io_threads = 8 innodb_read_io_threads = 8 #innodb_force_recovery=1 innodb_thread_concurrency = 16 innodb_flush_log_at_trx_commit = 2 #innodb_fast_shutdown innodb_log_buffer_size = 16M innodb_log_file_size = 256M innodb_log_files_in_group = 3 #innodb_log_group_home_dir innodb_max_dirty_pages_pct = 90 #innodb_flush_method=O_DSYNC innodb_lock_wait_timeout = 120 # 禁止MySQL对外部连接进行DNS解析 skip-name-resolve lower_case_table_names = 1 #设置服务器ID server-id= 51 # 设置同步数据库 #binlog-do-db = blufly #不对mysql库进行日志记录操作 binlog-ignore-db=test,information_schema,performance_schema #不对test进行复制操作 replicate-ignore-db=test,information_schema,performance_schema # 打开日志 #binlog日志格式,mysql默认采用statement,建议使用mixed binlog_format=mixed #binlog日志文件 log-bin=/data/mysql/binlog/mysql-bin.log log-bin-index=/data/mysql/binlog/mysql-bin.index #binlog过期清理时间 expire_logs_days=30 #binlog每个日志文件大小 max-binlog-size=100M #binlog缓存大小 binlog_cache_size=4M #最大binlog缓存大小 max_binlog_cache_size=10M binlog_stmt_cache_size=2M log-slave-updates=true master-info-repository=TABLE relay-log-info-repository=TABLE sync-master-info=1 slave-parallel-threads=2 binlog-checksum=CRC32 master-verify-checksum=1 slave-sql-verify-checksum=1 binlog-rows-query-log_events=1 [mysqldump] quick max_allowed_packet = 32M [myisamchk] key_buffer_size = 2048M sort_buffer_size = 2048M read_buffer = 32M write_buffer = 32M [mysqlhotcopy] interactive-timeout [mysqld_safe] open-files-limit = 10240 log-error=/var/log/mariadb/mariadb.log pid-file=/var/run/mariadb/mariadb.pid
##-------- 设置my.cnf(slave52)------##
##-------- 设置my.cnf(slave53)------##
[client] port = 9106 socket=/tmp/mysql.sock [mysql] #no-auto-rehash auto-rehash [mysqld] port = 9106 datadir=/data/mysql socket=/tmp/mysql.sock # Disabling symbolic-links is recommended to prevent assorted security risks symbolic-links=0 #*** network *** back_log = 512 max_connections = 3000 max_connect_errors = 6000 table_open_cache = 1024 max_allowed_packet = 32M # *** global cache *** read_buffer_size = 8M read_rnd_buffer_size = 64M sort_buffer_size = 16M join_buffer_size = 16M # *** thread *** thread_cache_size = 300 thread_stack = 512K # *** query cache *** query_cache_size = 128M query_cache_limit = 4M # *** index *** ft_min_word_len = 8 default-storage-engine = INNODB transaction_isolation = REPEATABLE-READ # *** tmp table *** tmp_table_size = 256M max_heap_table_size = 256M # *** slow query log *** slow_query_log long_query_time = 10 log-queries-not-using-indexes #*** MyISAM Specific options key_buffer_size = 256M read_buffer_size = 1M read_rnd_buffer_size = 16M bulk_insert_buffer_size = 256M myisam_sort_buffer_size = 256M myisam_max_sort_file_size = 10G myisam_repair_threads = 1 myisam_recover_options=force,backup # *** INNODB Specific options *** #innodb_additional_mem_pool_size = 64M innodb_buffer_pool_size = 512M innodb_data_file_path = ibdata1:10M:autoextend #innodb_data_home_dir =innodb_write_io_threads = 8 innodb_read_io_threads = 8 #innodb_force_recovery=1 innodb_thread_concurrency = 16 innodb_flush_log_at_trx_commit = 2 #innodb_fast_shutdown innodb_log_buffer_size = 16M innodb_log_file_size = 256M innodb_log_files_in_group = 3 #innodb_log_group_home_dir innodb_max_dirty_pages_pct = 90 #innodb_flush_method=O_DSYNC innodb_lock_wait_timeout = 120 # 禁止MySQL对外部连接进行DNS解析 skip-name-resolve lower_case_table_names = 1 #设置服务器ID server-id= 52 # 设置同步数据库 #binlog-do-db = blufly #不对test进行复制操作 replicate-ignore-db=test,information_schema,performance_schema # 打开日志 binlog-format=mixed relay-log=/data/mysql/binlog/mysql-relay-bin.log relay-log-index=/data/mysql/binlog/mysql-relay-bin.index max_relay_log_size=100M expire_logs_days=30 log-slave-updates=true master-info-repository=TABLE relay-log-info-repository=TABLE sync-master-info=1 slave-parallel-threads=2 binlog-checksum=CRC32 master-verify-checksum=1 slave-sql-verify-checksum=1 binlog-rows-query-log_events=1 [mysqldump] quick max_allowed_packet = 32M [myisamchk] key_buffer_size = 2048M sort_buffer_size = 2048M read_buffer = 32M write_buffer = 32M [mysqlhotcopy] interactive-timeout [mysqld_safe] open-files-limit = 10240 log-error=/var/log/mariadb/mariadb.log pid-file=/var/run/mariadb/mariadb.pid
/etc/rc.d/init.d/mysqld start /sbin/chkconfig --add mysqld /sbin/chkconfig --level 2345 mysqld on ln -s /usr/local/mysql/bin/mysql /sbin/mysql ln -s /usr/local/mysql/bin/mysqladmin /sbin/mysqladmin ln -s /usr/local/mysql/bin/mysqlbinlog /sbin/mysqlbinlog
echo 'export PATH=$PATH:/usr/local/mysql/bin' > /etc/profile.d/mysql.sh chmod +x /etc/profile.d/mysql.sh source /etc/profile.d/mysql.sh
mysqladmin -u root password '753951' mysql -uroot -p753951 -P9106 #授权一个网段 msqyl> grant all on *.* to 'blufly'@'192.168.5.%' identified by '852741'; msqyl> flush privileges; msqyl> exit;
/etc/rc.d/init.d/mysqld restart
yum install lsof -y lsof -n | grep jemalloc
mysql -uroot -p grant replication slave,replication client on *.* to "repl"@'192.168.5.%' identified by 'qazqwe1688'; flush privileges; exit;
mysqldump -uroot -p753951 --all-databases --lock-all-tables --flush-logs --master-data=2 > /opt/mysql20180905.sql scp -P 65535 /opt/mysql20180905.sql scp -P 65535 /opt/mysql20180905.sql
mysql -uroot -p753951 < /opt/mysql20180905.sql
mysql -uroot -p flush privileges;
mysql -uroot -p MariaDB [(none)]> show master status; +------------------+----------+--------------+--------------------------------------------------+ | File | Position | Binlog_Do_DB | Binlog_Ignore_DB | +------------------+----------+--------------+--------------------------------------------------+ | mysql-bin.000010 | 358 | | mysql,test,information_schema,performance_schema | +------------------+----------+--------------+--------------------------------------------------+ 1 row in set (0.000 sec)
MariaDB [(none)]> SELECT BINLOG_GTID_POS("mysql-bin.000003", 727); +------------------------------------------+ | BINLOG_GTID_POS("mysql-bin.000003", 727) | +------------------------------------------+ | 0-51-430 | +------------------------------------------+ 1 row in set (0.007 sec)
#SLAVE可以通过设置 @@gtid_slave_pos 的值来设定复制的起始位置,用 CHANGE MASTER 把这个值传给主库(db52、db53)
mysql -uroot -p SET GLOBAL gtid_slave_pos = "0-51-430";
MariaDB [mysql]> start slave; MariaDB [mysql]> show slave status\G *************************** 1. row *************************** Slave_IO_State: Waiting for master to send event Master_Host: Master_User: repl Master_Port: 9106 Connect_Retry: 60 Master_Log_File: mysql-bin.000003 Read_Master_Log_Pos: 40778 Relay_Log_File: mysql-relay-bin.000002 Relay_Log_Pos: 40778 Relay_Master_Log_File: mysql-bin.000003 Slave_IO_Running: Yes Slave_SQL_Running: Yes Replicate_Do_DB: Replicate_Ignore_DB: test,mysql,information_schema,performance_schema Replicate_Do_Table: Replicate_Ignore_Table: Replicate_Wild_Do_Table: Replicate_Wild_Ignore_Table: Last_Errno: 0 Last_Error: Skip_Counter: 0 Exec_Master_Log_Pos: 40778 Relay_Log_Space: 41087 Until_Condition: None Until_Log_File: Until_Log_Pos: 0 Master_SSL_Allowed: No Master_SSL_CA_File: Master_SSL_CA_Path: Master_SSL_Cert: Master_SSL_Cipher: Master_SSL_Key: Seconds_Behind_Master: 0 Master_SSL_Verify_Server_Cert: No Last_IO_Errno: 0 Last_IO_Error: Last_SQL_Errno: 0 Last_SQL_Error: Replicate_Ignore_Server_Ids: Master_Server_Id: 51 Master_SSL_Crl: Master_SSL_Crlpath: Using_Gtid: Slave_Pos Gtid_IO_Pos: 0-51-464 Replicate_Do_Domain_Ids: Replicate_Ignore_Domain_Ids: Parallel_Mode: conservative SQL_Delay: 0 SQL_Remaining_Delay: NULL Slave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it 1 row in set (0.000 sec)
mysql -uroot -p753951 -e "set global read_only=1"
###--------------------- 二、MySQL高可用之MHA ----------------------###
#Checking if super_read_only is defined and turned on..DBD::mysql::st execute failed: Unknown system variable 'super_read_only'
#show variables like ‘super_read_only’;
#在所有节点都要安装MHA node所需的perl模块(DBD:mysql)(db51、db52、db53)
rpm -vih http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/e/epel-release-7-11.noarch.rpm yum -y install perl-DBD-MySQL perl-devel perl-CPAN
#在所有的节点安装mha node
cd /opt tar -zxvf mha4mysql-node-0.56.tar.gz cd mha4mysql-node-0.56 perl Makefile.PL make && make install
[root@db51 ~]# ssh-keygen -t rsa [root@db51 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 [email protected] [root@db51 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 [email protected] [root@db51 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 [email protected]
[root@db52 ~]# ssh-keygen -t rsa [root@db52 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 [email protected] [root@db52 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 [email protected] [root@db52 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 [email protected]
[root@db53 ~]# ssh-keygen -t rsa [root@db53 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 [email protected] [root@db53 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 [email protected] [root@db53 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 [email protected]
ssh -p 65535 [email protected]
#把DB53作为MHA Manger
[root@db53 ~]# cd /opt #首先安装MHA Manger依赖的perl模块 [root@db53 ~]# yum install perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes -y [root@db53 ~]# tar -zxvf mha4mysql-manager-0.56.tar.gz [root@db53 ~]# cd mha4mysql-manager-0.56 [root@db53 ~]# perl Makefile.PL [root@db53 ~]# make && make install
[root@db53 ~]# mkdir -p /etc/masterha [root@db53 ~]# mkdir -p /var/log/masterha [root@db53 ~]# cp samples/conf/app1.cnf /etc/masterha/
[root@db53 ~]# cat /etc/masterha/app1.cnf [server default] manager_workdir=/var/log/masterha/app1 manager_log=/var/log/masterha/app1/manager.log master_binlog_dir=/data/mysql/binlog #master_ip_failover_script=/usr/local/bin/master_ip_failover #master_ip_online_change_script=/usr/local/bin/master_ip_online_change password=852741 user=blufly ping_interval=1 remote_workdir=/data/mysql/binlog repl_password=qazqwe1688 repl_user=repl report_script=/usr/local/bin/send_report secondary_check_script= /usr/local/bin/masterha_secondary_check -s -s shutdown_script="" ssh_user=root ssh_port=65535 [server1] hostname= port=9106 [server2] hostname= port=9106 candidate_master=1 check_repl_delay=0 [server3] hostname= port=9106
[root@db53 ~]# cat /usr/local/bin/send_report #!/usr/bin/perl # Copyright (C) 2011 DeNA Co.,Ltd. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ## Note: This is a sample script and is not complete. Modify the script based on your environment. use strict; use warnings FATAL => 'all'; use Mail::Sender; use Getopt::Long; #new_master_host and new_slave_hosts are set only when recovering master succeeded my ( $dead_master_host, $new_master_host, $new_slave_hosts, $subject, $body ); my $smtp='smtp.163.com'; my $mail_from='[email protected]'; my $mail_user='[email protected]'; my $mail_pass='1234567'; my $mail_to=['[email protected]']; GetOptions( 'orig_master_host=s' => \$dead_master_host, 'new_master_host=s' => \$new_master_host, 'new_slave_hosts=s' => \$new_slave_hosts, 'subject=s' => \$subject, 'body=s' => \$body, ); mailToContacts($smtp,$mail_from,$mail_user,$mail_pass,$mail_to,$subject,$body); sub mailToContacts { my ( $smtp, $mail_from, $user, $passwd, $mail_to, $subject, $msg ) = @_; open my $DEBUG, "> /tmp/monitormail.log" or die "Can't open the debug file:$!\n"; my $sender = new Mail::Sender { ctype => 'text/plain; charset=utf-8', encoding => 'utf-8', smtp => $smtp, from => $mail_from, auth => 'LOGIN', TLS_allowed => '0', authid => $user, authpwd => $passwd, to => $mail_to, subject => $subject, debug => $DEBUG }; $sender->MailMsg( { msg => $msg, debug => $DEBUG } ) or print $Mail::Sender::Error; return 1; } # Do whatever you want here exit 0;
#在每个slave节点上设置relay log的清除方式
[root@db52 ~]# mysql -uroot -p753951 -e "set global relay_log_purge=0"
[root@db53 ~]# mysql -uroot -p753951 -e "set global relay_log_purge=0"
[root@db52 ~]# cat /root/purge_relay_log.sh #!/bin/bash user=blufly passwd=852741 port=9106 log_dir='/data/masterha/log' work_dir='/data' purge='/usr/local/bin/purge_relay_logs' if [ ! -d $log_dir ] then mkdir $log_dir -p fi $purge --user=$user --password=$passwd --disable_relay_log_purge --port=$port --workdir=$work_dir >> $log_dir/purge_relay_logs.log 2>&1
echo "0 3 * * * /bin/bash /root/purge_relay_log.sh > /dev/null 2>&1" >> /etc/crontab
[root@db52 ~]# purge_relay_logs --user=root --password=753951 --port=9106 -disable_relay_log_purge --workdir=/data/ 2018-09-06 15:50:26: purge_relay_logs script started. Found relay_log.info: /data/mysql/relay-log.info Opening /data/mysql/binlog/mysql-relay-bin.000003 .. Opening /data/mysql/binlog/mysql-relay-bin.000004 .. Executing SET GLOBAL relay_log_purge=1; FLUSH LOGS; sleeping a few seconds so that SQL thread can delete older relay log files (if it keeps up); SET GLOBAL relay_log_purge=0; .. ok. 2018-09-06 15:50:29: All relay log purging operations succeeded.
[root@db53 ~]# masterha_check_ssh --conf=/etc/masterha/app1.cnf Thu Sep 6 16:13:59 2018 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Thu Sep 6 16:13:59 2018 - [info] Reading application default configuration from /etc/masterha/app1.cnf.. Thu Sep 6 16:13:59 2018 - [info] Reading server configuration from /etc/masterha/app1.cnf.. Thu Sep 6 16:13:59 2018 - [info] Starting SSH connection tests.. Thu Sep 6 16:14:01 2018 - [debug] Thu Sep 6 16:13:59 2018 - [debug] Connecting via SSH from [email protected]( to [email protected]( Thu Sep 6 16:14:00 2018 - [debug] ok. Thu Sep 6 16:14:00 2018 - [debug] Connecting via SSH from [email protected]( to [email protected]( Thu Sep 6 16:14:01 2018 - [debug] ok. Thu Sep 6 16:14:01 2018 - [debug] Thu Sep 6 16:13:59 2018 - [debug] Connecting via SSH from [email protected]( to [email protected]( Thu Sep 6 16:14:00 2018 - [debug] ok. Thu Sep 6 16:14:00 2018 - [debug] Connecting via SSH from [email protected]( to [email protected]( Thu Sep 6 16:14:01 2018 - [debug] ok. Thu Sep 6 16:14:02 2018 - [debug] Thu Sep 6 16:14:00 2018 - [debug] Connecting via SSH from [email protected]( to [email protected]( Thu Sep 6 16:14:01 2018 - [debug] ok. Thu Sep 6 16:14:01 2018 - [debug] Connecting via SSH from [email protected]( to [email protected]( Thu Sep 6 16:14:02 2018 - [debug] ok. Thu Sep 6 16:14:02 2018 - [info] All SSH connection tests passed successfully.
masterha_check_repl --conf=/etc/masterha/app1.cnf [root@db53 /]# masterha_check_repl --conf=/etc/masterha/app1.cnf Thu Sep 6 19:46:46 2018 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Thu Sep 6 19:46:46 2018 - [info] Reading application default configuration from /etc/masterha/app1.cnf.. Thu Sep 6 19:46:46 2018 - [info] Reading server configuration from /etc/masterha/app1.cnf.. Thu Sep 6 19:46:46 2018 - [info] MHA::MasterMonitor version 0.56. Thu Sep 6 19:46:47 2018 - [info] GTID failover mode = 0 Thu Sep 6 19:46:47 2018 - [info] Dead Servers: Thu Sep 6 19:46:47 2018 - [info] Alive Servers: Thu Sep 6 19:46:47 2018 - [info] Thu Sep 6 19:46:47 2018 - [info] Thu Sep 6 19:46:47 2018 - [info] Thu Sep 6 19:46:47 2018 - [info] Alive Slaves: Thu Sep 6 19:46:47 2018 - [info] Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled Thu Sep 6 19:46:47 2018 - [info] Replicating from Thu Sep 6 19:46:47 2018 - [info] Primary candidate for the new Master (candidate_master is set) Thu Sep 6 19:46:47 2018 - [info] Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled Thu Sep 6 19:46:47 2018 - [info] Replicating from Thu Sep 6 19:46:47 2018 - [info] Current Alive Master: Thu Sep 6 19:46:47 2018 - [info] Checking slave configurations.. Thu Sep 6 19:46:47 2018 - [info] Checking replication filtering settings.. Thu Sep 6 19:46:47 2018 - [info] binlog_do_db= , binlog_ignore_db= information_schema,mysql,performance_schema,test Thu Sep 6 19:46:47 2018 - [info] Replication filtering check ok. Thu Sep 6 19:46:47 2018 - [info] GTID (with auto-pos) is not supported Thu Sep 6 19:46:47 2018 - [info] Starting SSH connection tests.. Thu Sep 6 19:46:51 2018 - [info] All SSH connection tests passed successfully. Thu Sep 6 19:46:51 2018 - [info] Checking MHA Node version.. Thu Sep 6 19:46:51 2018 - [info] Version check ok. Thu Sep 6 19:46:51 2018 - [info] Checking SSH publickey authentication settings on the current master.. Thu Sep 6 19:46:52 2018 - [info] HealthCheck: SSH to is reachable. Thu Sep 6 19:46:52 2018 - [info] Master MHA Node version is 0.56. Thu Sep 6 19:46:52 2018 - [info] Checking recovery script configurations on Thu Sep 6 19:46:52 2018 - [info] Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql/binlog --output_file=/data/mysql/binlog/save_binary_logs_test --manager_version=0.56 --start_file=mysql-bin.000004 Thu Sep 6 19:46:52 2018 - [info] Connecting to [email protected]( Creating /data/mysql/binlog if not exists.. ok. Checking output directory is accessible or not.. ok. Binlog found at /data/mysql/binlog, up to mysql-bin.000004 Thu Sep 6 19:46:53 2018 - [info] Binlog setting check done. Thu Sep 6 19:46:53 2018 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers.. Thu Sep 6 19:46:53 2018 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='blufly' --slave_host= --slave_ip= --slave_port=9106 --workdir=/data/mysql/binlog --target_version=10.3.6-MariaDB-log --manager_version=0.56 --relay_log_info=/data/mysql/relay-log.info --relay_dir=/data/mysql/ --slave_pass=xxx Thu Sep 6 19:46:53 2018 - [info] Connecting to [email protected]( Checking slave recovery environment settings.. Opening /data/mysql/relay-log.info ... ok. Relay log found at /data/mysql/binlog, up to mysql-relay-bin.000003 Temporary relay log file is /data/mysql/binlog/mysql-relay-bin.000003 Testing mysql connection and privileges.. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Thu Sep 6 19:46:53 2018 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='blufly' --slave_host= --slave_ip= --slave_port=9106 --workdir=/data/mysql/binlog --target_version=10.3.6-MariaDB-log --manager_version=0.56 --relay_log_info=/data/mysql/relay-log.info --relay_dir=/data/mysql/ --slave_pass=xxx Thu Sep 6 19:46:53 2018 - [info] Connecting to [email protected]( Checking slave recovery environment settings.. Opening /data/mysql/relay-log.info ... ok. Relay log found at /data/mysql/binlog, up to mysql-relay-bin.000003 Temporary relay log file is /data/mysql/binlog/mysql-relay-bin.000003 Testing mysql connection and privileges.. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Thu Sep 6 19:46:54 2018 - [info] Slaves settings check done. Thu Sep 6 19:46:54 2018 - [info] (current master) +-- +-- Thu Sep 6 19:46:54 2018 - [info] Checking replication health on Thu Sep 6 19:46:54 2018 - [info] ok. Thu Sep 6 19:46:54 2018 - [info] Checking replication health on Thu Sep 6 19:46:54 2018 - [info] ok. Thu Sep 6 19:46:54 2018 - [warning] master_ip_failover_script is not defined. Thu Sep 6 19:46:54 2018 - [warning] shutdown_script is not defined. Thu Sep 6 19:46:54 2018 - [info] Got exit code 0 (Not master dead). MySQL Replication Health is OK.
#开启MHA Manager监控(db53)
nohup masterha_manager --conf=/etc/masterha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /var/log/masterha/app1/manager.log 2>&1 &
mysql -uroot -p753951 -e "set global read_only=1" mysql -uroot -p753951 -e "set global relay_log_purge=0"
#----------- 利用keeplived做VIP切换 -------------#
wget http://www.keepalived.org/software/keepalived-2.0.1.tar.gz yum -y install libnl libnl-devel libnfnetlink-devel tar -zxvf keepalived-2.0.1.tar.gz cd keepalived-2.0.1 ./configure make;make install
cp /usr/local/etc/sysconfig/keepalived /etc/sysconfig/ mkdir /etc/keepalived cp /usr/local/etc/keepalived/keepalived.conf /etc/keepalived/ cp /usr/local/sbin/keepalived /usr/sbin/ cp /opt/keepalived-2.0.1/keepalived/etc/init.d/keepalived /etc/rc.d/init.d/ chmod +x /etc/rc.d/init.d/keepalived
[root@db51 ~]# mv /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf-bak [root@db51 ~]# vi /etc/keepalived/keepalived.conf ! Configuration File for keepalived global_defs { notification_email { [email protected] } notification_email_from [email protected] smtp_server smtp_connect_timeout 30 router_id MySQL-HA enable_script_security } vrrp_script chk_mysql_port { #检测mysql服务是否在运行。有很多方式,比如进程,用脚本检测等等 script "/root/chk_mysql.sh" #这里通过脚本监测 interval 2 #脚本执行间隔,每2s检测一次 weight -5 #脚本结果导致的优先级变更,检测失败(脚本返回非0)则优先级 -5 fall 2 #检测连续2次失败才算确定是真失败。会用weight减少优先级(1-255之间) rise 1 #检测1次成功就算成功。但不修改优先级 user root } vrrp_instance MysqlHA_1 { state BACKUP interface eno16777984 virtual_router_id 55 priority 150 advert_int 1 nopreempt unicast_src_ip ##本机ip unicast_peer { ##对端ip } authentication { auth_type PASS auth_pass haha268 } virtual_ipaddress { } track_script { chk_mysql_port } }
[root@db52 ~]# mv /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf-bak [root@db52 ~]# vi /etc/keepalived/keepalived.conf ! Configuration File for keepalived global_defs { notification_email { [email protected] } notification_email_from [email protected] smtp_server smtp_connect_timeout 30 router_id MySQL-HA enable_script_security } vrrp_script chk_mysql_port { #检测mysql服务是否在运行。有很多方式,比如进程,用脚本检测等等 script "/root/chk_mysql.sh" #这里通过脚本监测 interval 2 #脚本执行间隔,每2s检测一次 weight -5 #脚本结果导致的优先级变更,检测失败(脚本返回非0)则优先级 -5 fall 2 #检测连续2次失败才算确定是真失败。会用weight减少优先级(1-255之间) rise 1 #检测1次成功就算成功。但不修改优先级 user root } vrrp_instance MysqlHA_1 { state BACKUP interface eno16777984 virtual_router_id 55 priority 120 advert_int 1 nopreempt unicast_src_ip ##本机ip unicast_peer { ##对端ip } authentication { auth_type PASS auth_pass haha268 } virtual_ipaddress { } track_script { chk_mysql_port } }
[root@db51 ~]# cat /root/chk_mysql.sh #!/bin/bash counter=$(netstat -na|grep "LISTEN"|grep "9106"|wc -l) if [ "${counter}" -eq 0 ]; then /etc/init.d/keepalived stop fi
[root@db51 ~]# chmod +x /root/chk_mysql.sh
[root@db51 ~]# /etc/init.d/keepalived start [root@db51 ~]# chkconfig keepalived on [root@db51 ~]# systemctl list-unit-files|grep keepalived keepalived.service enabled
[root@db52 ~]# /etc/init.d/keepalived start [root@db52 ~]# chkconfig keepalived on [root@db52 ~]# systemctl list-unit-files|grep keepalived keepalived.service enabled
[root@db51 ~]# ip addr|grep inet scope global eno16777984
[root@db53 ~]# cat /usr/local/bin/master_ip_failover #!/usr/bin/env perl use strict; use warnings FATAL => 'all'; use Getopt::Long; my ( $command, $ssh_user, $orig_master_host, $orig_master_ip, $orig_master_port, $new_master_host, $new_master_ip, $new_master_port ); my $vip = ''; my $ssh_start_vip = "/etc/init.d/keepalived start"; my $ssh_stop_vip = "/etc/init.d/keepalived stop"; GetOptions( 'command=s' => \$command, 'ssh_user=s' => \$ssh_user, 'ssh_port=s' => \$ssh_port, 'orig_master_host=s' => \$orig_master_host, 'orig_master_ip=s' => \$orig_master_ip, 'orig_master_port=i' => \$orig_master_port, 'new_master_host=s' => \$new_master_host, 'new_master_ip=s' => \$new_master_ip, 'new_master_port=i' => \$new_master_port, ); exit &main(); sub main { print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n"; if ( $command eq "stop" || $command eq "stopssh" ) { my $exit_code = 1; eval { print "Disabling the VIP on old master: $orig_master_host \n"; &stop_vip(); $exit_code = 0; }; if ($@) { warn "Got Error: $@\n"; exit $exit_code; } exit $exit_code; } elsif ( $command eq "start" ) { my $exit_code = 10; eval { print "Enabling the VIP - $vip on the new master - $new_master_host \n"; &start_vip(); $exit_code = 0; }; if ($@) { warn $@; exit $exit_code; } exit $exit_code; } elsif ( $command eq "status" ) { print "Checking the Status of the script.. OK \n"; #`ssh $ssh_user\@cluster1 \" $ssh_start_vip \"`; exit 0; } else { &usage(); exit 1; } } # A simple system call that enable the VIP on the new master sub start_vip() { `ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`; } # A simple system call that disable the VIP on the old_master sub stop_vip() { return 0 unless ($ssh_user); `ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`; } sub usage { print "Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n"; } #修改master_ip_online_change脚本 [root@db53 ~]# cat /usr/local/bin/master_ip_online_change #!/usr/bin/env perl use strict; use warnings FATAL => 'all'; use Getopt::Long; use MHA::DBHelper; use MHA::NodeUtil; use Time::HiRes qw( sleep gettimeofday tv_interval ); use Data::Dumper; my $_tstart; my $_running_interval = 0.1; my ( $command, $orig_master_is_new_slave, $orig_master_host, $orig_master_ip, $orig_master_port, $orig_master_user, $orig_master_password, $orig_master_ssh_user, $new_master_host, $new_master_ip, $new_master_port, $new_master_user, $new_master_password, $new_master_ssh_user, ); my $vip = ''; my $ssh_start_vip = "/etc/init.d/keepalived start"; my $ssh_stop_vip = "/etc/init.d/keepalived stop"; GetOptions( 'command=s' => \$command, 'ssh_user=s' => \$ssh_user, 'ssh_port=s' => \$ssh_port, 'orig_master_is_new_slave' => \$orig_master_is_new_slave, 'orig_master_host=s' => \$orig_master_host, 'orig_master_ip=s' => \$orig_master_ip, 'orig_master_user=s' => \$orig_master_user, 'orig_master_password=s' => \$orig_master_password, 'new_master_host=s' => \$new_master_host, 'new_master_ip=s' => \$new_master_ip, 'new_master_port=i' => \$new_master_port, 'new_master_user=s' => \$new_master_user, 'new_master_password=s' => \$new_master_password, ); exit &main(); sub current_time_us { my ( $sec, $microsec ) = gettimeofday(); my $curdate = localtime($sec); return $curdate . " " . sprintf( "%06d", $microsec ); } sub sleep_until { my $elapsed = tv_interval($_tstart); if ( $_running_interval > $elapsed ) { sleep( $_running_interval - $elapsed ); } } sub get_threads_util { my $dbh = shift; my $my_connection_id = shift; my $running_time_threshold = shift; my $type = shift; $running_time_threshold = 0 unless ($running_time_threshold); $type = 0 unless ($type); my @threads; my $sth = $dbh->prepare("SHOW PROCESSLIST"); $sth->execute(); while ( my $ref = $sth->fetchrow_hashref() ) { my $id = $ref->{Id}; my $user = $ref->{User}; my $host = $ref->{Host}; my $command = $ref->{Command}; my $state = $ref->{State}; my $query_time = $ref->{Time}; my $info = $ref->{Info}; $info =~ s/^\s*(.*?)\s*$/$1/ if defined($info); next if ( $my_connection_id == $id ); next if ( defined($query_time) && $query_time < $running_time_threshold ); next if ( defined($command) && $command eq "Binlog Dump" ); next if ( defined($user) && $user eq "system user" ); next if ( defined($command) && $command eq "Sleep" && defined($query_time) && $query_time >= 1 ); if ( $type >= 1 ) { next if ( defined($command) && $command eq "Sleep" ); next if ( defined($command) && $command eq "Connect" ); } if ( $type >= 2 ) { next if ( defined($info) && $info =~ m/^select/i ); next if ( defined($info) && $info =~ m/^show/i ); } push @threads, $ref; } return @threads; } sub main { if ( $command eq "stop" ) { ## Gracefully killing connections on the current master # 1. Set read_only= 1 on the new master # 2. DROP USER so that no app user can establish new connections # 3. Set read_only= 1 on the current master # 4. Kill current queries # * Any database access failure will result in script die. my $exit_code = 1; eval { ## Setting read_only=1 on the new master (to avoid accident) my $new_master_handler = new MHA::DBHelper(); # args: hostname, port, user, password, raise_error(die_on_error)_or_not $new_master_handler->connect( $new_master_ip, $new_master_port, $new_master_user, $new_master_password, 1 ); print current_time_us() . " Set read_only on the new master.. "; $new_master_handler->enable_read_only(); if ( $new_master_handler->is_read_only() ) { print "ok.\n"; } else { die "Failed!\n"; } $new_master_handler->disconnect(); # Connecting to the orig master, die if any database error happens my $orig_master_handler = new MHA::DBHelper(); $orig_master_handler->connect( $orig_master_ip, $orig_master_port, $orig_master_user, $orig_master_password, 1 ); ## Drop application user so that nobody can connect. Disabling per-session binlog beforehand #$orig_master_handler->disable_log_bin_local(); #print current_time_us() . " Drpping app user on the orig master..\n"; #FIXME_xxx_drop_app_user($orig_master_handler); ## Waiting for N * 100 milliseconds so that current connections can exit my $time_until_read_only = 15; $_tstart = [gettimeofday]; my @threads = get_threads_util( $orig_master_handler->{dbh}, $orig_master_handler->{connection_id} ); while ( $time_until_read_only > 0 && $#threads >= 0 ) { if ( $time_until_read_only % 5 == 0 ) { printf "%s Waiting all running %d threads are disconnected.. (max %d milliseconds)\n", current_time_us(), $#threads + 1, $time_until_read_only * 100; if ( $#threads < 5 ) { print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump . "\n" foreach (@threads); } } sleep_until(); $_tstart = [gettimeofday]; $time_until_read_only--; @threads = get_threads_util( $orig_master_handler->{dbh}, $orig_master_handler->{connection_id} ); } ## Setting read_only=1 on the current master so that nobody(except SUPER) can write print current_time_us() . " Set read_only=1 on the orig master.. "; $orig_master_handler->enable_read_only(); if ( $orig_master_handler->is_read_only() ) { print "ok.\n"; } else { die "Failed!\n"; } ## Waiting for M * 100 milliseconds so that current update queries can complete my $time_until_kill_threads = 5; @threads = get_threads_util( $orig_master_handler->{dbh}, $orig_master_handler->{connection_id} ); while ( $time_until_kill_threads > 0 && $#threads >= 0 ) { if ( $time_until_kill_threads % 5 == 0 ) { printf "%s Waiting all running %d queries are disconnected.. (max %d milliseconds)\n", current_time_us(), $#threads + 1, $time_until_kill_threads * 100; if ( $#threads < 5 ) { print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump . "\n" foreach (@threads); } } sleep_until(); $_tstart = [gettimeofday]; $time_until_kill_threads--; @threads = get_threads_util( $orig_master_handler->{dbh}, $orig_master_handler->{connection_id} ); } print "Disabling the VIP on old master: $orig_master_host \n"; &stop_vip(); ## Terminating all threads print current_time_us() . " Killing all application threads..\n"; $orig_master_handler->kill_threads(@threads) if ( $#threads >= 0 ); print current_time_us() . " done.\n"; #$orig_master_handler->enable_log_bin_local(); $orig_master_handler->disconnect(); ## After finishing the script, MHA executes FLUSH TABLES WITH READ LOCK $exit_code = 0; }; if ($@) { warn "Got Error: $@\n"; exit $exit_code; } exit $exit_code; } elsif ( $command eq "start" ) { ## Activating master ip on the new master # 1. Create app user with write privileges # 2. Moving backup script if needed # 3. Register new master's ip to the catalog database # We don't return error even though activating updatable accounts/ip failed so that we don't interrupt slaves' recovery. # If exit code is 0 or 10, MHA does not abort my $exit_code = 10; eval { my $new_master_handler = new MHA::DBHelper(); # args: hostname, port, user, password, raise_error_or_not $new_master_handler->connect( $new_master_ip, $new_master_port, $new_master_user, $new_master_password, 1 ); ## Set read_only=0 on the new master #$new_master_handler->disable_log_bin_local(); print current_time_us() . " Set read_only=0 on the new master.\n"; $new_master_handler->disable_read_only(); ## Creating an app user on the new master #print current_time_us() . " Creating app user on the new master..\n"; #FIXME_xxx_create_app_user($new_master_handler); #$new_master_handler->enable_log_bin_local(); $new_master_handler->disconnect(); ## Update master ip on the catalog database, etc print "Enabling the VIP - $vip on the new master - $new_master_host \n"; &start_vip(); $exit_code = 0; }; if ($@) { warn "Got Error: $@\n"; exit $exit_code; } exit $exit_code; } elsif ( $command eq "status" ) { # do nothing exit 0; } else { &usage(); exit 1; } } # A simple system call that enable the VIP on the new master sub start_vip() { `ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`; } # A simple system call that disable the VIP on the old_master sub stop_vip() { return 0 unless ($ssh_user); `ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`; } sub usage { print "Usage: master_ip_online_change --command=start|stop|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --orig_master_user=user --orig_master_password=password --orig_master_ssh_user=sshuser --new_master_host=host --new_master_ip=ip --new_master_port=port --new_master_user=user --new_master_password=password --new_master_ssh_user=sshuser \n"; die; }
[root@db53 ~]# sed -i 's/\r$//' /usr/local/bin/master_ip_online_change [root@db53 ~]# sed -i 's/\r$//' /usr/local/bin/master_ip_failover #在app1.conf中去掉master_ip_failover、master_ip_failover注释 [root@db53 ~]# grep 'master_ip_failover_script' /etc/masterha/app1.cnf master_ip_failover_script=/usr/local/bin/master_ip_failover
[root@db53 ~]# masterha_check_repl --conf=/etc/masterha/app1.cnf Fri Sep 7 09:53:22 2018 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Fri Sep 7 09:53:22 2018 - [info] Reading application default configuration from /etc/masterha/app1.cnf.. Fri Sep 7 09:53:22 2018 - [info] Reading server configuration from /etc/masterha/app1.cnf.. Fri Sep 7 09:53:22 2018 - [info] MHA::MasterMonitor version 0.56. Fri Sep 7 09:53:23 2018 - [info] GTID failover mode = 0 Fri Sep 7 09:53:23 2018 - [info] Dead Servers: Fri Sep 7 09:53:23 2018 - [info] Alive Servers: Fri Sep 7 09:53:23 2018 - [info] Fri Sep 7 09:53:23 2018 - [info] Fri Sep 7 09:53:23 2018 - [info] Fri Sep 7 09:53:23 2018 - [info] Alive Slaves: Fri Sep 7 09:53:23 2018 - [info] Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled Fri Sep 7 09:53:23 2018 - [info] Replicating from Fri Sep 7 09:53:23 2018 - [info] Primary candidate for the new Master (candidate_master is set) Fri Sep 7 09:53:23 2018 - [info] Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled Fri Sep 7 09:53:23 2018 - [info] Replicating from Fri Sep 7 09:53:23 2018 - [info] Current Alive Master: Fri Sep 7 09:53:23 2018 - [info] Checking slave configurations.. Fri Sep 7 09:53:23 2018 - [info] Checking replication filtering settings.. Fri Sep 7 09:53:23 2018 - [info] binlog_do_db= , binlog_ignore_db= information_schema,mysql,performance_schema,test Fri Sep 7 09:53:23 2018 - [info] Replication filtering check ok. Fri Sep 7 09:53:23 2018 - [info] GTID (with auto-pos) is not supported Fri Sep 7 09:53:23 2018 - [info] Starting SSH connection tests.. Fri Sep 7 09:53:27 2018 - [info] All SSH connection tests passed successfully. Fri Sep 7 09:53:27 2018 - [info] Checking MHA Node version.. Fri Sep 7 09:53:28 2018 - [info] Version check ok. Fri Sep 7 09:53:28 2018 - [info] Checking SSH publickey authentication settings on the current master.. Fri Sep 7 09:53:28 2018 - [info] HealthCheck: SSH to is reachable. Fri Sep 7 09:53:29 2018 - [info] Master MHA Node version is 0.56. Fri Sep 7 09:53:29 2018 - [info] Checking recovery script configurations on Fri Sep 7 09:53:29 2018 - [info] Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql/binlog --output_file=/data/mysql/binlog/save_binary_logs_test --manager_version=0.56 --start_file=mysql-bin.000004 Fri Sep 7 09:53:29 2018 - [info] Connecting to [email protected]( Creating /data/mysql/binlog if not exists.. ok. Checking output directory is accessible or not.. ok. Binlog found at /data/mysql/binlog, up to mysql-bin.000004 Fri Sep 7 09:53:29 2018 - [info] Binlog setting check done. Fri Sep 7 09:53:29 2018 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers.. Fri Sep 7 09:53:29 2018 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='blufly' --slave_host= --slave_ip= --slave_port=9106 --workdir=/data/mysql/binlog --target_version=10.3.6-MariaDB-log --manager_version=0.56 --relay_log_info=/data/mysql/relay-log.info --relay_dir=/data/mysql/ --slave_pass=xxx Fri Sep 7 09:53:29 2018 - [info] Connecting to [email protected]( Checking slave recovery environment settings.. Opening /data/mysql/relay-log.info ... ok. Relay log found at /data/mysql/binlog, up to mysql-relay-bin.000003 Temporary relay log file is /data/mysql/binlog/mysql-relay-bin.000003 Testing mysql connection and privileges.. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Fri Sep 7 09:53:30 2018 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='blufly' --slave_host= --slave_ip= --slave_port=9106 --workdir=/data/mysql/binlog --target_version=10.3.6-MariaDB-log --manager_version=0.56 --relay_log_info=/data/mysql/relay-log.info --relay_dir=/data/mysql/ --slave_pass=xxx Fri Sep 7 09:53:30 2018 - [info] Connecting to [email protected]( Checking slave recovery environment settings.. Opening /data/mysql/relay-log.info ... ok. Relay log found at /data/mysql/binlog, up to mysql-relay-bin.000003 Temporary relay log file is /data/mysql/binlog/mysql-relay-bin.000003 Testing mysql connection and privileges.. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Fri Sep 7 09:53:30 2018 - [info] Slaves settings check done. Fri Sep 7 09:53:30 2018 - [info] (current master) +-- +-- Fri Sep 7 09:53:30 2018 - [info] Checking replication health on Fri Sep 7 09:53:30 2018 - [info] ok. Fri Sep 7 09:53:30 2018 - [info] Checking replication health on Fri Sep 7 09:53:30 2018 - [info] ok. Fri Sep 7 09:53:30 2018 - [info] Checking master_ip_failover_script status: Fri Sep 7 09:53:30 2018 - [info] /usr/local/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host= --orig_master_ip= --orig_master_port=9106 --orig_master_ssh_port=65535 Unknown option: orig_master_ssh_port IN SCRIPT TEST====/etc/init.d/keepalived stop==/etc/init.d/keepalived start=== Checking the Status of the script.. OK Fri Sep 7 09:53:30 2018 - [info] OK. Fri Sep 7 09:53:30 2018 - [warning] shutdown_script is not defined. Fri Sep 7 09:53:30 2018 - [info] Got exit code 0 (Not master dead). MySQL Replication Health is OK.
#MHA manager启动脚本
[root@db53 ~]# vi /etc/init.d/mha_manager #! /bin/sh # Description: Startup mha_manager # chkconfig: 2345 55 25 function start(){ nohup /usr/local/bin/masterha_manager --conf=/etc/masterha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /var/log/masterha/app1/manager.log 2>&1 & } function stop(){ /usr/local/bin/masterha_stop --conf=/etc/masterha/app1.cnf } function status(){ /usr/local/bin/masterha_check_status --conf=/etc/masterha/app1.cnf } case "$1" in start) start ;; stop) stop ;; status) status ;; *) echo "Usage: $0 start|stop" esac
[root@db53 ~]# chmod +x /etc/init.d/mha_manager
#启动MHA manager(db53)
[root@db53 ~]# sed -i 's/\r$//' /etc/init.d/mha_manager [root@db53 ~]# /etc/init.d/mha_manager start [root@db53 ~]# chkconfig mha_manager on
#通过MHA Manger监控,查看集群里面现在谁是master
[root@db53 ~]# masterha_check_status --conf=/etc/masterha/app1.cnf app1 (pid:18766) is running(0:PING_OK), master:
#--------- 自动Failover(MHA Manager必须运行)-------#
[root@db51 ~]# yum install sysbench -y
[root@db51 ~]# mysql -uroot -p753951 -e "create database mhatest" [root@db51 ~]# sysbench /usr/share/sysbench/oltp_write_only.lua --mysql-host= --mysql-port=9106 --mysql-user=root --mysql-password=753951 --mysql-socket=/tmp/mysql.sock --mysql-db=mhatest --db-driver=mysql --tables=10 --table_size=100000 --report-interval=10 --threads=30 --time=120 prepare
#停掉slave sql线程,模拟主从延时(db52)
[root@db52 ~]# mysql -uroot -p MariaDB [(none)]> stop slave io_thread; Query OK, 0 rows affected (0.08 sec) MariaDB [(none)]> show slave status\G *************************** 1. row *************************** Slave_IO_State: Master_Host: Master_User: repl Master_Port: 9106 Connect_Retry: 60 Master_Log_File: mysql-bin.000005 Read_Master_Log_Pos: 89202349 Relay_Log_File: mysql-relay-bin.000006 Relay_Log_Pos: 89202648 Relay_Master_Log_File: mysql-bin.000005 Slave_IO_Running: No Slave_SQL_Running: Yes Replicate_Do_DB: Replicate_Ignore_DB: test,mysql,information_schema,performance_schema Replicate_Do_Table: Replicate_Ignore_Table: Replicate_Wild_Do_Table: Replicate_Wild_Ignore_Table: Last_Errno: 0 Last_Error: Skip_Counter: 0 Exec_Master_Log_Pos: 89202349 Relay_Log_Space: 194088288 Until_Condition: None Until_Log_File: Until_Log_Pos: 0 Master_SSL_Allowed: No Master_SSL_CA_File: Master_SSL_CA_Path: Master_SSL_Cert: Master_SSL_Cipher: Master_SSL_Key: Seconds_Behind_Master: NULL Master_SSL_Verify_Server_Cert: No Last_IO_Errno: 0 Last_IO_Error: Last_SQL_Errno: 0 Last_SQL_Error: Replicate_Ignore_Server_Ids: Master_Server_Id: 51 Master_SSL_Crl: Master_SSL_Crlpath: Using_Gtid: Slave_Pos Gtid_IO_Pos: 0-51-922 Replicate_Do_Domain_Ids: Replicate_Ignore_Domain_Ids: Parallel_Mode: conservative SQL_Delay: 0 SQL_Remaining_Delay: NULL Slave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it 1 row in set (0.000 sec)
[root@db51 ~]# sysbench /usr/share/sysbench/oltp_write_only.lua --mysql-host= --mysql-port=9106 --mysql-user=root --mysql-password=753951 --mysql-socket=/tmp/mysql.sock --mysql-db=mhatest --db-driver=mysql --tables=10 --table_size=100000 --report-interval=10 --threads=30 --time=180 run
[root@db52 ~]# mysql -uroot -p MariaDB [(none)]> start slave io_thread; Query OK, 0 rows affected (0.00 sec) MariaDB [(none)]> show slave status\G *************************** 1. row *************************** Slave_IO_State: Waiting for master to send event Master_Host: Master_User: repl Master_Port: 9106 Connect_Retry: 60 Master_Log_File: mysql-bin.000009 Read_Master_Log_Pos: 99099077 Relay_Log_File: mysql-relay-bin.000009 Relay_Log_Pos: 34972386 Relay_Master_Log_File: mysql-bin.000006 Slave_IO_Running: Yes Slave_SQL_Running: Yes Replicate_Do_DB: Replicate_Ignore_DB: test,mysql,information_schema,performance_schema Replicate_Do_Table: Replicate_Ignore_Table: Replicate_Wild_Do_Table: Replicate_Wild_Ignore_Table: Last_Errno: 0 Last_Error: Skip_Counter: 0 Exec_Master_Log_Pos: 34972087 Relay_Log_Space: 623421793 Until_Condition: None Until_Log_File: Until_Log_Pos: 0 Master_SSL_Allowed: No Master_SSL_CA_File: Master_SSL_CA_Path: Master_SSL_Cert: Master_SSL_Cipher: Master_SSL_Key: Seconds_Behind_Master: 245 Master_SSL_Verify_Server_Cert: No Last_IO_Errno: 0 Last_IO_Error: Last_SQL_Errno: 0 Last_SQL_Error: Replicate_Ignore_Server_Ids: Master_Server_Id: 51 Master_SSL_Crl: Master_SSL_Crlpath: Using_Gtid: Slave_Pos Gtid_IO_Pos: 0-51-520164 Replicate_Do_Domain_Ids: Replicate_Ignore_Domain_Ids: Parallel_Mode: conservative SQL_Delay: 0 SQL_Remaining_Delay: NULL Slave_SQL_Running_State: Waiting for room in worker thread event queue 1 row in set (0.000 sec) MariaDB [mhatest]> show tables; +-------------------+ | Tables_in_mhatest | +-------------------+ | sbtest1 | | sbtest10 | | sbtest2 | | sbtest3 | | sbtest4 | | sbtest5 | | sbtest6 | | sbtest7 | | sbtest8 | | sbtest9 | +-------------------+ 10 rows in set (0.000 sec) MariaDB [mhatest]> select count(*) from sbtest1; +----------+ | count(*) | +----------+ | 100000 | +----------+ 1 row in set (0.027 sec)
[root@db51 ~]# pkill -9 mysqld
cat /var/log/masterha/app1/manager.log ----- Failover Report ----- app1: MySQL Master failover to succeeded Master is down! Check MHA Manager logs at db53.blufly.com:/var/log/masterha/app1/manager.log for details. Started automated(non-interactive) failover. Invalidated master IP address on The latest slave has all relay logs for recovery. Selected as a new master. OK: Applying all logs succeeded. OK: Activated master IP address. This host has the latest relay log events. Generating relay diff files from the latest slave succeeded. OK: Applying all logs succeeded. Slave started, replicating from Resetting slave info succeeded. Master failover to completed successfully.
#--------- 修复宕机的Master作为Slave ---------------------#
[root@db53 app1]# grep -i "All other slaves should start" manager.log Mon Apr 21 22:28:33 2014 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='', MASTER_PORT=9106, MASTER_LOG_FILE='mysql-bin.000008', MASTER_LOG_POS=342, MASTER_USER='repl', MASTER_PASSWORD='qazqwe1688';
#获取上述信息以后,就可以直接在修复后的master上执行change master to相关操作,重新作为从库了。
[root@db51 ~]# /etc/init.d/mysqld start [root@db51 ~]# /etc/init.d/keepalived start [root@db51 ~]# mysql -uroot -p753951 MariaDB [(none)]> CHANGE MASTER TO MASTER_HOST='', MASTER_PORT=9106, MASTER_LOG_FILE='mysql-bin.000008', MASTER_LOG_POS=342, MASTER_USER='repl', MASTER_PASSWORD='qazqwe1688'; MariaDB [(none)]> start slave; MariaDB [(none)]> show slave status\G;
[root@db51 ~]# mysql -uroot -p753951 -e "set global read_only=1" [root@db51 ~]# mysql -uroot -p753951 -e "set global relay_log_purge=0"
#通过MHA Manger监控,查看集群里面现在谁是master
[root@db53 ~]# /etc/init.d/mha_manager start [root@db53 ~]# masterha_check_status --conf=/etc/masterha/app1.cnf app1 (pid:21725) is running(0:PING_OK), master:
#--------- 手动Failover(MHA Manager必须没有运行)-------#
#关闭MHA Manager(db53)
[root@db53 ~]# /etc/init.d/masterha_manager stop [root@db53 ~]# masterha_master_switch --master_state=dead --conf=/etc/masterha/app1.cnf --dead_master_host= --dead_master_port=9106 --new_master_host= --new_master_port=9106 --ignore_last_failover
#-------------------- 在线进行切换 ----------------------#
#通过MHA Manger监控,查看集群里面现在谁是master
[root@db53 ~]# masterha_check_status --conf=/etc/masterha/app1.cnf app1 (pid:26244) is running(0:PING_OK), master:
[root@db53 ~]# /etc/init.d/mha_manager stop
#查看manager status
[root@db53 ~]# masterha_check_status --conf=/etc/masterha/app1.cnf app1 is stopped(2:NOT_RUNNING).
[root@db53 ~]# masterha_master_switch --conf=/etc/masterha/app1.cnf --master_state=alive --new_master_host= --new_master_port=9106 --orig_master_is_new_slave --running_updates_limit=10000 Fri Sep 21 12:44:52 2018 - [info] MHA::MasterRotate version 0.56. Fri Sep 21 12:44:52 2018 - [info] Starting online master switch.. Fri Sep 21 12:44:52 2018 - [info] Fri Sep 21 12:44:52 2018 - [info] * Phase 1: Configuration Check Phase.. Fri Sep 21 12:44:52 2018 - [info] Fri Sep 21 12:44:52 2018 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Fri Sep 21 12:44:52 2018 - [info] Reading application default configuration from /etc/masterha/app1.cnf.. Fri Sep 21 12:44:52 2018 - [info] Reading server configuration from /etc/masterha/app1.cnf.. Fri Sep 21 12:44:53 2018 - [info] GTID failover mode = 0 Fri Sep 21 12:44:53 2018 - [info] Current Alive Master: Fri Sep 21 12:44:53 2018 - [info] Alive Slaves: Fri Sep 21 12:44:53 2018 - [info] Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled Fri Sep 21 12:44:53 2018 - [info] Replicating from Fri Sep 21 12:44:53 2018 - [info] Version=10.3.6-MariaDB-log (oldest major version between slaves) log-bin:enabled Fri Sep 21 12:44:53 2018 - [info] Replicating from It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on (YES/no): yes Fri Sep 21 12:44:55 2018 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time.. Fri Sep 21 12:44:55 2018 - [info] ok. Fri Sep 21 12:44:55 2018 - [info] Checking MHA is not monitoring or doing failover.. Fri Sep 21 12:44:55 2018 - [info] Checking replication health on Fri Sep 21 12:44:55 2018 - [info] ok. Fri Sep 21 12:44:55 2018 - [info] Checking replication health on Fri Sep 21 12:44:55 2018 - [info] ok. Fri Sep 21 12:44:55 2018 - [info] can be new master. Fri Sep 21 12:44:55 2018 - [info] From: (current master) +-- +-- To: (new master) +-- +-- Starting master switch from to (yes/NO): yes Fri Sep 21 12:44:59 2018 - [info] Checking whether is ok for the new master.. Fri Sep 21 12:44:59 2018 - [info] ok. Fri Sep 21 12:44:59 2018 - [info] SHOW SLAVE STATUS returned empty result. To check replication filtering rules, temporarily executing CHANGE MASTER to a dummy host. Fri Sep 21 12:44:59 2018 - [info] Resetting slave pointing to the dummy host. Fri Sep 21 12:44:59 2018 - [info] ** Phase 1: Configuration Check Phase completed. Fri Sep 21 12:44:59 2018 - [info] Fri Sep 21 12:44:59 2018 - [info] * Phase 2: Rejecting updates Phase.. Fri Sep 21 12:44:59 2018 - [info] Fri Sep 21 12:44:59 2018 - [info] Executing master ip online change script to disable write on the current master: Fri Sep 21 12:44:59 2018 - [info] /usr/local/bin/master_ip_online_change --command=stop --orig_master_host= --orig_master_ip= --orig_master_port=9106 --orig_master_user='blufly' --orig_master_password='852741' --new_master_host= --new_master_ip= --new_master_port=9106 --new_master_user='blufly' --new_master_password='852741' --orig_master_ssh_user=root --new_master_ssh_user=root --orig_master_ssh_port=65535 --new_master_ssh_port=65535 --orig_master_is_new_slave Unknown option: orig_master_ssh_port Unknown option: new_master_ssh_port Fri Sep 21 12:44:59 2018 849902 Set read_only on the new master.. ok. Fri Sep 21 12:44:59 2018 854742 Set read_only=1 on the orig master.. ok. Disabling the VIP on old master: ssh: connect to host port 22: Connection refused Fri Sep 21 12:44:59 2018 865695 Killing all application threads.. Fri Sep 21 12:44:59 2018 865768 done. Fri Sep 21 12:44:59 2018 - [info] ok. Fri Sep 21 12:44:59 2018 - [info] Locking all tables on the orig master to reject updates from everybody (including root): Fri Sep 21 12:44:59 2018 - [info] Executing FLUSH TABLES WITH READ LOCK.. Fri Sep 21 12:44:59 2018 - [info] ok. Fri Sep 21 12:44:59 2018 - [info] Orig master binlog:pos is mysql-bin.000008:629044. Fri Sep 21 12:44:59 2018 - [info] Waiting to execute all relay logs on Fri Sep 21 12:44:59 2018 - [info] master_pos_wait(mysql-bin.000008:629044) completed on Executed 0 events. Fri Sep 21 12:44:59 2018 - [info] done. Fri Sep 21 12:44:59 2018 - [info] Getting new master's binlog name and position.. Fri Sep 21 12:44:59 2018 - [info] mysql-bin.000015:545235 Fri Sep 21 12:44:59 2018 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='', MASTER_PORT=9106, MASTER_LOG_FILE='mysql-bin.000015', MASTER_LOG_POS=545235, MASTER_USER='repl', MASTER_PASSWORD='xxx'; Fri Sep 21 12:44:59 2018 - [info] Executing master ip online change script to allow write on the new master: Fri Sep 21 12:44:59 2018 - [info] /usr/local/bin/master_ip_online_change --command=start --orig_master_host= --orig_master_ip= --orig_master_port=9106 --orig_master_user='blufly' --orig_master_password='852741' --new_master_host= --new_master_ip= --new_master_port=9106 --new_master_user='blufly' --new_master_password='852741' --orig_master_ssh_user=root --new_master_ssh_user=root --orig_master_ssh_port=65535 --new_master_ssh_port=65535 --orig_master_is_new_slave Unknown option: orig_master_ssh_port Unknown option: new_master_ssh_port Fri Sep 21 12:45:00 2018 107287 Set read_only=0 on the new master. Enabling the VIP - on the new master - ssh: connect to host port 22: Connection refused Fri Sep 21 12:45:00 2018 - [info] ok. Fri Sep 21 12:45:00 2018 - [info] Fri Sep 21 12:45:00 2018 - [info] * Switching slaves in parallel.. Fri Sep 21 12:45:00 2018 - [info] Fri Sep 21 12:45:00 2018 - [info] -- Slave switch on host started, pid: 19526 Fri Sep 21 12:45:00 2018 - [info] Fri Sep 21 12:45:01 2018 - [info] Log messages from ... Fri Sep 21 12:45:01 2018 - [info] Fri Sep 21 12:45:00 2018 - [info] Waiting to execute all relay logs on Fri Sep 21 12:45:00 2018 - [info] master_pos_wait(mysql-bin.000008:629044) completed on Executed 0 events. Fri Sep 21 12:45:00 2018 - [info] done. Fri Sep 21 12:45:00 2018 - [info] Resetting slave and starting replication from the new master Fri Sep 21 12:45:00 2018 - [info] Executed CHANGE MASTER. Fri Sep 21 12:45:00 2018 - [info] Slave started. Fri Sep 21 12:45:01 2018 - [info] End of log messages from ... Fri Sep 21 12:45:01 2018 - [info] Fri Sep 21 12:45:01 2018 - [info] -- Slave switch on host succeeded. Fri Sep 21 12:45:01 2018 - [info] Unlocking all tables on the orig master: Fri Sep 21 12:45:01 2018 - [info] Executing UNLOCK TABLES.. Fri Sep 21 12:45:01 2018 - [info] ok. Fri Sep 21 12:45:01 2018 - [info] Starting orig master as a new slave.. Fri Sep 21 12:45:01 2018 - [info] Resetting slave and starting replication from the new master Fri Sep 21 12:45:01 2018 - [info] Executed CHANGE MASTER. Fri Sep 21 12:45:01 2018 - [info] Slave started. Fri Sep 21 12:45:01 2018 - [info] All new slave servers switched successfully. Fri Sep 21 12:45:01 2018 - [info] Fri Sep 21 12:45:01 2018 - [info] * Phase 5: New master cleanup phase.. Fri Sep 21 12:45:01 2018 - [info] Fri Sep 21 12:45:01 2018 - [info] Resetting slave info succeeded. Fri Sep 21 12:45:01 2018 - [info] Switching master to completed successfully.
#-orig_master_is_new_slave 切换时加上此参数是将原 master 变为 slave 节点,如果不加此参数,原来的 master 将不启动
#--running_updates_limit=10000,故障切换时,候选master 如果有延迟的话,mha切换不能成功,加上此参数表示延迟在此时间范围内都可切换(单位为s),但是切换的时间长短是由recover 时relay 日志的大小决定
#通过MHA Manger监控,查看集群里面现在谁是master
[root@db53 ~]# /etc/init.d/mha_manager start [root@db53 ~]# masterha_check_status --conf=/etc/masterha/app1.cnf app1 (pid:19644) is running(0:PING_OK), master:
[root@db51 ~]# ip addr|grep inet scope global eno16777984
[root@maxscale61 ~]# maxadmin -S /tmp/maxadmin.sock MaxScale> list servers Servers. -------------------+-----------------+-------+-------------+-------------------- Server | Address | Port | Connections | Status -------------------+-----------------+-------+-------------+-------------------- server1 | | 9106 | 0 | Master, Running server2 | | 9106 | 0 | Slave, Running server3 | | 9106 | 0 | Slave, Running -------------------+-----------------+-------+-------------+--------------------
至此mysql一主两从的主从同步、MHA高可用已实现,下一步就是实现Maxscale读写分离和Maxscale HA,相关脚本已放到https://github.com/ivehu/mha