附件下载地址:https://down.51cto.com/data/2464809
1、orchestrator 配置文件PostFailoverProcesses中调用配置脚本
{ "Debug": true, "EnableSyslog": false, "ListenAddress": ":3000", "MySQLTopologyUser": "orchestrator", "MySQLTopologyPassword": "123456", "MySQLTopologyCredentialsConfigFile": "", "MySQLTopologySSLPrivateKeyFile": "", "MySQLTopologySSLCertFile": "", "MySQLTopologySSLCAFile": "", "MySQLTopologySSLSkipVerify": true, "MySQLTopologyUseMutualTLS": false, "BackendDB": "sqlite", "SQLite3DataFile": "/usr/local/orchestrator/orchestrator.sqlite3", "MySQLConnectTimeoutSeconds": 1, "DefaultInstancePort": 3306, "DiscoverByShowSlaveHosts": true, "InstancePollSeconds": 5, "DiscoveryIgnoreReplicaHostnameFilters": [ "a_host_i_want_to_ignore[.]example[.]com", ".*[.]ignore_all_hosts_from_this_domain[.]example[.]com" ], "UnseenInstanceForgetHours": 240, "SnapshotTopologiesIntervalHours": 0, "InstanceBulkOperationsWaitTimeoutSeconds": 10, "HostnameResolveMethod": "default", "MySQLHostnameResolveMethod": "@@hostname", "SkipBinlogServerUnresolveCheck": true, "ExpiryHostnameResolvesMinutes": 60, "RejectHostnameResolvePattern": "", "ReasonableReplicationLagSeconds": 10, "ProblemIgnoreHostnameFilters": [], "VerifyReplicationFilters": false, "ReasonableMaintenanceReplicationLagSeconds": 20, "CandidateInstanceExpireMinutes": 60, "AuditLogFile": "", "AuditToSyslog": false, "RemoveTextFromHostnameDisplay": ".mydomain.com:3306", "ReadOnly": false, "AuthenticationMethod": "", "HTTPAuthUser": "", "HTTPAuthPassword": "", "AuthUserHeader": "", "PowerAuthUsers": [ "*" ], "ClusterNameToAlias": { "127.0.0.1": "test suite" }, "SlaveLagQuery": "", "DetectClusterAliasQuery": "SELECT SUBSTRING_INDEX(@@hostname, '.', 1)", "DetectClusterDomainQuery": "", "DetectInstanceAliasQuery": "", "DetectPromotionRuleQuery": "", "DataCenterPattern": "[.]([^.]+)[.][^.]+[.]mydomain[.]com", "PhysicalEnvironmentPattern": "[.]([^.]+[.][^.]+)[.]mydomain[.]com", "PromotionIgnoreHostnameFilters": [], "DetectSemiSyncEnforcedQuery": "", "ServeAgentsHttp": false, "AgentsServerPort": ":3001", "AgentsUseSSL": false, "AgentsUseMutualTLS": false, "AgentSSLSkipVerify": false, "AgentSSLPrivateKeyFile": "", "AgentSSLCertFile": "", "AgentSSLCAFile": "", "AgentSSLValidOUs": [], "UseSSL": false, "UseMutualTLS": false, "SSLSkipVerify": false, "SSLPrivateKeyFile": "", "SSLCertFile": "", "SSLCAFile": "", "SSLValidOUs": [], "URLPrefix": "", "StatusEndpoint": "/api/status", "StatusSimpleHealth": true, "StatusOUVerify": false, "AgentPollMinutes": 60, "UnseenAgentForgetHours": 6, "StaleSeedFailMinutes": 60, "SeedAcceptableBytesDiff": 8192, "PseudoGTIDPattern": "", "PseudoGTIDPatternIsFixedSubstring": false, "PseudoGTIDMonotonicHint": "asc:", "DetectPseudoGTIDQuery": "", "BinlogEventsChunkSize": 10000, "SkipBinlogEventsContaining": [], "ReduceReplicationAnalysisCount": true, "FailureDetectionPeriodBlockMinutes": 1, "RecoveryPeriodBlockSeconds": 0, "RecoveryIgnoreHostnameFilters": [], "RecoverMasterClusterFilters": [ "*" ], "RecoverIntermediateMasterClusterFilters": [ "*" ], "OnFailureDetectionProcesses": [ "echo 'Detected {failureType} on {failureCluster}. Affected replicas: {countSlaves}' >> /tmp/recovery.log" ], "PreFailoverProcesses": [ "echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/recovery.log" ], "PostFailoverProcesses": [ "echo '(for all types) Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log", "/usr/local/bin/orch_hook.sh {failureType} {failureClusterAlias} {failedHost} {successorHost} >> /tmp/orch.log" ], "PostUnsuccessfulFailoverProcesses": [], "PostMasterFailoverProcesses": [ "echo 'Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Promoted: {successorHost}:{successorPort}' >> /tmp/recovery.log" ], "PostIntermediateMasterFailoverProcesses": [ "echo 'Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log" ], "CoMasterRecoveryMustPromoteOtherCoMaster": true, "DetachLostSlavesAfterMasterFailover": true, "ApplyMySQLPromotionAfterMasterFailover": true, "PreventCrossDataCenterMasterFailover": false, "PreventCro***egionMasterFailover": false, "MasterFailoverDetachSlaveMasterHost": false, "MasterFailoverLostInstancesDowntimeMinutes": 0, "PostponeSlaveRecoveryOnLagMinutes": 0, "OSCIgnoreHostnameFilters": [], "GraphiteAddr": "", "GraphitePath": "", "GraphiteConvertHostnameDotsToUnderscores": true }
2、orch_hook.sh 注意修改vip和网卡信息,MYSQL_PWD并不需要配置正确的密码,因为我使用的是5.7版本GTID复制,这个是为pseudo_gtid使用的
#!/bin/bash isitdead=$1 cluster=$2 oldmaster=$3 newmaster=$4 mysqluser="orchestrator" export MYSQL_PWD="xxxpassxxx" logfile="/var/log/orch_hook.log" # list of clusternames clusternames=(rep blea lajos) # clustername=( interface IP user Inter_IP) rep=( enp0s8 "192.168.56.121" root "192.168.56.125") if [[ $isitdead == "DeadMaster" ]]; then array=( enp0s8 "192.168.56.121" root "192.168.56.125") interface=${array[0]} IP=${array[1]} user=${array[2]} if [ ! -z ${IP} ] ; then echo $(date) echo "Revocering from: $isitdead" echo "New master is: $newmaster" echo "/usr/local/bin/orch_vip.sh -d 1 -n $newmaster -i ${interface} -I ${IP} -u ${user} -o $oldmaster" | tee $logfile /usr/local/bin/orch_vip.sh -d 1 -n $newmaster -i ${interface} -I ${IP} -u ${user} -o $oldmaster mysql -h$newmaster -u$mysqluser < /usr/local/bin/orch_event.sql else echo "Cluster does not exist!" | tee $logfile fi elif [[ $isitdead == "DeadIntermediateMasterWithSingleSlaveFailingToConnect" ]]; then array=( enp0s8 "192.168.56.121" root "192.168.56.125") interface=${array[0]} IP=${array[3]} user=${array[2]} slavehost=`echo $5 | cut -d":" -f1` echo $(date) echo "Revocering from: $isitdead" echo "New intermediate master is: $slavehost" echo "/usr/local/bin/orch_vip.sh -d 1 -n $slavehost -i ${interface} -I ${IP} -u ${user} -o $oldmaster" | tee $logfile /usr/local/bin/orch_vip.sh -d 1 -n $slavehost -i ${interface} -I ${IP} -u ${user} -o $oldmaster elif [[ $isitdead == "DeadIntermediateMaster" ]]; then array=( enp0s8 "192.168.56.121" root "192.168.56.125") interface=${array[0]} IP=${array[3]} user=${array[2]} slavehost=`echo $5 | sed -E "s/:[0-9]+//g" | sed -E "s/,/ /g"` showslave=`mysql -h$newmaster -u$mysqluser -sN -e "SHOW SLAVE HOSTS;" | awk '{print $2}'` newintermediatemaster=`echo $slavehost $showslave | tr ' ' '\n' | sort | uniq -d` echo $(date) echo "Revocering from: $isitdead" echo "New intermediate master is: $newintermediatemaster" echo "/usr/local/bin/orch_vip.sh -d 1 -n $newintermediatemaster -i ${interface} -I ${IP} -u ${user} -o $oldmaster" | tee $logfile /usr/local/bin/orch_vip.sh -d 1 -n $newintermediatemaster -i ${interface} -I ${IP} -u ${user} -o $oldmaster fi
3、orch_vip.sh 脚本
#!/bin/bash emailaddress="[email protected]" sendmail=0 function usage { cat << EOF usage: $0 [-h] [-d master is dead] [-o old master ] [-s ssh options] [-n new master] [-i interface] [-I] [-u SSH user] OPTIONS: -h Show this message -o string Old master hostname or IP address -d int If master is dead should be 1 otherweise it is 0 -s string SSH options -n string New master hostname or IP address -i string Interface exmple eth0:1 -I string Virtual IP -u string SSH user EOF } while getopts ho:d:s:n:i:I:u: flag; do case $flag in o) orig_master="$OPTARG"; ;; d) isitdead="${OPTARG}"; ;; s) ssh_options="${OPTARG}"; ;; n) new_master="$OPTARG"; ;; i) interface="$OPTARG"; ;; I) vip="$OPTARG"; ;; u) ssh_user="$OPTARG"; ;; h) usage; exit 0; ;; *) usage; exit 1; ;; esac done if [ $OPTIND -eq 1 ]; then echo "No options were passed"; usage; fi shift $(( OPTIND - 1 )); # discover commands from our path ssh=$(which ssh) arping=$(which arping) ip2util=$(which ip) # command for adding our vip cmd_vip_add="sudo -n $ip2util address add ${vip} dev ${interface}" # command for deleting our vip cmd_vip_del="sudo -n $ip2util address del ${vip}/32 dev ${interface}" # command for discovering if our vip is enabled cmd_vip_chk="sudo -n $ip2util address show dev ${interface} to ${vip%/*}/32" # command for sending gratuitous arp to announce ip move cmd_arp_fix="sudo -n $arping -c 1 -I ${interface} ${vip%/*} " # command for sending gratuitous arp to announce ip move on current server cmd_local_arp_fix="sudo -n $arping -c 1 -I ${interface} ${vip%/*} " vip_stop() { rc=0 # ensure the vip is removed $ssh ${ssh_options} -tt ${ssh_user}@${orig_master} \ "[ -n \"\$(${cmd_vip_chk})\" ] && ${cmd_vip_del} && sudo ${ip2util} route flush cache || [ -z \"\$(${cmd_vip_chk})\" ]" rc=$? return $rc } vip_start() { rc=0 # ensure the vip is added # this command should exit with failure if we are unable to add the vip # if the vip already exists always exit 0 (whether or not we added it) $ssh ${ssh_options} -tt ${ssh_user}@${new_master} \ "[ -z \"\$(${cmd_vip_chk})\" ] && ${cmd_vip_add} && ${cmd_arp_fix} || [ -n \"\$(${cmd_vip_chk})\" ]" rc=$? $cmd_local_arp_fix return $rc } vip_status() { $arping -c 1 -I ${interface} ${vip%/*} if ping -c 1 -W 1 "$vip"; then return 0 else return 1 fi } if [[ $isitdead == 0 ]]; then echo "Online failover" if vip_stop; then if vip_start; then echo "$vip is moved to $new_master." if [ $sendmail -eq 1 ]; then mail -s "$vip is moved to $new_master." "$emailaddress" < /dev/null &> /dev/null ; fi else echo "Can't add $vip on $new_master!" if [ $sendmail -eq 1 ]; then mail -s "Can't add $vip on $new_master!" "$emailaddress" < /dev/null &> /dev/null ; fi exit 1 fi else echo $rc echo "Can't remove the $vip from orig_master!" if [ $sendmail -eq 1 ]; then mail -s "Can't remove the $vip from orig_master!" "$emailaddress" < /dev/null &> /dev/null ; fi exit 1 fi elif [[ $isitdead == 1 ]]; then echo "Master is dead, failover" # make sure the vip is not available if vip_status; then if vip_stop; then if [ $sendmail -eq 1 ]; then mail -s "$vip is removed from orig_master." "$emailaddress" < /dev/null &> /dev/null ; fi else if [ $sendmail -eq 1 ]; then mail -s "Couldn't remove $vip from orig_master." "$emailaddress" < /dev/null &> /dev/null ; fi exit 1 fi fi if vip_start; then echo "$vip is moved to $new_master." if [ $sendmail -eq 1 ]; then mail -s "$vip is moved to $new_master." "$emailaddress" < /dev/null &> /dev/null ; fi else echo "Can't add $vip on $new_master!" if [ $sendmail -eq 1 ]; then mail -s "Can't add $vip on $new_master!" "$emailaddress" < /dev/null &> /dev/null ; fi exit 1 fi else echo "Wrong argument, the master is dead or live?" fi
4、orch_event.sql 我并没有使用
create database if not exists meta;
use meta;
create table if not exists pseudo_gtid_status (
anchor int unsigned not null,
originating_mysql_host varchar(128) charset ascii not null,
originating_mysql_port int unsigned not null,
originating_server_id int unsigned not null,
time_generated timestamp not null default current_timestamp,
pseudo_gtid_uri varchar(255) charset ascii not null,
pseudo_gtid_hint varchar(255) charset ascii not null,
PRIMARY KEY (anchor)
);
drop event if exists create_pseudo_gtid_event;
delimiter $$
create event if not exists
create_pseudo_gtid_event
on schedule every 5 second starts current_timestamp
on completion preserve
enable
do
begin
DECLARE lock_result INT;
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
set @connection_id := connection_id();
set @now := now();
set @rand := floor(rand()*(1 << 32));
set @pseudo_gtid_hint := concat_ws(':', lpad(hex(unix_timestamp(@now)), 8, '0'), lpad(hex(@connection_id), 16, '0'), lpad(hex(@rand), 8, '0'));
set @_create_statement := concat('drop ', 'view if exists `meta`.`_pseudo_gtid_', 'hint__asc:', @pseudo_gtid_hint, '`');
PREPARE st FROM @_create_statement;
EXECUTE st;
DEALLOCATE PREPARE st;
/*!50600
SET innodb_lock_wait_timeout = 1;
*/
SET lock_result = GET_LOCK('pseudo_gtid_status', 0);
IF lock_result = 1 THEN
set @serverid := @@server_id;
set @hostname := @@hostname;
set @port := @@port;
set @pseudo_gtid := concat('pseudo-gtid://', @hostname, ':', @port, '/', @serverid, '/', date(@now), '/', time(@now), '/', @rand);
insert into pseudo_gtid_status (
anchor,
originating_mysql_host,
originating_mysql_port,
originating_server_id,
time_generated,
pseudo_gtid_uri,
pseudo_gtid_hint
)
values (1, @hostname, @port, @serverid, @now, @pseudo_gtid, @pseudo_gtid_hint)
on duplicate key update
originating_mysql_host = values(originating_mysql_host),
originating_mysql_port = values(originating_mysql_port),
originating_server_id = values(originating_server_id),
time_generated = values(time_generated),
pseudo_gtid_uri = values(pseudo_gtid_uri),
pseudo_gtid_hint = values(pseudo_gtid_hint)
;
SET lock_result = RELEASE_LOCK('pseudo_gtid_status');
END IF;
end
$$
delimiter ;
set global event_scheduler := 1;
5、集群信息
[root@es3 bin]# cat /etc/hosts 127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 192.168.56.14 es1 192.168.56.15 es2 192.168.56.16 es3 [root@es3 bin]# cat /etc/my.cnf |grep -v ^# [mysqld] datadir=/var/lib/mysql socket=/var/lib/mysql/mysql.sock symbolic-links=0 log-error=/var/log/mysqld.log pid-file=/var/run/mysqld/mysqld.pid gtid-mode = ON enforce-gtid-consistency = ON log-slave-updates = ON log-bin=mysqlbin server-id=3 report_host=es3 [root@es3 bin]#
附记:
orchestrator 本身能完成主从切换满足条件
Requirements
To run any kind of failovers, your topologies must support either:
Oracle GTID (with
MASTER_AUTO_POSITION=1
)MariaDB GTID
Pseudo GTID
Binlog Servers
See MySQL Configuration for more details.
Automated recovery is opt in. Please consider recovery configuration.