监控MySQL主从同步是否异常,如果异常,则发送短信或者邮件给管理员。 1)开发一个守护进程脚本每30秒实现检测一次。
2)如果同步出现如下错误号(1158,1159,1008,1007,1062),则跳过错误。 阶段3:
3)如果IO和SQL线程出现异常,则通知邮件给管理员

使用数组技术实现上述脚本(获取主从判断及错误号部分)

主从监控脚本如下:

#!/bin/bash
CODE=(
1158
1159
1008
1007
1062
2003
)
fun_Base(){
#1.定义变量

#1.1 抓取IP地址
IP=`ifconfig eth0 |awk 'NR==2{print $2}'`

#1.2 获取slave IO和SQL状态,Err代码
My_SQL=`mysql -e "show slave status\G" |egrep "SQL_Running:" |awk '{print $NF}'`
My_IO=`mysql -e "show slave status\G" |egrep "IO_Running:" |awk '{print $NF}'`
My_CODE=`mysql -e "show slave status\G" |egrep "Last_IO_Errno:" |awk '{print $NF}'`

#1.3 定时时间变量
Time=`date +%F-%H:%M:%S`

#1.4 定义log目录
DIR=/tmp/slave_${Time}
Status_Log=$DIR/slave_status_${Time}.log
Check_log=$DIR/slave_check_${Time}.log
Erro_log=$DIR/slave_err_${Time}.log
#1.5 定义邮箱
Total="$IP slave status $Time"
Mail_Rec="[email protected]"

#2.将slave的状态保存到log文件中
[ -d $DIR ] || mkdir $DIR -p
mysql -e "show slave status\G" >$Status_Log
}

#3.判断slave状态的错误代码
fun_Status(){
RETVAL=0
for  ((i=0;i<${#CODE[*]};i++))
do
    if [ $My_CODE -eq ${CODE[i]}  ];then
       mysql -e "stop slave;" && RETVAL=$?
       [ $RETVAL -eq 0 ] && mysql -e "SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1;" && RETVAL=$?
       [ $RETVAL -eq 0 ] && mysql -e "start slave;" && RETVAL=$?
       [ $RETVAL -eq 0 ] && echo "slave errno code is successful." >$Erro_log
       [ $RETVAL -eq 0 ] && mail -s "$Total" $Mail_Rec <$Status_Log && mail -s "$Total" $Mail_Rec <$Erro_log
    fi
done
}

#4.判断IO和SQL线程是否正常
fun_Check(){
  if [ "$My_SQL" == "Yes" -a "$My_IO" == "Yes" ];then
    echo "slave status is successful." 
    echo "slave status is successful." >$Check_log
    mail -s "$Total" $Mail_Rec <$Status_Log
    mail -s "$Total" $Mail_Rec <$Check_log
  else
    echo "slave status is failed."
    echo "slave status is failed." >>$Check_log
    mail -s "$Total" $Mail_Rec <$Status_Log
    mail -s "$Total" $Mail_Rec <$Check_log
 fi
}

#5.主体函数
main(){

while true
do
 fun_Base
 fun_Status
 fun_Check
 sleep 20
done
}

main