1. 获取ip、cpu、内存信息、loadAverage
ip使用ifconfig获得
localip=$(ifconfig eth0 | grep 'inet addr' | awk '{print $2}' | cut -f2 -d:)
空闲cpu由mpstat获得
cpuidle=$(mpstat | grep all | awk '{print $11}')
内存使用情况由free获得
freemem=$(free | grep Mem | awk '{print $4}')
totalmem=$(free | grep Mem | awk '{print $2}')
pcent=$(free | grep Mem | awk '{print $4/$2}')
loadAverage由uptime获得
loadavg1=$(uptime | awk '{print $10}' | cut -f1 -d,)
loadavg5=$(uptime | awk '{print $11}' | cut -f1 -d,)
loadavg15=$(uptime | awk '{print $12}')
- ./etc/profile
- AlertNginxCpu=50
- AlertNginxMem=50
- AlertPcent=0.5
- AlertCpu=50
- AlertLoad=1
- msg=
- Subject='服务器超载警报'
- mailto(){
- /application/search/sendmail.py"$Subject""$msg"
- msg=
- Subject='服务器超载警报'
- }
- date=$(date)
- localip=$(/sbin/ifconfigeth0|grep'inetaddr'|awk'{print$2}'|cut-f2-d:)
- nginxcpu=$(psaxu|grepnginx|grepsearch|grepworker|awk'{print$3}')
- nginxmempcent=$(psaxu|grepnginx|grepsearch|grepworker|awk'{print$4}')
- nginxmem=$(psaxu|grepnginx|grepsearch|grepworker|awk'{print$6}')
- cpuidle=$(mpstat|grepall|awk'{print$11}')
- freemem=$(free|grepMem|awk'{print$4}')
- totalmem=$(free|grepMem|awk'{print$2}')
- freemempcent=$(free|grepMem|awk'{print$4/$2}')
- freeswap=$(free|grepSwap|awk'{print$4}')
- totalswap=$(free|grepSwap|awk'{print$2}')
- freeswappcent=$(free|grepSwap|awk'{print$4/$2}')
- loadavg1=$(uptime|awk'{print$10}'|cut-f1-d,)
- loadavg5=$(uptime|awk'{print$11}'|cut-f1-d,)
- loadavg15=$(uptime|awk'{print$12}')
- echo$date
- echo$localip
- echo'空闲cpu:'$cpuidle
- echo'空闲内存:'$freemem
- echo'总内存:'$totalmem
- echo'空闲内存比:'$freemempcent
- echo'空闲交换内存:'$freeswap
- echo'总交换内存:'$totalswap
- echo'空闲交换内存比:'$freeswappcent
- echo$loadavg1
- echo$loadavg5
- echo$loadavg15
- alldata=''$date'|'$localip'|nginx:'$nginxcpu','$nginxmempcent'('$nginxmem')|cpu_idle:'$cpuidle'|Mem:'$freemem'/'$totalmem'='$freemempcent'|Swap:'$freeswap'/'$totalswap'='$freeswappcent'|loadavg:'$loadavg1','$loadavg5','$loadavg15''
- echo$alldata>>monitor.log
- if[$(echo"$nginxcpu>=$AlertNginxCpu"|bc)=1];then
- msg=''$alldata'|nginxcpu'$nginxcpu':高于'$AlertNginxCpu''
- Subject=''$Subject':'$localip':nginxcpu'$nginxcpu'高于'$AlertNginxCpu''
- echo$msg>>monitor.log
- mailto
- fi
- if[$(echo"$nginxmempcent>=$AlertNginxMem"|bc)=1];then
- msg=''$alldata'|nginxmem'$nginxmempcent':高于'$AlertNginxMem''
- Subject=''$Subject':'$localip':nginxmem'$nginxmempcent'高于'$AlertNginxMem''
- echo$msg>>monitor.log
- mailto
- fi
- if[$(echo"$freeswappcent<=$AlertPcent"|bc)=1];then
- msg=''$alldata'|剩余虚拟内存占比'$freeswappcent':低于'$AlertPcent''
- Subject=''$Subject':'$localip':剩余虚拟内存占比'$freeswappcent'低于'$AlertPcent''
- echo$msg>>monitor.log
- mailto
- fi
- if[$(echo"$cpuidle<=$AlertCpu"|bc)=1];then
- msg=''$alldata'|cpu空闲时间'$cpuidle':低于'$AlertCpu''
- Subject=''$Subject':'$localip':cpu空闲时间'$cpuidle'低于'$AlertCpu''
- echo$msg>>monitor.log
- mailto
- fi
- if[$(echo"$loadavg1>=$AlertLoad"|bc)=1];then
- msg=''$alldata'|load'$loadavg1':高于'$AlertLoad''
- Subject=''$Subject':'$localip':load'$loadavg1'高于'$AlertLoad''
- echo$msg>>monitor.log
- mailto
- fi
- if[-n$msg];then
- echo'系统运行正常'
- fi
2. 空闲cpu小于cpu报警阈值或空闲内存比例低于内存报警阈值或loadAverage超过阈值时发送邮件报警
- importsmtplib
- importsys
- fromemail.mime.textimportMIMEText
- mailto_list=[""]
- mail_host="smtp.126.com"
- mail_user="monitor_algo"
- mail_pass=""
- mail_postfix="126.com"
- defsend_mail(to_list,sub,context):
- me=mail_user+"<"+mail_user+"@"+mail_postfix+">"
- msg=MIMEText(context)
- msg['Subject']=sub
- msg['From']=me
- msg['To']=";".join(to_list)
- try:
- send_smtp=smtplib.SMTP()
- send_smtp.connect(mail_host)
- send_smtp.login(mail_user,mail_pass)
- send_smtp.sendmail(me,to_list,msg.as_string())
- send_smtp.close()
- returnTrue
- except(Exception,e):
- print(str(e))
- returnFalse
- if__name__=="__main__":
- if(True==send_mail(mailto_list,sys.argv[1],sys.argv[2])):
- pass
- else:
- pass
3. 加入crontab定时任务
注意一点,crontan中的环境变量与用户环境的环境变量不一样,因其不会从缺省的用户profile文件中读入环境变量参数,最简单的方法是用source命令(.)
*/1 * * * * . /etc/profile; sh /application/search/monitor.sh