#/bin/sh
#负载、CPU、内存、磁盘、Http服务、Tcp服务监控脚本
#实现发邮件、短信
#基本配置
IP=192.168.2.248
export LANG=en_US.UTF-8
#通知邮件地址列表配置
email_list=(\
[email protected] \
)
#短信通知手机号
sms_list=(\
13800138000 \
)
#需要监控的Http服务器列表
http_list=(\
127.0.0.1:80/secure/Dashboard.jspa \
#127.0.0.1:80/ \
127.0.0.1:80/index.jsp \
)
#需要监控的Tcp服务器列表配置
tcp_list=(\
127.0.0.1:22 \
)
#基本配置结束
#时间格式化
date=$(date -d "today" +"%Y-%m-%d_%H:%M:%S")
#发送邮件函数
#调用方法:send_email subject tempfile
send_email()
{
email_len=${#email_list[*]}
ei=0
while [ $ei -lt $email_len ]
do
[ -e $2 ] && mail -s "$1 $date" ${email_list[$ei]} < $2 || echo "TempFile not found:$2" | mail -s "File not found $date" ${email_list[$ei]}
let ei++
done
}
#Test send email
#send_email "Test email" /tmp/a
#飞信发短信
#调用方法 send_sms tempFile
send_sms()
{
tomobile=`echo "${sms_list[*]}" |sed 's/ /,/g'`
# echo $tomobile
if [ "${#tomobile}" != 0 ];then
[ -e $1 ] && LD_LIBRARY_PATH=/usr/local/fx/lib /usr/local/fx/fetion --hide --config=/usr/local/fx/fx.conf --index=1 --to="$tomobile" --msg-type=1 --file-utf8="$1" >/dev/null 2>&1 || LD_LIBRARY_PATH=/usr/local/fx/lib /usr/local/fx/fetion --hide --config=/usr/local/fx/fx.conf --index=1 --to="$tomobile" --msg-type=1 --msg-utf8="Error no this file $1" >/dev/null 2>&1
fi
}
#Test send_sms
#send_sms /tmp/a.log
#监控系统负载
#调用 monitor_load
monitor_load() {
NOTIFY=$1
#警告阀值
NOTIFY=${NOTIFY:-"6.0"}
#邮件标题
SUBJECT="Alert $IP $(hostname) Load Average"
OS="$(uname)"
TRUE="1"
if [ "$OS" == "FreeBSD" ]; then
TEMPFILE="$(mktemp /tmp/$(basename $0).tmp.XXX)"
FTEXT='load averages:'
elif [ "$OS" == "Linux" ]; then
TEMPFILE="$(mktemp)"
FTEXT='load average:'
fi
F5M="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f1)"
F10M="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f2)"
F15M="$(uptime | awk -F "$FTEXT" '{ print $2 }' | cut -d, -f3)"
# mail message
echo "Alert From $IP $(hostname) " >> $TEMPFILE
echo "Load average Crossed allowed limit $NOTIFY." >> $TEMPFILE
echo "Hostname: $(hostname)" >> $TEMPFILE
echo "Local Date & Time : $date" >> $TEMPFILE
echo "5 min load :$F5M" >> $TEMPFILE
echo "10 min load :$F10M" >> $TEMPFILE
echo "15 min load :$F15M" >> $TEMPFILE
# Look if it crossed limit
# compare it with last 15 min load average
RESULT=0
RESULT=$(echo "$F15M > $NOTIFY" | bc)
# if so send an email
if [ "$RESULT" == "$TRUE" ]; then
send_email "$SUBJECT" $TEMPFILE
send_sms $TEMPFILE
fi
# remove file
rm -f $TEMPFILE
}
#Test monitor_load
#monitor_load 3.0
#监控磁盘空间
#调用方法 monitor_disk
monitor_disk(){
MAXDISK=$1
#警告阀值
MAXDISK=${MAXDISK:-"80"}
SUBJECT="Alert $IP $(hostname) Disk use surpasses the limitation value "
#date=$(date -d "today" +"%Y-%m-%d_%H:%M:%S")
OS="$(uname)"
TRUE="1"
if [ "$OS" == "FreeBSD" ]; then
TEMPFILE="$(mktemp /tmp/$(basename $0).tmp.XXX)"
elif [ "$OS" == "Linux" ]; then
TEMPFILE="$(mktemp)"
fi
DISK=`df -h |sed 's/ /1/'|awk {'print $5'}|sed 's/%//g'|sed '1,2d'|sort -n |tail -1`
RESULT=0
RESULT=$(echo "$DISK > $MAXDISK" | bc)
if [ "$RESULT" == "$TRUE" ]; then
echo "Alert From $IP $(hostname) " >> $TEMPFILE
echo "DISK Crossed allowed limit $MAXDISK%." >> $TEMPFILE
echo " " >> $TEMPFILE
echo "`df -h`" >>$TEMPFILE
send_email "$SUBJECT" $TEMPFILE
send_sms $TEMPFILE
fi
rm -f $TEMPFILE
}
#Test monitor_disk
#monitor_disk 40
#CPU监控
monitor_cpu(){
MAXCPU=$1
#警告阀值
MAXCPU=${MAXCPU:-"70"}
SUBJECT="Alert $IP $(hostname) CPU use surpasses the limitation value "
#date=$(date -d "today" +"%Y-%m-%d_%H:%M:%S")
OS="$(uname)"
TRUE="1"
if [ "$OS" == "FreeBSD" ]; then
TEMPFILE="$(mktemp /tmp/$(basename $0).tmp.XXX)"
elif [ "$OS" == "Linux" ]; then
TEMPFILE="$(mktemp)"
fi
CPU=`vmstat|sed 1,2d|awk '{print $13}'`
RESULT=0
RESULT=$(echo "$CPU > $MAXCPU" | bc)
if [ "$RESULT" == "$TRUE" ]; then
echo "Alert From $IP $(hostname) " >> $TEMPFILE
echo "CPU Crossed allowed limit $MAXCPU%." >> $TEMPFILE
echo " " >> $TEMPFILE
ps aux|sed 1d |awk '{if ($3 >= 0) {print $0}}' >>$TEMPFILE
send_email "$SUBJECT" $TEMPFILE
send_sms $TEMPFILE
fi
rm -f $TEMPFILE
}
#Test monitor_cpu
#monitor_cpu 30
#内存监控
monitor_mem(){
MAXMEM=$1
#剩余内存阀值
MAXMEM=${MAXMEM:-"200"}
SUBJECT="Alert $IP $(hostname) MEM check info "
OS="$(uname)"
TRUE="1"
if [ "$OS" == "FreeBSD" ]; then
TEMPFILE="$(mktemp /tmp/$(basename $0).tmp.XXX)"
elif [ "$OS" == "Linux" ]; then
TEMPFILE="$(mktemp)"
fi
MEMFREE=`free -m|grep '^M'|awk '{print $4}'`
RESULT=0
RESULT=$(echo "$MEMFREE <= $MAXMEM" | bc)
if [ "$RESULT" == "$TRUE" ]; then
# echo "The memory surplus space is smaller than $MAXMEM M." >> $TEMPFILE
echo "Alert From $IP $(hostname) " >> $TEMPFILE
echo "Memory remaining space is less than $MAXMEM M." >> $TEMPFILE
echo "Memory surplus space : $MEMFREE M." >> $TEMPFILE
echo " " >> $TEMPFILE
free -m|grep '^[M,S]'|awk 'BEGIN{print "Item\tTotal\tUSED\tFREE\tFree%"}{z=100*$5}{print $1"\t"$2"\t"$3"\t"$4"\t"$4/$2*100}' >>$TEMPFILE
send_email "$SUBJECT" $TEMPFILE
send_sms $TEMPFILE
fi
rm -f $TEMPFILE
}
#Test monitor_mem
#monitor_mem 30
#HTTP服务器监控
monitor_http(){
SUBJECT="Alert $IP $(hostname) Http check Info "
#date=$(date -d "today" +"%Y-%m-%d_%H:%M:%S")
http_len=${#http_list[*]}
hi=0
while [ $hi -lt $http_len ]
do
OS="$(uname)"
if [ "$OS" == "FreeBSD" ]; then
TEMPFILE="$(mktemp /tmp/$(basename $0).tmp.XXX)"
elif [ "$OS" == "Linux" ]; then
TEMPFILE="$(mktemp)"
fi
server_ip=$(echo ${http_list[$hi]} | awk -F ':' '{print $1}')
server_port=$(echo ${http_list[$hi]} | awk -F ':' '{print $2}' | awk -F '/' '{print $1}')
server_url=$(echo ${http_list[$hi]} | awk -F '/' '{$1="";print $0}'| sed 's/ /\//g')
if curl -m 10 -I -f http://${http_list[$hi]} > /dev/null 2>&1
then
#status: 0,http down 1,http ok 2,http down but ping ok
status=1
echo "Server:${server_ip}, Port:${server_port}, URL:${server_url} is OK!" >> $TEMPFILE
else
if curl -m 30 -I -f http://${http_list[$hi]} > /dev/null 2>&1
then
status=1
echo "Server:${server_ip}, Port:${server_port}, URL:${server_url} is OK !" >> $TEMPFILE
else
if ping -c 1 $server_ip > /dev/null 2>&1
then
status=2
echo "Alert From $IP $(hostname) " >> $TEMPFILE
echo "Server:${server_ip}, Port:${server_port}, URL:${server_url} is ERROR! Ping is OK!" >> $TEMPFILE
else
status=0
echo "Alert From $IP $(hostname) " >> $TEMPFILE
echo "Server:${server_ip}, Port:${server_port}, URL:${server_url} is ERROR! Ping is ERROR!" >> $TEMPFILE
fi
curl -m 10 -I http://${http_list[$hi]} >> $TEMPFILE 2>/dev/null
fi
fi
if [ $status != 1 ] ;then
send_email "$SUBJECT" $TEMPFILE
send_sms $TEMPFILE
fi
rm -f $TEMPFILE
let hi++
done
}
#Test Http
#monitor_http
#TCP服务器监控
monitor_tcp(){
SUBJECT="Alert $IP $(hostname) TCP check Info "
tcp_len=${#tcp_list[*]}
ti=0
while [ $ti -lt $tcp_len ]
do
OS="$(uname)"
if [ "$OS" == "FreeBSD" ]; then
TEMPFILE="$(mktemp /tmp/$(basename $0).tmp.XXX)"
elif [ "$OS" == "Linux" ]; then
TEMPFILE="$(mktemp)"
fi
server_ip=$(echo ${tcp_list[$ti]} | awk -F ':' '{print $1}')
server_port=$(echo ${tcp_list[$ti]} | awk -F ':' '{print $2}')
if nc -vv -z -w 3 $server_ip $server_port > /dev/null 2>&1
then
#status: 0,http down 1,http ok 2,http down but ping ok
status=1
echo "Server:${server_ip}, Port:${server_port} is Ok !" >> $TEMPFILE
else
if nc -vv -z -w 10 $server_ip $server_port > /dev/null 2>&1
then
status=1
echo "Server:${server_ip}, Port:${server_port} is Ok !" >> $TEMPFILE
else
if ping -c 1 $server_ip > /dev/null 2>&1
then
status=2
echo "Alert From $IP $(hostname) " >> $TEMPFILE
echo "Server:${server_ip}, Port:${server_port}is ERROR,Ping is Ok!" >> $TEMPFILE
else
status=0
echo "Alert From $IP $(hostname) " >> $TEMPFILE
echo "Server:${server_ip}, Port:${server_port}is ERROR,Ping is ERROR !" >> $TEMPFILE
fi
fi
fi
if [ $status != 1 ] ;then
send_email "$SUBJECT" $TEMPFILE
send_sms $TEMPFILE
fi
rm -f $TEMPFILE
let ti++
done
}
#Test monitor_tcp
#monitor_tcp
#调用函数
monitor_load 3.0
monitor_disk 40
monitor_cpu 30
monitor_mem 20
monitor_http
monitor_tcp