Shell 系统信息脚本,资源瓶颈告警脚本

linux分析系统资源性能瓶颈

1、查看CPU利用率与负载(top、vmstat、sar)

2、查看磁盘、Inode利用率与I/O负载(df、iostat、iotop、sar、dstat)

3、查看内存利用率(free、vmstat)

4、查看TCP连接状态(netstat、 ss)

5、查看CPU与内存占用最高的10个进程(top、ps)

6、查看网络流量(ifconfig、iftop、iptraf)

实例1:系统信息脚本
#!/bin/bash
#show system information

#设置一个函数:检测当前系统版本,并根据系统版本选择yum安装时的指定命令
os_check() {
        if [ -e /etc/redhat-release ];then
                REDHAT=`cat /etc/redhat-release | cut -d' ' -f1`
        else
                DEBIAN=`cat /etc/issue | cut -d' ' -f1`
        fi

        if [ "$REDHAT" == "CentOS" -o "$REDHAT" == "Red" ];then
                P_M=yum
        elif [ "$DEBIAN"== "Ubuntu" -o "$DEBIAN" == "ubutnu" ];then
                P_M=apt-get
        else
                Operating system does not support.
                exit 1
        fi
}
case_check() {
        case $input in
                cpu_load)
                #监控CPU的负载情况
                        echo "-----------------------------------"
                        for i in `seq -w 1 3`;do
                                echo -e "\033[32m ${i} \033[0m"
                                UTIL=`vmstat | awk '{if(NR==3)print 100-$15"%"}'`
                                USER=`vmstat | awk '{if(NR==3)print $13"%"}'`
                                SYS=`vmstat |awk '{if(NR==3)print $14"%"}'`
                                IOWAIT=`vmstat |awk '{if(NR==3)print $16"%"}'`
                                echo "util:$UTIL"
                                echo "User use:$USER"
                                echo "System use:$SYS"
                                echo "I/O wait:$IOWAIT"
                                let ++i
                                sleep 1
                        done
                        echo "-----------------------------------"
                        break
                ;;
				disk_load)
               	#监控磁盘负载情况
                        echo "------------------------------------"
                        for i in `seq -w 1 3`;do
                                echo -e "\033[32m ${i} \033[0m"
                                UTIL=`iostat -x -k |awk '/^[v|s]/{OFS=": ";print $1,$NF"%"}'`
                                READ=`iostat -x -k |awk '/^[v|s]/{OFS=": ";print $1,$6"KB"}'`
                                WRITE=`iostat -x -k |awk '/^[v|s]/{OFS=": ";print $1,$7"KB"}'`
                                IOWAIT=`vmstat |awk '{if(NR==3)print $16"%"}'`
                                echo -e "util:\n${UTIL}"
                                echo -e "Read:\n$READ"
                                echo -e "Write:\n$WRITE"
                                echo -e "I/O wait:$IOWAIT"
                                let ++i
                                sleep 1
                        done
                        echo "------------------------------------"
                        break
                ;;
				disk_use)
                #监控磁盘利用率的使用情况
                        DISK_LOG=/tmp/disk_use.tmp
                        DISK_TOTAL=`fdisk -l |awk '/^Disk.*bytes/&&/\/dev/{printf $2" ";print $3$4}'`
                        USE_RATE=`df -h | grep -v /dev/sr0 | awk '/^\/dev/{print int($5)}'`
                        for i in $USE_RATE;do
                                if [ $i -gt 10 ];then
                                        PART=`df -h | awk '{if(int($5)=='''$i''')print $6}'`
                                        echo "$(date +%F-%H:%S):$PART = $i%" >> $DISK_LOG
                                fi
                        done
                        echo "-----------------------------------"
                        echo -e "Disk total:\n$DISK_TOTAL"
                        if [ -f $DISK_LOG ];then
                                echo "------------------------------"
                                cat $DISK_LOG
                                echo "------------------------------"
                                rm -rf $DISK_LOG
                        else
                                echo "---------------------------------"

                        fi
                        break
                ;;
				disk_inode)
                #监控磁盘Inode利用率使用情况
                        INODE_LOG=/tmp/inode_use.tmp
                        INODE_TOTAL=`df -i |awk '/^\/dev/{print $1": "int($5) "%"}'`
                        INODE_USE=`df -i |awk '/^\/dev/{print int($5)}'`
                        for i in $INODE_USE;do
                                if [ $i -gt 90 ];then
                                        PART=`df -h |awk '{if(int($5)=='''$i''')print $6}'`
                                        echo "$(date +%F-%H%S)$PART = ${i}%" >> $INODE_LOG
                                fi
                        done
                        echo -e "Disk inode: \n$INODE_TOTAL"
                        if [ -f $INODE_LOG ];then
                                echo "------------------------------"
                                rm -rf $INODE_LOG
                        else
                                echo "------------------------------"
                                echo "Inode use rate no than 90% of the partition."
                                echo "------------------------------"
                        fi
                        break
                ;;
                mem_use)
                #监控内存使用情况
                        MEM_TOTAL=`free -m | awk '{if(NR==2)printf "%.1f",$2} END{print "M"}'`
                        USE=`free -m | awk '{if(NR==2)printf "%.1f",$3} END{print "M"}'`
                        FREE=`free -m | awk '{if(NR==2)printf "%.1f",$4} END{print "M"}'`
                        CACHE=`free -m | awk '{if(NR==2)printf "%.1f",$6} END{print "M"}'`
                        echo -e "Total: $MEM_TOTAL"
                        echo -e "Use: $USE"
                        echo -e "Free: $FREE"
                        echo -e "Cache: $CACHE"
                        echo "----------------------------"     
                        break
                ;;
				tcp_status)
                #监控当前系统TCP连接数量
                        echo "---------------------------"
                        COUNT=`ss -ant |awk '!/State/{state[$1]++} END {for(i in state)print i,state[i]}'`
                        echo -e "TCP connection status:\n$COUNT"
                        echo "---------------------------"
                ;;
                cpu_top10)
                #监控CPU占用率最高的前10个进程以及文件
                        echo "----------------------------"
                        CPU_LOG=/tmp/cpu_top.tmp
                        for i in `seq -w 1 3`;do
                                #ps aux | awk '{if($3>0.1)printf "PID:" $2 "CPU:" $3 "-->",$11"\n"}' | sort -k2 -nr | head -n 10 > $CPU_LOG'
                                ps aux | awk '{if($3>0.1) {{ printf "PID:" $2 "CPU:" $3 "-->" }for(i=11;i<=NF;i++)if(i==NF)printf $i"\n";else printf $11}}' | sort -k4 -nr | head -10 > $CPU_LOG
                        #
                                if [[ -n `cat $CPU_LOG` ]];then
                                        echo -e "\033[32m ${i} \033[0m"
                                        cat $CPU_LOG
                                        rm -rf $CPU_LOG
                                else
                                        echo "No process using the CPU."
                                        break
                                fi
                        let i++
                        sleep 1
                        echo "----------------------------"
                        done
                ;;
				mem_top10)
				#监控内存占用率最高的前10个进程以及文件
                        echo "----------------------------"
                        MEM_LOG=/tmp/mem_top.tmp
                        for i in `seq -w 1 3`;do
                                ps aux | awk '{if($4>0.1) {{ printf "PID:" $2 "Memory:" $4 "-->" }for(i=11;i<=NF;i++)if(i==NF)printf $i"\n";else printf $11}}' | sort -k4 -nr | head -10 > $MEM_LOG
                                if [[ -n `cat $MEM_LOG` ]];then
                                        echo -e "\033[32m ${i} \033[0m"
                                        cat $MEM_LOG
                                        rm -rf $MEM_LOG
                                else
                                        echo "No process using the Memory."
                                        break
                                fi
                        let i++
                        sleep 1
                        echo "-----------------------------"            
                        done
                ;;
				traffic)
                #监控网络流量
                        echo "----------------------------"
                        while true;do
                                read -p "Please enter the network card name(ens[0-9] or em[0-9]):" ens
                                if [ `ifconfig | grep -c "\<$ens\>"` -eq 1 ];then
                                        break
                                else
                                        echo "Input format error or Don't have the card name,please input agin"
                                fi
                        done
                        echo "----------------------------"
                        echo -e "In -------- Out"
                        for i in `seq -w 1 3`;do
                                OLD_IN=`ifconfig $ens |awk '/bytes/{if(NR==8)print $4;else if(NR==5)print $5}'`
                                OLD_OUT=`ifconfig $ens |awk '/bytes/{if(NR==8)print $4;else if(NR==7)print $5}'`
                                sleep 1
                                NEW_IN=`ifconfig $ens |awk '/bytes/{if(NR==8)print $4;else if(NR==5)print $5}'`
                                NEW_OUT=`ifconfig $ens |awk '/bytes/{if(NR==8)print $4;else if(NR==7)print $5}'`
                                IN=`awk 'BEGIN{printf "%.1f\n",'$((${NEW_IN}-${OLD_IN}))'/1024/128}'`
                                OUT=`awk 'BEGIN{printf "%.1f\n",'$((${NEW_OUT}-${OLD_OUT}))'/1024/128}'`
                                echo "${IN}MB/s ${OUT}MB/s"
                                sleep 1
                        done
                        echo "--------------------------"
                        break
                ;;
                quit)
                        exit 0
                ;;
                *)
                        echo "error"
        esac
}

#如果没有vmstat命令则进行安装
which vmstat &>/dev/null
if [ $? -ne 0 ];then
        echo "vmstat command not found,now the install."
        sleep 1
        os_check
        $P_M install procps -y
        echo "-----------------------------------------"
fi
#如果没有iostat命令则进行安装
which iostat &>/dev/null
if [ $? -ne 0 ];then
        echo "iostat command not found,now the install."
        sleep 1
        os_check
        $P_M install sysstat -y
        echo "-----------------------------------------"
fi
#循环调用函数进行选择
while true
do
	select input in cpu_load disk_load disk_use disk_inode mem_use tcp_status cpu_top10 mem_top10 traffic quit
	do
    case_check
    done
done
实例2:如果磁盘根分区使用量超过90%就报警

磁盘根分区使用量: df -Th | grep “/$” | awk ‘{print $6}’ —13%

#!/bin/bash
disk=`df -hT | grep "/$" | awk '{print$6}' | awk -F"%" '{print$1}'`


if [ $disk -ge 13 ];then
        echo "`date +%F.%H:%M` 当前根分区使用量已到达13%" | mail -s "disl war ..." root

fi
实例3:如果内存使用率超过60%就报警
#!/bin/bash
mem_user=`free -m | grep ^Mem | awk '{print $3}'`
mem_total=`free -m | grep ^Mem | awk '{print $2}'`
mem_percent=$[ mem_user*10 / mem_total ]
war_file=/tmp/mem_war.txt
rm -rf $war_file

if [ $mem_percent -ge 1 ];then
        echo "`date +%F.%H:%M` memory的百分比为${mem_percent}%" > $war_file
fi

if [ -f $war_file ];then
        mail -s "mem is war..." root < $war_file
        rm -rf $war_file
fi                                                        

你可能感兴趣的:(shell,shell)