2011-06-28 14:42 nagios CPU温度检测插件(转)

转自:http://hi.baidu.com/icanwen/item/e6ed673c3f79e7302f20c403

最近公司的机房空调经常的自动关闭!使机房温度过热引起设备故障!因而想到用nagios检测CPU温度!当温度达到一定上限时!自动发送SMS到手机。在dell R200测试通过。参照了网上 很多高手的资料在此谢过!

nagios 主程序服务服务定制设置如下:

define service{
        use                             generic-service         ; Name of service template to use
        host_name                       fileserver
        service_description             cpu temperature
        check_command                   check_nrpe!check_cpu_temp
        }

被监控主机服务定制如下:

vi /usr/local/nagios/etc/nrpe.cfg

#解释:用sensors检测cpu的温度当cpu温度到达35度时报警,到达40度时处于紧急状态

command[check_cpu_temp]=/usr/local/nagios/libexec/check_cputemp.sh -m sensors -w 35 -c 40

不可以上传附件 插件代码直接贴到这里了:)

#!/bin/sh
######################check_cputemp#######################
#version: 1.0
#Author : xiaoyong wen

#date : May 2011

#Licence GPLv2
#INSTALLATION
#the script need to install lm_sensors
#sensors's output need like below format
#########################################
#coretemp-isa-0000   #
#Adapter: ISA adapter   #
#Core 0:      +27°C  (high =   +85°C) #                  
     #
#coretemp-isa-0001   #
#Adapter: ISA adapter   #
#Core 1:      +25°C  (high =   +85°C) #
#########################################
#you can use NRPE to define service in nagios
#check_nrpe!check_cputemp.sh
######my taobao shop http://ujjj.taobao.com######just a AD :)####

# Plugin return statements
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3

print_help_msg(){
 $Echo "Usage: $0 -h to get help."
}

print_full_help_msg(){
 $Echo "Usage:"
 $Echo "$0 [ -v ] -m sensors -w cpuT -c cpuT"
 $Echo "Sepicify the method to use the temperature data sensors."
 $Echo "And the corresponding Critical value must greater than Warning value."
 $Echo "Example:"
 $Echo "${0} -m sensors -w 50 -c 55"
}

print_err_msg(){
 $Echo "Error."
 print_full_help_msg
}


to_debug(){
if [ "$Debug" = "true" ]; then
 $Echo "$*" >> /var/log/check_sys_temperature.log.$$ 2>&1
fi
}

unset LANG

Echo="echo -e"


if [ $# -lt 1 ]; then
 print_help_msg
 exit 3
else
 while getopts :vhm:w:c: OPTION
 do
  case $OPTION
   in
   v)
   #$Echo "Verbose mode."
   Debug=true
   ;;
   m)
   method=$OPTARG
   ;;
   w)
   WARNING=$OPTARG
   ;;
   c)
   CRITICAL=$OPTARG ;;
   h)
   print_full_help_msg
   exit 3
   ;;
   ?)
   $Echo "Error: Illegal Option."
   print_help_msg
   exit 3
   ;;
  esac
 done
 if [ "$method" = "sensors" ]; then
  use_sensors="true"
  to_debug use_sensors
 else
  $Echo "Error. Must to sepcify the method to use sensors."
  print_full_help_msg
  exit 3
 fi

 to_debug All Values  are \" Warning: "$WARNING" and Critical: "$CRITICAL" \".
  
  

fi
#########lm_sensors##################
if [ "$use_sensors" = "true" ]; then

 sensorsCheckOut=`which sensors 2>&1`
 if [ $? -ne 0 ];then
  echo $sensorsCheckOut
  echo Maybe you need to check your sensors.
  exit 3
 fi
 to_debug Use $sensorsCheckOut to check system temperature

 TEMP1=`sensors | head -3 | tail -1 | gawk '{print $3}' | grep -o [0-9][0-9]`
 TEMP2=`sensors | head -7 | tail -1 | gawk '{print $3}' | grep -o [0-9][0-9]`
 SUM=$(( $TEMP1 + $TEMP2 ))
 TEMP=$(($SUM/2))
 if [ -z "$TEMP" ] ; then
  $Echo "No Data been get here. Please confirm your ARGS and re-check it with Verbose mode, then to check the log."
  exit 3
 fi
 to_debug temperature data is $TEMP

else
 $Echo "Error. Must to sepcify the method to use sensors"
 print_full_help_msg
 exit 3
fi

######### Comparaison with the warnings and criticals thresholds given by user############
CPU_TEMP=$TEMP
#if [ "$WARNING" != "0" ] || [ "$CRITICAL" != "0" ]; then
                       
 if [ "$CPU_TEMP" -gt "$CRITICAL" ]  && [ "$CRITICAL" != "0" ]; then
        STATE="$STATE_CRITICAL"
        STATE_MESSAGE="CRITICAL"
 to_debug $STATE , Message is $STATE_MESSAGE
        elif [ "$CPU_TEMP" -gt "$WARNING" ] && [ "$WARNING" != "0" ]; then
        STATE="$STATE_WARNING"
        STATE_MESSAGE="WARNING"
 to_debug $STATE , Message is $STATE_MESSAGE
        else
        STATE="$STATE_OK"
 STATE_MESSAGE="OK"
 to_debug $STATE , Message is $STATE_MESSAGE
        fi
 
 echo "The TEMPERATURE "$STATE_MESSAGE" "-" The CPU's Temperature is "$CPU_TEMP" degree"
 
 exit $STATE

你可能感兴趣的:(linux,nrpe)