http://ju.outofmemory.cn/entry/51075


机房没有温度报警装置,我用此方法实现对机房温度的掌控,如果只有一台报警,则可认为单机故障,如果几台同时报警,则可认为机房空调出现了问题。


具体实现方法如下:


环境:被监控机:CentOS 6.4


1、安装硬件传感器监控软件 sensors

#yum install lm_sensors*


2、运行sensors-detect进行传感器检测


#sensors-detect ##一路回车即可,此步我在虚拟机下报错,但在物理机上没有问题


3、运行sensors看是否能读取数据,如下像下面这样表示正常:


[root@rd02 ~]# sensors


coretemp-isa-0000


Adapter: ISA adapter


Core 0: +32.0°C (high = +76.0°C, crit = +100.0°C)


Core 1: +32.0°C (high = +76.0°C, crit = +100.0°C)


4、#vi /usr/local/nagios/libexec/check_cputemp ##粘贴如下#号之间的内容


##########################################################


#!/bin/sh


#########check_cputemp###########


#date : May 2011


#Licence GPLv2


#INSTALLATION


#the script need to install lm_sensors


#sensors’s output need like below format


#########################################


#coretemp-isa-0000#


#Adapter: ISA adapter#


#Core 0: +27°C (high = +85°C)#


#


#coretemp-isa-0001#


#Adapter: ISA adapter#


#Core 1: +25°C (high = +85°C) #


#########################################


#you can use NRPE to define service in nagios


#check_nrpe!check_cputemp.sh


# Plugin return statements


STATE_OK=0


STATE_WARNING=1


STATE_CRITICAL=2


STATE_UNKNOWN=3


print_help_msg(){


$Echo “Usage: $0 -h to get help.”


}


print_full_help_msg(){


$Echo “Usage:”


$Echo “$0 [ -v ] -m sensors -w cpuT -c cpuT”


$Echo “Sepicify the method to use the temperature data sensors.”


$Echo “And the corresponding Critical value must greater than Warning value.”


$Echo “Example:”


$Echo “${0} -m sensors -w 40 -c 50″


}


print_err_msg(){


$Echo “Error.”


print_full_help_msg


}


to_debug(){


if [ "$Debug" = "true" ]; then


$Echo “$*” >> /var/log/check_sys_temperature.log.$$ 2>&1


fi


}


unset LANG


Echo=”echo -e”


if [ $# -lt 1 ]; then


print_help_msg


exit 3


else


while getopts :vhm:w:c: OPTION


do


case $OPTION


in


v)


#$Echo “Verbose mode.”


Debug=true


;;


m)


method=$OPTARG


;;


w)


WARNING=$OPTARG


;;


c)


CRITICAL=$OPTARG ;;


h)


print_full_help_msg


exit 3


;;


?)


$Echo “Error: Illegal Option.”


print_help_msg


exit 3


;;


esac


done


if [ "$method" = "sensors" ]; then


use_sensors=”true”


to_debug use_sensors


else


$Echo “Error. Must to sepcify the method to use sensors.”


print_full_help_msg


exit 3


fi


to_debug All Values are \” Warning: “$WARNING” and Critical: “$CRITICAL” \”.

fi


#########lm_sensors##################


if [ "$use_sensors" = "true" ]; then


sensorsCheckOut=`which sensors 2>&1`


if [ $? -ne 0 ];then


echo $sensorsCheckOut


echo Maybe you need to check your sensors.


exit 3


fi


to_debug Use $sensorsCheckOut to check system temperature


TEMP1=`sensors | head -3 | tail -1 | gawk ‘{print $3}’ | grep -o [0-9][0-9]`


TEMP2=`sensors | head -4 | tail -1 | gawk ‘{print $3}’ | grep -o [0-9][0-9]`


SUM=$(( $TEMP1 + $TEMP2 ))


TEMP=$(($SUM/2))


if [ -z "$TEMP" ] ; then


$Echo “No Data been get here. Please confirm your ARGS and re-check it with Verbose mode, then to check the log.”


exit 3


fi


to_debug temperature data is $TEMP


else


$Echo “Error. Must to sepcify the method to use sensors”


print_full_help_msg


exit 3


fi


######### Comparaison with the warnings and criticals thresholds given by user############


CPU_TEMP=$TEMP


#if [ "$WARNING" != "0" ] || [ "$CRITICAL" != "0" ]; then


if [ "$CPU_TEMP" -gt "$CRITICAL" ] && [ "$CRITICAL" != "0" ]; then


STATE=”$STATE_CRITICAL”


STATE_MESSAGE=”CRITICAL”


to_debug $STATE , Message is $STATE_MESSAGE


elif [ "$CPU_TEMP" -gt "$WARNING" ] && [ "$WARNING" != "0" ]; then


STATE=”$STATE_WARNING”


STATE_MESSAGE=”WARNING”


to_debug $STATE , Message is $STATE_MESSAGE


else


STATE=”$STATE_OK”


STATE_MESSAGE=”OK”


to_debug $STATE , Message is $STATE_MESSAGE


fi


echo “The TEMPERATURE “$STATE_MESSAGE” “-” The CPU’s Temperature is “$CPU_TEMP” ℃ !”


exit $STATE


#######################################################