【转载】http://blog.chinaunix.net/uid-26978448-id-3408364.html
#
cat check_snmp_storage.py
- #! / usr/ bin/ python
- #_* _ coding: utf- 8 _* _
- #_* _ coding: cp950 _* _
- '' '
- Create date: 2012-10-30
- Last update: 2012-10-30
- Version: 1.0
- Description: Monitor Disk usage
- Author: Victor
- QQ:1409175531
- '''
- import sys
- import netsnmp
- from decimal import *
- def help( ) :
- print '' 'Usage:
- sys.argv[0] <Community> <Host> <Device> <Warning_threshold> <Critical_threshold>'''
- if len( sys. argv) < 6:
- help( )
- sys. exit( 3)
- elif sys. argv[ 4] > sys. argv[ 5] :
- print 'Critical_threshold must be more than Warning_threshold'
- sys. exit( 3)
- try :
- session = netsnmp. Session( Version= 2, Community= sys. argv[ 1] , DestHost= sys. argv[ 2] )
- except IndexError :
- sys. exit( 3)
- oid01 = netsnmp. Varbind( 'hrStorageDescr' )
- oid02 = netsnmp. Varbind( 'hrStorageSize' )
- oid03 = netsnmp. Varbind( 'hrStorageUsed' )
- oid04 = netsnmp. Varbind( 'hrStorageAllocationUnits' )
- oidlist01 = netsnmp. VarList( oid01)
- oidlist02 = netsnmp. VarList( oid02)
- oidlist03 = netsnmp. VarList( oid03)
- oidlist04 = netsnmp. VarList( oid04)
- rl01 = session. walk( oidlist01)
- rl02 = session. walk( oidlist02)
- rl03 = session. walk( oidlist03)
- rl04 = session. walk( oidlist04)
- try :
- units = dict( zip( rl01, rl04) ) [ sys. argv[ 3] ]
- total = Decimal( dict( zip( rl01, rl02) ) [ sys. argv[ 3] ] ) * Decimal( units) / Decimal( 1024) / Decimal( 1024)
- used_1 = dict( zip( rl01, rl03) ) [ sys. argv[ 3] ]
- used_2 = Decimal( used_1) * Decimal( units) / Decimal( 1024) / Decimal( 1024)
- except KeyError :
- print 'Timeout or please check syntax/community/Host or other params.'
- sys. exit( 3)
- getcontext( ) . prec = 2
- used_percent = int( Decimal( used_2) / Decimal( total) * Decimal( 100) )
- warning = Decimal( total) * Decimal( sys. argv[ 4] )
- w_threshold = int( warning)
- w_percent = Decimal( sys. argv[ 4] ) * Decimal( 100)
- critical = Decimal( total) * Decimal( sys. argv[ 5] )
- c_threshold = int( critical)
- c_percent = Decimal( sys. argv[ 5] ) * Decimal( 100)
- if used_percent < w_percent:
- print 'OK - "%s" 使用了:%s%% | 总容量:%s MB , 已使用:%s MB (w:%s%%, c:%s%%)' % ( sys. argv[ 3] , used_percent, int( total) , int( used_2) , w_percent, c_percent)
- sys. exit( 0)
- elif c_percent > used_percent > = w_percent:
- print 'Warning - "%s" 使用了:%s%% | 总容量:%s MB , 已使用:%s MB (w:%s%%, c:%s%%)' % ( sys. argv[ 3] , used_percent, int( total) , int( used_2) , w_percent, c_percent)
- sys. exit( 1)
- elif used_percent > c_percent:
- print 'Critical - "%s" 使用了:%s%% | 总容量:%s MB , 已使用:%s MB (w:%s%%, c:%s%%)' % ( sys. argv[ 3] , used_percent, int( total) , int( used_2) , w_percent, c_percent)
- sys. exit( 2)
- elif not used_2:
- print 'Unknown'
- sys. exit( 3)
- else :
- print 'Unknown'
- sys. exit( 3)
将脚本check_snmp_storage.py放到/usr/local/nagios/libexec目录下,给执行权限。直接执行脚本可以看到脚本的用法:
#
python check_snmp_storage.py
Usage:
sys.argv[0] <Community> <Host> <Device> <Warning_threshold> <Critical_threshold>
<Device> 设备名,如“Physical memory”、“Swap space”、 “/usr/local”等,如果不确定设备名,可先使用snmpdf查看,如:
<Device> 需要与上图Description列的名字吻合,大小写必须一致,否则脚本通过snmp去取数据时会找不到设备。
<Warning_threshold> 和<Critical_threshold>是报警阀值,用小数点表示,如0.8表示80%,0.9表示90%。
举例:
commands.cfg配置如下:
define command{
command_name check_snmp_storage
command_line $USER1$/check_snmp_storage.py $ARG1$ $HOSTADDRESS$ $ARG2$ $ARG3$ $ARG4$ $ARG5$
}
services.cfg配置如下:
define service{
use service02
host_name test
service_description Mem
notifications_enabled 0
check_command check_snmp_storage_v4!public!'Physical memory'!0.85!0.9
}
以上完成后重启nagios服务,效果图如下: