nagios监控内存

nagios 监控内存

nagios 版本:nagios-3.2.0
监控端:
1、定义插件
cd /usr/local/nagios/libexec
vim check_mem
把附件内容添加到文件中,这样监控内存的程序就做好了。
赋予权限:
chmod +x /usr/local/nagios/libexec/check_mem
检查命令:
./check_mem -h
[root@wqk1 libexec]# ./check_mem -h
Wrong Syntax: check_mem -h
Usage: check_mem [-w|--warning]<percent free> [-c|--critical]<percent free>
由帮助可以看出是以剩余内存为判断标准
./check_mem -w 90 -c 50
WARNING - 1658 MB (82%) Free Memory
[root@wqk1 libexec]# ./check_mem -w 95 -c 90
CRITICAL - 1658 MB (82%) Free Memory
2、定义命令
vim /usr/local/nagios/etc/objects/commands.cfg
   定义nrpe命令
define command{
       command_name    check_nrpe
       command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ -t 30
}
   定义内存命令
define command{
command_name check_mem
command_line $USER1$/check_mem -w $ARG1$ -c $ARG2$
}
3、定义被监控主机
vim /usr/local/nagios/etc/objects/localhost.cfg
define host {
       host_name       wqk_centos-107
       alias           centos-107
       address         192.168.10.107
       check_command   check-host-alive
       notification_options    d,u,r
       check_interval  1
       max_check_attempts      2
       contact_groups  admins
       notification_interval   10
       notification_period     24x7
}
4、定义服务
vim /usr/local/nagios/etc/objects/localhost.cfg
define service {
       host_name       wqk_centos-107
       service_description     check_mem
       check_period    24x7
       normal_check_interval   2
       retry_check_interval    1
       max_check_attempts      5
       notification_period     24x7
       notification_options    w,u,c,r
       check_command     check_nrpe!check_mem //如果没有定义check_nrpe,可以写成check_mem!90!50;而且如果监控端和被监控端的command字段不修改,只修改这里的数值就能影响到nagios监控结果。
}
5、检查排错
cd /usr/local/nagios/libexec
./check_nrpe -H 192.168.10.107 -c check_mem(被监控机命令定义后检查)
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
6、重启服务
service nagios restart
==================================================
被监控端:
1、定义插件
cd /usr/local/nagios/libexec
vim check_mem
把附件内容添加到文件中,这样监控内存的程序就做好了。
赋予权限:
chmod +x /usr/local/nagios/libexec/check_mem
检查命令:
./check_mem -h
./check_mem -w 90 -c 50
2、定义命令
编辑nrpe配置文件:
vim /usr/local/nagios/etc/nrpe.cfg

添加:

allowed_host=127.0.0.1,192.168.0.1(本机ip),192.168.0.100(监控机ip)

command[check_mem]=/usr/local/nagios/libexec/check_mem -w 90 -c 50 //设置90是为了看到效果,实际生产中要调低
3、重启服务
ps aux | grep nrpe
kill掉nrpe 进程
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d  //启动nrpe服务
4、远程测试命令
在监控端测试
cd /usr/local/nagios/libexec
./check_nrpe -H 192.168.10.107 -c check_mem
****************************************************************
check_mem 脚本:
   
   
   
   
  1. #script to check real memory usage

  2. # L.Gill 02/05/06 - V.1.0

  3. # ------------------------------------------

  4. # ########  Script Modifications  ##########

  5. # ------------------------------------------

  6. # Who         When           What

  7. # ---    ----      ----

  8. # LGill         17/05/06  "$percent" lt 1% fix - sed edits dc result beggining with "."

  9. #

  10. #

  11. #!/bin/bash

  12. USAGE="`basename $0` [-w|--warning]<percent free> [-c|--critical]<percent free>"

  13. THRESHOLD_USAGE="WARNING threshold must be greater than CRITICAL: `basename $0` $*"

  14. calc=/tmp/memcalc

  15. percent_free=/tmp/mempercent

  16. critical=""

  17. warning=""

  18. STATE_OK=0

  19. STATE_WARNING=1

  20. STATE_CRITICAL=2

  21. STATE_UNKNOWN=3

  22. # print usage

  23. if [[ $# -lt 4 ]]

  24. then

  25.        echo ""

  26.        echo "Wrong Syntax: `basename $0` $*"

  27.        echo ""

  28.        echo "Usage: $USAGE"

  29.        echo ""

  30.        exit 0

  31. fi

  32. # read input

  33. while [[ $# -gt 0 ]]

  34.  do

  35.        case "$1" in

  36.               -w|--warning)

  37.               shift

  38. warning=$1

  39.        ;;

  40.               -c|--critical)

  41.               shift

  42. critical=$1

  43.        ;;

  44.        esac

  45.        shift

  46.  done

  47. # verify input

  48. if [[ $warning -eq $critical || $warning -lt $critical ]]

  49. then

  50.        echo ""

  51.        echo "$THRESHOLD_USAGE"

  52.        echo ""

  53.        echo "Usage: $USAGE"

  54.        echo ""

  55.        exit 0

  56. fi

  57. # Total memory available

  58. total=`free -m | head -2 |tail -1 |gawk '{print $2}'`

  59. # Total memory used

  60. used=`free -m | head -2 |tail -1 |gawk '{print $3}'`

  61. # Calc total minus used

  62. free=`free -m | head -2 |tail -1 |gawk '{print $4+$7}'`

  63. # normal values

  64. #echo "$total"MB total

  65. #echo "$used"MB used

  66. #echo "$free"MB free

  67. # make it into % percent free = ((free mem / total mem) * 100)

  68. echo "5" > $calc # decimal accuracy

  69. echo "k" >> $calc # commit

  70. echo "100" >> $calc # multiply

  71. echo "$free" >> $calc # division integer

  72. echo "$total" >> $calc # division integer

  73. echo "/" >> $calc # division sign

  74. echo "*" >> $calc # multiplication sign

  75. echo "p" >> $calc # print

  76. percent=`/usr/bin/dc $calc|/bin/sed 's/^\./0./'|/usr/bin/tr "." " "|/usr/bin/gawk {'print $1'}`

  77. #percent1=`/usr/bin/dc $calc`

  78. #echo "$percent1"

  79. if [[ "$percent" -le  $critical ]]

  80.        then

  81.                echo "CRITICAL - $free MB ($percent%) Free Memory"

  82.                exit 2

  83. fi

  84. if [[ "$percent" -le  $warning ]]

  85.        then

  86.                echo "WARNING - $free MB ($percent%) Free Memory"

  87.                exit 1

  88. fi

  89. if [[ "$percent" -gt  $warning ]]

  90.        then

  91.                echo "OK - $free MB ($percent%) Free Memory"

  92.                exit 0

  93. fi

你可能感兴趣的:(内存,nagios)