Nagios监控linux服务器
一、客户端
安装所需的软件(nagios-plugins、nrpe)
#wget http://prdownloads.sourceforge.net/sourceforge/nagiosplug/nagios-plugins-1.4.16.tar.gz
#wget http://prdownloads.sourceforge.net/sourceforge/nagios/nrpe-2.13.tar.gz

1、安装nagios-plugins
#增加一个用户
useradd nagios -s /sbin/nologin -M

解压并安装
#tar zvxf nagios-plugins-1.4.16.tar.gz && cd nagios-plugins-1.4.16
#./configure --with-nagios-user=nagios --with-nagios-group=nagios && make && make install
#chown -R nagios:nagios /usr/local/nagios

2、安装nrpe
#tar -zxvf nrpe-2.13.tar.gz && cd nrpe-2.13
#./configure --enable-ssl --with-ssl-lib (前提是已经安装了openssl与openssl-devel)
#make all && make install-plugin && make install-daemon && make install-daemon-config
3、配置nrpe

配置nrpe信息
#vim /usr/local/nagios/etc/nrpe.cfg,查找并修改如下一行
allowed_hosts=172.16.17.223,127.0.0.1   #注意修改为nagios服务器的IP:172.16.17.223

#配置监控对象
说明:由监控原理可知被监控端做监控,然后将数据传给监控服务器绘总,设置监控详细参数主要是设置被监控端的nrpe.cfg文件,可以看到里面监控对象
vim /usr/local/nagios/etc/nrpe.cfg,查找并修改如下一行
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_disk]=/usr/local/nagios/libexec/check_disk -w 10% -c 5%
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 500 -c 550
command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
command[check_ping]=/usr/local/nagios/libexec/check_ping -H 192.168.1.1 -w 100.0,20% -c 500.0,60%
后两行是自添加的

4、启动nrpe,并测试
#/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
#echo “/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d &> /dev/null” >> /etc/rc.local
#netstat -atulnp | grep nrpe
tcp        0      0 0.0.0.0:5666                0.0.0.0:*                   LISTEN      5201/nrpe           
#/usr/local/nagios/libexec/check_nrpe -H localhost
NRPE v2.13

二、服务器端
1、安装所需的软件(nagios、nagios-plugins、nrpe)下载后源码包安装,不赘述
wget http://prdownloads.sourceforge.net/sourceforge/nagios/nagios-3.4.1.tar.gz
wget http://prdownloads.sourceforge.net/sourceforge/nagiosplug/nagios-plugins-1.4.16.tar.gz
wget http://prdownloads.sourceforge.net/sourceforge/nagios/nrpe-2.13.tar.gz
安装完成后配置一下报警的邮箱
# grep email /usr/local/nagios/etc/objects/contacts.cfg
        email                           [email protected]        ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
2、配置
#在commands.cfg中定义nrpe这个外部构件
#vim /usr/local/nagios/etc/nagios.cfg,打开下面这一行
cfg_file=/usr/local/nagios/etc/objects/commands.cfg
#vim /usr/local/nagios/etc/objects/commands.cfg,增加如下一行
check nrpe
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
3、#配置要监控的linux主机
#cd /usr/local/nagios/etc/objects
#mv localhost.cfg rhel5.cfg
更改如下
注意下面定义的check名字要与linux客户端/usr/local/nagios/etc/nrpe.cfg文件中的名字一致
#cat  rhel5.cfg
# Define a host for the local machine
define host{
        use                     linux-server
        host_name               rhel5
        alias                   rhel5
        address                 172.16.17.220
        }

# SERVICE DEFINITIONS

# Define a service to "ping" the local machine

define service{
        use                             generic-service         ; Name of service template to use
        host_name                       rhel5
        service_description             PING
        check_command                   check_nrpe!check_ping
        }


# Define a service to check the disk space of the root partition
# on the local machine.  Warning if < 20% free, critical if
# < 10% free space on partition.

define service{
        use                             generic-service         ; Name of service template to use
        host_name                       rhel5
        service_description             Root Partition
        check_command                   check_nrpe!check_disk
        }



# Define a service to check the number of currently logged in
# users on the local machine.  Warning if > 20 users, critical
# if > 50 users.

define service{
        use                             generic-service         ; Name of service template to use
        host_name                       rhel5
        service_description             Current Users
        check_command                   check_nrpe!check_users
        }


# Define a service to check the number of currently running procs
# on the local machine.  Warning if > 250 processes, critical if
# > 400 users.

define service{
        use                             generic-service         ; Name of service template to use
        host_name                       rhel5
        service_description             Total Processes
        check_command                   check_nrpe!check_total_procs
        }



# Define a service to check the load on the local machine.

define service{
        use                             generic-service         ; Name of service template to use
        host_name                       rhel5
        service_description             Current Load
        check_command                   check_nrpe!check_load
        }



# Define a service to check the swap usage the local machine.
# Critical if less than 10% of swap is free, warning if less than 20% is free

define service{
        use                             generic-service         ; Name of service template to use
        host_name                       rhel5
        service_description             Swap Usage
        check_command                   check_nrpe!check_swap
        }

4、启动nrpc,并测试
#/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
#echo “/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d &> /dev/null” >> /etc/rc.local
#netstat -tulpn | grep nrpe
tcp        0      0 0.0.0.0:5666                0.0.0.0:*                   LISTEN      14371/nrpe          

#/usr/local/nagios/libexec/check_nrpe -H 172.16.17.220  #linux客户端的IP地址:172.16.17.220
NRPE v2.13

#/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
测试都没有错误才是语法正确,哪里有错误会提示哪里改正之
Total Warnings: 0
Total Errors:   0

Things look okay - No serious problems were detected during the pre-flight check

5、重启服务
# service nagios restart
Running configuration check...done.
Stopping nagios: done.
Starting nagios: done.
网页访问
http://ip/nagios



邮件如下