1.Nagios监控linux服务器平台及所用组件,
    监控 服务器 :CentOS 5.3x64(192.168.1.202)+nagios-3.2.1+ nagios-plugins-1.4.15+ nrpe_2.8.1
    被监控端 : CentOS 5.3x64(192.168.1.203) + nagios-plugins-1.4.15+ nrpe_2.8.1

注意:两端nrpe的版本要一致,不然无法相连接

 

NRPE总共由两部分组成:

    check_nrpe插件,运行在监控主机上。

    NRPE daemon,运行在远程的linux主机上(通常就是被监控机)


2.配置监控端
安装nagios
useradd nagios
passwd nagios
cd nagios-3.2.1
./configure --prefix=/usr/local/nagios --with-command-group=nagcmd --with-gd-lib=/usr/lib --with-gd-inc=/usr/include
make all
make install
make install-init
make install-config
make install-commandmode
make install-webconf        #可以自动配置httpd.conf
安装nagios-plugins
cd nagios-plugins-1.4.15
./configure --with-nagios-user=nagios --with-nagios-group=nagios --enable-redhat-pthread-workaround
make
make install
安装NRPE

#yum install openssl-devel (安装 openssl包)

cd nagios-nrpe_2.8.1
./configure   #默认自动添加了openssl
#因为传送过程要加密,如果后面make报错,加如下参数
rpm -qa| grep ssl
openssl-devel-0.9.7a-43.17.el4_6.1
rpm -ql openssl-devel-0.9.7a-43.17.el4_6.1 | more
./configure --enable-ssl --with-ssl-lib=/lib/(当然前提要有openssl)
make all
make install-plugin
commands.cfg定义外部构件nrpe
vi /usr/local/nagios/etc/objects/commands.cfg
#添加
#check nrpe
define command{
        command_name check_nrpe
        command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
        }
配置要监控的linux主机
vi /usr/local/nagios/etc/nagios.cfg
#中间添加
cfg_file=/usr/local/nagios/etc/objects/mylinux.cfg
新建mylinux.cfg 设置监控内容
vi /usr/local/nagios/etc/objects/mylinux.cfg
define host{
           use             linux-server
          host_name     mylinux
          alias            mylinux
          address        192.168.1.203(客户端IP既被监控的IP)
        }
define service{
        use                      generic-service
        host_name             mylinux
        service_description     check-swap
        check_command        check_nrpe!check_swap
               }
define service{
        use                      generic-service
        host_name             mylinux
        service_description     check-load
       check_command         check_nrpe!check_load
               }
define service{
        use                      generic-service
        host_name             mylinux
       service_description     check-disk
       check_command        check_nrpe!check_had1
              }
define service{
        use                      generic-service
        host_name             mylinux
       service_description     check-users
       check_command        check_nrpe!check_users
               }
define service{
        use                    generic-service
        host_name           mylinux
        service_description  otal_procs
        check_command     check_nrpe!check_total_procs
}
#这5个是默认的监控服务,如需自定义服务可在这里添加
自定义服务可参考:http://saplingidea.javaeye.com/blog/514013
如有必要则需更改check阈值,在被监控端的/usr/local/nagios/etc/nrpe.cfg,下面会提到

其它设置
chkconfig --add nagios    #配置机器启动时自动启动Nagios
chkconfig nagios on
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg #检查Nagios配置文件
vi /etc/selinux/config      #关闭SELinux
SELINUX=disabled
service iptables stop  #关闭SELinux,或打开80,5666端口
service nagios start


2.配置被监控端

1.安装nagios-plugin

useradd nagios
passwd nagios
tar -zxvf nagios-plugins-1.4.15.tar.gz
cd nagios-plugins-1.4.15
./configure --with-nagios-user=nagios --with-nagios-group=nagios --enable-redhat-pthread-workaround
Make
make install
改变主目录权限
chown –R  nagios.nagios /usr/local/nagios
[root@client nagios]# ll
drwxr-xr-x  2 nagios nagios 4096 Jun  1 00:07 libexec
drwxr-xr-x  3 nagios nagios 4096 Jun  1 00:07 share
安装客户端的nrpe

yum install openssl-devel (安装 openssl包)

tar -zxvf nagios-nrpe_2.8.1.orig.tar.gz
cd nagios-nrpe_2.8.1
./configure  (会自动加载SSL)
#如果后面make报错,加如下参数
./configure --enable-ssl --with-ssl-lib=/usr/lib/(当然前提要有openssl)
make all
make install-plugin
make install-daemon
make install-daemon-config
配置nrpe信息
vi /usr/local/nagios/etc/nrpe.cfg
allowed_hosts=127.0.0.1,192.168.1.202
启动nrpe
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg –d
#上面这一步在本机上可以运行,如通过ssh连接,则需用: /usr/local/nagios/bin/nrpe -d
vi /etc/rc.d/rc.local
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg –d
验证nrpe
netstat -an | grep 5666
tcp      0    0 0.0.0.0:5666             0.0.0.0:*         LISTEN
/usr/local/nagios/libexec/check_nrpe -H 127.0.0.1
NRPE v2.8.1
#服务端测试
/usr/local/nagios/libexec/check_nrpe -H l92.168.1.203
NRPE v2.8.1
#常见错误
/usr/local/nagios/libexec/check_nrpe -H 127.0.0.1
CHECK_NRPE: Error - Could not complete SSL handshake.
配置allowed_hosts=127.0.0.1,192.168.1.202然后kill进程再重启就OK
2./usr/local/nagios/libexec/check_nrpe -H 127.0.0.1
Connection refused by host
Nrpe进程没有启动

配置监控对像( 关键)
    由监控原理可知被监控端做监控然后传给监控服务器绘总,设置监控详细参数主要是设置被监控端nrpe.cfg文件
    可以看到里面监控对象
vi /usr/local/nagios/etc/nrpe.cfg
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20 -c 10 -p /dev/hda1
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
#这里就是上面提到的check阈值,如需更改,加减后面的数值即可