Nagios监控linux服务器
一、客户端
安装所需的软件(nagios-plugins、nrpe)
#wget http://prdownloads.sourceforge.net/sourceforge/nagiosplug/nagios-plugins-1.4.16.tar.gz
#wget http://prdownloads.sourceforge.net/sourceforge/nagios/nrpe-2.13.tar.gz
1、安装nagios-plugins
#增加一个用户
useradd nagios -s /sbin/nologin -M
解压并安装
#tar zvxf nagios-plugins-1.4.16.tar.gz && cd nagios-plugins-1.4.16
#./configure --with-nagios-user=nagios --with-nagios-group=nagios && make && make install
#chown -R nagios:nagios /usr/local/nagios
2、安装nrpe
#tar -zxvf nrpe-2.13.tar.gz && cd nrpe-2.13
#./configure --enable-ssl --with-ssl-lib (前提是已经安装了openssl与openssl-devel)
#make all && make install-plugin && make install-daemon && make install-daemon-config
3、配置nrpe
配置nrpe信息
#vim /usr/local/nagios/etc/nrpe.cfg,查找并修改如下一行
allowed_hosts=172.16.17.223,127.0.0.1 #注意修改为nagios服务器的IP:172.16.17.223
#配置监控对象
说明:由监控原理可知被监控端做监控,然后将数据传给监控服务器绘总,设置监控详细参数主要是设置被监控端的nrpe.cfg文件,可以看到里面监控对象
vim /usr/local/nagios/etc/nrpe.cfg,查找并修改如下一行
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_disk]=/usr/local/nagios/libexec/check_disk -w 10% -c 5%
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 500 -c 550
command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
command[check_ping]=/usr/local/nagios/libexec/check_ping -H 192.168.1.1 -w 100.0,20% -c 500.0,60%
后两行是自添加的
4、启动nrpe,并测试
#/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
#echo “/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d &> /dev/null” >> /etc/rc.local
#netstat -atulnp | grep nrpe
tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN 5201/nrpe
#/usr/local/nagios/libexec/check_nrpe -H localhost
NRPE v2.13
二、服务器端
1、安装所需的软件(nagios、nagios-plugins、nrpe)下载后源码包安装,不赘述
wget http://prdownloads.sourceforge.net/sourceforge/nagios/nagios-3.4.1.tar.gz
wget http://prdownloads.sourceforge.net/sourceforge/nagiosplug/nagios-plugins-1.4.16.tar.gz
wget http://prdownloads.sourceforge.net/sourceforge/nagios/nrpe-2.13.tar.gz
安装完成后配置一下报警的邮箱
# grep email /usr/local/nagios/etc/objects/contacts.cfg
email [email protected] ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
2、配置
#在commands.cfg中定义nrpe这个外部构件
#vim /usr/local/nagios/etc/nagios.cfg,打开下面这一行
cfg_file=/usr/local/nagios/etc/objects/commands.cfg
#vim /usr/local/nagios/etc/objects/commands.cfg,增加如下一行
check nrpe
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
3、#配置要监控的linux主机
#cd /usr/local/nagios/etc/objects
#mv localhost.cfg rhel5.cfg
更改如下
注意下面定义的check名字要与linux客户端/usr/local/nagios/etc/nrpe.cfg文件中的名字一致
#cat rhel5.cfg
# Define a host for the local machine
define host{
use linux-server
host_name rhel5
alias rhel5
address 172.16.17.220
}
# SERVICE DEFINITIONS
# Define a service to "ping" the local machine
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description PING
check_command check_nrpe!check_ping
}
# Define a service to check the disk space of the root partition
# on the local machine. Warning if < 20% free, critical if
# < 10% free space on partition.
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description Root Partition
check_command check_nrpe!check_disk
}
# Define a service to check the number of currently logged in
# users on the local machine. Warning if > 20 users, critical
# if > 50 users.
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description Current Users
check_command check_nrpe!check_users
}
# Define a service to check the number of currently running procs
# on the local machine. Warning if > 250 processes, critical if
# > 400 users.
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description Total Processes
check_command check_nrpe!check_total_procs
}
# Define a service to check the load on the local machine.
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description Current Load
check_command check_nrpe!check_load
}
# Define a service to check the swap usage the local machine.
# Critical if less than 10% of swap is free, warning if less than 20% is free
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description Swap Usage
check_command check_nrpe!check_swap
}
4、启动nrpc,并测试
#/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
#echo “/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d &> /dev/null” >> /etc/rc.local
#netstat -tulpn | grep nrpe
tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN 14371/nrpe
#/usr/local/nagios/libexec/check_nrpe -H 172.16.17.220 #linux客户端的IP地址:172.16.17.220
NRPE v2.13
#/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
测试都没有错误才是语法正确,哪里有错误会提示哪里改正之
Total Warnings: 0
Total Errors: 0
Things look okay - No serious problems were detected during the pre-flight check
5、重启服务
# service nagios restart
Running configuration check...done.
Stopping nagios: done.
Starting nagios: done.
网页访问
http://ip/nagios
邮件如下
一、客户端
安装所需的软件(nagios-plugins、nrpe)
#wget http://prdownloads.sourceforge.net/sourceforge/nagiosplug/nagios-plugins-1.4.16.tar.gz
#wget http://prdownloads.sourceforge.net/sourceforge/nagios/nrpe-2.13.tar.gz
1、安装nagios-plugins
#增加一个用户
useradd nagios -s /sbin/nologin -M
解压并安装
#tar zvxf nagios-plugins-1.4.16.tar.gz && cd nagios-plugins-1.4.16
#./configure --with-nagios-user=nagios --with-nagios-group=nagios && make && make install
#chown -R nagios:nagios /usr/local/nagios
2、安装nrpe
#tar -zxvf nrpe-2.13.tar.gz && cd nrpe-2.13
#./configure --enable-ssl --with-ssl-lib (前提是已经安装了openssl与openssl-devel)
#make all && make install-plugin && make install-daemon && make install-daemon-config
3、配置nrpe
配置nrpe信息
#vim /usr/local/nagios/etc/nrpe.cfg,查找并修改如下一行
allowed_hosts=172.16.17.223,127.0.0.1 #注意修改为nagios服务器的IP:172.16.17.223
#配置监控对象
说明:由监控原理可知被监控端做监控,然后将数据传给监控服务器绘总,设置监控详细参数主要是设置被监控端的nrpe.cfg文件,可以看到里面监控对象
vim /usr/local/nagios/etc/nrpe.cfg,查找并修改如下一行
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_disk]=/usr/local/nagios/libexec/check_disk -w 10% -c 5%
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 500 -c 550
command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
command[check_ping]=/usr/local/nagios/libexec/check_ping -H 192.168.1.1 -w 100.0,20% -c 500.0,60%
后两行是自添加的
4、启动nrpe,并测试
#/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
#echo “/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d &> /dev/null” >> /etc/rc.local
#netstat -atulnp | grep nrpe
tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN 5201/nrpe
#/usr/local/nagios/libexec/check_nrpe -H localhost
NRPE v2.13
二、服务器端
1、安装所需的软件(nagios、nagios-plugins、nrpe)下载后源码包安装,不赘述
wget http://prdownloads.sourceforge.net/sourceforge/nagios/nagios-3.4.1.tar.gz
wget http://prdownloads.sourceforge.net/sourceforge/nagiosplug/nagios-plugins-1.4.16.tar.gz
wget http://prdownloads.sourceforge.net/sourceforge/nagios/nrpe-2.13.tar.gz
安装完成后配置一下报警的邮箱
# grep email /usr/local/nagios/etc/objects/contacts.cfg
email [email protected] ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
2、配置
#在commands.cfg中定义nrpe这个外部构件
#vim /usr/local/nagios/etc/nagios.cfg,打开下面这一行
cfg_file=/usr/local/nagios/etc/objects/commands.cfg
#vim /usr/local/nagios/etc/objects/commands.cfg,增加如下一行
check nrpe
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
3、#配置要监控的linux主机
#cd /usr/local/nagios/etc/objects
#mv localhost.cfg rhel5.cfg
更改如下
注意下面定义的check名字要与linux客户端/usr/local/nagios/etc/nrpe.cfg文件中的名字一致
#cat rhel5.cfg
# Define a host for the local machine
define host{
use linux-server
host_name rhel5
alias rhel5
address 172.16.17.220
}
# SERVICE DEFINITIONS
# Define a service to "ping" the local machine
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description PING
check_command check_nrpe!check_ping
}
# Define a service to check the disk space of the root partition
# on the local machine. Warning if < 20% free, critical if
# < 10% free space on partition.
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description Root Partition
check_command check_nrpe!check_disk
}
# Define a service to check the number of currently logged in
# users on the local machine. Warning if > 20 users, critical
# if > 50 users.
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description Current Users
check_command check_nrpe!check_users
}
# Define a service to check the number of currently running procs
# on the local machine. Warning if > 250 processes, critical if
# > 400 users.
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description Total Processes
check_command check_nrpe!check_total_procs
}
# Define a service to check the load on the local machine.
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description Current Load
check_command check_nrpe!check_load
}
# Define a service to check the swap usage the local machine.
# Critical if less than 10% of swap is free, warning if less than 20% is free
define service{
use generic-service ; Name of service template to use
host_name rhel5
service_description Swap Usage
check_command check_nrpe!check_swap
}
4、启动nrpc,并测试
#/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
#echo “/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d &> /dev/null” >> /etc/rc.local
#netstat -tulpn | grep nrpe
tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN 14371/nrpe
#/usr/local/nagios/libexec/check_nrpe -H 172.16.17.220 #linux客户端的IP地址:172.16.17.220
NRPE v2.13
#/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
测试都没有错误才是语法正确,哪里有错误会提示哪里改正之
Total Warnings: 0
Total Errors: 0
Things look okay - No serious problems were detected during the pre-flight check
5、重启服务
# service nagios restart
Running configuration check...done.
Stopping nagios: done.
Starting nagios: done.
网页访问
http://ip/nagios
邮件如下