系统:Red Hat Enterprise Linux Server release 5 (Tikanga)
监控机172.20.51.3 210.51.173.153
被监控机172.20.51.154
一、在主控机上做如下的操作:
date -s "17:22:22 20090101"
安装前准备:
1).安装GD库
#!/bin/bash
yum install gcc
yum install cpp
yum install gcc-c++
yum install ncurses
yum install ncurses-devel
yum install gd
yum install gd-devel php-gd
yum install zlib
yum install zlib-devel
yum install freetype-devel freetype-demos freetype-utils
yum install libpng-devel libpng10 libpng10-devel
yum install libjpeg-devel
yum install ImageMagick
yum install flex
yum install ImageMagick-devel
yum install glibc glibc-common
2).安装apache
tar xvfz httpd-2.2.9.tar.gz
ls
cd httpd-2.2.9
ls
./configure --prefix=/usr/local/apache2 --enable-so --enable-mods-shared=all
make
make install
3).下载Nagios3.0.5,Nagios-plugins1.4.11,nrpe2.12,nsclient++
wget http://osdn.dl.sourceforge.net/sourceforge/nagios/nagios-3.0.5.tar.gz
wget http://osdn.dl.sourceforge.net/sourceforge/nagiosplug/nagios-plugins-1.4.11.tar.gz
注:Nagios3.0.5,Nagios-plugins1.4.11,nrpe2.12安装在监控服务器上。
在被监控服务器(Linux/unix)上安装Nagios-plugins和nrpe
nsclient++安装在Windows被监控端。
4).创建帐号及组
useradd -m nagios
passwd nagios
groupadd nagcmd
usermod -a -G nagcmd nagios
usermod -a -G nagcmd daemon
#daemon为运行apache的帐号。
5).安装nagios
tar xvfz nagios-3.0.5.tar.gz
ls
cd nagios-3.0.5
ls
./configure --with-command=nagcmd
make all
make install # 使用make install来安装主程序,CGI和HTML文件
make install-init # 使用make install-init在/etc/rc.d/init.d安装启动脚本
make install-config # 使用make install-cofig来安装示例配置文件,安装的路径是/usr/local/nagios/etc
make install-commandmode # 使用make install-commandmode来配置目录权限
/usr/local/apache2/bin/htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin #生成一个用户密码
6).配置apache
vim /usr/local/apache2/conf/httpd.conf
加入下面的内容
<IfModule alias_module>
#add by sunyankui for nagios
ScriptAlias /nagios/cgi-bin "/usr/local/nagios/sbin"
<Directory "/usr/local/nagios/sbin">
# SSLRequireSSL
Options ExecCGI
AllowOverride None
Order allow,deny
Allow from all
# Order deny,allow
# Deny from all
# Allow from 127.0.0.1
AuthName "Nagios Access"
AuthType Basic
AuthUserFile /usr/local/nagios/etc/htpasswd.users
Require valid-user
</Directory>
Alias /nagios "/usr/local/nagios/share"
<Directory "/usr/local/nagios/share">
# SSLRequireSSL
Options None
AllowOverride None
Order allow,deny
Allow from all
# Order deny,allow
# Deny from all
# Allow from 127.0.0.1
AuthName "Nagios Access"
AuthType Basic
AuthUserFile /usr/local/nagios/etc/htpasswd.users
Require valid-user
</Directory>
7).重启apache
/usr/local/apache2/bin/apachectl -k restart
8).安装插件
tar xvfz nagios-plugins-1.4.11.tar.gz
cd nagios-plugins-1.4.11
./configure --prefix=/usr/local/nagios --with-nagios-user=nagios --with-nagios-gourp=nagios --enable-perl-modules
make
make install
9).加入启动
chkconfig --add nagios
service nagios start
10).做监控的相关配置
[root@monitor ~]# su - nagios
[nagios@monitor ~]$ cd /usr/local/nagios/etc/
在这个目录自己创建servers目录Nagios可以加载一个目录下面的所有配置文件(需要在nagios.cfg中配置)
[nagios@monitor etc]$ mkdir servers
[nagios@monitor etc]$ cd servers/
新建两个文件
hostgroup.cfg #自己创建的主机群组配置文件
wiki-l-11.cfg #自己创建的监控远程Linux主机的配置文件
[nagios@monitor servers]$ vim hostgroup.cfg
#定义主机组(localhost.cfg中有类似的主机组设置,我已将其注释掉,否则可能会有冲突)
define hostgroup{
hostgroup_name linux-servers ; The name of the hostgroup
alias Linux Servers ; Long name of the group
members localhost,wiki ; Comma separated list of hosts that belong to this group
}
#define hostgroup{
# hostgroup_name windows-servers ; The name of the hostgroup
# alias Windows Servers ; Long name of the group
# members print ; Comma separated list of hosts that belong to this group
# }
[nagios@monitor servers]$ vim wiki-l-11.cfg
#定义主机
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name wiki
alias Docs
address 172.20.51.154 ;被监控机的ip
}
#定义Ping远程Linux主机
define service{
use generic-service ; Name of service template to use
host_name wiki
service_description PING
}
#检查远程Linux主机根分区使用情况,check_nrpe命令必须在/usr/local/nagios/etc/objects/commands.cfg中定义(默认未定义)
define service{
use generic-service ; Name of service template to use
host_name wiki
service_description Root Partition
check_command check_nrpe!check_disk_root
}
#检查远程Linux主机的登录人数
define service{
use generic-service ; Name of service template to use
host_name wiki
service_description Current Users
check_command check_nrpe!check_users
}
#检查远程Linux的主机的负载
define service{
use generic-service ; Name of service template to use
host_name wiki
service_description Current Load
check_command check_nrpe!check_load
}
#检查远程Linux主机swap分区使用情况
define service{
use generic-service ; Name of service template to use
host_name wiki
service_description Swap Usage
check_command check_nrpe!check_swap
}
#检查远程Linux主机的SSH服务
define service{
use generic-service ; Name of service template to use
host_name wiki
service_description SSH
check_command check_ssh
notifications_enabled 0
}
#检查远程Linux主机的HTTP服务
define service{
use generic-service ; Name of service template to use
host_name wiki
service_description HTTP
check_command check_http
notifications_enabled 0
}
11).在nagios.cfg配置文件中开启对/usr/local/nagios/etc/servers/中配置文件的引用
[nagios@monitor servers]$ vim /usr/local/nagios/etc/nagios.cfg
#edit by sunyankui for server
cfg_dir=/usr/local/nagios/etc/servers
#cfg_dir=/usr/local/nagios/etc/printers
#cfg_dir=/usr/local/nagios/etc/switches
#cfg_dir=/usr/local/nagios/etc/routers
12).检查Nagios配置文件
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
cd /usr/local/nagios/
13).SELinux及防火墙设置
1)).关闭SELinux
2)).如果开启防火墙,应该允许访问apache(一般为80端口)并允许nagios去抓取被监控机信息(一般nrpe为5666端口)。
14).安装nrpe插件,用来监控Linux机器
tar xvfz nrpe-2.12.tar.gz
ls
cd nrpe-2.12
ls
./configure
make all
make install-plugin
15).在/usr/local/nagios/etc/objects/commands.cfg中定义check_nrpe命令
vim /usr/local/nagios/etc/objects/commands.cfg
#add by sunyankui for nrpe
# 'check_nrpe ' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
二、在被监控服务器(Linux/unix)上安装Nagios-plugins和nrpe
1).安装相关库
yum -y install ntp vim-enhanced gcc gcc-c++gcc-g77 flex bison autoconf automake bzip2-devel ncurses-devel openssl-devel libtool*zlib-devel libxml2-devel libjpeg-devel libpng-devel libtiff-devel fontconfig-devel freetype-devel libXpm-develgettext-devel curl curl-devel pam-devel e2fsprogs-devel krb5-devel libidnlibidn-devel
2).创建帐号
useradd nagios
passwd nagios
3).安装插件
tar xvfz nagios-plugins-1.4.11.tar.gz
ls
cd nagios-plugins-1.4.11
ls
./configure
make
make install
chown nagios.nagios /usr/local/nagios/
chown -R nagios.nagios /usr/local/nagios/libexec/
cd -
ls
tar xvfz nrpe-2.12.tar.gz
ls
cd nrpe-2.12
ls
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
4).修改nrpe配置文件,允许Nagios监控服务器(172.20.51.3)监控
vim /usr/local/nagios/etc/nrpe.cfg
#多台机器用逗号隔开
allowed_hosts=127.0.0.1,172.20.51.3
5).以独立守护进程启动nrpe,也可以使用xinetd启动nrpe,具体清查看nrpe官方文档。
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
6).开机自动启动nrpe
vi /etc/rc.d/rc.local
#加入下面行
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
7).检查nrpe是否安装正常
/usr/local/nagios/libexec/check_nrpe -H localhost
NRPE v2.12
#返回nrpe版本说明安装没问题。
8).在/usr/local/nagios/etc/nrpe.cfg中定义我们用到的监控本地资源的命令
vim /usr/local/nagios/etc/nrpe.cfg
# The following examples use hardcoded command arguments...
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/hda1
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
#add by sunyankui for nrpe
#监控交换分区的使用情况,使用超过20%时为警告状态,超过10%时为严重状态
command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
#监控根分区磁盘使用情况
command[check_disk_root]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /
小问题说明:
1.出现这个:HTTP WARNING: HTTP/1.1 403 Forbidden
你的网页目录下没有index.html,你新建个index.html就可以解决。
2.
11.Nagios的配置文件
cgi.cfg #控制cgi访问的配置文件
nagios.cfg #Nagios主配置文件
resource.cfg #resource.cfg定义了一些变量,以便被其他文件引用,如$USER1$
objects #objects是一个目录,用于定义Nagios对象
servers #servers是自己创建的一个目录,Nagios可以加载一个目录下面的所有配置文件(需要在nagios.cfg中配置)
./objects:
commands.cfg #命令定义配置文件,里面定义的命令可以被其他文件引用
contacts.cfg #联系人和联系人组配置文件
localhost.cfg #监控本地机器的配置文件
printer.cfg #监控打印机的一个事例配置文件(默认未启用)
switch.cfg #监控路由器的一个事例配置文件(默认未启用)
templates.cfg #模板配置文件,在此可以定义模板,在其他文件中引用
timeperiods.cfg #定义监控时间段的配置文件
windows.cfg #监控Windows的一个事例配置文件(默认未启用)
./servers:
hostgroup.cfg #自己创建的主机群组配置文件
wiki-l-11.cfg #自己创建的监控远程Linux主机的配置文件