上一节中完成了nagios的安装,这一节主要对nagios进行简单的配置。
一、NRPE的安装
nagios在监控Linux主机时会要用到NRPE这个软件,工作原理如下图所示:
tar xzf nrpe-2.12.tar.gz
cd nrpe-2.12
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
二、配置主机监控
一般主机监控配置都会存在一个hosts.cfg文件中,这里先在/usr/local/nagios/etc/目录下建立hosts.cgf文件之后写入如下内容:
define host{
host_name nagios-server
alias nagios server
address 192.168.191.128
check_command check-host-alive
max_check_attempts 5
check_period 24x7
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options d,u,r
}
define host{
host_name windows2003
alias windows2003
address 192.168.191.129
check_command check-host-alive
max_check_attempts 5
check_period 24x7
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options d,u,r
}
define host{
host_name linux
alias linux
address 192.168.191.130
check_command check-host-alive
max_check_attempts 5
check_period 24x7
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options d,u,r
}
define hostgroup{
hostgroup_name sa-servers
alias sa servers
members nagios-server,windows2003,linux
}
三、配置服务监控
和主机监控配置一样,服务配置也要存在一个services.cfg文件中,同样在/usr/local/nagios/etc/目录下建立services.cfg文件之后加入如下内容:
define service{
host_name nagios-server
service_description check-host-alive
check_command check-host-alive
max_check_attempts 5
normal_check_interval 5
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups admins
}
define service{
host_name nagios-server
service_description check_tcp 80
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
check_command check_tcp!80
}
define service{
host_name nagios-server
service_description cpu load
check_command check_nrpe!check_load
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
}
define service{
host_name nagios-server
service_description total-procs
check_command check_nrpe!check_total_procs
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
}
define service{
host_name nagios-server
service_description check_df
check_command check_nrpe!check_df
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
}
define service{
host_name nagios-server
service_description check_ips
check_command check_nrpe!check_ips
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups admins
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
}
这里只给出nagios-server主机中服务的监控配置,其他两个被监控主机配置基本相同这里就不再给出。
四、配置nagios主配置文件nagios.cfg
这样只给出需要改的项目:
cfg_file=/usr/local/nagios/etc/objects/commands.cfg
cfg_file=/usr/local/nagios/etc/objects/contacts.cfg
cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg
cfg_file=/usr/local/nagios//etc/objects/templates.cfg
cfg_file=/usr/local/nagios/etc/hosts.cfg
cfg_file=/usr/local/nagios/etc/services.cfg
#cfg_file=/usr/local/nagios//etc/objects/localhost.cfg
五、配置联系人文件contacts.cfg
define contact{
contact_name nagiosadmin
alias Nagios Admin
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r
host_notification_options d,u,r
service_notification_commands notify-by-email,service-notify-by-fx
host_notification_commands host-notify-by-email,host-notify-by-fx
email [email protected]
pager 13800138000
}
define contactgroup{
contactgroup_name admins
alias Nagios Administrators
members nagiosadmin
}
六、NRPE配置
vi /usr/local/nagios/etc/nrpe.cfg
server_address=192.168.191.128
allowed_hosts=127.0.0.1,192.168.191.128
command[check_users]=/usr/local/nrpe/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nrpe/libexec/check_load -w 15,10,5 -c 30,25,20
#command[check_hda1]=/usr/local/nrpe/libexec/check_disk -w 20 -c 10 -p /dev/hda1 //注释掉
command[check_df]=/usr/local/nrpe/libexec/check_disk -w 20 -c 10 //添加这一行,监控整个磁盘利用率
command[check_zombie_procs]=/usr/local/nrpe/libexec/check_procs -w 5 -c 10 -s z
command[check_total_procs]=/usr/local/nrpe/libexec/check_procs -w 150 -c 200
command[check_ips]=/usr/local/nrpe/libexec/ip_conn.sh 8000 10000 //监控ip连接数
启动nrpe服务
# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg –d
七、被监控Linux主机配置
1.安装相关软件
useradd nagios -s /sbin/nologin
tar xvf nagios-plugins- 1.4.14 .tar.gz
cd nagios-plugins- 1.4.14
./configure --prefix=/usr/local/nagios
make
make install
chown –R nagios:nagios /usr/local/nagios
chown –R nagios:nagios /usr/local/nagios/libexec
tar xzf nrpe-2.12.tar.gz
cd nrpe-2.12
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
2. 修改nrpe.cfg配置文件
allowed_hosts=127.0.0.1,192.168.191.128 //允许监控服务器访问,中间用逗号隔开;
# 修改NRPE的监控命令,添加相应的命令;
# The following examples use hardcoded command arguments...
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
#command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/hda1
command[check_df]=/usr/local/nagios/libexec/check_disk -w 20 -c 10
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
command[check_tcp]=/usr/local/nagios/libexec/check_tcp -p 80
3. 启动nrpe服务
/usr/local/nagios/bin/nrpe –c /usr/local/nagios/etc/nrpe.cfg -d
八、windows2003配置
在http://sourceforge.net/projects/nscplus/里下载nsclient++-0.3.8-win32.msi进行安装。
进安装目录修改NSC.ini文件,在 [modules]部分的所在模块前面的注释“;”去掉,只保留CheckWMI.dll和RemoteConfiguration.dll这两个不用去掉注释;在 [Settings] 部分找到”allowed_hosts”选项,将前面的 “;”注释去掉,并在后面加上监控服务器的IP;在 [NSClient] 部分找到“port=12489”这一行,并把前面的注释去掉,这是NSClinet的默认监听端口。
运行安装目录下的nsclient++.exet和nstray.exe文件。
在服务里找到
把属性中登录选项的允许服务与桌面交互前的对勾打上,如下图所示:
之后要在监控主机的commands.cfg中加入如下内容:
#'check_nt' command definition
define command{
command_name check_nt
command_line $USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -v $ARG1$ $ARG2$
}
九、启动服务查看结果
这时就可以在监控主机上启动nagios和httpd服务查看结果了,如下图所示: