nagios安装
[root@client ~]# rpm -ivh http://www.aminglinux.com/bbs/data/p_w_upload/forum/month_1211/epel-release-6-7.noarch.rpm
[root@client ~]# yum install -y httpd nagios nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe
[root@client ~]# htpasswd -c /etc/nagios/passwd nagiosadmin
New password:
Re-type new password:
Adding password for user nagiosadmin
//设置密码
[root@client ~]# vim /etc/nagios/nagios.cfg //查看配置文件
[root@client ~]# nagios -v /etc/nagios/nagios.cfg
Total Warnings: 0
Total Errors: 0
//检查配置文件,无告警和错误
[root@client ~]# service httpd start
[root@client ~]# service nagios start
//启动服务
浏览器访问:192.168.137.21/nagios
输入用户名密码,访问
nagios安装客户端1
在客户端上:
[root@Client ~]# rpm -ivh http://www.aminglinux.com/bbs/data/p_w_upload/forum/month_1211/epel-release-6-7.noarch.rpm
[root@Client ~]# yum install -y nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe
[root@Client ~]# vim /etc/nagios/nrpe.cfg
allowed_hosts=192.168.137.21 //允许哪台设备连(服务端)
dont_blame_nrpe=1
[root@Client ~]# /etc/init.d/nrpe start
监控端服务器添加被监控的主机(192.168.137.23)
[root@client ~]# cd /etc/nagios/conf.d/
[root@client conf.d]# vim 192.168.137.23.cfg
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name 192.168.137.23
alias 137.23
address 192.168.137.23
}
define service{
use generic-service
host_name 192.168.137.23
service_description check_ping
check_command check_ping!100.0,20%!200.0,50%
max_check_attempts 5
normal_check_interval 1
}
define service{
use generic-service
host_name 192.168.137.23
service_description check_ssh
check_command check_ssh
max_check_attempts 5
normal_check_interval 1
}
define service{
use generic-service
host_name 192.168.137.23
service_description check_http
check_command check_http
max_check_attempts 5
normal_check_interval 1
}
[root@client conf.d]# nagios -v /etc/nagios/nagios.cfg //检查配置有无错误
Total Warnings: 0
Total Errors: 0
[root@client conf.d]# service nagios restart
点击左侧host,出现192.168.137.23客户端(被监控设备)
点击services查看各种服务状态
nagios 监控客户端 2
以上监控无需安装其他软件,现借助nrpe监控具体的服务
服务端:
[root@client conf.d]# vim /etc/nagios/objects/commands.cfg //末尾加
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
[root@client conf.d]# vim /etc/nagios/conf.d/192.168.137.23.cfg
define service{
use generic-service
host_name 192.168.137.23
service_description check_load
check_command check_nrpe!check_load
max_check_attempts 5
normal_check_interval 1
}
define service{
use generic-service
host_name 192.168.137.23
service_description check_disk_hda1
check_command check_nrpe!check_hda1
max_check_attempts 5
normal_check_interval 1
}
define service{
use generic-service
host_name 192.168.137.23
service_description check_disk_hda3
check_command check_nrpe!check_hda3
max_check_attempts 5
normal_check_interval 1
}
//尾行在添加三个服务
客户端查看配置文件
[root@Client ~]# vim /etc/nagios/nrpe.cfg
command[check_users]=/usr/lib/nagios/plugins/check_users -w 5 -c 10
command[check_load]=/usr/lib/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
command[check_hda1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda1
command[check_hda3]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda3
command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 150 -c 200
//查看,服务器上的命令在此定义check_load、check_hda1等,修改hda1成sda1,设备上只有sda1,无hda1.
[root@Client ~]# ls /usr/lib/nagios/plugins/check_load
/usr/lib/nagios/plugins/check_load
[root@Client ~]# /etc/init.d/nrpe restart
服务端:
[root@client conf.d]# /etc/init.d/nagios restart
网页访问;新添加的三个监控服务已出现
服务端查看nagios日志
root@client conf.d]# ls /var/log/nagios/
[root@client conf.d]# tail /var/log/nagios/nagios.log
nagios 配置邮件告警
先定义人、组。告警人邮箱信息等
root@client conf.d]# vim /etc/nagios/objects/contacts.cfg
define contact{ #定义一个人
contact_name 123
use generic-contact
alias cai
email [email protected]
}
define contact{
contact_name 456
use generic-contact
alias yao
email [email protected]
}
define contactgroup{ #定义一个组有哪些人
contactgroup_name common
alias common
members 123,456
}6
}
//尾行添加
然后需在告警的服务器里加上contactgroup
服务器上:
[root@client conf.d]# vim /etc/nagios/conf.d/192.168.137.23.cfg
define service{
use generic-service
host_name 192.168.137.23
service_description check_load
check_command check_nrpe!check_load
max_check_attempts 5
normal_check_interval 1
contact_groups common
# notification_period 24*7
# notification_options c,r
}
//如check_load负载需加邮件告警,则在check_load中加contact_groups common,common为某组,告警组内成员notification_period 24*7告警时间段24*7,notification_options c,r出现问题恢复时告警