第一步:
安装软件:分为nagios监控端和nagios被监控端
监控端软件列表
Nagios安装包
Nagios插件安装包
Nagios中文支持包
Nrpe软件包
nagios通过nrpe来远端管理服务
1. nagios执行安装在它里面的check_nrpe 插件,并告诉check_nrpe 去检测哪些服务。
2. 通过ssl,check_nrpe 连接远端机子上的nrpe daemon
3. nrpe运行本地的各种插件去检测本地的服务和状态
4. nrpe把检测的结果传给主机端的check_nrpe, 主机端的check_nrpe再把结果送到nagios状态队列中。
5. nagios 依次读取队列中的信息,再把结果显示出来。
解决编译软件时需要的依赖
[root@www ~]# yum install -y gcc glibc glibc-common gd gd-devel xinetd openssl-devel
添加nagios用户
添加目录 mkdir /usr/local/nagios
修改权限 chown -R nagios.nagios /usr/local/nagios
解压软件包 tar -xzvf nagios-3.2.0.tar.gz
开始安装1. ./configure --prefix=/usr/local/nagios
2. make all
3. make install
4. make install-init
5. make install-commandmode
6. make install-config
7. chkconfig --add nagios
8. chkconfig --level 35 nagios on
9. tar -xzvf nagios-plugins-1.4.14.tar.gz
10. ./configure --prefix=/usr/local/nagios
11. make && make install
12. yum install httpd
13. yum install php
14. vim /etc/httpd/conf/httpd.conf
添加以下内容
修改用户
15. 添加密码
htpasswd -c /usr/local/nagios/etc/htpasswd oracle
16. /etc/init.d/httpd start
17. tar -xjvf nagios-cn-3.2.0.tar.bz2
18. ./configure
19. make && make install
监控端nagios安装完毕
重启httpd和启动nagios
可以看到以下画面
这里你要输入刚才设置的用户和密码
在没有修改配置的情况下,默认只能查看到本机
好的 下面来添加主机
会用到以下几个文件,其他文件就由各位下去自己慢慢研究了,我手懒
其中以下两个文件需要手动创建
定义被监控主机
[root@www objects]# cat hosts.cfg
define host{
use linux-server 使用templates.cfg定义的
host_name Nagios-Linux 自己取的名字并不一定是主机名
alias Nagios-Linux 别名
address 192.168.1.201 被监控主机
}
定义监控内容也就是监控命令
[root@www objects]# cat services.cfg
define service{
use local-service
host_name Nagios-Linux
service_description Current Load
check_command check_nrpe!check_load
}
define service{
use local-service
host_name Nagios-Linux
service_description Check Disk sda1
check_command check_nrpe!check_sda1
}
define service{
use local-service
host_name Nagios-Linux
service_description Total Processes
check_command check_nrpe!check_total_procs
}
define service{
use local-service
host_name Nagios-Linux
service_description Current Users
check_command check_nrpe!check_users
}
define service{
use local-service
host_name Nagios-Linux
service_description Check Zombie Procs
check_command check_nrpe!check_zombie_procs
}
define service{
use local-service
host_name Nagios-Linux
service_description Check Swap
check_command check_nrpe!check_swap
}
define service{
use local-service
host_name Nagios-Linux
service_description HTTP
check_command check_nrpe!check_http
}
define service{
use local-service
host_name Nagios-Linux
service_description MYSQL
check_command check_nrpe!check_mysql
}
define service{
use local-service
host_name Nagios-Linux
service_description MYLOG
check_command check_nrpe!check_mylog
被监控端主机安装nrep、nagi-plugin软件包
与监控端安装类似,注意最好是以xinetd的方式来启动
安装nrep
161 ./configure
162 make all
163 make install-plugin
164 make install-daemon-config
165 make install-xinetd
vim /etc/xinetd.d/nrpe
# default: on
# description: NRPE (Nagios Remote Plugin Executor)
service nrpe
{
flags = REUSE
socket_type = stream
port = 5666
wait = no
user = nagios
group = nagios
server = /usr/local/nagios/bin/nrpe
server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd
log_on_failure += USERID
disable = no
only_from = 127.0.0.1 192.168.1.200#服务器地址
测试
/usr/local/nagios/libexec/check_nrpe -H localhost
[root@www libexec]# vim /usr/local/nagios/etc/nrpe.cfg
log_facility=daemon
pid_file=/var/run/nrpe.pid
server_port=5666
nrpe_user=nagios
nrpe_group=nagios
allowed_hosts=127.0.0.1
dont_blame_nrpe=0
debug=0
command_timeout=60
connection_timeout=300
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_sda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda1
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
command[check_http]=/usr/local/nagios/libexec/check_http -I 127.0.0.1
command[check_mysql]=/usr/local/nagios/libexec/check_mysql
command[check_mylog]=/usr/local/nagios/libexec/check_mylog
自己定义的插件
[root@www libexec]# cat check_mylog
#!/bin/bash
#
#
myfiles="`date +%F`.txt"
count=`sed -n '/fail/p' /tmp/$myfiles|wc -l`
if [ $count -eq 0 ];then
echo "MYLOG IS OK"
exit 0
elif [ $count -ge 1 -a $count -le 10 ] ;then
echo "MYLOG IS WARNING"
exit 1
elif [ $count -ge 11 ];then
echo "MYLOG IS CRITICAL"
exit 2
elif [ $count -lt 0 ];then
echo "UNKNOWN"
exit 3
If
0 表示OK
1 表示WARNNING
2 表示 CRITICAL
3 表示UNKNOWN
在服务端的services.cfg中使用这两个自己定义的插件
define service{
use local-service
host_name Nagios-Linux
service_description MYSQL
check_command check_nrpe!check_mysql
}
define service{
use local-service
host_name Nagios-Linux
service_description MYLOG
check_command check_nrpe!check_mylog
最后要在command.cfg中加入check_nrpe命令
# 'check_nrpe' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
重启服务端服务
就会出现我们定义的主机
例如我现在关闭mysql
这里就可以看到mysql的监控状态为紧急
现在恢复mysql
可以看到mysql监控状态为ok
下面我来实现故障告警功能
使用邮件告警
[root@www objects]# cat contacts.cfg
###############################################################################
# CONTACTS.CFG - SAMPLE CONTACT/CONTACTGROUP DEFINITIONS
#
# Last Modified: 05-31-2007
#
# NOTES: This config file provides you with some example contact and contact
# group definitions that you can reference in host and service
# definitions.
#
# You don't need to keep these definitions in a separate file from your
# other object definitions. This has been done just to make things
# easier to understand.
#
###############################################################################
###############################################################################
###############################################################################
#
# CONTACTS
#
###############################################################################
###############################################################################
# Just one contact defined by default - the Nagios admin (that's you)
# This contact definition inherits a lot of default values from the 'generic-contact'
# template which is defined elsewhere.
define contact{
contact_name nagiosadmin
use generic-contact
alias Nagios Admin
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r,f,s
host_notification_options d,u,r,f,s
service_notification_commands notify-service-by-email
host_notification_commands notify-host-by-email
email [email protected] ;
}
###############################################################################
###############################################################################
#
# CONTACT GROUPS
#
###############################################################################
###############################################################################
# We only have one contact in this simple configuration file, so there is
# no need to create more than one contact group.
define contactgroup{
contactgroup_name admins
alias Nagios Administrators
members nagiosadmin
}
[root@www objects]# tail -20 services.cfg
}
define service{
use local-service
host_name Nagios-Linux
service_description HTTP
check_command check_nrpe!check_http
}
define service{
use local-service
host_name Nagios-Linux
service_description MYSQL
check_command check_nrpe!check_mysql
contact_groups admins
}
define service{
use local-service
host_name Nagios-Linux
service_description MYLOG
check_command check_nrpe!check_mylog
}
Cat command.cfg
# 'notify-host-by-email' command definition
define command{
command_name notify-host-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
}
# 'notify-service-by-email' command definition
define command{
command_name notify-service-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" | /bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
}
默认是配置好了这里就不用配置
直接在contact和services中使用就好了
把mysql停了
验证成功,但是要注意需要把发件人的地址设置为白名单才可以收到
由于是手机邮件就实现了邮件短信同时提醒的功能,当然有邮件达到短信提醒的邮箱都能实现这个功能不一定是手机邮箱