系统环境:rhel6.4
selinux disabled 和 iptables
整合cacti和nagios是利用了cacti的一个插件nagios for cacti,它的原理是将nagios的数据通过ndo2db导入到mysql数据库(cacti的库中),然后cacti读取数据库信息将nagios的结果展示出来。接下来就介绍整个安装及配置过程,并且实现飞信报警通知的功能:
bin
|
Nagios执行程序所在目录,这个目录只有两个文件nagios、nagiostats
|
etc
|
Nagios配置文件位置,初始安装完后,只有几个*.cfg-sample文件和一个objects的目录,该目录中放的是一些模版,在做监控的时候主要就在这些模版中进行修改
|
sbin
|
Nagios Cgi文件所在目录,也就是执行外部命令所需文件所在的目录
|
Share
|
Nagios网页文件所在的目录
|
Var
|
Nagios日志文件、spid 等文件所在的目录
|
一、nagios监控本地主机
注释掉localhost.cfg,新增加hosts.cfg,services.cfg
[root@server34 etc]#vim /usr/local/nagios/etc/nagios.cfg
# You can specify individual object config files as shown below:
cfg_file=/usr/local/nagios/etc/objects/commands.cfg
cfg_file=/usr/local/nagios/etc/objects/contacts.cfg
cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg
cfg_file=/usr/local/nagios/etc/objects/templates.cfg
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
cfg_file=/usr/local/nagios/etc/objects/services.cfg
# Definitions for monitoring the local (Linux) host
#cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
localhost.cfg中主机与服务是在一块的,这样不好管理,以下将主机与服务分开
更改主机:
[root@server34 objects]# cp localhost.cfg hosts.cfg
[root@server34 objects]# vim hosts.cfg
define host{
use linux-server
host_name server34.example.com
alias Manager
address 192.168.0.34
icon_image server.gif
statusmap_image server.gd2
2d_coords500,200
3d_coords500,200,100
}
###############################################################################
###############################################################################
#
# HOST GROUP DEFINITION
#
###############################################################################
###############################################################################
# Define an optional hostgroup for Linux machines
define hostgroup{
hostgroup_name linux-servers ; The name of the hostgroup
alias Linux Servers ; Long name of the group
members * ; Comma separated list of hosts that belong to this group
}
[root@server34 objects]# cp localhost.cfg services.cfg
[root@server34 objects]# cat hosts.cfg
define host{
use linux-server
host_name server34.example.com
alias Manager
address 192.168.0.34
icon_image server.gif
statusmap_image server.gd2
2d_coords500,200
3d_coords500,200,100
}
###############################################################################
###############################################################################
#
# HOST GROUP DEFINITION
#
###############################################################################
###############################################################################
# Define an optional hostgroup for Linux machines
define hostgroup{
hostgroup_name linux-servers ; The name of the hostgroup
alias Linux Servers ; Long name of the group
members * ; Comma separated list of hosts that belong to this group
}
[root@server34 objects]# cat services.cfg
define servicegroup{
servicegroup_name 系统负荷检查
alias 负荷检查
members server34.example.com,进程总数,server34.example.com,登录用户数,server34.example.com,根分区,server34.example.com,交换空间利用率
}
define servicegroup{
servicegroup_name 全部联通性检查
alias 联通性检查
members server34.example.com,PING
}
###############################################################################
###############################################################################
#
# SERVICE DEFINITIONS
#
###############################################################################
###############################################################################
# Define a service to "ping" the local machine
define service{
use local-service ; Name of service template to use
host_name *
service_description PING
check_commandcheck_ping!100.0,20%!500.0,60%
}
# Define a service to check the disk space of the root partition
# on the local machine. Warning if < 20% free, critical if
# < 10% free space on partition.
define service{
use local-service ; Name of service template to use
host_name server34.example.com
service_description 根分区
check_commandcheck_local_disk!20%!10%!/
}
# Define a service to check the number of currently logged in
# users on the local machine. Warning if > 20 users, critical
# if > 50 users.
define service{
use local-service ; Name of service template to use
host_name server34.example.com
service_description 登录用户数
check_commandcheck_local_users!20!50
}
# Define a service to check the number of currently running procs
# on the local machine. Warning if > 250 processes, critical if
# > 400 users.
define service{
use local-service ; Name of service template to use
host_name server34.example.com
service_description 进程总数
check_commandcheck_local_procs!250!400!RSZDT
}
# Define a service to check the load on the local machine.
define service{
use local-service ; Name of service template to use
host_name server34.example.com
service_description 系统负荷
check_commandcheck_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
# Define a service to check the swap usage the local machine.
# Critical if less than 10% of swap is free, warning if less than 20% is free
define service{
use local-service ; Name of service template to use
host_name server34.example.com
service_description 交换空间利用率
check_commandcheck_local_swap!20!10
}
# Define a service to check SSH on the local machine.
# Disable notifications for this service by default, as not all users may have SSH enabled.
define service{
use local-service ; Name of service template to use
host_name server34.example.com
service_description SSH
check_commandcheck_tcp!22!1.0!10.0
notifications_enabled0
}
# Define a service to check HTTP on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.
define service{
use local-service ; Name of service template to use
host_name server34.example.com
service_description HTTP
check_commandcheck_http
notifications_enabled0
}
检测nagios的配置文件
[root@server34 objects]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
[root@server34 objects]# /etc/init.d/nagios reload
http://192.168.0.34/nagios
点击服务:
看到以下信息
二、nagios监控远程主机
1、监控远程主机的网络服务
监控主机:192.168.0.34
远程主机(被监控主机):192.168.0.17
rhel6.4 selinux disabled and iptables
一、监控远程主机的网络服务(如监控mysql服务等):
在此以监控远程主机的mysql为例来说明nagios是怎样监控远程主机的网络服务的。
安装mysql
[root@server17 ~]# yum install mysql -y
启动mysql
创建nagios数据库,并给nagios用户授予在监控主机上登录nagios库的权限。
mysql> create database nagios;
mysql> grant select on nagios.* to nagios@'192.168.0.34' identified by 'westos';
检测是否能采集到信息:
[root@server34 libexec]# ./check_mysql -H 192.168.0.17 -u nagios -p westos -d nagios
Uptime: 1648 Threads: 1 Questions: 15 Slow queries: 0 Opens: 15 Flush tables: 1 Open tables: 8 Queries per second avg: 0.9|Connections=8c;;; Open_files=16;;; Open_tables=8;;; Qcache_free_memory=0;;; Qcache_hits=0c;;; Qcache_inserts=0c;;; Qcache_lowmem_prunes=0c;;; Qcache_not_cached=0c;;; Qcache_queries_in_cache=0;;; Queries=15c;;; Questions=15c;;; Table_locks_waited=0c;;; Threads_connected=1;;; Threads_running=1;;; Uptime=1648c;;;
[root@server34 libexec]# pwd
/usr/local/nagios/libexec
添加检测命令:
[root@server34 objects]# vim commands.cfg
# 'check_mysql' command definition
define command{
command_name check_mysql
command_line $USER1$/check_mysql -H $HOSTADDRESS$ -u $ARG1$ -p $ARG2$ -d $ARG3$
}
添加主机:
[root@server34 objects]# vim hosts.cfg
define host{
use linux-server
host_name server17.example.com
alias mysql
address 192.168.0.17
icon_image server.gif
statusmap_image server.gd2
2d_coords 600,300
3d_coords 600,300,100
}
添加服务:
[root@server34 objects]# vim services.cfg ###添加服务组
define servicegroup{
servicegroup_name 全部联通性检查
alias 联通性检查
members server34.example.com,PING,server17.example.com,PING
}
################################check_mysql#################################
define service{ ###添加服务
use local-service ; Name of service template to use
host_name server17.example.com
service_description check_mysql
check_command check_mysql!nagios!westos!nagios
}
验证配置文件更改是否正确:
[root@server34 objects]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
重新加载nagios:
[root@server34 objects]# /etc/init.d/nagios reload
检测:http://192.168.0.34/nagios
点击主机->出现server17.example.com
点击服务->出现check_mysql服务
2、通过nrpe监控远程主机的基本信息(如此盘的使用情况等)。
在被监控主机上操作:
下载nagios插件包、nrpe包
nagios-plugins-1.5.tar.gz, nrpe-2.15.tar.gz
[root@server17 ~]# tar zxf nagios-plugins-1.5.tar.gz
创建nagios用户
[root@server17 ~]# useradd nagios
编译安装nagios-plugin
[root@server17 ~]# cd nagios-plugins-1.5
[root@server17 nagios-plugins-1.5]# ./configure --with-nagios-user=nagios --with-nagios-user=nagios
[root@server17 nagios-plugins-1.5]# make && make install
编译安装nrpe,nrpe是守护进程,需安装更改xinetd
[root@server17 nrpe-2.15]# yum install xinetd -y
[root@server17 nrpe-2.15]# vim /etc/xinetd.d/nrpe
only_from = 192.168.0.34
[root@server17 nrpe-2.15]# vim /etc/services
nrpe 5666/tcp
[root@server17 nrpe-2.15]# ./configure
[root@server17 nrpe-2.15]# make all
[root@server17 nrpe-2.15]# make install-plugin
[root@server17 nrpe-2.15]# make install-daemon
[root@server17 nrpe-2.15]# make install-daemon-config
[root@server17 nrpe-2.15]# make install-xinetd
[root@server17 nrpe-2.15]# /etc/init.d/xinetd start
配置nrpe
[root@server34 etc]# vim nrpe.cfg
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/hda1
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
用守护进程xibnetd启动nrpe
[root@server17 ~]# /etc/init.d/xinetd start
##########################################
在监控主机上
下载nrpe包:
nrpe-2.15.tar.gz
在监控主机上操作:
[root@server17 nrpe-2.15]# ./configure
[root@server17 nrpe-2.15]# make all
[root@server17 nrpe-2.15]# make install-plugin
[root@server17 nrpe-2.15]# make install-daemon
[root@server17 nrpe-2.15]# make install-daemon-config
检测:
[root@server34 libexec]# ./check_nrpe -H 192.168.0.17
NRPE v2.15
检测磁盘使用情况:
[root@server34 libexec]# ./check_nrpe -H 192.168.0.17 -c check_disk
DISK OK - free space: / 14459 MB (85% inode=91%);| /=2540MB;14327;16118;0;17909
添加check_nrpe检测命令
[root@server34 objects]# vim commands.cfg
# 'check_nrpe' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
[root@server34 objects]# vim services.cfg
define servicegroup{
servicegroup_name 系统负荷检查
alias 负荷检查
members server34.example.com,进程总数,server34.example.com,登录用户数,server34.example.com,根分区,server34.example.com,交换空间利用率
}
define servicegroup{
servicegroup_name 全部联通性检查
alias 联通性检查
members server34.example.com,PING,server17.example.com,PING
}
配置检测:
[root@server34 objects]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
检测:
http ://192.168.0.34/nagios
点击服务,查看结果。
整合nagios与微信、飞信
整合nagios与微信、飞信,使得nagios的监控报警具有了锦上添花的效果。这样,管理员不用经常性的查看系统运行是否正常,磁盘使用情况等等,有什么异常情况,微信、飞信会第一时间的通知你,你要做的就是手机保持畅通就ok了。既然这么好,那大家就跟随我一块愉快的感受下吧!
配置微信,飞信:
首先打开相关服务:
[root@server34 objects]# /etc/init.d/nagios start
[root@server34 objects]# /etc/init.d/httpd start
[root@server34 objects]# /etc/init.d/mysqld start
检测[email protected]是否能收发邮件
[root@server34 objects]# mail [email protected]
切换到nagios用户,查看到以下邮件时则成功
[root@server34 objects]# su - nagios
-bash-4.1$ mail
Heirloom Mail version 12.4 7/29/08. Type ? for help.
"/var/spool/mail/nagios": 1 message
> 1 root Sun Apr 27 09:54 20/642 "dsdssds"
&
一般自定义域名会被QQ邮箱当作垃圾邮件处理,在QQ邮箱中设置白名单
-bash-4.1$ ping mail.qq.com //检测是否可以上网
PING mail.qq.com (14.17.32.178) 56(84) bytes of data.
64 bytes from 14.17.32.178: icmp_seq=1 ttl=53 time=54.4 ms
64 bytes from 14.17.32.178: icmp_seq=2 ttl=53 time=1088 ms
添加邮箱:
[root@server34 objects]# vim contacts.cfg
email [email protected]
[root@server34 objects]# /etc/init.d/nagios reload
下载fetion,和飞信的库文件linuxso_20101113.tar.gz
[root@server34 ~]# tar zxf linuxso_20101113.tar.gz -C /lib^C
[root@server34 ~]# cp fetion /usr/local/nagios/libexec/
给以下文件加可执行权限
[root@server34 libexec]# chmod +x libACE*^C
[root@server34 libexec]# chmod +x libcrypto.so.4 libssl.so.4
[root@server34 libexec]# chmod nagios.nagios fetion
[root@server34 libexec]# su - nagios
根据提示的错误依次安装以下软件包:
-bash-4.1$ /usr/local/nagios/libexec/fetion
-bash: /usr/local/nagios/libexec/fetion: /lib/ld-linux.so.2: bad ELF interpreter: No such file or directory
[root@server34 ~]# yum install /lib/ld-linux.so.2 libstdc++.so.6 libgssapi_krb5.so.2 libz.so.1 -y
第一次发送是会要求输入校验码,以后发送时则不需要
-bash-4.1$ /usr/local/nagios/libexec/fetion --mobile=18709242146 --pwd=316103ok --to=18740419949,18709242146 --msg-utf8="hahahahhhhhhhhhhhaaaa"
为了查看校验码的内容,将校验码拷贝到物理主机的桌面上
[root@server34 libexec]# scp 18709242146.jpg 192.168.0.39:/root/Desktop
整合飞信与nagios
因为发送的对象和内容一直在变,所以用脚本实现:
-bash-4.1$ vim fetion.sh
/usr/local/nagios/libexec/fetion --mobile=18709242146 --pwd=316103ok --to="$1" --msg-utf8="$2" &>/dev/null
给fetion.sh加可执行权限:
-bash-4.1$ chmod +x fetion.sh
测试:
-bash-4.1$ /usr/local/nagios/libexec/fetion.sh 18709242146 "westos linux"
-bash-4.1$ vim contacts.cfg
define contact{
contact_name nagiosadmin ; Short name of user
use generic-contact ; Inherit default values from generic-contact template (defined above)
alias Nagios Admin ; Full name of user
email [email protected] ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
pager 18709242146
}
-bash-4.1$ vim commands.cfg
# 'notify-host-by-fetion' command definition
define command{
command_name notify-host-by-fetion
command_line $USER1$/fetion.sh $CONTACTPAGER$ " $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$"
}
# 'notify-service-by-FETION' command definition
define command{
command_name notify-service-by-fetion
command_line $USER1$/fetion.sh $CONTACTPAGER$ " $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$"
}
-bash-4.1$ vim templates.cfg
service_notification_commands notify-service-by-email,notify-service-by-fetion ; send service notifications via email
host_notification_commands notify-host-by-email,notify-host-by-fetion ; send host notifications via email
加测配置文件
[root@server34 objects]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
测试:
[root@server17 ~]# /etc/init.d/mysqld stop
Stopping mysqld: [ OK ]
会收到邮件提示
【注意】