用到的软件包:
nagios-cn-3.2.0.tar.bz2
nagios-plugins-1.4.14.tar.gz
nrpe-2.12.tar.gz
rrdtool-1.0.50.tar.gz
pnp-0.4.14.tar.gz
1、Nagios监控端安装
安装apache、php和相关库
yum -y install gd gd-devel
yum -y install httpd php php-gd
建立运行用户
useradd nagios
groupadd nagcmd
usermod -G nagcmd nagios
usermod -G nagcmd apache
Nagios主程序安装
./configure --with-command-group=nagcmd
make all
make install
make install-init
make install-config
make install-commandmode
make install-webconf
创建一个nagiosadmin(系统默认管理员用户,用其他用户名时需要自己更改cgi.cfg配置)的用户用于Nagios的WEB接口登录
htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
/etc/init.d/httpd restart
安装Nagios插件
./configure --with-nagios-user=nagios --with-nagios-group=nagios
make
make install
安装nrpe(监控linux专用)
./configure
make all
make install-plugin
配置监控端(仔细查看etc下的配置文件和官方配置文件说明)
vi /usr/local/nagios/etc/objects/commands.cfg
在最后面增加如下内容
###################################################################
#####
#
# 2009.10.17 add by sapling
# NRPE COMMAND
#
###################################################################
#####
# 'check_nrpe ' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
监控linux服务器示例
define service{
use local-service ; Name of service template to use
host_name 10.3.37.110
service_description CHECK-USERS
check_command check_nrpe!check_users ; !后为要执行的命令
}
mkdir /usr/local/nagios/etc/objects/host
chown nagios.nagios /usr/local/nagios/etc/objects/host
vi /usr/local/nagios/etc/nagios.cfg
注释默认监控,加入个存放监控主机配置的目录
#cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
cfg_dir=/usr/local/nagios/etc/objects/host
vi /usr/local/nagios/etc/objects/host/localhost.cfg
文件示例:
###############################################################################
# LOCALHOST.CFG - SAMPLE OBJECT CONFIG FILE FOR MONITORING THIS MACHINE
#
# Last Modified: 05-31-2007
#
# NOTE: This config file is intended to serve as an *extremely* simple
# example of how you can create configuration entries to monitor
# the local (Linux) machine.
#
###############################################################################
###############################################################################
###############################################################################
#
# HOST DEFINITION
#
###############################################################################
###############################################################################
# Define a host for the local machine
define host{
use linux-server
host_name 127.0.0.1
alias localhost
address 127.0.0.1
}
###############################################################################
###############################################################################
#
# SERVICE DEFINITIONS
#
###############################################################################
###############################################################################
# Define a service to "ping" the local machine
define service{
use local-service ; Name of service template to use
host_name 127.0.0.1
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
# Define a service to check the disk space of the root partition
# on the local machine. Warning if < 20% free, critical if
# < 10% free space on partition.
define service{
use local-service ; Name of service template to use
host_name 127.0.0.1
service_description DISK
check_command check_local_disk!20%!10%! /
}
# Define a service to check the number of currently logged in
# users on the local machine. Warning if > 20 users, critical
# if > 50 users.
define service{
use local-service ; Name of service template to use
host_name 127.0.0.1
service_description USERS
check_command check_local_users!20!50
}
# Define a service to check the number of currently running procs
# on the local machine. Warning if > 250 processes, critical if
# > 400 users.
define service{
use local-service ; Name of service template to use
host_name 127.0.0.1
service_description PROCES
check_command check_local_procs!250!400!RSZDT
}
# Define a service to check the load on the local machine.
define service{
use local-service ; Name of service template to use
host_name 127.0.0.1
service_description LOAD
check_command check_local_load!10.0,8.0,4.0!30.0,20.0,10.0
}
# Define a service to check the swap usage the local machine.
# Critical if less than 10% of swap is free, warning if less than 20% is free
define service{
use local-service ; Name of service template to use
host_name 127.0.0.1
service_description SWAP
check_command check_local_swap!30!10
}
# Define a service to check SSH on the local machine.
# Disable notifications for this service by default, as not all users may have SSH enabled.
define service{
use local-service ; Name of service template to use
host_name 127.0.0.1
service_description SSH
check_command check_tcp!22!1.0!10.0
notifications_enabled 1
}
# Define a service to check HTTP on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.
define service{
use local-service ; Name of service template to use
host_name 127.0.0.1
service_description HTTP
check_command check_http
notifications_enabled 1
}
define service{
use local-service ; Name of service template to use
host_name 127.0.0.1
service_description FTP
check_command check_ftp
notifications_enabled 1
process_perf_data 0
}
vi /usr/local/nagios/etc/objects/host/10.3.37.110.cfg
文件示例:
###############################################################################
# LOCALHOST.CFG - SAMPLE OBJECT CONFIG FILE FOR MONITORING THIS MACHINE
#
# Last Modified: 05-31-2007
#
# NOTE: This config file is intended to serve as an *extremely* simple
# example of how you can create configuration entries to monitor
# the local (Linux) machine.
#
###############################################################################
###############################################################################
###############################################################################
#
# HOST DEFINITION
#
###############################################################################
###############################################################################
# Define a host for the local machine
define host{
use linux-server
host_name 10.3.37.110
alias 10.3.37.110
address 10.3.37.110
}
###############################################################################
###############################################################################
#
# SERVICE DEFINITIONS
#
###############################################################################
###############################################################################
define service{
use local-service ; Name of service template to use
host_name 10.3.37.110
service_description CHECK-DISK
check_command check_nrpe!check_sda7
}
define service{
use local-service ; Name of service template to use
host_name 10.3.37.110
service_description CHECK-USERS
check_command check_nrpe!check_users
}
define service{
use local-service ; Name of service template to use
host_name 10.3.37.110
service_description CHECK-LOAD
check_command check_nrpe!check_load
}
define service{
use local-service ; Name of service template to use
host_name 10.3.37.110
service_description CHECK-ZOMBIE-PROCS
check_command check_nrpe!check_zombie_procs
}
define service{
use local-service ; Name of service template to use
host_name 10.3.37.110
service_description CHECK-TOTAL-PROCS
check_command check_nrpe!check_total_procs
}
vi /usr/local/nagios/etc/objects/host/group.cfg
文件示例:
###############################################################################
###############################################################################
#
# HOST GROUP DEFINITION
#
###############################################################################
###############################################################################
# Define an optional hostgroup for Linux machines
define hostgroup{
hostgroup_name linux-servers ; The name of the hostgroup
alias Linux Servers ; Long name of the group
members * ; Comma separated list of hosts that belong to this group
}
令SELinux处于容许模式(出现无权限问题的话就执行)
setenforce 0
检查配置与启动
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
/etc/init.d/nagios start
访问监控web
http://localhost/nagios/
2、nagios被监控端安装
没安装xinetd的要安装
yum -y install xinetd
安装Nagios插件
./configure --with-nagios-user=nagios --with-nagios-group=nagios
make
make install
安装nrpe
./configure
make all
make install-daemon
make install-daemon-config
make install-xinetd
配置nrpe启动
vi /etc/xinetd.d/nrpe
service nrpe
{
flags = REUSE
socket_type = stream
port = 5666
wait = no
user = nagios
group = nagios
server = /usr/local/nagios/bin/nrpe
server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd
log_on_failure += USERID
disable = no
only_from = 127.0.0.1 10.3.37.110
#only_from: allow monit server ip. “ ”ge kai duo ge ip
}
vi /etc/services
加入以下:
nrpe 5666/tcp # nrpe
重启 xinetd 服务
/etc/init.d/xinetd restart
检查nrpe是否正常工作
在监控端执行以下命令,返回版本则成功。
/usr/local/nagios/libexec/check_nrpe -H 被监控端ip
NRPE v2.8.1
配置监控命令
vi /usr/local/nagios/etc/nrpe.cfg
# The following examples use hardcoded command arguments...
###############
command[check_users]=/usr/local/nagios/libexec/check_users -w 10 -c 20
command[check_load]=/usr/local/nagios/libexec/check_load -w 16,10,8 -c 30,25,20
command[check_sda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda1
command[check_sda2]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda2
command[check_sda5]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda5
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 300 -c 360
command[check_http]=/usr/local/nagios/libexec/check_http -H 10.3.37.110 -u /nagios.php
command[check_ftp]=/usr/local/nagios/libexec/check_ftp -H 10.3.37.110 -p 21
command[check_ssh]=/usr/local/nagios/libexec/check_ssh 10.3.37.110
command[check_alive]=/usr/local/nagios/libexec/check_ping -H 10.3.37.110 -w 100,20% -c 500,60% -p 4
command[check_105mysql]=/usr/local/nagios/libexec/check_mysql -H 10.3.37.110 -P 3306 -u nagios -p ***
##############
检查监控命令是否生效
在监控端执行以下命令,返回结果则成功。
/usr/local/nagios/libexec/check_nrpe -H被监控端ip -c check_load
OK - load average: 0.00, 0.00, 0.00|load1=0.000;15.000;30.000;0; load5=0.000;10.000;25.000;0; load15=0.000;5.000;20.000;0;
3、Nagios 的性能分析图
监控服务变化曲线的工具 ---- PNP
安装rrdtools(绘图工具)可能需要的库
yum install cairo pango libart_lgpl libart_lgpl-devel zlib zlib-devel freetype freetype-devel
安装rrdtools
./configure
make
make install
编辑Nagios 的主配置文件 nagios.cfg
vi /usr/local/nagios/etc/nagios.cfg
修改如下:
process_performance_data=1
host_perfdata_command=host-service-perfdata
service_perfdata_command=process-service-perfdata
如果想要对某个监控对象做数据图表,则需在所对应的host或者service 定义中包含如下的定义:
process_perf_data 1
编辑command.cfg,将“process-service-perfdata”命令对应的执行命令行的内容替换成该脚本:
define command{
command_name process-service-perfdata
# command_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERV
ICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perf
data.out
command_line /usr/local/nagios/libexec/process_perfdata.pl
# command_line /usr/bin/perl /usr/local/nagios/sbin/insert.cgi
}
安装PNP
./configure --with-rrdtool=/usr/local/rrdtool-1.0.50/bin/rrdtool
make all
make install
检查配置文件并重启
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
/etc/init.d/nagios restart
访问web
http://localhost/nagios/pnp/index.php
4、整合飞信机器人发送短信报警
飞信机器人下载地址:
http://www.it-adv.net/
加入飞信运行所需libACE库文件
tar zxvf fetion20091117-linux.tar.gz -C /usr/local/
mv /usr/local/fx /usr/local/fetion
安装飞信机器人
chmod -R 755 /usr/local/fetion
chown -R nagios:nagios /usr/local/fetion
加入飞信.so文件到系统链接库
vi /etc/ld.so.conf.d/fetion.conf
加入一行:
/usr/local/fetion/
更新:ldconfig
发送短信测试
/usr/local/fetion/fetion --hide --mobile=137*** --pwd=*** --to=136*** --msg-utf8="test"
编辑发送飞信命令commands.cfg
vi /usr/local/nagios/etc/objects/commands.cfg
# 'notify-host-by-fei' command definition
define command {
command_name host-notify-by-fei
command_line /usr/local/fetion/fetion --hide --mobile=136******** --pwd=*** --to=$CONTACTPAGER$ --msg-utf8="Host $HOSTSTATE$ alert for $HOSTNAME$! on '$LONGDATETIME$'" $CONTACTPAGER$
}
# 'notify-service-by-fei' command definition
define command {
command_name service-notify-by-fei
command_line /usr/local/fetion/fetion --hide --mobile=136******** --pwd=*** --to=$CONTACTPAGER$ --msg-utf8="$HOSTADDRESS$ $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ on $LONGDATETIME$" $CONTACTPAGER$
}
编辑联系人配置文件contacts.cfg
vi /usr/local/nagios/etc/objects/contacts.cfg
加入*-notify-by-fei两行和pager
define contact{
contact_name nagiosadmin ; Short name of user
use generic-contact ; Inherit default values from generic-contact template (defined abov
e)
alias Nagios Admin ; Full name of user
service_notification_commands service-notify-by-fei
host_notification_commands host-notify-by-fei
email [email protected] ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
pager 136********
}
检查配置文件并重启
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
/etc/init.d/nagios restart