linux nagios pnp安装配置+短信报警

用到的软件包:
nagios-cn-3.2.0.tar.bz2
nagios-plugins-1.4.14.tar.gz
nrpe-2.12.tar.gz
rrdtool-1.0.50.tar.gz
pnp-0.4.14.tar.gz

1、Nagios监控端安装

安装apache、php和相关库
yum -y install gd gd-devel
yum -y install httpd php php-gd


建立运行用户
useradd nagios
groupadd nagcmd
usermod -G nagcmd nagios
usermod -G nagcmd apache


Nagios主程序安装
./configure --with-command-group=nagcmd
make all
make install
make install-init
make install-config
make install-commandmode
make install-webconf


创建一个nagiosadmin(系统默认管理员用户,用其他用户名时需要自己更改cgi.cfg配置)的用户用于Nagios的WEB接口登录
htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
/etc/init.d/httpd restart


安装Nagios插件
./configure --with-nagios-user=nagios --with-nagios-group=nagios
make
make install


安装nrpe(监控linux专用)
./configure
make all
make install-plugin


配置监控端(仔细查看etc下的配置文件和官方配置文件说明)
vi /usr/local/nagios/etc/objects/commands.cfg
在最后面增加如下内容
###################################################################
#####
#
# 2009.10.17 add by sapling
# NRPE COMMAND
#
###################################################################
#####
# 'check_nrpe ' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}

监控linux服务器示例
define service{
        use                      local-service         ; Name of service template to use
        host_name                    10.3.37.110
        service_description             CHECK-USERS
        check_command               check_nrpe!check_users   ; !后为要执行的命令
        }


mkdir /usr/local/nagios/etc/objects/host
chown nagios.nagios /usr/local/nagios/etc/objects/host
vi /usr/local/nagios/etc/nagios.cfg
注释默认监控,加入个存放监控主机配置的目录
#cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
cfg_dir=/usr/local/nagios/etc/objects/host


vi /usr/local/nagios/etc/objects/host/localhost.cfg
文件示例:

###############################################################################
# LOCALHOST.CFG - SAMPLE OBJECT CONFIG FILE FOR MONITORING THIS MACHINE
#
# Last Modified: 05-31-2007
#
# NOTE: This config file is intended to serve as an *extremely* simple 
#       example of how you can create configuration entries to monitor
#       the local (Linux) machine.
#
###############################################################################




###############################################################################
###############################################################################
#
# HOST DEFINITION
#
###############################################################################
###############################################################################

# Define a host for the local machine

define host{
        use                     linux-server
        host_name               127.0.0.1 
        alias                   localhost
        address                 127.0.0.1
        }

###############################################################################
###############################################################################
#
# SERVICE DEFINITIONS
#
###############################################################################
###############################################################################


# Define a service to "ping" the local machine

define service{
        use                             local-service         ; Name of service template to use
        host_name                       127.0.0.1 
        service_description             PING
        check_command                   check_ping!100.0,20%!500.0,60%
        }


# Define a service to check the disk space of the root partition
# on the local machine.  Warning if < 20% free, critical if
# < 10% free space on partition.

define service{
        use                             local-service         ; Name of service template to use
        host_name                       127.0.0.1 
        service_description             DISK 
        check_command                   check_local_disk!20%!10%! /
        }



# Define a service to check the number of currently logged in
# users on the local machine.  Warning if > 20 users, critical
# if > 50 users.

define service{
        use                             local-service         ; Name of service template to use
        host_name                       127.0.0.1 
        service_description             USERS 
        check_command                   check_local_users!20!50
        }


# Define a service to check the number of currently running procs
# on the local machine.  Warning if > 250 processes, critical if
# > 400 users.

define service{
        use                             local-service         ; Name of service template to use
        host_name                       127.0.0.1 
        service_description             PROCES 
        check_command                   check_local_procs!250!400!RSZDT
        }



# Define a service to check the load on the local machine. 

define service{
        use                             local-service         ; Name of service template to use
        host_name                       127.0.0.1 
        service_description             LOAD 
        check_command                   check_local_load!10.0,8.0,4.0!30.0,20.0,10.0
        }



# Define a service to check the swap usage the local machine. 
# Critical if less than 10% of swap is free, warning if less than 20% is free

define service{
        use                             local-service         ; Name of service template to use
        host_name                       127.0.0.1 
        service_description             SWAP 
        check_command                   check_local_swap!30!10
        }



# Define a service to check SSH on the local machine.
# Disable notifications for this service by default, as not all users may have SSH enabled.

define service{
        use                             local-service         ; Name of service template to use
        host_name                       127.0.0.1
        service_description             SSH
        check_command                   check_tcp!22!1.0!10.0
        notifications_enabled           1
        }



# Define a service to check HTTP on the local machine.
# Disable notifications for this service by default, as not all users may have HTTP enabled.

define service{
        use                             local-service         ; Name of service template to use
        host_name                       127.0.0.1 
        service_description             HTTP
        check_command                   check_http
        notifications_enabled           1
        }

define service{
        use                             local-service         ; Name of service template to use
        host_name                       127.0.0.1
        service_description             FTP
        check_command                   check_ftp
        notifications_enabled           1
        process_perf_data               0
        }


vi /usr/local/nagios/etc/objects/host/10.3.37.110.cfg
文件示例:

###############################################################################
# LOCALHOST.CFG - SAMPLE OBJECT CONFIG FILE FOR MONITORING THIS MACHINE
#
# Last Modified: 05-31-2007
#
# NOTE: This config file is intended to serve as an *extremely* simple 
#       example of how you can create configuration entries to monitor
#       the local (Linux) machine.
#
###############################################################################




###############################################################################
###############################################################################
#
# HOST DEFINITION
#
###############################################################################
###############################################################################

# Define a host for the local machine

define host{
        use                     linux-server
        host_name               10.3.37.110
        alias                   10.3.37.110
        address                 10.3.37.110
        }

###############################################################################
###############################################################################
#
# SERVICE DEFINITIONS
#
###############################################################################
###############################################################################

define service{
        use                             local-service         ; Name of service template to use
        host_name                       10.3.37.110
        service_description             CHECK-DISK
        check_command                   check_nrpe!check_sda7
        }

define service{
        use                             local-service         ; Name of service template to use
        host_name                       10.3.37.110
        service_description             CHECK-USERS
        check_command                   check_nrpe!check_users
        }

define service{
        use                             local-service         ; Name of service template to use
        host_name                       10.3.37.110
        service_description             CHECK-LOAD
        check_command                   check_nrpe!check_load
        }

define service{
        use                             local-service         ; Name of service template to use
        host_name                       10.3.37.110
        service_description             CHECK-ZOMBIE-PROCS
        check_command                   check_nrpe!check_zombie_procs
        }

define service{
        use                             local-service         ; Name of service template to use
        host_name                       10.3.37.110
        service_description             CHECK-TOTAL-PROCS
        check_command                   check_nrpe!check_total_procs
        }


vi /usr/local/nagios/etc/objects/host/group.cfg
文件示例:

###############################################################################
###############################################################################
#
# HOST GROUP DEFINITION
#
###############################################################################
###############################################################################

# Define an optional hostgroup for Linux machines

define hostgroup{
        hostgroup_name  linux-servers ; The name of the hostgroup
        alias           Linux Servers ; Long name of the group
        members         *     ; Comma separated list of hosts that belong to this group
        }


令SELinux处于容许模式(出现无权限问题的话就执行)
setenforce 0


检查配置与启动
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
/etc/init.d/nagios start


访问监控web
http://localhost/nagios/


2、nagios被监控端安装

没安装xinetd的要安装
yum -y install xinetd


安装Nagios插件
./configure --with-nagios-user=nagios --with-nagios-group=nagios
make
make install


安装nrpe
./configure
make all
make install-daemon
make install-daemon-config
make install-xinetd


配置nrpe启动
vi /etc/xinetd.d/nrpe
service nrpe
{
        flags           = REUSE
        socket_type     = stream    
        port            = 5666    
        wait            = no
        user            = nagios
        group           = nagios
        server          = /usr/local/nagios/bin/nrpe
        server_args     = -c /usr/local/nagios/etc/nrpe.cfg --inetd
        log_on_failure  += USERID
        disable         = no
        only_from       = 127.0.0.1 10.3.37.110
#only_from: allow monit server ip. “ ”ge kai duo ge ip
}


vi /etc/services

加入以下:
nrpe            5666/tcp                        # nrpe


重启 xinetd 服务
/etc/init.d/xinetd restart


检查nrpe是否正常工作
在监控端执行以下命令,返回版本则成功。
/usr/local/nagios/libexec/check_nrpe -H 被监控端ip
NRPE v2.8.1


配置监控命令
vi /usr/local/nagios/etc/nrpe.cfg


# The following examples use hardcoded command arguments...
###############
command[check_users]=/usr/local/nagios/libexec/check_users -w 10 -c 20
command[check_load]=/usr/local/nagios/libexec/check_load -w 16,10,8 -c 30,25,20
command[check_sda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda1
command[check_sda2]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda2
command[check_sda5]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda5
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 300 -c 360 
command[check_http]=/usr/local/nagios/libexec/check_http -H 10.3.37.110 -u /nagios.php
command[check_ftp]=/usr/local/nagios/libexec/check_ftp -H 10.3.37.110 -p 21
command[check_ssh]=/usr/local/nagios/libexec/check_ssh 10.3.37.110
command[check_alive]=/usr/local/nagios/libexec/check_ping -H 10.3.37.110 -w 100,20% -c 500,60% -p 4
command[check_105mysql]=/usr/local/nagios/libexec/check_mysql -H 10.3.37.110 -P 3306 -u nagios -p ***
##############


检查监控命令是否生效
在监控端执行以下命令,返回结果则成功。
/usr/local/nagios/libexec/check_nrpe -H被监控端ip -c check_load

OK - load average: 0.00, 0.00, 0.00|load1=0.000;15.000;30.000;0; load5=0.000;10.000;25.000;0; load15=0.000;5.000;20.000;0;


3、Nagios 的性能分析图

监控服务变化曲线的工具 ---- PNP

安装rrdtools(绘图工具)可能需要的库
yum install cairo pango libart_lgpl libart_lgpl-devel zlib zlib-devel freetype freetype-devel


安装rrdtools
./configure
make
make install


编辑Nagios 的主配置文件 nagios.cfg
vi /usr/local/nagios/etc/nagios.cfg
修改如下:
process_performance_data=1
host_perfdata_command=host-service-perfdata
service_perfdata_command=process-service-perfdata


如果想要对某个监控对象做数据图表,则需在所对应的host或者service 定义中包含如下的定义:
process_perf_data 1

编辑command.cfg,将“process-service-perfdata”命令对应的执行命令行的内容替换成该脚本:
define command{
        command_name    process-service-perfdata
#       command_line    /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERV
ICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perf
data.out
        
        command_line /usr/local/nagios/libexec/process_perfdata.pl

#        command_line    /usr/bin/perl /usr/local/nagios/sbin/insert.cgi 
        }


安装PNP
./configure --with-rrdtool=/usr/local/rrdtool-1.0.50/bin/rrdtool
make all
make install


检查配置文件并重启
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
/etc/init.d/nagios restart


访问web
http://localhost/nagios/pnp/index.php


4、整合飞信机器人发送短信报警

飞信机器人下载地址: http://www.it-adv.net/

加入飞信运行所需libACE库文件
tar zxvf fetion20091117-linux.tar.gz -C /usr/local/
mv /usr/local/fx /usr/local/fetion


安装飞信机器人
chmod -R 755 /usr/local/fetion
chown -R nagios:nagios /usr/local/fetion


加入飞信.so文件到系统链接库
vi /etc/ld.so.conf.d/fetion.conf
加入一行:
/usr/local/fetion/
更新:ldconfig


发送短信测试
/usr/local/fetion/fetion --hide --mobile=137*** --pwd=*** --to=136*** --msg-utf8="test"


编辑发送飞信命令commands.cfg
vi /usr/local/nagios/etc/objects/commands.cfg

# 'notify-host-by-fei' command definition
define command {
             command_name            host-notify-by-fei
             command_line            /usr/local/fetion/fetion --hide --mobile=136******** --pwd=*** --to=$CONTACTPAGER$ --msg-utf8="Host $HOSTSTATE$ alert for $HOSTNAME$! on '$LONGDATETIME$'" $CONTACTPAGER$
             }

# 'notify-service-by-fei' command definition
define command {
             command_name         service-notify-by-fei
             command_line         /usr/local/fetion/fetion --hide --mobile=136******** --pwd=*** --to=$CONTACTPAGER$ --msg-utf8="$HOSTADDRESS$ $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ on $LONGDATETIME$" $CONTACTPAGER$
             }


编辑联系人配置文件contacts.cfg
vi /usr/local/nagios/etc/objects/contacts.cfg
加入*-notify-by-fei两行和pager
define contact{
        contact_name                    nagiosadmin             ; Short name of user
        use                             generic-contact         ; Inherit default values from generic-contact template (defined abov
e)
        alias                           Nagios Admin            ; Full name of user
        service_notification_commands   service-notify-by-fei
        host_notification_commands      host-notify-by-fei
        email                           [email protected]        ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
        pager                           136********
        }


检查配置文件并重启
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
/etc/init.d/nagios restart

你可能感兴趣的:(java,linux,配置管理,ssh,nagios)