Nagios监控工具

不会显示具体的值,仅显示主机或服务的状态

强大的报警功能

高度插件化


Nagios监控主机/服务显示的4种状态

OK

WARNING

CRITICAL

UNKNOWN

Nagios对象:

主机,主机组

服务/资源,服务组

联系人,联系人组

时间段

命令(模板--->应用到某个被监控对象,以实现监控)


Nagios实现监控的方式:

check_by_ssh

check_by_nrpe:专用于监控Linux或Unix主机

check_by_nsclient++:用于监控Windows主机

check_by_snmp

check_by_nsca:实现被动方式的nagios,由被监控对象主动向nagios发送状态信息

check_by_xyz:自定义监控方式/脚本

安装nagios


1、安装依赖的软件

# yum install -y httpd php php-mysql mysql-devel mysql 


2、创建nagios运行需要的用户和组

# groupadd nagcmd

# useradd -G nagcmd nagios


将运行apache进程的用户加入到nagcmd组中

# usermod -a -G nagcmd apache

3、安装nagios

# tar zxf nagios-3.3.1.tar.gz 

# cd nagios

# ./configure --with-command-group=nagcmd --enable-event-broker --sysconfdir=/etc/nagios

# make all

# make install

# make install-init

# make install-commandmode

# make install-config

#  make install-webconf

创建一个登录nagios web页面的用户,这个用户账号在以后通过web登录nagios时使用

# htpasswd -c /etc/nagios/htpasswd.users nagiosadmin

New password: 

Re-type new password: 

Adding password for user nagiosadmin



启动httpd服务

# service httpd start



4、安装nagios-plugins插件


# tar zxf nagios-plugins-1.5.tar.gz 

# cd nagios-plugins-1.5

# ./configure --with-nagios-user=nagios --with-nagios-group=nagios

# make && make install


5、启动nagios服务

# chkconfig --add nagios

# service nagios start 

6、访问nagios

http://10.1.1.2/nagios

配置nagios

nagios配置文件保存在/etc/nagios目录下

nagios插件目录

/usr/local/nagios/libexec

nagios监控windows主机

SNMP

NSClient++

nagios通过check_nt插件与windows NSClient++通信

NSClient++可提供check_nt,check_nrpe及NSCA的能力

NSClient++监听在12489/tcp端口与check_nt插件通信

配置check_nt与NSClient++监控windows主机


1、定义命令command

# vim /etc/nagios/objects/commands.cfg 


define command{

        command_name    check_nt

        command_line    $USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -v $ARG1$ $ARG2$

        }


2、定义主机host及服务service

# vim /etc/nagios/objects/windows.cfg 


define host{

        use             windows-server  ; Inherit default values from a template

        host_name       winhost; The name we're giving to this host

        alias           My Windows Server       ; A longer name associated with the host

        address         10.1.1.254      ; IP address of the host

        }


define service{

        use                     generic-service

        host_name               winhost

        service_description     Uptime

        check_command           check_nt!UPTIME

        }

   

3、启用主机配置文件

# vim /etc/nagios/nagios.cfg 

cfg_file=/etc/nagios/objects/windows.cfg


4、检测配置文件语法,并重启nagios服务

# /usr/local/nagios/bin/nagios -v /etc/nagios/nagios.cfg 

# service nagios restart

浏览器访问http://10.1.1.2/nagios查看监控状态数据

配置check_nrpe监控Linux及windows主机


监控端通过check_nrpe与被监控端nrpe进程通信

nrpe监听在5666/tcp端口 

监控端、被监控端都需要安装nrpe addon,被监控端需要启动nrpe服务

一、监控Linux 主机10.1.1.1

1、在被监控主机上安装nrpe并配置


1)创建nagios用户

# useradd -s /sbin/nologin nagios


2)nrpe依赖于nagios-plugins,安装nagios-plugins 

# tar zxf nagios-plugins-1.4.14.tar.gz 

# cd nagios-plugins-1.4.14

# ./configure --with-nagios-user=nagios --with-nagios-group=nagios

# make all

# make install



3)安装nrpe

# tar zxf nrpe-2.15.tar.gz  

# cd nrpe-2.15

# ./configure --with-nrpe-user=nagios --with-nrpe-group=nagios --with-nagios-user=nagios --with-nagios-group=nagios --enable-command-args --enable-ssl

# make all

# make install-plugin

# make install-daemon

# make install-daemon-config



4)配置nrpe

# vim /usr/local/nagios/etc/nrpe.cfg 


log_facility=daemon

pid_file=/var/run/nrpe.pid

server_port=5666

server_address=10.1.1.1

nrpe_user=nagios

nrpe_group=nagios

allowed_hosts=10.1.1.2

command_timeout=60


定义监控本地资源的命令

command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10

command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20

command[check_sda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda1

command[check_sda2]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda2

command[check_sda3]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda3

command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z

command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200 



5)编写启动nrpe服务脚本

# vim /etc/rc.d/init.d/nrpe

#!/bin/bash

#

# chkconfig: 2345 88 12

# description: NRPE DAEMON

NRPE=/usr/local/nagios/bin/nrpe

NRPECONF=/usr/local/nagios/etc/nrpe.cfg


case $1 in

  start)

    echo -n "Starting NRPE damon...."

    $NRPE -c $NRPECONF -d

    echo "Done..."

    ;;

  stop)

    echo -n "Stopping NRPE daemon...."

    pkill -u nagios nrpe

    echo "Done...."

    ;;

  restart)

    $0 stop

    sleep 2

    $0 start

    ;;

  *)

   echo "Usage: $0 { start | stop | restart }"

   ;;

esac

exit 0


# chmod +x /etc/rc.d/init.d/nrpe

# chkconfig --add nrpe

# service nrpe start

# netstat -tnlp

Active Internet connections (only servers)

Proto Recv-Q Send-Q Local Address               Foreign Address             State       PID/Program name   

tcp        0      0 10.1.1.1:5666               0.0.0.0:*                   LISTEN      6826/nrpe           



2、在监控端安装nrpe


1)安装nrpe 

# tar zxf nrpe-2.15.tar.gz 

# cd nrpe-2.15

# ./configure --with-nrpe-user=nagios --with-nrpe-group=nagios --with-nagios-user=nagios --with-nagios-group=nagios --enable-command-args --enable-ssl

# make all

# make install-plugin


安装完成后,在/usr/local/nagios/libexec目录下会产生check_nrpe插件

通过以下命令可测试插件是否工作正常

# ./check_nrpe -H 10.1.1.1

NRPE v2.15




3、配置监控Linux主机


1)定义命令command

# vim /etc/nagios/objects/commands.cfg 

define command{

        command_name    check_nrpe

        command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$

}


2)定义主机host及服务service

# cp /etc/nagios/objects/windows.cfg /etc/nagios/objects/linux.cfg

# vim /etc/nagios/objects/linux.cfg


define host{

use linux-server ; Inherit default values from a template

host_name linuxhost; The name we're giving to this host

alias My Linux Server ; A longer name associated with the host

address 10.1.1.1 ; IP address of the host

}


define service{

use generic-service

host_name linuxhost

service_description Users

check_command check_nrpe!check_users

}


define service{

use generic-service

host_name linuxhost

service_description Load

check_command check_nrpe!check_load

}


define service{

use generic-service

host_name linuxhost

service_description SDA1

check_command check_nrpe!check_sda1

}


define service{

use generic-service

host_name linuxhost

service_description SDA2

check_command check_nrpe!check_sda2

}


define service{

use generic-service

host_name linuxhost

service_description SDA3

check_command check_nrpe!check_sda3

}



define service{

use generic-service

host_name linuxhost

service_description Zombie procs

check_command check_nrpe!check_zombie_procs

}


define service{

use generic-service

host_name linuxhost

service_description total procs

check_command check_nrpe!check_total_procs

}



check_nrpe!check_total_procs

!后面为被监控端定义好的命令名称

3)启用主机配置文件

# vim /etc/nagios/nagios.cfg 

cfg_file=/etc/nagios/objects/linux.cfg

4)检测配置文件语法,并重启nagios服务

# /usr/local/nagios/bin/nagios -v /etc/nagios/nagios.cfg

# service nagios restart


在10.1.1.1上分别安装web及mysql服务,配置监控端10.1.1.2监控这两个服务


1、在10.1.1.1上安装web及mysql

# yum install -y httpd mysql-server 


需要在mysql服务器上创建允许远程登录的用户


2、编辑command.cfg分别定义监控web及mysql服务的命令

# vim /etc/nagios/objects/commands.cfg 


define command{

        command_name    check_http

        command_line    $USER1$/check_http -I $HOSTADDRESS$ $ARG1$

        }

define command{

        command_name    check_mysql

        command_line    $USER1$/check_mysql -H $HOSTADDRESS$ -u $ARG1$ -p $ARG2$

        }

3、编辑linux.cfg配置文件,定义web及mysql服务

# vim /etc/nagios/objects/linux.cfg 


define service{

        use                     generic-service

        host_name               linuxhost

        service_description     Web Server

        check_command           check_http

        }


define service{

        use                     generic-service

        host_name               linuxhost

        service_description     Mysql Server

        check_command           check_mysql!admin!redhat

        }

4、检测配置文件语法,并重启nagios服务

# /usr/local/nagios/bin/nagios -v /etc/nagios/nagios.cfg 

# service nagios restart