nagios安装

 

[root@client ~]# rpm -ivh http://www.aminglinux.com/bbs/data/p_w_upload/forum/month_1211/epel-release-6-7.noarch.rpm

 

[root@client ~]# yum install -y httpd nagios nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe

 

[root@client ~]# htpasswd -c /etc/nagios/passwd nagiosadmin

New password:

Re-type new password:

Adding password for user nagiosadmin

//设置密码

[root@client ~]# vim /etc/nagios/nagios.cfg    //查看配置文件

[root@client ~]# nagios -v /etc/nagios/nagios.cfg

Total Warnings: 0

Total Errors:   0

 

  //检查配置文件,无告警和错误

 

[root@client ~]# service httpd start

[root@client ~]# service nagios start

//启动服务

 

 

浏览器访问:192.168.137.21/nagios

输入用户名密码,访问

 

nagios安装客户端1

在客户端上:

[root@Client ~]# rpm -ivh http://www.aminglinux.com/bbs/data/p_w_upload/forum/month_1211/epel-release-6-7.noarch.rpm

[root@Client ~]# yum install -y nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe

[root@Client ~]# vim /etc/nagios/nrpe.cfg

allowed_hosts=192.168.137.21               //允许哪台设备连(服务端)

dont_blame_nrpe=1

 

[root@Client ~]# /etc/init.d/nrpe start

 

监控端服务器添加被监控的主机(192.168.137.23

[root@client ~]# cd /etc/nagios/conf.d/

[root@client conf.d]# vim 192.168.137.23.cfg

define host{

        use                     linux-server            ; Name of host template to use

                                                        ; This host definition will inherit all variables that are defined

                                                        ; in (or inherited by) the linux-server host template definition.

        host_name               192.168.137.23

        alias                   137.23

        address                 192.168.137.23

        }

 

define service{

        use                     generic-service

        host_name               192.168.137.23

        service_description     check_ping

        check_command           check_ping!100.0,20%!200.0,50%

        max_check_attempts 5

        normal_check_interval 1

}

 

define service{

        use                     generic-service

        host_name               192.168.137.23

        service_description     check_ssh

        check_command           check_ssh

        max_check_attempts      5

        normal_check_interval 1

}

 

define service{

        use                     generic-service

        host_name               192.168.137.23

        service_description     check_http

        check_command           check_http

        max_check_attempts      5

        normal_check_interval 1

}

 

 

[root@client conf.d]# nagios -v /etc/nagios/nagios.cfg        //检查配置有无错误

Total Warnings: 0

Total Errors:   0

 

[root@client conf.d]# service nagios restart

点击左侧host,出现192.168.137.23客户端(被监控设备)

点击services查看各种服务状态

 

 

 

nagios 监控客户端 2

以上监控无需安装其他软件,现借助nrpe监控具体的服务

 

服务端:

 

[root@client conf.d]# vim /etc/nagios/objects/commands.cfg             //末尾加

define command{

         command_name    check_nrpe

         command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$

         }

 

 

[root@client conf.d]# vim /etc/nagios/conf.d/192.168.137.23.cfg

define service{

        use     generic-service

        host_name       192.168.137.23

        service_description     check_load

        check_command           check_nrpe!check_load

        max_check_attempts 5

        normal_check_interval 1

}

 

define service{

        use     generic-service

        host_name       192.168.137.23

        service_description     check_disk_hda1

        check_command           check_nrpe!check_hda1

        max_check_attempts 5

        normal_check_interval 1

}

 

define service{

        use     generic-service

        host_name       192.168.137.23

        service_description     check_disk_hda3

        check_command           check_nrpe!check_hda3

        max_check_attempts 5

        normal_check_interval 1

}

 

//尾行在添加三个服务

 

 

客户端查看配置文件

[root@Client ~]# vim /etc/nagios/nrpe.cfg

command[check_users]=/usr/lib/nagios/plugins/check_users -w 5 -c 10

command[check_load]=/usr/lib/nagios/plugins/check_load -w 15,10,5 -c 30,25,20

command[check_hda1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda1

command[check_hda3]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda3

command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z

command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 150 -c 200

 

//查看,服务器上的命令在此定义check_loadcheck_hda1,修改hda1sda1,设备上只有sda1,hda1.

 

[root@Client ~]# ls /usr/lib/nagios/plugins/check_load

/usr/lib/nagios/plugins/check_load

[root@Client ~]# /etc/init.d/nrpe restart

 

 

 

服务端:

[root@client conf.d]# /etc/init.d/nagios restart

 

网页访问;新添加的三个监控服务已出现

 

服务端查看nagios日志

root@client conf.d]# ls /var/log/nagios/

[root@client conf.d]# tail /var/log/nagios/nagios.log

 

 

 

nagios 配置邮件告警

先定义人、组。告警人邮箱信息等

root@client conf.d]# vim /etc/nagios/objects/contacts.cfg

define contact{                                                 #定义一个人

        contact_name                    123

        use                             generic-contact

        alias                           cai

        email                           [email protected]

        }

 

define contact{

        contact_name                    456

        use                             generic-contact

        alias                           yao

        email                           [email protected]

        }

 

define contactgroup{                                      #定义一个组有哪些人

        contactgroup_name               common

        alias                           common

        members                         123,456

        }6

        }

 

//尾行添加

 

 

然后需在告警的服务器里加上contactgroup

服务器上:

[root@client conf.d]# vim /etc/nagios/conf.d/192.168.137.23.cfg

define service{

        use     generic-service

        host_name       192.168.137.23

        service_description     check_load

        check_command           check_nrpe!check_load

        max_check_attempts 5

        normal_check_interval 1

        contact_groups     common

       # notification_period  24*7

       # notification_options c,r

}

//check_load负载需加邮件告警,则在check_load中加contact_groups  commoncommon为某组,告警组内成员notification_period  24*7告警时间段24*7notification_options c,r出现问题恢复时告警