nagios server端 --(original)

 

 

#---config nagios server---#

 

添加nagios initV 脚本

chkconfig --add nagios

chkconfig --level 3 nagios on

 

测试nagios配置文件:

/fgn/theron/nagios/bin/nagios -v /fgn/theron/nagios/etc/nagios.cfg

 

配置nagios目录属组

chown -R nagios:nagcmd /fgn/theron/nagios/

 

启动nagios

service nagios start

 

#---cfg files---#

objects(对象)是所有可监控和通知的要素。

下边包含的配置文件主要包括

hosts.cfg 定义被监控主机

hostgroups.cfg 定义被监控主机组

 

services.cfg 定义服务

servicegroups.cfg 定义服务组

 

contacts.cfg 定义联系人

contactgroups.cfg 定义联系人组

 

timeperiods.cfg 定义时间期限-如24x7全天候的监测

commands.cfg 定义命令

services.cfg 定义被监控进程

 

servicedependency 定义服务依赖

serviceescalation 定义服务扩展

hostdependency 定义主机依赖

hostescalation 定义主机扩展

 

cp -af /fgn/theron/nagios/etc/   /fgn/theron/nagios/etc.bak/

touch /fgn/theron/nagios/etc/objects/contactgroups.cfg

touch /fgn/theron/nagios/etc/objects/services.cfg

touch /fgn/theron/nagios/etc/objects/servicegroups.cfg

touch /fgn/theron/nagios/etc/objects/hosts.cfg

touch /fgn/theron/nagios/etc/objects/hostgroups.cfg

vim /fgn/theron/nagios/etc/nagios.cfg

修改cfg file为

cfg_file=/fgn/theron/nagios/etc/objects/contacts.cfg

cfg_file=/fgn/theron/nagios/etc/objects/contactgroups.cfg

cfg_file=/fgn/theron/nagios/etc/objects/services.cfg

cfg_file=/fgn/theron/nagios/etc/objects/servicegroups.cfg

cfg_file=/fgn/theron/nagios/etc/objects/commands.cfg

cfg_file=/fgn/theron/nagios/etc/objects/timeperiods.cfg

cfg_file=/fgn/theron/nagios/etc/objects/templates.cfg

cfg_file=/fgn/theron/nagios/etc/objects/hosts.cfg

cfg_file=/fgn/theron/nagios/etc/objects/hostgroups.cfg

cfg_file=/fgn/theron/nagios/etc/objects/localhost.cfg

 

 

新增hosts

cat << EOF >> /fgn/theron/nagios/etc/objects/hosts.cfg

define host{

host_name 192.168.1.205

alias 192.168.1.205

address 192.168.1.205

max_check_attempts 5

#check_interval 1

#retry_interval 1

check_period 24x7

contact_groups sa_groups

notification_interval 30

#first_notification_delay #

notification_period 24x7

notification_options d,u,r

}

EOF

 

cat << EOF >> /fgn/theron/nagios/etc/objects/hosts.cfg

define host{

host_name 192.168.1.4

alias 192.168.1.4

address 192.168.1.4

max_check_attempts 5

#check_interval 1

#retry_interval 1

check_period 24x7

contact_groups sa_groups

notification_interval 30

#first_notification_delay #

notification_period 24x7

notification_options d,u,r

}

EOF

 

 

新增hostgroups

cat << EOF >> /fgn/theron/nagios/etc/objects/hostgroups.cfg

define hostgroup{

hostgroup_name all_hosts

alias all_hosts

members 192.168.1.4,192.168.1.205

#notes note_string

#notes_url url

#action_url url

}

define hostgroup{

hostgroup_name http_hosts

alias http_hosts

members 192.168.1.4

#notes note_string

#notes_url url

#action_url url

}

EOF

 

 

 

新增contacts

cat << EOF >> /fgn/theron/nagios/etc/objects/contacts.cfg

define contact{

contact_name cheng

alias sa_cheng

host_notifications_enabled 1 [0/1]

service_notifications_enabled 1 [0/1]

host_notification_period 24x7

service_notification_period 24x7

host_notification_options d,u,r

service_notification_options w,u,c,r

host_notification_commands notify-service-by-email,notify-service-by-sms

service_notification_commands notify-host-by-email,notify-host-by-sms

email [email protected]

pager 13712345678

can_submit_commands 1 [0/1]

#retain_status_information [0/1]

#retain_nonstatus_information [0/1]

}

EOF

 

 

新增contactgroups

cat << EOF >> /fgn/theron/nagios/etc/objects/contactgroups.cfg

define contactgroup{

contactgroup_name sa_groups

alias sa_groups

members cheng

#contactgroup_members contactgroups

}

EOF

 

#下边检查调用的命令(check_command),在命令配置文件中定义或在nrpe配置文件中要有定义

#最大重试次数(max_check_attempts),一般设置为3-4次比较好,这样不会因为太敏感而发生误报,一丢包就发短信太崩溃了吧

#检查间隔(check_interval)和重试检查间隔(retry_interval)的单位是分钟,不同的检查项目酌情修改

#通知间隔(notification_interval)指探测到故障以后,每隔多少分钟发送一次报警信息。

#状态级别:

#d=send notifications on a DOWN state宕

#w=send notifications on a WARNING state警告状态

#c=send notifications on a CRITICAL state严重状态、临界状态

#u=send notifications on an UNREACHABLE or UNKNOWN state找不到、不可达

#r=send notifications on recoveries (OK state)OK状态

#f=send notifications when the host or service starts and stops flapping

#s=send notifications when scheduled downtime starts and ends

 

新增services

cat << EOF >> /fgn/theron/nagios/etc/objects/services.cfg

#monitor hosts

define service{

host_name 192.168.1.4

service_description check_ftp

check_command check_ftp

max_check_attempts 3

check_interval 10

retry_interval 5

check_period 24x7

notification_interval 30

notification_period 24x7

notification_options w,u,c

#contacts contacts(*)

contact_groups sa_groups

}

EOF

 

#monitor all_hosts

define service{

hostgroup_name all_hosts

service_description check_host-alive

check_command check_ping

max_check_attempts 5

check_interval 3

retry_interval 1

check_period 24x7

notification_interval 30

notification_period 24x7

notification_options w,u,c

#contacts contacts(*)

contact_groups sa_groups

}

define service{

hostgroup_name all_hosts

service_description check_df

check_command check_nrpe!check_df

max_check_attempts 4

check_interval 1440

retry_interval 5

check_period 24x7

notification_interval 1440

notification_period 24x7

notification_options w,u,c

#contacts contacts(*)

contact_groups sa_groups

}

define service{

hostgroup_name all_hosts

service_description check_load

check_command check_nrpe!check_load

max_check_attempts 5

check_interval 5

retry_interval 5

check_period 24x7

notification_interval 30

notification_period 24x7

notification_options w,u,c

#contacts contacts(*)

contact_groups sa_groups

}

define service{

hostgroup_name all_hosts

service_description check_zombie_procs

check_command check_nrpe!check_zombie_procs

max_check_attempts 5

check_interval 5

retry_interval 5

check_period 24x7

notification_interval 30

notification_period 24x7

notification_options w,u,c

#contacts contacts(*)

contact_groups sa_groups

}

define service{

hostgroup_name all_hosts

service_description check_total_procs

check_command check_nrpe!check_total_procs

max_check_attempts 5

check_interval 5

retry_interval 5

check_period 24x7

notification_interval 30

notification_period 24x7

notification_options w,u,c

#contacts contacts(*)

contact_groups sa_groups

}

define service{

hostgroup_name all_hosts

service_description check_ssh

check_command check_ssh

max_check_attempts 3

check_interval 60

retry_interval 5

check_period 24x7

notification_interval 60

notification_period 24x7

notification_options w,u,c

#contacts contacts(*)

contact_groups sa_groups

}

 

#monitor http_hosts

define service{

hostgroup_name http_hosts

service_description check_http

check_command check_http

max_check_attempts 4

check_interval 3

retry_interval 1

check_period 24x7

notification_interval 30

notification_period 24x7

notification_options w,u,c

#contacts contacts(*)

contact_groups sa_groups

}

EOF

 

 

 

新增servicesgroups

cat << EOF >> /fgn/theron/nagios/etc/objects/servicegroups.cfg

define servicegroup{

        servicegroup_name       Ping

        alias                   Ping

        members                 test1,check_http,test2,check_http,test3,check_http

}

EOF

 

 

 

新增 commands

cat << EOF >> /fgn/theron/nagios/etc/objects/commands.cfg

# 'check_nrpe' command definition

define command{

command_name check_nrpe

command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$

}

EOF

 

定义完nrpe command,下面报错消失

[root@www nagios]# ./bin/nagios -v ./etc/nagios.cfg

 

Nagios Core 3.2.1

Copyright (c) 2009-2010 Nagios Core Development Team and Community Contributors

Copyright (c) 1999-2009 Ethan Galstad

Last Modified: 03-09-2010

License: GPL

 

Website: http://www.nagios.org

Reading configuration data...

   Read main config file okay...

Processing object config file '/fgn/theron/nagios/etc/objects/contacts.cfg'...

Processing object config file '/fgn/theron/nagios/etc/objects/contactgroups.cfg'...

Processing object config file '/fgn/theron/nagios/etc/objects/services.cfg'...

Processing object config file '/fgn/theron/nagios/etc/objects/servicegroups.cfg'...

Processing object config file '/fgn/theron/nagios/etc/objects/commands.cfg'...

Processing object config file '/fgn/theron/nagios/etc/objects/timeperiods.cfg'...

Processing object config file '/fgn/theron/nagios/etc/objects/templates.cfg'...

Processing object config file '/fgn/theron/nagios/etc/objects/hosts.cfg'...

Processing object config file '/fgn/theron/nagios/etc/objects/hostgroups.cfg'...

Processing object config file '/fgn/theron/nagios/etc/objects/localhost.cfg'...

   Read object config files okay...

 

Running pre-flight check on configuration data...

 

Checking services...

Error: Service check command 'check_nrpe' specified in service 'check_df' for host '192.168.1.205' not defined anywhere!

Error: Service check command 'check_nrpe' specified in service 'check_load' for host '192.168.1.205' not defined anywhere!

Error: Service check command 'check_nrpe' specified in service 'check_total_procs' for host '192.168.1.205' not defined anywhere!

Error: Service check command 'check_nrpe' specified in service 'check_zombie_procs' for host '192.168.1.205' not defined anywhere!

Error: Service check command 'check_nrpe' specified in service 'check_df' for host '192.168.1.4' not defined anywhere!

Error: Service check command 'check_nrpe' specified in service 'check_load' for host '192.168.1.4' not defined anywhere!

Error: Service check command 'check_nrpe' specified in service 'check_total_procs' for host '192.168.1.4' not defined anywhere!

Error: Service check command 'check_nrpe' specified in service 'check_zombie_procs' for host '192.168.1.4' not defined anywhere!

Checked 22 services.

Checking hosts...

Checked 3 hosts.

Checking host groups...

Checked 3 host groups.

Checking service groups...

Checked 0 service groups.

Checking contacts...

Error: Service notification command 'notify-host-by-sms' specified for contact 'cheng' is not defined anywhere!

Error: Host notification command 'notify-service-by-sms' specified for contact 'cheng' is not defined anywhere!

Checked 2 contacts.

Checking contact groups...

Checked 2 contact groups.

Checking service escalations...

Checked 0 service escalations.

Checking service dependencies...

Checked 0 service dependencies.

Checking host escalations...

Checked 0 host escalations.

Checking host dependencies...

Checked 0 host dependencies.

Checking commands...

Checked 24 commands.

Checking time periods...

Checked 5 time periods.

Checking for circular paths between hosts...

Checking for circular host and service dependencies...

Checking global event handlers...

Checking obsessive compulsive processor commands...

Checking misc settings...

 

Total Warnings: 0

Total Errors:   10

 

***> One or more problems was encountered while running the pre-flight check...

 

     Check your configuration file(s) to ensure that they contain valid

     directives and data defintions.  If you are upgrading from a previous

     version of Nagios, you should be aware that some variables/definitions

     may have been removed or modified in this version.  Make sure to read

     the HTML documentation regarding the config files, as well as the

     'Whats New' section to find out what has changed.

 

#---第一次新增主机---#

vim hosts.cfg

define host{

use   host-test-template

host_name test1

alias  test1

address 192.168.1.205

process_perf_data       1

}

相应新增要使用:

主机模板host-test-template

vim templates.cfg

define host{

name  host-test-template

contact_groups sa_groups

max_check_attempts 5

notification_interval 0

notification_period 24x7

notification_options d,u,r

check_command check-host-alive

}

新增联系组sa_groups

vim contactgroups.cfg

define contactgroup{

contactgroup_name sa_groups

alias sa_groups

members cheng

}

新增联系组成员:cheng

vim contacts.cfg

define contact{

contact_name cheng

alias sa_cheng

host_notifications_enabled 1 [0/1]

service_notifications_enabled 1 [0/1]

host_notification_period 24x7

service_notification_period 24x7

host_notification_options d,u,r

service_notification_options w,u,c,r

host_notification_commands notify-service-by-email

#,notify-service-by-sms

service_notification_commands notify-host-by-email

#,notify-host-by-sms

email [email protected]

pager 13712345678

can_submit_commands 1 [0/1]

#retain_status_information [0/1]

#retain_nonstatus_information [0/1]

}

新增命令:check-host-alive,notify-host-by-email,notify-service-by-email(默认就有,此步骤可省略)

vim commands.cfg

define command{

        command_name    check-host-alive

        command_line    $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5

        }

define command{

        command_name    notify-host-by-email

        command_line    /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$

        }

define command{

        command_name    notify-service-by-email

        command_line    /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" | /bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$

        }

给所有nrpe客户端使用:

define command{

         command_name check_nrpe

         command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ -t 20

         }

#---以后新增主机---#

只需新增主机信息

vim hosts.cfg

define host{

use   host-test-template

host_name test1

alias  test1

address 192.168.1.205

process_perf_data       1

}

#---新增host group---#

define hostgroup{

hostgroup_name test_a_group

alias all_hosts

members test1,test2

#notes note_string

#notes_url url

#action_url url

}

相应新增service

vim services.cfg:

define service{

hostgroup_name test_a_group

service_description check-host-alive

check_command check_ping

max_check_attempts 5

check_interval 3

retry_interval 1

check_period 24x7

notification_interval 30

notification_period 24x7

notification_options w,u,c

#contacts contacts(*)

contact_groups sa_groups

}

 

#---新增servicegroup---#

define servicegroup{

        servicegroup_name       Ping

        alias                   Ping

        members                 test1,check_http,test2,check_http,test3,check_http

}

 

#---新增脚本工具check_ips---#

相应新增service

define service{

hostgroup_name test_a_group,test_b_group,test_c_group

service_description check_ip_link

check_command check_nrpe!check_ips

max_check_attempts 5

check_interval 3s

retry_interval 1

check_period 24x7

notification_interval 30

notification_period 24x7

notification_options w,u,c

contact_groups sa_groups

}

相应配置nrpe客户端:

command[check_ips]=/fgn/theron/nagios/libexec/ip_conn.sh

 

 

 

你可能感兴趣的:(nagios server端 --(original))