#---config nagios server---#
添加nagios initV 脚本
chkconfig --add nagios
chkconfig --level 3 nagios on
测试nagios配置文件:
/fgn/theron/nagios/bin/nagios -v /fgn/theron/nagios/etc/nagios.cfg
配置nagios目录属组
chown -R nagios:nagcmd /fgn/theron/nagios/
启动nagios
service nagios start
#---cfg files---#
objects(对象)是所有可监控和通知的要素。
下边包含的配置文件主要包括
hosts.cfg 定义被监控主机
hostgroups.cfg 定义被监控主机组
services.cfg 定义服务
servicegroups.cfg 定义服务组
contacts.cfg 定义联系人
contactgroups.cfg 定义联系人组
timeperiods.cfg 定义时间期限-如24x7全天候的监测
commands.cfg 定义命令
services.cfg 定义被监控进程
servicedependency 定义服务依赖
serviceescalation 定义服务扩展
hostdependency 定义主机依赖
hostescalation 定义主机扩展
cp -af /fgn/theron/nagios/etc/ /fgn/theron/nagios/etc.bak/
touch /fgn/theron/nagios/etc/objects/contactgroups.cfg
touch /fgn/theron/nagios/etc/objects/services.cfg
touch /fgn/theron/nagios/etc/objects/servicegroups.cfg
touch /fgn/theron/nagios/etc/objects/hosts.cfg
touch /fgn/theron/nagios/etc/objects/hostgroups.cfg
vim /fgn/theron/nagios/etc/nagios.cfg
修改cfg file为
cfg_file=/fgn/theron/nagios/etc/objects/contacts.cfg
cfg_file=/fgn/theron/nagios/etc/objects/contactgroups.cfg
cfg_file=/fgn/theron/nagios/etc/objects/services.cfg
cfg_file=/fgn/theron/nagios/etc/objects/servicegroups.cfg
cfg_file=/fgn/theron/nagios/etc/objects/commands.cfg
cfg_file=/fgn/theron/nagios/etc/objects/timeperiods.cfg
cfg_file=/fgn/theron/nagios/etc/objects/templates.cfg
cfg_file=/fgn/theron/nagios/etc/objects/hosts.cfg
cfg_file=/fgn/theron/nagios/etc/objects/hostgroups.cfg
cfg_file=/fgn/theron/nagios/etc/objects/localhost.cfg
新增hosts
cat << EOF >> /fgn/theron/nagios/etc/objects/hosts.cfg
define host{
host_name 192.168.1.205
alias 192.168.1.205
address 192.168.1.205
max_check_attempts 5
#check_interval 1
#retry_interval 1
check_period 24x7
contact_groups sa_groups
notification_interval 30
#first_notification_delay #
notification_period 24x7
notification_options d,u,r
}
EOF
cat << EOF >> /fgn/theron/nagios/etc/objects/hosts.cfg
define host{
host_name 192.168.1.4
alias 192.168.1.4
address 192.168.1.4
max_check_attempts 5
#check_interval 1
#retry_interval 1
check_period 24x7
contact_groups sa_groups
notification_interval 30
#first_notification_delay #
notification_period 24x7
notification_options d,u,r
}
EOF
新增hostgroups
cat << EOF >> /fgn/theron/nagios/etc/objects/hostgroups.cfg
define hostgroup{
hostgroup_name all_hosts
alias all_hosts
members 192.168.1.4,192.168.1.205
#notes note_string
#notes_url url
#action_url url
}
define hostgroup{
hostgroup_name http_hosts
alias http_hosts
members 192.168.1.4
#notes note_string
#notes_url url
#action_url url
}
EOF
新增contacts
cat << EOF >> /fgn/theron/nagios/etc/objects/contacts.cfg
define contact{
contact_name cheng
alias sa_cheng
host_notifications_enabled 1 [0/1]
service_notifications_enabled 1 [0/1]
host_notification_period 24x7
service_notification_period 24x7
host_notification_options d,u,r
service_notification_options w,u,c,r
host_notification_commands notify-service-by-email,notify-service-by-sms
service_notification_commands notify-host-by-email,notify-host-by-sms
email [email protected]
pager 13712345678
can_submit_commands 1 [0/1]
#retain_status_information [0/1]
#retain_nonstatus_information [0/1]
}
EOF
新增contactgroups
cat << EOF >> /fgn/theron/nagios/etc/objects/contactgroups.cfg
define contactgroup{
contactgroup_name sa_groups
alias sa_groups
members cheng
#contactgroup_members contactgroups
}
EOF
#下边检查调用的命令(check_command),在命令配置文件中定义或在nrpe配置文件中要有定义
#最大重试次数(max_check_attempts),一般设置为3-4次比较好,这样不会因为太敏感而发生误报,一丢包就发短信太崩溃了吧
#检查间隔(check_interval)和重试检查间隔(retry_interval)的单位是分钟,不同的检查项目酌情修改
#通知间隔(notification_interval)指探测到故障以后,每隔多少分钟发送一次报警信息。
#状态级别:
#d=send notifications on a DOWN state宕
#w=send notifications on a WARNING state警告状态
#c=send notifications on a CRITICAL state严重状态、临界状态
#u=send notifications on an UNREACHABLE or UNKNOWN state找不到、不可达
#r=send notifications on recoveries (OK state)OK状态
#f=send notifications when the host or service starts and stops flapping
#s=send notifications when scheduled downtime starts and ends
新增services
cat << EOF >> /fgn/theron/nagios/etc/objects/services.cfg
#monitor hosts
define service{
host_name 192.168.1.4
service_description check_ftp
check_command check_ftp
max_check_attempts 3
check_interval 10
retry_interval 5
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
EOF
#monitor all_hosts
define service{
hostgroup_name all_hosts
service_description check_host-alive
check_command check_ping
max_check_attempts 5
check_interval 3
retry_interval 1
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
define service{
hostgroup_name all_hosts
service_description check_df
check_command check_nrpe!check_df
max_check_attempts 4
check_interval 1440
retry_interval 5
check_period 24x7
notification_interval 1440
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
define service{
hostgroup_name all_hosts
service_description check_load
check_command check_nrpe!check_load
max_check_attempts 5
check_interval 5
retry_interval 5
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
define service{
hostgroup_name all_hosts
service_description check_zombie_procs
check_command check_nrpe!check_zombie_procs
max_check_attempts 5
check_interval 5
retry_interval 5
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
define service{
hostgroup_name all_hosts
service_description check_total_procs
check_command check_nrpe!check_total_procs
max_check_attempts 5
check_interval 5
retry_interval 5
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
define service{
hostgroup_name all_hosts
service_description check_ssh
check_command check_ssh
max_check_attempts 3
check_interval 60
retry_interval 5
check_period 24x7
notification_interval 60
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
#monitor http_hosts
define service{
hostgroup_name http_hosts
service_description check_http
check_command check_http
max_check_attempts 4
check_interval 3
retry_interval 1
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
EOF
新增servicesgroups
cat << EOF >> /fgn/theron/nagios/etc/objects/servicegroups.cfg
define servicegroup{
servicegroup_name Ping
alias Ping
members test1,check_http,test2,check_http,test3,check_http
}
EOF
新增 commands
cat << EOF >> /fgn/theron/nagios/etc/objects/commands.cfg
# 'check_nrpe' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
EOF
定义完nrpe command,下面报错消失
[root@www nagios]# ./bin/nagios -v ./etc/nagios.cfg
Nagios Core 3.2.1
Copyright (c) 2009-2010 Nagios Core Development Team and Community Contributors
Copyright (c) 1999-2009 Ethan Galstad
Last Modified: 03-09-2010
License: GPL
Website: http://www.nagios.org
Reading configuration data...
Read main config file okay...
Processing object config file '/fgn/theron/nagios/etc/objects/contacts.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/contactgroups.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/services.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/servicegroups.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/commands.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/timeperiods.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/templates.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/hosts.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/hostgroups.cfg'...
Processing object config file '/fgn/theron/nagios/etc/objects/localhost.cfg'...
Read object config files okay...
Running pre-flight check on configuration data...
Checking services...
Error: Service check command 'check_nrpe' specified in service 'check_df' for host '192.168.1.205' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_load' for host '192.168.1.205' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_total_procs' for host '192.168.1.205' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_zombie_procs' for host '192.168.1.205' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_df' for host '192.168.1.4' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_load' for host '192.168.1.4' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_total_procs' for host '192.168.1.4' not defined anywhere!
Error: Service check command 'check_nrpe' specified in service 'check_zombie_procs' for host '192.168.1.4' not defined anywhere!
Checked 22 services.
Checking hosts...
Checked 3 hosts.
Checking host groups...
Checked 3 host groups.
Checking service groups...
Checked 0 service groups.
Checking contacts...
Error: Service notification command 'notify-host-by-sms' specified for contact 'cheng' is not defined anywhere!
Error: Host notification command 'notify-service-by-sms' specified for contact 'cheng' is not defined anywhere!
Checked 2 contacts.
Checking contact groups...
Checked 2 contact groups.
Checking service escalations...
Checked 0 service escalations.
Checking service dependencies...
Checked 0 service dependencies.
Checking host escalations...
Checked 0 host escalations.
Checking host dependencies...
Checked 0 host dependencies.
Checking commands...
Checked 24 commands.
Checking time periods...
Checked 5 time periods.
Checking for circular paths between hosts...
Checking for circular host and service dependencies...
Checking global event handlers...
Checking obsessive compulsive processor commands...
Checking misc settings...
Total Warnings: 0
Total Errors: 10
***> One or more problems was encountered while running the pre-flight check...
Check your configuration file(s) to ensure that they contain valid
directives and data defintions. If you are upgrading from a previous
version of Nagios, you should be aware that some variables/definitions
may have been removed or modified in this version. Make sure to read
the HTML documentation regarding the config files, as well as the
'Whats New' section to find out what has changed.
#---第一次新增主机---#
vim hosts.cfg
define host{
use host-test-template
host_name test1
alias test1
address 192.168.1.205
process_perf_data 1
}
相应新增要使用:
主机模板host-test-template
vim templates.cfg
define host{
name host-test-template
contact_groups sa_groups
max_check_attempts 5
notification_interval 0
notification_period 24x7
notification_options d,u,r
check_command check-host-alive
}
新增联系组sa_groups
vim contactgroups.cfg
define contactgroup{
contactgroup_name sa_groups
alias sa_groups
members cheng
}
新增联系组成员:cheng
vim contacts.cfg
define contact{
contact_name cheng
alias sa_cheng
host_notifications_enabled 1 [0/1]
service_notifications_enabled 1 [0/1]
host_notification_period 24x7
service_notification_period 24x7
host_notification_options d,u,r
service_notification_options w,u,c,r
host_notification_commands notify-service-by-email
#,notify-service-by-sms
service_notification_commands notify-host-by-email
#,notify-host-by-sms
email [email protected]
pager 13712345678
can_submit_commands 1 [0/1]
#retain_status_information [0/1]
#retain_nonstatus_information [0/1]
}
新增命令:check-host-alive,notify-host-by-email,notify-service-by-email(默认就有,此步骤可省略)
vim commands.cfg
define command{
command_name check-host-alive
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
}
define command{
command_name notify-host-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
}
define command{
command_name notify-service-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" | /bin/mail -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
}
给所有nrpe客户端使用:
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ -t 20
}
#---以后新增主机---#
只需新增主机信息
vim hosts.cfg
define host{
use host-test-template
host_name test1
alias test1
address 192.168.1.205
process_perf_data 1
}
#---新增host group---#
define hostgroup{
hostgroup_name test_a_group
alias all_hosts
members test1,test2
#notes note_string
#notes_url url
#action_url url
}
相应新增service
vim services.cfg:
define service{
hostgroup_name test_a_group
service_description check-host-alive
check_command check_ping
max_check_attempts 5
check_interval 3
retry_interval 1
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
#contacts contacts(*)
contact_groups sa_groups
}
#---新增servicegroup---#
define servicegroup{
servicegroup_name Ping
alias Ping
members test1,check_http,test2,check_http,test3,check_http
}
#---新增脚本工具check_ips---#
相应新增service
define service{
hostgroup_name test_a_group,test_b_group,test_c_group
service_description check_ip_link
check_command check_nrpe!check_ips
max_check_attempts 5
check_interval 3s
retry_interval 1
check_period 24x7
notification_interval 30
notification_period 24x7
notification_options w,u,c
contact_groups sa_groups
}
相应配置nrpe客户端:
command[check_ips]=/fgn/theron/nagios/libexec/ip_conn.sh