1、基础环境安装(系统最小安装)
yum install gcc glibc glibc-common gd gd-devel httpd
2、nagios和 nagios-plus的安装
/usr/sbin/useradd nagios /usr/sbin/groupadd nagcmd /usr/sbin/usermod -G nagcmd nagios /usr/sbin/usermod -G nagcmd apache tar -jxvf nagios-cn- 3 . 2 . 3 .tar.bz2 cd nagios-cn- 3 . 2 . 3 ./configure --with-group=nagios --with-user=nagios --with-command-group=nagcmd --with-gd-lib=/usr/lib --with -gd- inc =/usr/include make all make install make install-init make install-commandmode make install-config make install-webconf htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin ##创建web登录用户名和密码 chown -R nagios.nagios /usr/local/nagios/etc/htpasswd.users tar -zxvf nagios-plugins- 1 . 4 . 16 .tar.gz cd nagios-plus- 1 . 4 . 16 ./configure --prefix=/usr/local/nagios --with-nagios-user=nagios --with-nagios-gourp=nagios make&&make install
PS:当安装nagios-plugins-1.4.16时候报错如下
ps/check_http.Tpo -c -o check_http.o check_http.c check_http. c: In function ‘process_arguments’: check_http. c: 312 : error: ‘ssl_version’ undeclared (first use in this function) check_http. c: 312 : error: (Each undeclared identifier is reported only once check_http. c: 312 : error: for each function it appears in .) make[ 2 ]: *** [check_http.o] Error 1 make[ 2 ]: Leaving directory `/data/nagios-plugins- 1 . 4 . 16 /plugins ' make[1]: *** [all-recursive] Error 1 make[1]: Leaving directory `/data/nagios-plugins-1.4.16 ' make: *** [all] Error 2
解决办法:
1 、yum -y install openssl openssl-devel 2 、重新configure
3、配置文件修改
[root@db3 etc]# pwd /usr/local/nagios/etc [root@db3 etc]# tree . ├── cgi.cfg ##需要修改 ├── htpasswd.users ├── nagios.cfg ##需要修改 ├── nagiosgraph.cfg ├── objects │ ├── commands.cfg ##需要修改 │ ├── contacts.cfg ##需要修改 │ ├── localhost.cfg │ ├── printer.cfg │ ├── switch.cfg │ ├── templates.cfg ##需要修改 │ ├── timeperiods.cfg │ └── windows.cfg └── resource.cfg
[root@db3 etc]# cat cgi.cfg |grep -v "#" | awk "NF" main_config_file=/usr/local/nagios/etc/nagios.cfg physical_html_path=/usr/local/nagios/share url_html_path=/nagios show_context_help=0 use_pending_states=1 use_authentication=1 use_ssl_authentication=0 authorized_for_system_information=nagiosadmin,command authorized_for_configuration_information=nagiosadmin,command authorized_for_system_commands=nagiosadmin authorized_for_all_services=nagiosadmin authorized_for_all_hosts=nagiosadmin authorized_for_all_service_commands=nagiosadmin authorized_for_all_host_commands=nagiosadmin statusmap_background_image=zytianbk.png default_statusmap_layout=0 default_statuswrl_layout=4 ping_syntax=/bin/ping -n -U -c 5 $HOSTADDRESS$ refresh_rate=90 escape_html_tags=1 action_url_target=_blank notes_url_target=_blank lock_author_names=1 ttf_file=/usr/local/nagios/sbin/simhei.ttf statusmap_mod=2
nagios.cfg内容如下:
[root@yw_monitor_db etc]# cat nagios.cfg |grep -v "#" | awk "NF" log_file=/usr/local/nagios/var/nagios.log cfg_file=/usr/local/nagios/etc/objects/commands.cfg cfg_file=/usr/local/nagios/etc/objects/contacts.cfg cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg cfg_file=/usr/local/nagios/etc/objects/templates.cfg cfg_dir=/usr/local/nagios/etc/objects/cfg object_cache_file=/usr/local/nagios/var/objects.cache precached_object_file=/usr/local/nagios/var/objects.precache resource_file=/usr/local/nagios/etc/resource.cfg status_file=/usr/local/nagios/var/status.dat status_update_interval=10 nagios_user=nagios nagios_group=nagios check_external_commands=1 command_check_interval=-1 command_file=/usr/local/nagios/var/rw/nagios.cmd external_command_buffer_slots=4096 lock_file=/usr/local/nagios/var/nagios.lock temp_file=/usr/local/nagios/var/nagios.tmp temp_path=/tmp event_broker_options=-1 log_rotation_method=d log_archive_path=/usr/local/nagios/var/archives use_syslog=1 log_notifications=1 log_service_retries=1 log_host_retries=1 log_event_handlers=1 log_initial_states=0 log_external_commands=1 log_passive_checks=1 service_inter_check_delay_method=s max_service_check_spread=30 service_interleave_factor=s host_inter_check_delay_method=s max_host_check_spread=30 max_concurrent_checks=0 check_result_reaper_frequency=10 max_check_result_reaper_time=30 check_result_path=/usr/local/nagios/var/spool/checkresults max_check_result_file_age=3600 cached_host_check_horizon=15 cached_service_check_horizon=15 enable_predictive_host_dependency_checks=1 enable_predictive_service_dependency_checks=1 soft_state_dependencies=0 auto_reschedule_checks=0 auto_rescheduling_interval=30 auto_rescheduling_window=180 sleep_time=0.25 service_check_timeout=60 host_check_timeout=30 event_handler_timeout=30 notification_timeout=30 ocsp_timeout=5 perfdata_timeout=5 retain_state_information=1 state_retention_file=/usr/local/nagios/var/retention.dat retention_update_interval=60 use_retained_program_state=1 use_retained_scheduling_info=1 retained_host_attribute_mask=0 retained_service_attribute_mask=0 retained_process_host_attribute_mask=0 retained_process_service_attribute_mask=0 retained_contact_host_attribute_mask=0 retained_contact_service_attribute_mask=0 interval_length=60 check_for_updates=1 bare_update_check=0 use_aggressive_host_checking=0 execute_service_checks=1 accept_passive_service_checks=1 execute_host_checks=1 accept_passive_host_checks=1 enable_notifications=1 enable_event_handlers=1 process_performance_data=0 obsess_over_services=0 obsess_over_hosts=0 translate_passive_host_checks=0 passive_host_checks_are_soft=0 check_for_orphaned_services=1 check_for_orphaned_hosts=1 check_service_freshness=1 service_freshness_check_interval=60 check_host_freshness=0 host_freshness_check_interval=60 additional_freshness_latency=15 enable_flap_detection=1 low_service_flap_threshold=5.0 high_service_flap_threshold=20.0 low_host_flap_threshold=5.0 high_host_flap_threshold=20.0 date_format=us p1_file=/usr/local/nagios/bin/p1.pl enable_embedded_perl=1 use_embedded_perl_implicitly=1 illegal_object_name_chars=`~!$%^&*|'"<>?,()= illegal_macro_output_chars=`~$&|'"<> use_regexp_matching=0 use_true_regexp_matching=0 admin_email=nagios@localhost admin_pager=pagenagios@localhost daemon_dumps_core=0 use_large_installation_tweaks=0 enable_environment_macros=1 debug_level=0 debug_verbosity=1 debug_file=/usr/local/nagios/var/nagios.debug max_debug_file_size=1000000
commands.cfg的内容如下:
[root@yw_monitor_db objects]# cat commands.cfg |grep -v "#" | awk "NF" define command{ command_name notify-host-by-email command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$ } define command{ command_name notify-service-by-email command_line /usr/bin/printf "Host: $HOSTALIAS$\nService: $SERVICEDESC$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\nDate/Time: $LONGDATETIME$\nInfo:$SERVICEOUTPUT$\n持续时间:$SERVICEDURATION$" | mail -s " $HOSTALIAS$ $SERVICESTATE$" $CONTACTEMAIL$ } define command{ command_name notify-service-by-email-163 command_line /usr/bin/printf "Host: $HOSTALIAS$\nService: $SERVICEDESC$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\nDate/Time: $LONGDATETIME$\nInfo:$SERVICEOUTPUT$\n持续时间:$SERVICEDURATION$" | mail -s " $HOSTALIAS$ $SERVICESTATE$" $CONTACTEMAIL$ } define command{ command_name notify-service-by-email-164 command_line /usr/local/nagios/bin/send_message.sh $CONTACTEMAIL$ $_SERVICETYPE$ $HOSTALIAS$-$SERVICEDESC$ info:$SERVICEOUTPUT$ Duration:$SERVICEDURATION$ } define command{ command_name check_mysql command_line $USER1$/check_mysql -H $HOSTADDRESS$ -u nagios -d nagios -p 123456 } define command{ command_name check_mysql_slave command_line $USER1$/check_mysql -H $HOSTADDRESS$ -P13267 -S -uroot -p123456 -s /data/mysql/nagios/logs/mysql.sock } define command{ command_name check-host-alive command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5 } define command{ command_name check_local_disk command_line $USER1$/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$ } define command{ command_name check_local_load command_line $USER1$/check_load -w $ARG1$ -c $ARG2$ } define command{ command_name check_local_procs command_line $USER1$/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$ } define command{ command_name check_local_users command_line $USER1$/check_users -w $ARG1$ -c $ARG2$ } define command{ command_name check_local_swap command_line $USER1$/check_swap -w $ARG1$ -c $ARG2$ } define command{ command_name check_local_mrtgtraf command_line $USER1$/check_mrtgtraf -F $ARG1$ -a $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ } define command{ command_name check_ftp command_line $USER1$/check_ftp -H $HOSTADDRESS$ $ARG1$ } define command{ command_name check_hpjd command_line $USER1$/check_hpjd -H $HOSTADDRESS$ $ARG1$ } define command{ command_name check_snmp command_line $USER1$/check_snmp -H $HOSTADDRESS$ $ARG1$ } define command{ command_name check_http command_line $USER1$/check_http -H $HOSTADDRESS$ $ARG1$ } define command{ command_name check_ssh command_line $USER1$/check_ssh $ARG1$ $HOSTADDRESS$ } define command{ command_name check_dhcp command_line $USER1$/check_dhcp $ARG1$ } define command{ command_name check_ping command_line $USER1$/check_ping -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5 } define command{ command_name check_pop command_line $USER1$/check_pop -H $HOSTADDRESS$ $ARG1$ } define command{ command_name check_imap command_line $USER1$/check_imap -H $HOSTADDRESS$ $ARG1$ } define command{ command_name check_smtp command_line $USER1$/check_smtp -H $HOSTADDRESS$ $ARG1$ } define command{ command_name check_tcp command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$ } define command{ command_name check_udp command_line $USER1$/check_udp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$ } define command{ command_name check_nt command_line $USER1$/check_nt -H $HOSTADDRESS$ -p 12489 -v $ARG1$ $ARG2$ } define command{ command_name check_dig command_line $USER1$/check_dig -H $HOSTADDRESS$ -T $ARG4$ -l $ARG1$ -w $ARG2$ -c $ARG3$ } define command{ command_name process-host-perfdata command_line /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /usr/local/nagios/var/host-perfdata.out } define command{ command_name process-service-perfdata command_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perfdata.out } define command{ command_name check_nrpe command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ }
contacts.cfg的内容如下:
[root@yw_monitor_db objects]# cat contacts.cfg |grep -v "#" | awk "NF" define contact{ use generic-contact-163 contact_name zhaohaijun-mail alias zhaohaijun-mail email [email protected] } define contact{ use generic-contact-164 contact_name navyaijm-call alias navyaijm-call email 18630152179 } define contactgroup{ contactgroup_name ops_admin alias ops_admin members navyaijm-call,navyaijm-mail }
templates.cfg的内容如下:
[root@yw_monitor_db objects]# cat templates.cfg |grep -v "#" | awk "NF" define contact{ name generic-contact ; The name of this contact template service_notification_period 24x7 ; service notifications can be sent anytime host_notification_period 24x7 ; host notifications can be sent anytime service_notification_options w,u,c,r,f,s ; send notifications for all service states, flapping events, and scheduled downtime events host_notification_options d,u,r,f,s ; send notifications for all host states, flapping events, and scheduled downtime events service_notification_commands notify-service-by-email ; send service notifications via email host_notification_commands notify-host-by-email ; send host notifications via email register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL CONTACT, JUST A TEMPLATE! } define contact{ name generic-contact-163 ; The name of this contact template service_notification_period 24x7 ; service notifications can be sent anytime host_notification_period 24x7 ; host notifications can be sent anytime service_notification_options w,u,c,r,f,s ; send notifications for all service states, flapping events, and scheduled downtime events host_notification_options d,u,r,f,s ; send notifications for all host states, flapping events, and scheduled downtime events service_notification_commands notify-service-by-email-163 ; send service notifications via email host_notification_commands notify-host-by-email ; send host notifications via email register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL CONTACT, JUST A TEMPLATE! } define contact{ name generic-contact-164 ; The name of this contact template service_notification_period 24x7 ; service notifications can be sent anytime host_notification_period 24x7 ; host notifications can be sent anytime service_notification_options w,u,c,r,f,s ; send notifications for all service states, flapping events, and scheduled downtime events host_notification_options d,u,r,f,s ; send notifications for all host states, flapping events, and scheduled downtime events service_notification_commands notify-service-by-email-164 ; send service notifications via email host_notification_commands notify-host-by-email ; send host notifications via email register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL CONTACT, JUST A TEMPLATE! } define host{ name generic-host ; The name of this host template notifications_enabled 1 ; Host notifications are enabled event_handler_enabled 1 ; Host event handler is enabled flap_detection_enabled 1 ; Flap detection is enabled failure_prediction_enabled 1 ; Failure prediction is enabled process_perf_data 1 ; Process performance data retain_status_information 1 ; Retain status information across program restarts retain_nonstatus_information 1 ; Retain non-status information across program restarts notification_period 24x7 ; Send host notifications at any time register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE! } define host{ name linux-server ; The name of this host template use generic-host ; This template inherits other values from the generic-host template check_period 24x7 ; By default, Linux hosts are checked round the clock check_interval 5 ; Actively check the host every 5 minutes retry_interval 1 ; Schedule host check retries at 1 minute intervals active_checks_enabled 0 max_check_attempts 10 ; Check each Linux host 10 times (max) check_command check-host-alive ; Default command to check Linux hosts notification_period workhours ; Linux ops_admin hate to be woken up, so we only notify during the day ; Note that the notification_period variable is being overridden from ; the value that is inherited from the generic-host template! notification_interval 120 ; Resend notifications every 2 hours notification_options d,u,r ; Only send notifications for specific host states contact_groups ops_admin ; Notifications get sent to the ops_admin by default register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE! } define host{ name windows-server ; The name of this host template use generic-host ; Inherit default values from the generic-host template check_period 24x7 ; By default, Windows servers are monitored round the clock check_interval 5 ; Actively check the server every 5 minutes retry_interval 1 ; Schedule host check retries at 1 minute intervals max_check_attempts 10 ; Check each server 10 times (max) check_command check-host-alive ; Default command to check if servers are "alive" notification_period 24x7 ; Send notification out at any time - day or night notification_interval 30 ; Resend notifications every 30 minutes notification_options d,r ; Only send notifications for specific host states contact_groups ops_admin ; Notifications get sent to the ops_admin by default hostgroups windows-servers ; Host groups that Windows servers should be a member of register 0 ; DONT REGISTER THIS - ITS JUST A TEMPLATE } define host{ name generic-printer ; The name of this host template use generic-host ; Inherit default values from the generic-host template check_period 24x7 ; By default, printers are monitored round the clock check_interval 5 ; Actively check the printer every 5 minutes retry_interval 1 ; Schedule host check retries at 1 minute intervals max_check_attempts 10 ; Check each printer 10 times (max) check_command check-host-alive ; Default command to check if printers are "alive" notification_period workhours ; Printers are only used during the workday notification_interval 30 ; Resend notifications every 30 minutes notification_options d,r ; Only send notifications for specific host states contact_groups ops_admin ; Notifications get sent to the ops_admin by default register 0 ; DONT REGISTER THIS - ITS JUST A TEMPLATE } define host{ name generic-switch ; The name of this host template use generic-host ; Inherit default values from the generic-host template check_period 24x7 ; By default, switches are monitored round the clock check_interval 5 ; Switches are checked every 5 minutes retry_interval 1 ; Schedule host check retries at 1 minute intervals max_check_attempts 10 ; Check each switch 10 times (max) check_command check-host-alive ; Default command to check if routers are "alive" notification_period 24x7 ; Send notifications at any time notification_interval 30 ; Resend notifications every 30 minutes notification_options d,r ; Only send notifications for specific host states contact_groups ops_admin ; Notifications get sent to the ops_admin by default register 0 ; DONT REGISTER THIS - ITS JUST A TEMPLATE } define service{ name generic-service ; The 'name' of this service template active_checks_enabled 1 ; Active service checks are enabled passive_checks_enabled 1 ; Passive service checks are enabled/accepted parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems) obsess_over_service 1 ; We should obsess over this service (if necessary) check_freshness 0 ; Default is to NOT check service 'freshness' notifications_enabled 1 ; Service notifications are enabled event_handler_enabled 1 ; Service event handler is enabled flap_detection_enabled 1 ; Flap detection is enabled failure_prediction_enabled 1 ; Failure prediction is enabled process_perf_data 1 ; Process performance data retain_status_information 1 ; Retain status information across program restarts retain_nonstatus_information 1 ; Retain non-status information across program restarts is_volatile 0 ; The service is not volatile check_period 24x7 ; The service can be checked at any time of the day max_check_attempts 3 ; Re-check the service up to 3 times in order to determine its final (hard) state normal_check_interval 2 ; Check the service every 10 minutes under normal conditions retry_check_interval 1 ; Re-check the service every two minutes until a hard state can be determined contact_groups ops_admin ; Notifications get sent out to everyone in the 'ops_admin' group notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events notification_interval 5 ; Re-notify about service problems every hour notification_period 24x7 ; Notifications can be sent out at any time register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE! } define service{ name local-service ; The name of this service template use generic-service ; Inherit default values from the generic-service definition max_check_attempts 4 ; Re-check the service up to 4 times in order to determine its final (hard) state normal_check_interval 5 ; Check the service every 5 minutes under normal conditions retry_check_interval 1 ; Re-check the service every minute until a hard state can be determined register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE! }