检测命令篇:
文中内容包括:序述nagios从发现主机到web界面显示出状态再到邮件报警的整个过程。方面以后进行排错,还有如何编写特定应用的特定检测程序。
煮酒品茶:文章需要改进的有如何做触发报警的条件,警告等。
更新:
1、说明一些定义一些宏资料。
$ARGn$:The nth argument passed to the command (notification, event handler, service check, etc.). Nagios supports up to 32 argument macros ($ARG1$ through $ARG32$). $USERn$:The nth user-definable macro. User macros can be defined in one or more resource files. Nagios supports up to 256 user macros ($USER1$ through $USER32$). |
2、解决问题,报警四种状态。
define command{
command_name check-host-alive
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
}
|
[root@weihack libexec]# ./check_ping -h
Use ping to check connection statistics for a remote host.
Usage:check_ping -H <host_address> -w <wrta>,<wpl>% -c <crta>,<cpl>%
[-p packets] [-t timeout] [-4|-6]
Options:
-h, --help
Print detailed help screen
-V, --version
Print version information
-4, --use-ipv4
Use IPv4 connection
-6, --use-ipv6
Use IPv6 connection
-H, --hostname=HOST
host to ping
-w, --warning=THRESHOLD
warning threshold pair
-c, --critical=THRESHOLD
critical threshold pair
-p, --packets=INTEGER
number of ICMP ECHO packets to send (Default: 5)
-L, --link
show HTML in the plugin output (obsoleted by urlize)
-t, --timeout=INTEGER
Seconds before connection times out (default: 10)
|
[root@weihack objects]# cat commands.cfg |grep command_name
command_name
notify-host-by-email
command_name
notify-service-by-email
command_name check-host-alive
command_name check_local_disk
command_name check_local_load
command_name check_local_procs
command_name check_local_users
command_name
check_local_swap
command_name
check_local_mrtgtraf
command_name check_ftp
command_name check_hpjd
command_name check_snmp
command_name check_http
command_name
check_ssh
command_name
check_dhcp
command_name check_ping
command_name check_pop
command_name check_imap
command_name check_smtp
command_name
check_tcp
command_name
check_udp
command_name
check_nt
command_name
process-host-perfdata
command_name
process-service-perfdata
|
define command{
command_name notify-host-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
}
|
/usr/bin/printf "%b" "***** Nagios *****\n\n
Notification Type: $NOTIFICATIONTYPE$\n
Host: $HOSTNAME$\nState: $HOSTSTATE$\n
Address: $HOSTADDRESS$\n
Info: $HOSTOUTPUT$\n\n
Date/Time: $LONGDATETIME$\n
" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
|
主 题: ** RECOVERY Host Alert: rsync-89 is UP ** [新窗口打开]
时 间:
2013-03-13 22:57 (星期三)
发件人:
nagios<[email protected]> [添加联系人] [邮件往来] [拒收]
**** Nagios *****
Notification Type: RECOVERY
Host: rsync-89
State: UP
Address: 192.168.100.89
Info: PING OK - Packet loss = 0%, RTA = 0.32 ms
Date/Time: Wed Mar 13 22:57:44 CST 2013
|
[root@weihack libexec]# ./check_tcp -h
Usage:check_tcp -H host -p port [-w <warning time>] [-c <critical time>] [-s <send string>]
[-e <expect string>] [-q <quit string>][-m <maximum bytes>] [-d <delay>]
[-t <timeout seconds>] [-r <refuse state>] [-M <mismatch state>] [-v] [-4|-6] [-j]
[-D <days to cert expiry>] [-S <use SSL>] [-E]
Options:
-h, --help
Print detailed help screen
-V, --version
Print version information
-H, --hostname=ADDRESS
Host name, IP Address, or unix socket (must be an absolute path)
-p, --port=INTEGER
Port number (default: none)
-4, --use-ipv4
Use IPv4 connection
-6, --use-ipv6
Use IPv6 connection
-E, --escape
Can use \n, \r, \t or \ in send or quit string. Must come before send or quit option
Default: nothing added to send, \r\n added to end of quit
-s, --send=STRING
String to send to the server
-e, --expect=STRING
String to expect in server response (may be repeated)
-A, --all
All expect strings need to occur in server response. Default is any
-q, --quit=STRING
String to send server to initiate a clean close of the connection
-r, --refuse=ok|warn|crit
Accept TCP refusals with states ok, warn, crit (default: crit)
-M, --mismatch=ok|warn|crit
Accept expected string mismatches with states ok, warn, crit (default: warn)
-j, --jail
Hide output from TCP socket
-m, --maxbytes=INTEGER
Close connection once more than this number of bytes are received
-d, --delay=INTEGER
Seconds to wait between sending string and polling for response
-w, --warning=DOUBLE
Response time to result in warning status (seconds)
-c, --critical=DOUBLE
Response time to result in critical status (seconds)
-t, --timeout=INTEGER
Seconds before connection times out (default: 10)
-v, --verbose
Show details for command-line debugging (Nagios may truncate output)
|
define command{
command_name check_tcp
command_line $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
}
|
# Nagios supports up to 32 $USERx$ macros ($USER1$ through $USER32$)
# Sets $USER1$ to be the path to the plugins
$USER1$=/usr/local/nagios/libexec
|
[root@weihack objects]# vim services.cfg
define service {
host_name rsync-89
service_description check_tcp 80
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups ktm
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
check_command check_tcp!80
}
|
[root@weihack libexec]# cat /test/passwd |wc -l
25
|
#check_user_nagios cwtea
#blog: cwtea.blog.51cto.com
cu=`cat /test/passwda |wc -l`
if [ $cu -ne 0 ]; then
echo "User OK - User is running (UserNumber: ${cu})"
else
echo "User CRITICAL,"User is none""
fi
|
[root@weihack libexec]# ./check_user
User CRITICAL,User is none |
User OK - User is running (UserNumber: 25)
|
#check user
define command{
command_name check_user
command_line $USER1$/check_user
}
|
define service {
host_name rsync-89
service_description check_user
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups ktm
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
check_command check_user
}
|
#check_user_nagios cwtea
#blog: cwtea.blog.51cto.com
cu=`cat /test/passwd |wc -l`
if [ $cu -ne 0 ]; then
echo "User OK - User is running (UserNumber: ${cu})"
#Is OK.
exit 0
else
echo "User CRITICAL,"User is none""
exit 2
fi
|
主 题: ** PROBLEM Service Alert: rsync 89/check_user is CRITICAL ** [新窗口打开]
时 间:
2013-03-14 00:32 (星期四)
发件人:
nagios<[email protected]> [添加联系人] [邮件往来] [拒收]
收件人:
我<[email protected]>
***** Nagios *****
Notification Type: PROBLEM
Service: check_user
Host: rsync 89
Address: 192.168.100.89
State: CRITICAL
Date/Time: Thu Mar 14 00:32:39 CST 2013
Additional Info:
User CRITICAL,User is none
|