[root@server1 nagios]# 1 安装yum install gd-devel -y
[root@server1 nagios]# 2 部署lamp环境yum install httpd mysql mysql-server php php-mysql gcc gcc-c++ -y
[root@server1 nagios]# 3 安装主程序nagios
[root@server1 nagios]# tar fvxz nagios.tar.gz
[root@server1 nagios]# ./configure --prefix=/usr/local/nagios
[root@server1 nagios]# useradd nagios
[root@server1 nagios]# make all
[root@server1 nagios]# make install
make install
- This installs the main program, CGIs, and HTML files
make install-init
- This installs the init script in /et c/rc.d/init.d
make install-commandmode
- This installs and configures permissions on the
directory for holding the external command file
make install-config
- This installs *SAMPLE* config files in /usr/local/nagios/etc
You'll have to modify these sample files before you can
use Nagios. Read the HTML documentation for more info
on doing this. Pay particular attention to the docs on
object configuration files, as they determine what/how
things get monitored!
make install-webconf
- This installs the Apache config file for the Nagios
web interface
vim /etc/httpd/conf/httpd.conf
User nagios
Group nagios
重新启动apache
service httpd restart
service nagios start
生成用户
[root@server1 nagios-3.2.0]# htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosm
New password:
Re-type new password:
Adding password for user nagios
给nagios用户开权限,让他能够查看信息!
[root@server1 nagios-3.2.0]# vim /usr/local/nagios/etc/cgi.cfg
在所有的nagiosadmin后面添加nagios
本机为什么是down的状态???
监控分析控制台 ---------------主程序
插件
--------------被监控主机
nagios报错 无权查看任何主机的信息 解决方法
解决办法:
vi /usr/local/nagios/etc/cgi.cfg
将use_authentication的值改为0.
use_authentication=0
然后重启nagios服务
service nagios restart
[root@server1 libexec]# pwd
/usr/local/nagios/libexec
[root@server1 libexec]# ls
[root@server1 libexec]#
插件目录下什么没有有阿!
安装插件
[root@server1 nagios-plugins-1.4.13]# ./configure --prefix=/usr/local/nagios/ -
可选的选项--with-gnutls --with-openssl --enable-extra-opts --enable-perl-modules
make
make install
怎样监控的更多!
[root@server1 etc]# pwd
/usr/local/nagios/etc
[root@server1 etc]# vim nagios.cfg
编辑主配置文件
cfg_file=/usr/local/nagios/etc/objects/commands.cfg
cfg_file=/usr/local/nagios/etc/objects/contacts.cfg
cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg
cfg_file=/usr/local/nagios/etc/objects/templates.cfg
通过上面的语句来调用那些配置文件
[root@server1 objects]# pwd
/usr/local/nagios/etc/objects
时间timeperiods.cfg
define timeperiod{
timeperiod_name 24x7
alias 24 Hours A Day, 7 Days A Week
sunday 00:00-24:00
monday 00:00-24:00
tuesday 00:00-24:00
wednesday 00:00-24:00
thursday 00:00-24:00
friday 00:00-24:00
saturday 00:00-24:00
}
插件commands.cfg
define command{
command_name check-host-alive
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
}
监控谁localhost.cfg
define host {
host_name fudong
alias test
address 192.168.18.50
check_command check-host-alive
notification_options d,u,r
check_interval 1
max_check_attempts 2
contact_groups admins
notification_interval 10
notification_period 24x7
}
联系人contacts.cfg
define contact {
contact_name kyo
alias kyo
host_notification_period 24x7
host_notification_options d,u,r
service_notification_period 24x7
service_notification_options w,u,c,r
service_notification_commands notify-service-by-email
host_notification_commands notify-host-by-email
email [email protected]
#通过飞信机器人发信报警!
}
define contactgroup{
contactgroup_name admins
alias Nagios Administrators
members nagiosadmin,kyo
}
检查错误
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
定义服务
define service {
host_name fudong
service_description apache
check_period 24x7
normal_check_interval 2
retry_check_interval 1
max_check_attempts 5
notification_period 24x7
notification_options w,u,c,r
check_command check_http
}
关于插件的返回状态
[root@server1 objects]# echo $?
2
[root@server1 objects]# /usr/local/nagios/libexec/check_http -I 192.168.18.50
HTTP OK HTTP/1.1 200 OK - 43306 bytes in 0.026 seconds |time=0.026288s;;;0.000000 size=43306B;;;0
[root@server1 objects]# echo $?
0
[root@server1 objects]# /usr/local/nagios/libexec/check_http -I 192.168.18.50 -u /a.html -s hello
HTTP WARNING: HTTP/1.1 404 Not Found
[root@server1 objects]# echo $?
1
0 成功 1 警告 2 严重错误 3 未知
自己编写插件!!!!!!!!!!!!!!!!
#!/bin/bash
curl -I http://192.168.18.155 &> /dev/null
if [ $? -eq 0 ]; then
( curl -I http://192.168.18.155 | grep 'HTTP/1.1 200' ) &> /dev/null
if [ $? -eq 0 ];then
echo "OK!"
exit 0
else
echo "warning!"
exit 1
fi
else
echo "down!"
exit 2
fi
自定义命令
define command {
command_name check_url
command_line $USER1$/check_http -I $HOSTADDRESS$ -u $ARG1$ -s $ARG2$
}
使用新定义的命令
define service {
host_name fudong
service_description apache
check_period 24x7
normal_check_interval 2
retry_check_interval 1
max_check_attempts 5
notification_period 24x7
notification_options w,u,c,r
# check_command check_http
check_command check_url!/index.html!hello
}
########################################################################
check_mysql
vim /usr/local/nagios/libexec/check_mysql
#!/bin/bash
#check_mysql status
IP=$1
mysql -u test -h $IP -p123 -e 'show databases;' &> /dev/null
if [ $? -eq 0 ]; then
echo "mysql OK!"
exit 0;
else
echo "mysql err!"
exit 2;
fi
vim /usr/local/nagios/etc/objects/commands.cfg
define command{
command_name check_mysql
command_line $USER1$/check_mysql $ARG1$
}
vim /usr/local/nagios/etc/objects/localhost.cfg
define service {
host_name mail.vfast.com
service_description mysql
check_period 24x7
normal_check_interval 2
retry_check_interval 1
max_check_attempts 2
notification_period 24x7
notification_options w,u,c,r
check_command check_mysql!192.168.18.69
}
service nagios restart
###################################################################################
yum install expect -y
define command{
command_name notify-host-by-sms
command_line /usr/local/nagios/libexec/nagios-mail "$(/usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n")" smtp.163.com Y29vbHdhbmdjaG9uZ0AxNjMuY29t UVE4MTBXQU5HODIwMCFA [email protected] $CONTACTEMAIL$ "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **"
}
如果遇到host条目一会有,一会消失的问题,可以killall nagios 再重新启动nagios!
监控远程主机的系统信息
被监控主机
安装nrpe的server端
tar fvxz nrpe*.tar.gz
./configure --prefix=/usr/local/nagios
useradd nagios
make
make install-daemon
make install-daemon-config
make install-xinetd
安装插件2
make install (这步不是必须的!)
把插件拷贝给监控主机nagios
scp /usr/local/nagios/libexec/check_nrpe root@监控主机的ip:/usr/local/nagios/libexec
在被监控主机开启nrpe服务
vim /etc/xinetd.d/nrpe
# default: on
# description: NRPE (Nagios Remote Plugin Executor)
service nrpe
{
flags = REUSE
socket_type = stream
port = 5666
wait = no
user = nagios
group = nagios
server = /usr/local/nagios/bin/nrpe
server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd
log_on_failure += USERID
disable = no
only_from = 192.168.18.254 #监控主机的ip,保证他可以连接进来!
}
vim /etc/services
nrpe 5666/tcp
service xinetd restart
[root@server1 objects]# /usr/local/nagios/libexec/check_nrpe -H 192.168.18.188
NRPE v2.12
#注意关闭防火墙!
在被监控主机安装插件
vim nrpe.cfg
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_u]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/hda1
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
通过以上字段来定义命令,以及接收命令后执行的插件
如果想不明白
定义服务,来检测一下
define host {
host_name zcg
alias nrpe-server
address 192.168.18.188
check_command check-host-alive
notification_options d,u,r
check_interval 1
max_check_attempts 2
contact_groups admins
notification_interval 10
notification_period 24x7
}
define service {
host_name zcg
service_description nrpe
check_period 24x7
normal_check_interval 2
retry_check_interval 1
max_check_attempts 5
notification_period 24x7
notification_options w,u,c,r
check_command check_nrpe!check_users
#这里定义的check_nrpe需要在command.cfg里面定义
}
别忘了,先定义好zcg这台主机!!
定义命令
define command {
command_name check_nrpe
command_line /usr/local/nagios/libexec/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
重启nagios服务!