1、安装插件
# tar xvf nagios-plugins-1.4.13.tar.gz
# cd nagios-plugins-1.4.13
# ./configure
# make && make install
2、安装客户端
# tar xvf nrpe-2.12.tar.gz
# cd nrpe-2.12
# ./configure
# make all
# make install-plugin
cd ./src/ && make install-plugin
make[1]: Entering directory `/usr/local/src/nrpe-2.12/src'
/usr/bin/install -c -m 775 -o nagios -g nagios -d /usr/local/nagios/libexec
/usr/bin/install -c -m 775 -o nagios -g nagios check_nrpe /usr/local/nagios/libexec
make[1]: Leaving directory `/usr/local/src/nrpe-2.12/src'
# make install-daemon
cd ./src/ && make install-daemon
make[1]: Entering directory `/usr/local/src/nrpe-2.12/src'
/usr/bin/install -c -m 775 -o nagios -g nagios -d /usr/local/nagios/bin
/usr/bin/install -c -m 775 -o nagios -g nagios nrpe /usr/local/nagios/bin
make[1]: Leaving directory `/usr/local/src/nrpe-2.12/src'
# make install-daemon-config
/usr/bin/install -c -m 775 -o nagios -g nagios -d /usr/local/nagios/etc
/usr/bin/install -c -m 644 -o nagios -g nagios sample-config/nrpe.cfg /usr/local/nagios/etc
3、修改nrpe的配置文件
# vim /usr/local/nagios/etc/nrpe.cfg
log_facility=daemon
pid_file=/var/run/nrpe/nrpe.pid
server_port=5666
nrpe_user=nrpe
nrpe_group=nrpe
allowed_hosts=127.0.0.1,192.168.1.248,117.119.33.17 添加服务端地址,允许服务端连接
dont_blame_nrpe=0
allow_bash_command_substitution=0
debug=0
command_timeout=60
connection_timeout=300
include_dir=/etc/nrpe.d/
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
command[check_disk]=/usr/local/nagios/libexec/check_disk -w 15% -c 10% -A
command[check_cpu_load]=/usr/local/nagios/libexec/check_cpu_load.sh
command[check_swap]=/usr/local/nagios/libexec/check_swap -a -w 30% -c 20%
command[check_raiddisk]=/usr/local/nagios/libexec/check_megaraid_sas -m 20 -p 20 -o 20 2>/dev/null
command[check_raidbattery]=/usr/local/nagios/libexec/check_raidbattery.sh
command[check_remote_ntp]=/usr/local/nagios/libexec/check_ntp -H ntp1.oupeng.com -w 1 -c 3
command[check_mysql3307_rep]=/usr/local/nagios/libexec/check_mysql3307_rep
这里有两个监控脚本是自己写的:
检测cpu负载的脚本
# cat /usr/local/nagios/libexec/check_cpu_load.sh
#!/bin/bash
cpu=`grep -c "processor" /proc/cpuinfo`
load=(`uptime |awk -F "[ :,]+" '{print $(NF-2),$(NF-1),$NF}'`)
j=1
for i in ${load[*]};do
all_status=`awk -v a=$cpu -v b=$i 'BEGIN{print (b>=a)?2:0}'`
half_status=`awk -v a=$(($cpu/2)) -v b=$i 'BEGIN{print (b>=a)?1:0}'`
if [ $j -eq 1 ];then
time=1
elif [ $j -eq 2 ];then
time=5
else
time=15
fi
if [ $all_status -eq 2 ];then
echo "CRITICAL: - CURRENT ${time} minute CPU LOAD IS ${load[$j-1]} MORE THAN CPUS IS $cpu"
exit 2
elif [ $half_status -eq 1 ];then
echo "WARING: - CURRENT ${time} minute CPU LOAD IS ${load[$j-1]} MORE THAN HALF OF CPUS IS $(($cpu/2))"
exit 1
else
let j++
if [ $j -eq 4 ];then
echo "OK: - CURRENT CPU LOAD IS ${load[0]}, CPUS IS $cpu"
fi
continue
fi
echo "OK: - CURRENT CPU LOAD IS ${load[0]}, CPUS IS $cpu"
done
检测mysql主从状态的脚本
# cat /usr/local/nagios/libexec/check_mysql3307_rep
#!/bin/bash
SOCKET='/nh/mysql3307/mysql-3307.sock'
lines=($(mysql -uroot -pdellXdell -S $SOCKET -e 'show slave status\G' 2>&- |grep -E 'Running|Seconds_Behind_Master' |grep -v 'Slave_SQL_Running_State' |sed 's/ //g'))
if [[ -z ${lines[@]} ]];then
echo "WARNING - Don't have slave"
exit 1
fi
for line in ${lines[@]}
do
arg=$(echo $line |awk -F: '{print $1}')
value=$(echo $line |awk -F: '{print $2}')
case $line in
Slave*)
if [[ $value != 'Yes' ]];then
echo "CRITICAL - $line"
exit 2
fi
;;
Second*)
if [[ $value -ne 0 ]];then
echo "WARNING - $line"
exit 1
fi
;;
esac
done
echo 'OK - MYSQL REPLICATION OK'
启动服务
# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
# ps -ef | grep nrpe
本地测试
# /usr/local/nagios/libexec/check_nrpe -H localhost -c check_load
OK - load average: 0.10, 0.10, 0.08|load1=0.100;15.000;30.000;0; load5=0.100;10.000;25.000;0; load15=0.080;5.000;20.000;0;
服务端测试
为root用户设置一个密码
# mysql -uroot -S mysql-3307.sock
mysql> UPDATE user SET password=password('dellXdell') WHERE user='root';
在mysql上创建一个专门用于检测的用户,给予只读权限
mysql> grant select on *.* to 'nagios'@'192.168.%' identified by 'nagiostest';
# /usr/local/nagios/libexec/check_mysql -H 192.168.1.101 -u nagios -p nagiostest -d mysql -P 3307
Uptime: 110289 Threads: 3 Questions: 684 Slow queries: 4 Opens: 18 Flush tables: 2 Open tables: 1 Queries per second avg: 0.6
4、服务端配置
# cd /usr/local/nagios/etc/
# vim nagios.cfg
cfg_dir=/usr/local/nagios/etc/objects
创建一个配置文件
# cd objects/
# vim ansible_auto/public/uy-s-43.cfg
define host{
use Linuxserver
host_name uy-s-43
contact_groups group-sa
alias uy-s-43
address 192.168.1.101
hostgroups product-servers
}
define service{
use oupeng
host_name uy-s-43
contact_groups group-sa
service_description mysql_public_3307
check_command check_mysql!nagios!nagiostest!mysql!3307
}
define service{
use oupeng
host_name uy-s-43
contact_groups group-sa
service_description check_mysql3307_rep
check_command check_nrpe!check_mysql3307_rep
}
重载配置
# /etc/init.d/nagios reload