宁波网络公司-浙江海商网 -Nagios监控部署 实验环境
被监控 客户端(linux客户端--192.168.152.129)
CentOS5.4 + nagios-plugins-1.4.14 + nrpe-2.12
被监控 客户端(linux客户端--192.168.152.132)
CentOS5.4 + nagios-plugins-1.4.14 + nrpe-2.12
一、准备软件
cd /data/software wget http://prdownloads.sourceforge.net/sourceforge/nagios/nagios-3.2.0.tar.gz wget http://prdownloads.sourceforge.net/sourceforge/nagiosplug/nagios-plugins-1.4.14.tar.gz wget http://prdownloads.sourceforge.net/sourceforge/nagios/nrpe-2.12.tar.gz wget http://apache.etoak.com/httpd/httpd-2.2.14.tar.gz
tar xvf httpd-2.2.14.tar.gz cd httpd-2.2.14 ./configure --prefix=/usr/local/apache2 make make install /usr/local/apache/bin/apachectl start // 由于是没有改动的配置文件,可以直接启动 netstat –an |grep 80 // 检查 80 端口是否已经开启了
useradd nagios –s /sbin/nologin // 有的文章说要启用账号,其实不用也可以,因为这个账号不需要登录 tar xvf nagios-3.2.0.tar.gz cd nagios-3.2.0 ./configure --prefix=/usr/local/nagios --with-nagios-user=nagios --with-nagios-group=nagios make all make install make install-init // 在 /etc/rc.d/init.d 安装启动脚本 make install-config // 安装示例配置文件 , 安装的路径是 /usr/local/nagios/etc make install-commandmode // 配置目录权限
tar xvf nagios-plugins-1.4.14.tar.gz cd nagios-plugins-1.4.14 ./configure --prefix=/usr/local/nagios // 注意了,是放在 /usr/local/nagios 里,别搞错了 make make install chown -R nagios.nagios /usr/local/nagios
vi /usr/local/apache2/conf/httpd.conf User nagios // 把 apache 运行用户改成 nagios Group nagios // 把 apache 运行组改成 naios # 把下面的内容增加到文件的最后: Scriptalias /nagios/cgi-bin /usr/local/nagios/sbin <directory "/usr/local/nagios/sbin"> Authtype basic Options execcgi Allowoverride none Order allow,deny Allow from all Authname "nagios access" Authuserfile /usr/local/nagios/etc/htpasswd Require valid-user </directory> Alias /nagios /usr/local/nagios/share <directory "/usr/local/nagios/share"> Authtype basic Options none Allowoverride none Order allow,deny Allow from all Authname "nagios access" Authuserfile /usr/local/nagios/etc/htpasswd Require valid-user </directory>
vi /usr/local/nagios/etc/cgi.cfg use_authentication=1 // 打开验证 default_user_name=test authorized_for_system_information=nagiosadmin,test authorized_for_configuration_information=nagiosadmin,test authorized_for_system_commands=nagiosadmin,test authorized_for_all_services=nagiosadmin,test authorized_for_all_hosts=nagiosadmin,test authorized_for_all_service_commands=nagiosadmin,test authorized_for_all_host_commands=nagiosadmin,test // 这里添加的用户 ”test” 可以通过浏览器对 nagios 服务的关闭、重启等操作 ,在这里为了安全也可以把 nagiosadmin 这一个用户给删掉,如果有多个用户用逗号隔开,如: nagiosadmin,test
为test账号添加密码 /usr/local/apache2/bin/htpasswd -c /usr/local/nagios/etc/htpasswd test new password: 输入你的密码 re -type new password: 再次确认
vi /usr/local/nagios/etc/nagios.cfg cfg_file=/usr/local/nagios/etc/objects/commands.cfg #cfg_file=/usr/local/nagios/etc/objects/contacts.cfg //这一行注释掉,为了方便管理,我们重新写一个联系人的配置文件 cfg_file=/usr/local/nagios/etc/contacts.cfg //指定联系人配置文件路径 cfg_file=/usr/local/nagios/etc/contactgroups.cfg //指定联系人组配置文件路径 #cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg //注释掉,用自己写的监视时段配置文件 cfg_file=/usr/local/nagios/etc/timeperiods.cfg //指定监视时段配置文件路径 cfg_file=/usr/local/nagios/etc/objects/templates.cfg //指定临时配置文件路径 cfg_file=/usr/local/nagios/etc/services.cfg //服务配置文件路径 #cfg_file=/usr/local/nagios/etc/objects/localhost.cfg //注释掉, cfg_file=/usr/local/nagios/etc/hosts.cfg //主机配置文件路径 cfg_file=/usr/local/nagios/etc/hostgroups.cfg //主机组配置文件路径 check_external_commands=1 //在web界面下重启nagios,停止主机/服务检查操作,默认关闭; command_check_interval=10s //定义这个命令检查时间间隔,默认是1秒;
vi /usr/local/nagios/etc/timeperiods.cfg define timeperiod{ timeperiod_name 24x7 alias 24 hours a day,7days a week sunday 00:00-24:00 monday 00:00-24:00 tuesday 00:00-24:00 wednesday 00:00-24:00 thursday 00:00-24:00 friday 00:00-24:00 saturday 00:00-24:00 }
vi /usr/local/nagios/etc/contacts.cfg define contact { contact_name yaozhan189 alias system administrator service_notification_period 24x7 host_notification_period 24x7 service_notification_options w,u,c,r host_notification_options d,u,r service_notification_commands notify-service-by-email host_notification_commands notify-host-by-email email [email protected] # pager 13800138000 }
vi /usr/local/nagios/etc/contactgroups.cfg define contactgroup{ contactgroup_name sagroup alias system administrator group members yaozhan189 }
vi /usr/local/nagios/etc/hosts.cfg define host{ host_name linux129 alias linux-129 address 192.168.152.129 contact_groups sagroup check_command check-host-alive max_check_attempts 5 notification_interval 10 notification_period 24x7 notification_options d,u,r } define host{ host_name linux132 alias linux-132 address 192.168.152.132 contact_groups sagroup check_command check-host-alive max_check_attempts 5 notification_interval 10 notification_period 24x7 notification_options d,u,r }
vi /usr/local/nagios/etc/hostgroups.cfg define hostgroup{ hostgroup_name sa-servers alias sa servers members linux129,linux132 }
vi /usr/local/nagios/etc/services.cfg #监控主机是否存活 define service{ #host_name nagios-server hostgroup_name sa-servers service_description check-host-alive check_command check-host-alive max_check_attempts 5 normal_check_interval 5 retry_check_interval 2 check_period 24x7 notification_interval 10 notification_period 24x7 notification_options w,u,c,r contact_groups sagroup } #监控主机的web服务 define service{ #host_name nagios-server hostgroup_name sa-servers service_description check_tcp 80 check_period 24x7 max_check_attempts 4 normal_check_interval 3 retry_check_interval 2 contact_groups sagroup notification_interval 10 notification_period 24x7 notification_options w,u,c,r check_command check_tcp!80 } #监控主机的cpu负载情况 define service{ #host_name nagios-server hostgroup_name sa-servers service_description cpu load check_command check_nrpe!check_load check_period 24x7 max_check_attempts 4 normal_check_interval 3 retry_check_interval 2 contact_groups sagroup notification_interval 10 notification_period 24x7 notification_options w,u,c,r } #监控主机的进程数 define service{ #host_name nagios-server hostgroup_name sa-servers service_description total-procs check_command check_nrpe!check_total_procs check_period 24x7 max_check_attempts 4 normal_check_interval 3 retry_check_interval 2 contact_groups sagroup notification_interval 10 notification_period 24x7 notification_options w,u,c,r }
yum install openssl
tar xvf nrpe-2.12.tar.gz cd nrpe-2.12. ./configure --prefix=/usr/local/nrpe make make install
cp /usr/local/nrpe/libexec/check_nrpe /usr/local/nagios/libexec cp /usr/local/nagios/libexec/check_disk /usr/local/nrpe/libexec cp /usr/local/nagios/libexec/check_load /usr/local/nrpe/libexec cp /usr/local/nagios/libexec/check_ping /usr/local/nrpe/libexec cp /usr/local/nagios/libexec/check_procs /usr/local/nrpe/libexec cp /usr/local/nagios/libexec/check_users /usr/local/nrpe/libexec
vi /usr/local/nrpe/etc/nrpe.cfg server_address=192.168.152.133 // 以单独的守护进程运行 allowed_hosts=127.0.0.1,192.168.152.133 // 设置允许 nagios 监控服务器可以访问 command[check_users]=/usr/local/nrpe/libexec/check_users -w 5 -c 10 command[check_load]=/usr/local/nrpe/libexec/check_load -w 15,10,5 -c 30,25,20 #command[check_hda1]=/usr/local/nrpe/libexec/check_disk -w 20 -c 10 -p /dev/hda1 // 注释掉 command[check_df]=/usr/local/nrpe/libexec/check_disk -w 20 -c 10 // 添加这一行,监控整个磁盘利用率 command[check_zombie_procs]=/usr/local/nrpe/libexec/check_procs -w 5 -c 10 -s z command[check_total_procs]=/usr/local/nrpe/libexec/check_procs -w 150 -c 200 command[check_ips]=/usr/local/nrpe/libexec/ip_conn.sh 8000 10000 // 监控 ip 连接数
vi /usr/local/nrpe/libexec/ip_conn.sh
#!/bin/sh #if [ $#-ne 2 ] #then # echo "usage:$0 -w num1 -c num2" #exit 3 #fi ip_conns=`netstat -an |grep tcp |grep est |wc -l` if [ $ip_conns -lt $1 ] then echo "ok -connectcounts is $ip_conns" exit 0 fi if [ $ip_conns -gt $1 -a $ip_conns -lt $2 ] then echo "warning -connectcounts is $ip_conns" exit 1 fi if [ $ip_conns -gt $2 ] then echo "critical -connectcounts is $ip_conns" exit 2 fi
######################################################################## # 'check_nrpe ' command definition define command{ command_name check_nrpe command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ }
# useradd nagios -s /sbin/nologin
tar xvf nagios-plugins-1.4.14.tar.gz cd nagios-plugins-1.4.14 ./configure --prefix=/usr/local/nagios make make install
chown –R nagios.nagios /usr/local/nagios chown –R nagios.nagios /usr/local/nagios/libexec
yum install openssl
tar xvf nrpe-2.12.tar.gz cd nrpe nrpe-2.12. ./configure --prefix=/usr/local/nagios --enable-ssl --with-ssl-lib //也把它放到nagios这一个目录 make all make install-plugin //安装check_nrpe这个插件 make install-daemon //安装deamon make install-daemon-config //安装配置文件
allowed_hosts=127.0.0.1,192.168.152.133 //允许监控服务器访问,中间用逗号隔开; # 修改NRPE的监控命令,添加相应的命令; # The following examples use hardcoded command arguments... command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10 command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20 #command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/hda1 command[check_df]=/usr/local/nagios/libexec/check_disk -w 20 -c 10 command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200 command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10% command[check_tcp]=/usr/local/nagios/libexec/check_tcp -p 80
# /usr/local/nagios/bin/nrpe –c /usr/local/nagios/etc/nrpe.cfg -d
/usr/local/nagios/libexec/check_nrpe -H localhost /usr/local/nagios/libexec/check_nrpe -H localhost –c check_users /usr/local/nagios/libexec/check_nrpe -H localhost –c check_load /usr/local/nagios/libexec/check_nrpe -H localhost –c check_df /usr/local/nagios/libexec/check_nrpe -H localhost –c check_zombie_procs /usr/local/nagios/libexec/check_nrpe -H localhost –c check_total_procs /usr/local/nagios/libexec/check_nrpe -H localhost –c check_swap /usr/local/nagios/libexec/check_nrpe -H localhost –c check_tcp
/usr/local/nrpe/bin/nrpe –c /usr/local/nrpe/etc/nrpe.cfg –d
[root@rhel nrpe]# tail /var/log/messages oct 15 18:01:16 rhel nrpe[11791]: starting up daemon oct 15 18:01:16 rhel nrpe[11791]: listening for connections on port 5666 oct 15 18:01:16 rhel nrpe[11791]: allowing connections from: 127.0.0.1,192.168.152.133
[root@rhel nrpe]# netstat -an |grep 5666 tcp 0 0 0.0.0.0:5666 0.0.0.0:* listen
[root@rhel nrpe]# ps aux |grep nrpe |grep -v grep nagios 11791 0.0 0.1 4868 928 ? ss 18:01 0:00 nrpe -c /usr/local/nrpe/etc/nrpe.cfg –d
[root@rhel nrpe]# /usr/local/nrpe/libexec/check_nrpe -h 192.168.152.133 nrpe v2.12
[root@rhel nrpe]# /usr/local/nrpe/libexec/check_nrpe -h 192.168.152.133 -c check_df disk ok - free space: / 5245 mb (60% inode=95%); /home 13329 mb (80% inode=99%); /var 843 mb (9% inode=99%); /boot 82 mb (88% inode=99%); /dev/shm 235 mb (100% inode=99%);| /=3495mb;9197;9207;0;9217 /home=3215mb;17426;17436;0;17446 /var=7897mb;9197;9207;0;9217 /boot=10mb;78;88;0;98 /dev/shm=0mb;215;225;0;235 [root@rhel nrpe]# /usr/local/nrpe/libexec/check_nrpe -h 192.168.152.133 -c check_load ok - load average: 0.00, 0.00, 0.00|load1=0.000;15.000;30.000;0; load5=0.000;10.000;25.000;0; load15=0.000;5.000;20.000;0; [root@rhel nrpe]# /usr/local/nrpe/libexec/check_nrpe -h 192.168.152.133 -c check_ips ok -connectcounts is 4
[root@rhel nrpe]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg nagios core 3.2.0 reading configuration data... read main config file okay... ……………省……………… read object config files okay... checking misc settings... total warnings: 0 total errors: 0 things look okay - no serious problems were detected during the pre-flight check
/usr/local/nagios/bin/nagios –d /usr/local/nagios/etc/nagios.cfg