apt-get install nagios3
#Nagios web administration password
#123123
默认安装apache2 监听80端口
访问 http://10.10.100.54/nagios3/
默认管理员是nagiosadmin 密码自己设置的
密码文件/etc/nagios3/htpasswd.users
htpasswd -c /etc/nagios3/htpasswd.users admin
New password:
123123
Re-type new password:
123123
vim /etc/nagios3/cgi.cfg
use_authentication=1
authorized_for_system_information=admin
authorized_for_configuration_information=admin
authorized_for_system_commands=admin
authorized_for_all_services=admin
authorized_for_all_hosts=admin
authorized_for_all_service_commands=admin
authorized_for_all_host_commands=admin
vim /etc/nagios3/nagios.cfg
check_external_commands=1 #允许手动这页面上执行任务 **System>Scheduling Queue**
#有可能报错
Error: Could not stat() command file '/var/lib/nagios3/rw/nagios.cmd'!
sudo /etc/init.d/nagios3 stop
sudo dpkg-statoverride --update --add nagios www-data 2710 /var/lib/nagios3/rw
sudo dpkg-statoverride --update --add nagios nagios 751 /var/lib/nagios3
sudo /etc/init.d/nagios3 start
重启服务就可以看到页面的Current Status>Hosts已经默认监控本机
vim /etc/nagios3/conf.d/hosts.cfg #默认没这文件
define host {
use generic-host
host_name lvs #主机名称
alias lvs #主机别名不设置默认为host_name
address 10.10.100.100 #需要监控主机的IP
check_interval 1#检查的间隔 1分钟
}
vim /etc/nagios3/conf.d/hostgroups_nagios2.cfg
# Some generic hostgroup definitions
# A simple wildcard hostgroup
define hostgroup {
hostgroup_name all
alias All Servers
members * #所有的主机的组
}
# A list of your Debian GNU/Linux servers
define hostgroup {
hostgroup_name debian-servers
alias Debian GNU/Linux Servers
members localhost
}
# A list of your web servers
define hostgroup {
hostgroup_name http-servers
alias HTTP servers
members localhost,lvs #指定组的主机,按,分割,可以添加多个,
}
# A list of your ssh-accessible servers
define hostgroup {
hostgroup_name ssh-servers
alias SSH servers
members localhost
}
#添加ftp监控
define hostgroup {
hostgroup_name ftp-servers
alias FTP Servers
members lvs
}
vim /etc/nagios3/conf.d/services_nagios2.cfg
# check that web services are running
define service {
hostgroup_name http-servers
service_description HTTP
check_command check_http
use generic-service
notification_interval 0 ; set > 0 if you want to be renotified
}
# check that ssh services are running
define service {
hostgroup_name ssh-servers
service_description SSH
check_command check_ssh
use generic-service
notification_interval 0 ; set > 0 if you want to be renotified
}
# check that ssh services are running
define service {
hostgroup_name ftp-servers #这必须在hostgroups_nagios2.cfg有这个主机组
service_description FTP
check_command check_ftp ;检查FTP,插件默认路径为/usr/lib/nagios/plugins/
use generic-service
notification_interval 1 ; 通知间隔 0为不通知
}
}
我要监控远程主机的 CPU、硬盘空间、内存等等
Nagios 提供了一个外挂插件,叫 NRPE
它可以让 nagios server 在固定时间去抓 nagios client 被监控的项目回来判断是否ok。 和zabbix agent功能类似
#这需要监控的机器上安装
apt-get install nagios-nrpe-server
vim /etc/nagios/nrpe.cfg
......
allowed_hosts=10.10.100.54 #Nagios服务器端的地址
......
#添加监控项
#当前登录的用户大于1警告(warning),大于2危急(critical)
command[check_users]=/usr/lib/nagios/plugins/check_users -w 1 -c 2
#同load average
#当1分钟多于15个进程等待,5分钟多于10个,15分钟多于5个则为警告状态
#当1分钟多于30个进程等待,5分钟多于25个,15分钟多于20个则为危急状态
command[check_load]=/usr/lib/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
#如果空闲空间小于40%就是警告阀值
#如果空闲空间小于10%就是危急阀值
#-p分区
command[check_hda1]=/usr/lib/nagios/plugins/check_disk -w 40% -c 10% -p /dev/sda1
#检查进程
command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 150 -c 200
......
#重启服务
service nagios-nrpe-server restart
apt-get install nagios-nrpe-plugin
#检查通讯是否正常
/usr/lib/nagios/plugins/check_nrpe -H 10.10.100.100
NRPE v2.15
vim /etc/nagios-plugins/config/check_nrpe.cfg
.......
#添加
define service {
use generic-service
hostgroup_name all
service_description NRPE check_hda1 #和监控端的 command[check_hda1]对应
check_command check_nrpe_1arg!check_hda1
notification_interval 0
}
define service {
use generic-service
hostgroup_name all
service_description NRPE check_total_procs
check_command check_nrpe_1arg!check_total_procs
notification_interval 0
}
define service {
use generic-service
hostgroup_name all
service_description NRPE check_users
check_command check_nrpe_1arg!check_users
notification_interval 0
}
重启服务后就可以看到效果
vim /etc/nagios3/conf.d/contacts_nagios2.cfg
define contact{
contact_name hu #联系人称呼
service_notification_period 24x7 #当服务出现异常时,发送通知的时间段,这个时间段"7x24"在timeperiods_nagios2.cfg文件中定义
host_notification_period 24x7 #当主机出现异常时,发送通知的时间段,这个时间段"7x24"在timeperiods_nagios2.cfg文件中定义
service_notification_options w,u,c,r #这个定义的是"通知可以被发出的情况"。w(warn)表示警告状态,u(unknown)表示不明状态,c(criticle)表示紧急状态,r(recover)表示恢复状态。也就是在服务出现警告状态、未知状态、紧急状态和重新恢复状态时都发送通知给使用者。
host_notification_options d,r #定义主机在什么状态下需要发送通知给使用者,d(down)表示宕机状态,r(recovery)表示重新恢复状态。
service_notification_commands notify-service-by-email #服务故障时,发送通知的方式,可以是邮件和短信,这里发送的方式是邮件,在commands.cfg文件中定义
host_notification_commands notify-host-by-email #主机故障时,发送通知的方式,可以是邮件和短信,这里发送的方式是邮件,在commands.cfg文件中定义
email xxx@xxx.com #发送邮件的邮箱
}
define contactgroup{
contactgroup_name hus
members hu #多个人用,分割
}
vim /etc/nagios-plugins/config/check_nrpe.cfg
define service {
use generic-service
hostgroup_name all
service_description NRPE check_hda1
check_command check_nrpe_1arg!check_hda1
contact_groups hus #如果有异常通知hus组的人
}
vim /etc/nagios3/commands.cfg
#测试使用sendEmail发送QQ邮件
define command{
command_name notify-host-by-email #contact里面定义的host_notification_commands
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" |sendEmail -f 9656951@qq.com -t $CONTACTEMAIL$ -s smtp.qq.com -u "** 主机: $HOSTALIAS$ is $HOSTSTATE$ **" -xu 9656951@qq.com -xp xxxxxx
}
define command{
command_name notify-service-by-email #contact里面定义的service_notification_commands
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" |sendEmail -f 9656951@qq.com -t $CONTACTEMAIL$ -s smtp.qq.com -u "** 主机: $HOSTALIAS$ 服务: $SERVICEDESC$ is $SERVICESTATE$ **" -xu 9656951@qq.com -xp xxx }