主服务器配置
//关闭防火墙和selinux
[root@master ~]# systemctl disable --now firewalld.service
Removed /etc/systemd/system/multi-user.target.wants/firewalld.service.
Removed /etc/systemd/system/dbus-org.fedoraproject.FirewallD1.service.
[root@master ~]# sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
[root@master ~]# setenforce 0
//安装keepalived
[root@master ~]# yum -y install keepalived
[root@master ~]# rpm -ql keepalived
/etc/keepalived
/etc/keepalived/keepalived.conf //配置文件
/etc/sysconfig/keepalived
/usr/bin/genhash
/usr/lib/systemd/system/keepalived.service //服务控制文件
/usr/libexec/keepalived
/usr/sbin/keepalived
备服务器配置
//关闭防火墙和selinux
[root@slave ~]# systemctl disable --now firewalld.service
Removed /etc/systemd/system/multi-user.target.wants/firewalld.service.
Removed /etc/systemd/system/dbus-org.fedoraproject.FirewallD1.service.
[root@slave ~]# sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
[root@slave ~]# setenforce 0
//安装keepalived
[root@slave ~]# yum -y install keepalived
主服务器
//安装httpd,创建测试页面
[root@master ~]# yum -y install httpd
[root@master ~]# echo 'master' > /var/www/html/index.html
[root@master ~]# systemctl start httpd
[root@master ~]# ss -anltu
Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port
tcp LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
tcp LISTEN 0 128 *:80 *:*
tcp LISTEN 0 128 [::]:22 [::]:*
备服务器
//安装httpd,创建测试页面
[root@slave ~]# yum -y install httpd
[root@slave ~]# echo 'slave' > /var/www/html/index.html
[root@slave ~]# systemctl start httpd
[root@slave ~]# ss -anltu
Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port
tcp LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
tcp LISTEN 0 128 *:80 *:*
tcp LISTEN 0 128 [::]:22 [::]:*
[root@master ~]# cd /etc/keepalived/
[root@master keepalived]# ls
keepalived.conf
[root@master keepalived]# cp keepalived.conf{,-bak} //备份原文件
[root@master keepalived]# ls
keepalived.conf keepalived.conf-bak
[root@master keepalived]# rm -rf keepalived.conf
[root@master keepalived]# vim keepalived.conf
[root@master keepalived]# cat keepalived.conf
! Configuration File for keepalived
global_defs {
router_id lb01
}
vrrp_instance VI_1 {
state MASTER
interface ens160 //网卡名
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass wys123 //密码
}
virtual_ipaddress {
192.168.237.250 //VIP
}
}
virtual_server 192.168.237.250 80 { //VIP加端口号
delay_loop 6
lb_algo rr
lb_kind DR
persistence_timeout 50
protocol TCP
real_server 192.168.237.167 80 { //主服务器IP
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
real_server 192.168.237.170 80 { //备服务器IP
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
//启动keepalived
[root@master keepalived]# systemctl enable --now keepalived
Created symlink /etc/systemd/system/multi-user.target.wants/keepalived.service → /usr/lib/systemd/system/keepalived.service.
[root@slave ~]# cd /etc/keepalived/
[root@slave keepalived]# cp keepalived.conf{,-bak}
[root@slave keepalived]# cat keepalived.conf
! Configuration File for keepalived
global_defs {
router_id lb01
}
vrrp_instance VI_1 {
state BACKUP //修改为BACKUP,备份
interface ens160
virtual_router_id 51
priority 90 //权重改成90
advert_int 1
authentication {
auth_type PASS
auth_pass wys123
}
virtual_ipaddress {
192.168.237.250
}
}
virtual_server 192.168.237.250 80 { //VIP地址 和服务端口号
delay_loop 6
lb_algo rr
lb_kind DR
persistence_timeout 50
protocol TCP
real_server 192.168.237.167 80 { //主服务器
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
real_server 192.168.237.170 80 { //备服务器
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
[root@slave keepalived]# systemctl enable --now keepalived
Created symlink /etc/systemd/system/multi-user.target.wants/keepalived.service → /usr/lib/systemd/system/keepalived.service.
//主服务器
[root@master ~]# ip a s ens160
2: ens160: mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:0c:29:50:0d:fa brd ff:ff:ff:ff:ff:ff
inet 192.168.237.167/24 brd 192.168.237.255 scope global dynamic noprefixroute ens160
valid_lft 1175sec preferred_lft 1175sec
inet 192.168.237.250/32 scope global ens160
valid_lft forever preferred_lft forever
inet6 fe80::cef:1b5b:1107:cf5d/64 scope link noprefixroute
valid_lft forever preferred_lft forever
//备服务器
[root@slave ~]# ip a s ens160
2: ens160: mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:0c:29:cd:b3:bb brd ff:ff:ff:ff:ff:ff
inet 192.168.237.170/24 brd 192.168.237.255 scope global dynamic noprefixroute ens160
valid_lft 1151sec preferred_lft 1151sec
inet6 fe80::cef:1b5b:1107:cf5d/64 scope link dadfailed tentative noprefixroute
valid_lft forever preferred_lft forever
inet6 fe80::7220:8af:6655:3c80/64 scope link noprefixroute
valid_lft forever preferred_lft forever
//关闭主服务器上的keepalived
[root@master ~]# systemctl stop keepalived
[root@master ~]# ip a s ens160
2: ens160: mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:0c:29:50:0d:fa brd ff:ff:ff:ff:ff:ff
inet 192.168.237.167/24 brd 192.168.237.255 scope global dynamic noprefixroute ens160
valid_lft 1064sec preferred_lft 1064sec
inet6 fe80::cef:1b5b:1107:cf5d/64 scope link noprefixroute
valid_lft forever preferred_lft forever
//去备服务器查看,发现vip到备服务了说明是备服务器提供的服务
[root@slave ~]# ip a s ens160
2: ens160: mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:0c:29:cd:b3:bb brd ff:ff:ff:ff:ff:ff
inet 192.168.237.170/24 brd 192.168.237.255 scope global dynamic noprefixroute ens160
valid_lft 1055sec preferred_lft 1055sec
inet 192.168.237.250/32 scope global ens160
valid_lft forever preferred_lft forever
inet6 fe80::cef:1b5b:1107:cf5d/64 scope link dadfailed tentative noprefixroute
valid_lft forever preferred_lft forever
inet6 fe80::7220:8af:6655:3c80/64 scope link noprefixroute
valid_lft forever preferred_lft forever
主服务器编辑脚本
//创建第一个脚本
[root@master ~]# mkdir /scripts
[root@master ~]# cd /scripts/
[root@master scripts]# vim check_h.sh
[root@master scripts]# cat check_h.sh
#!/bin/bash
httpd_status=$(ps -ef|grep -Ev "httpd|$0"|grep '\bhttpd\b'|wc -l)
if [ $httpd_status -lt 1 ];then
systemctl stop keepalived
fi
[root@master scripts]# chmod +x check_h.sh //添加执行权限
[root@master scripts]# ll
总用量 4
-rwxr-xr-x. 1 root root 143 10月 22 10:16 check_h.sh
//创建第二个脚本
[root@master scripts]# vim notify.sh
[root@master scripts]# cat notify.sh
#!/bin/bash
VIP=$2
case "$1" in
master)
httpd_status=$(ps -ef|grep -Ev "grep|$0"|grep '\bhttpd\b'|wc -l)
if [ $httpd_status -lt 1 ];then
systemctl start httpd
fi
;;
backup)
httpd_status=$(ps -ef|grep -Ev "grep|$0"|grep '\bhttpd\b'|wc -l)
if [ $httpd_status -gt 0 ];then
systemctl stop httpd
fi
;;
*)
echo "Usage:$0 master|backup VIP"
;;
esac
[root@master scripts]# chmod +x notify.sh
[root@master scripts]# ll
总用量 8
-rwxr-xr-x. 1 root root 143 10月 22 10:16 check_h.sh
-rwxr-xr-x. 1 root root 434 10月 22 10:18 notify.sh
备服务器编辑脚本
[root@slave ~]# mkdir /scripts
[root@master scripts]# scp notify.sh [email protected]:/scripts //拷贝主服务器脚本文件到备服务器
[email protected]'s password:
notify.sh 100% 434 337.2KB/s 00:00
[root@slave ~]# cd /scripts/
[root@slave scripts]# ll
总用量 4
-rwxr-xr-x. 1 root root 434 10月 22 10:21 notify.sh
配置主服务器的keepalived
//前面手动验证了一次VIP是否会转移到备服务器,此时VIP在备服务器上,开启主服务器上keepalived让VIP重新回到主服务器
[root@master ~]# systemctl start keepalived.service
[root@master ~]# ip a s ens160
2: ens160: mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:0c:29:50:0d:fa brd ff:ff:ff:ff:ff:ff
inet 192.168.237.167/24 brd 192.168.237.255 scope global dynamic noprefixroute ens160
valid_lft 1467sec preferred_lft 1467sec
inet 192.168.237.250/32 scope global ens160
valid_lft forever preferred_lft forever
inet6 fe80::cef:1b5b:1107:cf5d/64 scope link noprefixroute
valid_lft forever preferred_lft forever
//编辑主服务的keepalived配置文件
[root@master ~]# vim /etc/keepalived/keepalived.conf
[root@master ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id lb01
}
vrrp_script httpd_check { //添加以下4行
script "/scripts/check_h.sh"
interval 1
weight -20
}
vrrp_instance VI_1 {
state MASTER
interface ens160
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass wys123
}
virtual_ipaddress {
192.168.237.250
}
track_script { //添加以下两行内容,引用上面定义的脚本
httpd_check
} //添加以下两行内容
notify_master "/scripts/notify.sh master 192.168.237.250"
notify_backup "/scripts/notify.sh backup 192.168.237.250"
}
virtual_server 192.168.237.250 80 {
delay_loop 6
lb_algo rr
lb_kind DR
persistence_timeout 50
protocol TCP
real_server 192.168.237.167 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
real_server 192.168.237.170 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
//重启服务
[root@master ~]# systemctl restart keepalived.service
配置备服务器的keepalived
[root@slave ~]# vim /etc/keepalived/keepalived.conf
[root@slave ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id lb01
}
vrrp_instance VI_1 {
state BACKUP
interface ens160
virtual_router_id 51
priority 90
advert_int 1
authentication {
auth_type PASS
auth_pass wys123
}
virtual_ipaddress {
192.168.237.250
} //添加以下两行内容
notify_master "/scripts/notify.sh master 192.168.237.250"
notify_backup "/scripts/notify.sh backup 192.168.237.250"
}
virtual_server 192.168.237.250 80 {
delay_loop 6
lb_algo rr
lb_kind DR
persistence_timeout 50
protocol TCP
real_server 192.168.237.167 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
real_server 192.168.237.170 80 {
weight 1
TCP_CHECK {
connect_port 80
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
[root@slave ~]# systemctl restart keepalived.service
//模拟关闭主上面的httpd,发现keepalived服务已经自动关闭了
[root@master ~]# systemctl stop httpd
[root@master ~]# ss -anltu
Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port
tcp LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
tcp LISTEN 0 128 [::]:22 [::]:*
[root@master ~]# ip a s ens160 //VIP已经转移
2: ens160: mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:0c:29:50:0d:fa brd ff:ff:ff:ff:ff:ff
inet 192.168.237.167/24 brd 192.168.237.255 scope global dynamic noprefixroute ens160
valid_lft 1100sec preferred_lft 1100sec
inet6 fe80::cef:1b5b:1107:cf5d/64 scope link noprefixroute
valid_lft forever preferred_lft forever
//keepalived已经自动关闭
[root@master ~]# systemctl status keepalived.service
● keepalived.service - LVS and VRRP High Availability Monitor
Loaded: loaded (/usr/lib/systemd/system/keepalived.service; enabled; vendor preset: disabled)
Active: inactive (dead) since Fri 2021-10-22 10:47:56 CST; 1s ago
Process: 198077 ExecStart=/usr/sbin/keepalived $KEEPALIVED_OPTIONS (code=exited, status=0/SUCCESS)
Main PID: 198078 (code=exited, status=0/SUCCESS)
Tasks: 0 (limit: 4842)
Memory: 2.1M
CGroup: /system.slice/keepalived.service
//在备服务器上查看IP,发现VIP已经转移到备服务器
[root@slave ~]# ip a s ens160
2: ens160: mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:0c:29:cd:b3:bb brd ff:ff:ff:ff:ff:ff
inet 192.168.237.170/24 brd 192.168.237.255 scope global dynamic noprefixroute ens160
valid_lft 980sec preferred_lft 980sec
inet 192.168.237.250/32 scope global ens160
valid_lft forever preferred_lft forever
inet6 fe80::cef:1b5b:1107:cf5d/64 scope link dadfailed tentative noprefixroute
valid_lft forever preferred_lft forever
inet6 fe80::7220:8af:6655:3c80/64 scope link noprefixroute
valid_lft forever preferred_lft forever
//备服务器httpd正在运行
[root@slave ~]# ss -anltu
Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port
tcp LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
tcp LISTEN 0 128 *:80 *:*
tcp LISTEN 0 128 [::]:22 [::]:*
在高可用(HA)系统中,当联系2个节点的“心跳线”断开时,本来为一整体、动作协调的HA系统,就分裂成为2个独立的个体。由于相互失去了联系,都以为是对方出了故障。两个节点上的HA软件像“裂脑人”一样,争抢“共享资源”、争起“应用服务”,就会发生严重后果——或者共享资源被瓜分、2边“服务”都起不来了;或者2边“服务”都起来了,但同时读写“共享存储”,导致数据损坏(常见如数据库轮询着的联机日志出错)。
对付HA系统“裂脑”的对策,目前达成共识的的大概有以下几条:
一般来说,脑裂的发生,有以下几种原因:
注意:
Keepalived配置里同一 VRRP实例如果 virtual_router_id两端参数配置不一致也会导致裂脑问题发生。
在实际生产环境中,我们可以从以下几个方面来防止裂脑问题的发生:
对脑裂的监控应在备用服务器上进行,通过添加zabbix自定义监控进行。
监控什么信息呢?监控备上有无VIP地址
备机上出现VIP有两种情况:
发生了脑裂
正常的主备切换
监控只是监控发生脑裂的可能性,不能保证一定是发生了脑裂,因为正常的主备切换VIP也是会到备上的。
用zabbix
进行监控
监控脚本如下:
备服务器
[root@slave scripts]# pwd
/scripts
[root@slave scripts]# vim check_keepalived.sh
[root@slave scripts]# cat check_keepalived.sh
#!/bin/bash
if [ `ip a show ens160 |grep 192.168.237.250|wc -l` -ne 0 ];then
echo 1
else
echo 0
fi
[root@slave scripts]# chmod +x check_keepalived.sh
修改zabbix配置文件
[root@slave ~]# vim /usr/local/etc/zabbix_agentd.conf
#最后一行添加
UserParameter=check_log,/scripts/check_keepalived.sh
[root@slave ~]# zabbix_agentd
zabbix web界面配置监控