高可用
红帽7自带pacemaker
红帽6是conga
在centos7环境使用pacemaker部署HA集群
172.16.0.51 node1
172.16.0.52 node2
172.16.0.60 storage
172.16.0.50 浮动IP
基本概念:
1. 单一故障点
当一个节点失效,就会让整套系统无法运作
2. 心跳
有一台出故障不能正常工作,自动切换到其他节点上
3. 仲裁
在节点出现故障时,判断谁接替工作,从而实现服务不间断的作用
4. 脑裂
多个节点抢资源导致服务瘫痪
5. 服务
哪个节点拥有资源,哪个节点就对外提供服务
6. 资源
浮动IP
集中存储
服务启动
7. 监视器
持续监视对方是否存活的一套硬件或者软件,不是心跳线,而是使用心跳线检测对方的硬件或软件
8. 失效迁移
当一个设备故障了,资源会转移到另一个设备
检查节点初始化工作。
/etc/hosts
[root@storage ~]# pvcreate /dev/sdb
[root@storage ~]# vgcreate vgs /dev/sdb
[root@storage ~]# lvcreate -L 10G -n iscsi1 vgs
[root@storage ~]# lftp 172.16.0.99
lftp 172.16.0.99:~> cd release/
lftp 172.16.0.99:/release> get epel-release-7-6.noarch.rpm
[root@storage ~]# rpm -ivh epel-release-7-6.noarch.rpm
[root@storage ~]# yum install -y scsi-target-utils
[root@storage ~]# vim /etc/tgt/targets.conf
<target iqn.2020-01.com.up.storage:iscsi1>
backing-store /dev/vgs/iscsi1
write-cache off
</target>
[root@storage ~]# systemctl start tgtd
[root@storage ~]# systemctl enable tgtd
[root@storage ~]# tgtadm --lld iscsi --mode target --op show
Target 1: iqn.2020-01.com.up.storage:iscsi1
(1)双节点 ssh互信
[root@node1 ~]# ssh-keygen
[root@node1 ~]# ssh-copy-id -i .ssh/id_rsa.pub node2
[root@node2 ~]# ssh-keygen
[root@node2 ~]# ssh-copy-id -i .ssh/id_rsa.pub node1
(2)开始部署集群
[root@node1 ~]# yum install -y pcs pacemaker corosync fence-agents-all
[root@node2 ~]# yum install -y pcs pacemaker corosync fence-agents-all
[root@node1 ~]# systemctl start pcsd
[root@node1 ~]# systemctl enable pcsd
[root@node2 ~]# systemctl start pcsd
[root@node2 ~]# systemctl enable pcsd
[root@node1 ~]# echo "ABC123qwe#" | passwd --stdin hacluster
[root@node2 ~]# echo "ABC123qwe#" | passwd --stdin hacluster
[root@node1 ~]# pcs cluster auth node1 node2
[root@node1 ~]# pcs cluster setup --start --name cluster0 node1 node2
[root@node1 ~]# pcs cluster start --all
[root@node1 ~]# pcs cluster enable --all
[root@node1 ~]# pcs cluster status
PCSD Status:
node1: Online
node2: Online
[root@node1 ~]# pcs status corosync
Membership information
----------------------
Nodeid Votes Name
1 1 node1 (local)
2 1 node2
(1)浮动IP
[root@node1 ~]# pcs resource create float_ip IPaddr2 ip=172.16.0.50 cidr_netmask=16 --group apachegroup
[root@node1 ~]# pcs resource show
Resource Group: apachegroup
float_ip (ocf::heartbeat:IPaddr2): Stopped
(2)集中存储
[root@node1 ~]# yum install -y gfs2-utils lvm2-sysvinit lvm2-cluster iscsi-initiator-utils
[root@node2 ~]# yum install -y gfs2-utils lvm2-sysvinit lvm2-cluster iscsi-initiator-utils
[root@node1 ~]# iscsiadm -m discovery -t st -p 172.16.0.60
[root@node1 ~]# iscsiadm -m node -T iqn.2020-01.com.up.storage:iscsi1 -p 172.16.0.60 -l
[root@node2 ~]# iscsiadm -m discovery -t st -p 172.16.0.60
[root@node2 ~]# iscsiadm -m node -T iqn.2020-01.com.up.storage:iscsi1 -p 172.16.0.60 -l
[root@node1 ~]# ls /dev/sdb
/dev/sdb
[root@node2 ~]# ls /dev/sdb
/dev/sdb
[root@node1 ~]# lvmconf --enable-cluster
[root@node2 ~]# lvmconf --enable-cluster
[root@node1 ~]# systemctl start dlm
[root@node1 ~]# systemctl enable dlm
[root@node2 ~]# systemctl start dlm
[root@node2 ~]# systemctl enable dlm
[root@node1 ~]# /etc/init.d/clvmd start
[root@node1 ~]# chkconfig clvmd on
[root@node2 ~]# /etc/init.d/clvmd start
[root@node2 ~]# chkconfig clvmd on
[root@node1 ~]# pvcreate /dev/sdb
[root@node1 ~]# vgcreate cvg0 /dev/sdb
[root@node1 ~]# lvcreate -L 5G -n clv01 cvg0
[root@node2 ~]# pvs 2> /dev/null | tail -1
/dev/sdb cvg0 lvm2 a-- <10.00g <5.00g
[root@node2 ~]# vgs 2> /dev/null | tail -1
cvg0 1 1 0 wz--nc <10.00g <5.00g
[root@node2 ~]# lvs 2> /dev/null | tail -1
clv01 cvg0 -wi-a----- 5.00g
注意cluster编号
[root@node1 ~]# mkfs.gfs2 -p lock_dlm -j 2 -t cluster0:clv01 /dev/cvg0/clv01
挂载
[root@node1 ~]# mount /dev/cvg0/clv01 /mnt/
[root@node1 ~]# echo "pacemaker_apache" > /mnt/index.html
[root@node1 ~]# umount /mnt/
[root@node1 ~]# pcs resource create gfs2 Filesystem device="/dev/cvg0/clv01" directory="/var/www/html" fstype="gfs2" --group apachegroup
[root@node1 ~]# pcs resource show
Resource Group: apachegroup
float_ip (ocf::heartbeat:IPaddr2): Stopped
gfs2 (ocf::heartbeat:Filesystem): Stopped
(3)服务
注意:安装服务类程序时,建议先不要启动服务,最后通过集群服务一起启动。防止启动失败
[root@node1 ~]# yum install -y httpd wget
[root@node2 ~]# yum install -y httpd wget
[root@node1 ~]# vim /etc/httpd/conf/httpd.conf
<Location /server-status>
SetHandler server-status
Require all granted
</Location>
##打开状态查看
[root@node2 ~]# vim /etc/httpd/conf/httpd.conf
<Location /server-status>
SetHandler server-status
Require all granted
</Location>
[root@node1 ~]# systemctl start httpd
[root@node2 ~]# systemctl start httpd
[root@node1 ~]# curl 127.0.0.1/server-status
[root@node2 ~]# curl 127.0.0.1/server-status
用浏览器看,比较准确
[root@node1 ~]# systemctl stop httpd
[root@node2 ~]# systemctl stop httpd
##添加资源前,服务一定是关闭状态!!!
[root@node1 ~]# pcs resource create Apache apache configfile="/etc/httpd/conf/httpd.conf" statusurl="http://127.0.0.1/server-status" --group apachegroup
[root@node1 ~]# pcs resource show
Resource Group: apachegroup
float_ip (ocf::heartbeat:IPaddr2): Stopped
gfs2 (ocf::heartbeat:Filesystem): Stopped
Apache (ocf::heartbeat:apache): Stopped
删除服务,重新添加
pcs resource delete Apahce
[root@node1 ~]# pcs property set no-quorum-policy=ignore
##忽略集群的存活条件(两个节点时,必须关闭,否则票数出问题)
[root@node1 ~]# pcs property set stonith-enabled=false
##关闭fence设备
[root@node1 ~]# pcs resource defaults migration-threshold=1
##节点故障时,进行失效迁移
[root@node1 ~]# pcs resource defaults resource-stickiness=100
##防止资源回迁
查看状态
[root@node1 ~]# pcs resource show
Resource Group: apachegroup静默0
float_ip (ocf::heartbeat:IPaddr2): Started node1
gfs2 (ocf::heartbeat:Filesystem):Started node1
Apache (ocf::heartbeat:apache): Started node1
[root@node1 ~]# ip a | grep 172
inet 172.16.0.51/16 brd 172.16.255.255 scope global ens32
inet 172.16.0.50/16 brd 172.16.255.255 scope global secondary ens32
[root@node1 ~]# df -h | tail -1
/dev/mapper/cvg0-clv01 5.0G 67M 5.0G 2% /var/www/html
[root@node1 ~]# netstat -antp | grep httpd
tcp6 0 0 :::80 :::* LISTEN 5817/httpd
关于配置文件
[root@node1 ~]# vim /etc/corosync/corosync.conf
心跳配置文件
[root@node1 ~]# cd /var/lib/pacemaker/cib/
[root@node1 /var/lib/pacemaker/cib]# vim cib.xml
资源配置文件
[root@node1 ~]# pcs resource show
Resource Group: apachegroup
float_ip (ocf::heartbeat:IPaddr2): Started node1
gfs2 (ocf::heartbeat:Filesystem): Started node1
Apache (ocf::heartbeat:apache): Started node1
[root@node1 ~]# pcs cluster standby node1
将node1设置为备用节点
[root@node1 ~]# pcs resource show
Resource Group: apachegroup
float_ip (ocf::heartbeat:IPaddr2): Started node2
gfs2 (ocf::heartbeat:Filesystem): Started node2
Apache (ocf::heartbeat:apache): Started node2
[root@node1 ~]# pcs cluster unstandby node1
资源没有被node1抢回
[root@node2 ~]# init 0
[root@node1 ~]# pcs resource show
Resource Group: apachegroup
float_ip (ocf::heartbeat:IPaddr2): Started node1
gfs2 (ocf::heartbeat:Filesystem): Started node1
Apache (ocf::heartbeat:apache): Started node1
如果查询资源时,在同一节点上有状态stop
检查是否设置前没有关闭apache服务,关闭服务之后,删除对应资源,重新添加对应资源,然后在查询。