由于配置过程中节点更换IP频繁,集群操作不当导致node1故障
# 系统 centos7.2 安装版本都是Yum源
node1: 192.168.8.111
node2:192.168.8.112
vip :192.168.8.200
nfs :192.168.8.113
# 互信
~] ssh-keygen
~]# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
~]# chmod go= .ssh/authorized_keys
~]# scp -p .ssh/id_rsa .ssh/authorized_keys node1:/root/.ssh/
# 安装pcs
~]# ansible ha -m service -a "name=pcs state=installd"
# 同步时间
~]# ntpdate 192.168.9.19;ssh node2 ntpdate 192.168.9.19
# 启动服务
~]# ansible ha -m service -a "name=pcsd state=started enabled=yes"
# 设置hacluster密码
~]# ansible ha -m shell -a ‘echo "xiong" | passwd --stdin hacluster‘
# 认证集群服务
~]# pcs cluster auth node1 node2
Username: hacluster
Password:
node1: Authorized
node2: Authorized
# 将两个节点加入集群服务内 --name集群名称 节点 广播地址
~]# pcs cluster setup --name myha node1 node2
Shutting down pacemaker/corosync services...
Redirecting to /bin/systemctl stop pacemaker.service
Redirecting to /bin/systemctl stop corosync.service
Killing any remaining services...
Removing all cluster configuration files...
node1: Succeeded
node2: Succeeded
Synchronizing pcsd certificates on nodes node1, node2...
node1: Success
node2: Success
Restaring pcsd on the nodes in order to reload the certificates...
node1: Success
node2: Success
# 加入集群之后会产生corosync配置文件
~]# ls /etc/corosync/
corosync.conf corosync.conf.example corosync.conf.example.udpu corosync.xml.example uidgid.d
# 启动pcs服务
~]# pcs cluster start --all
node2: Starting Cluster...
node1: Starting Cluster...
# 跟踪查看日志
tail -f /var/log/cluster/corosync.log
# 查看通信是否正常
~]# corosync-cfgtool -s
Printing ring status.
Local node ID 1
RING ID 0
id= 127.0.0.1 # 当为127.0.0.1时说明集群是失败的 更改/etc/hosts将127.0.0.1 主机名删除 只留默认项
status= ring 0 active with no faults
Current DC: 指定的协调员
# crmsh yum源
[network_ha-clustering_Stable]
name=Stable High Availability/Clustering packages (CentOS_CentOS-7)
type=rpm-md
baseurl=http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/
gpgcheck=1
gpgkey=http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7//repodata/repomd.xml.key
enabled=1
# 安装crmsh pssh 在哪台机器上操作就安哪一台就成
~]# yum -y install crmsh pssh
# nfs配置
~]# ansible ha -m yum -a "name=nfs-utils state=installed"
~]# vim /etc/exports
/opt/pages 192.168.8.0/24(rw)
# 安装nginx并测试nfs挂载是否生效
~]# ansible ha -m yum -a "name=nginx state=installed"
~]# ansible ha -m shell -a "mount -t nfs 192.168.8.113:/opt/pages /usr/share/nginx/html/"
~]# ansible ha -m shell -a "systemctl start nginx"
~]# ansible ha -m shell -a "umount /usr/share/nginx/html"
~]# ansible ha -m shell -a "df -Th"
# 每次操作需要先 verify 最后完成之后再执行 commit
# 定义虚拟IP地址
crm(live)configure# primitive vip ocf:heartbeat:IPaddr2 params ip="192.168.8.200"
# 定义nginx服务
crm(live)configure# primitive vipservice systemd:nginx op monitor interval=30s timeout=20s
# 定义NFS服务
crm(live)configure# primitive vipnfs ocf:heartbeat:Filesystem params device="192.168.8.113:/opt/pages" directory="/usr/share/nginx/html/" fstype="nfs" op start timeout=60s op stop timeout=60s op monitor timeout=40 interval=20
# 定义排列约束, 解析: A ( B C ) AB AC vipservice即要跟vip也得跟vipnfs在一起
crm(live)configure# colocation vipservice_with_vip_and_vipnfs inf: vipservice ( vip vipnfs )
# 定义顺序约束 启动顺序 强制先启动 vip再启动 vipnfs
crm(live)configure# order vipnfs_after_vip Mandatory: vip vipnfs
# 强制先启动 vipnfs 再启动 vipservice
crm(live)configure# order vipservice_after_vipnfs Mandatory: vipnfs vipservice
# 检查服务是否正常
crm(live)# status
Last updated: Thu May 18 16:00:41 2017Last change: Thu May 18 16:00:36 2017 by root via cibadmin on node1
Stack: corosync
Current DC: node2 (version 1.1.13-10.el7-44eb2dd) - partition with quorum
2 nodes and 3 resources configured
Online: [ node1 node2 ]
Full list of resources:
vip(ocf::heartbeat:IPaddr2):Started node1
vipservice(systemd:nginx):Started node1
vipnfs(ocf::heartbeat:Filesystem):Started node1
# 退出crmsh
# 分别执行ss -tnl | grep 80 df -Th ip addr show 最后执行完成有 将node1强制成为standby检查node2节点
# 设置vip资源黏性为100 倾向于node1
crm(live)configure# location node1_vip vip 100: node1
# 常见错误
# Node node1: UNCLEAN (offline) 检查 corosync-cfgtools -s 查看IP地址是不是127.0.0.1如果是删除127.1配置的主机名称
[root@node2 ~]# pcs status
Cluster name: myha
WARNING: no stonith devices and stonith-enabled is not false
Last updated: Wed May 17 15:34:53 2017Last change: Wed May 17 15:31:50 2017 by hacluster via crmd on node2
Stack: corosync
Current DC: node2 (version 1.1.13-10.el7-44eb2dd) - partition WITHOUT quorum
2 nodes and 0 resources configured
Node node1: UNCLEAN (offline)
Online: [ node2 ]
# WARNING: no stonith devices and stonith-enabled is not false
解决: pcs property set stonith-enabled=false
本文出自 “xiong” 博客,转载请务必保留此出处http://xiong51.blog.51cto.com/5239058/1927204