Red Hat Enterprise Linux 7 使用 Pacemaker 替换 rgmanager 进行集群资源管理以及从节点失败中恢复。Pacemaker 的优点包括:
• 资源配置的自动同步和版本管理。
• 灵活的资源和 fencing 模式可与用户环境更紧密地对应。
• 可使用 fencing 恢复资源层失败。
• 基于时间的配置选项。
• 在多节点中运行同一资源的能力。例如:网页服务器或者集群文件系统。
• 在两个不同模式之一的多个节点中运行同一资源的能力。例如:同步资源和目标。
• Pacemaker 不需要分布式锁管理程序。
• 仲裁丢失或者形成多个分区时的可配置行为。
Pacemaker 配置系统,也称 pcs,替换了 ccs、ricci 和 luci作为统一的集群配置和管理工具。pcs 的优点包括:
• 命令行工具。
• 可轻松自我启动集群,即初始化集群并使其开始运行。
• 可配置集群选项。
• 可添加、删除或者修改资源及其之间的关系。
一、集群配置
本文采用2台机器
192.168.122.194
192.168.122.137
关闭防火墙、SELINUX
[root@node1 ~]# systemctl disable firewalld
[root@node1 ~]#systemct stop firewalld
[root@node1 ~]# vim /etc/selinux/config
SELINUX=disabled
所有节点
修改主机名及时间同步
[root@node1 ~]# hostnamctl set-hostname node1
[root@node1 ~]# yum -y install ntp
[root@node1 ~]# ntpdate cn.pool.ntp.org
实现双机互信
[root@node1 ~]# ssh-keygen -f /root/.ssh/id_rsa -N ""
[root@node1 ~]# mv /root/.ssh/{id_rsa.pub,authorized_keys}
[root@node1 ~]# scp -r /root/.ssh node2:/root/
[root@node1 ~]# ls /root/.ssh/
authorized_keys id_rsa known_hosts
域名解析
[root@node1 ~]# vim /etc/hosts
192.168.122.194 node1
192.168.122.137 node2
安装软件包
[root@node1 ~]# yum -y install pcs pacemaker corosync fence-agents-all
[root@node1 ~]# systemctl start pcsd.service && systemctl enable pcsd.servic
[root@node1 ~]# passwd hacluster
任意节点
[root@node1 ~]# pcs cluster auth node1 node2
Username: hacluster
Password:
node1: Authorized
node2: Authorized
创建集群
[root@node1 ~]# pcs cluster setup --start --name abel_cluster \
> node1 node2
Destroying cluster on nodes: node1, node2...
node2: Stopping Cluster (pacemaker)...
node1: Stopping Cluster (pacemaker)...
node1: Successfully destroyed cluster
node2: Successfully destroyed cluster
Sending 'pacemaker_remote authkey' to 'node1', 'node2'
node1: successful distribution of the file 'pacemaker_remote authkey'
node2: successful distribution of the file 'pacemaker_remote authkey'
Sending cluster config files to the nodes...
node1: Succeeded
node2: Succeeded
Starting cluster on nodes: node1, node2...
node2: Starting Cluster...
node1: Starting Cluster...
Synchronizing pcsd certificates on nodes node1, node2...
node1: Success
node2: Success
Restarting pcsd on the nodes in order to reload the certificates...
node1: Success
node2: Success
[root@node1 ~]# pcs cluster enable --all
node1: Cluster Enabled
node2: Cluster Enabled
启动集群
[root@node1 ~]# pcs cluster start --all
设置开机自启动
[root@node1 ~]# pcs cluster enable --all
node1: Cluster Enabled
node2: Cluster Enabled
[root@node1 ~]# pcs cluster status
Cluster Status:
Stack: corosync
Current DC: node1 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorum
Last updated: Wed Nov 8 11:41:06 2017
Last change: Wed Nov 8 11:40:05 2017 by hacluster via crmd on node1
2 nodes configured
0 resources configured
PCSD Status:
node1: Online
node2: Online
[root@node1 ~]# corosync-cfgtool -s
Printing ring status.
Local node ID 1
RING ID 0
id = 192.168.122.171
status = ring 0 active with no faults
[root@node1 ~]# corosync-cmapctl | grep members
runtime.totem.pg.mrp.srp.members.1.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.1.ip (str) = r(0) ip(192.168.122.171)
runtime.totem.pg.mrp.srp.members.1.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.1.status (str) = joined
runtime.totem.pg.mrp.srp.members.2.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.2.ip (str) = r(0) ip(192.168.122.11)
runtime.totem.pg.mrp.srp.members.2.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.2.status (str) = joined
[root@node1 ~]# pcs status corosync
Membership information
----------------------
Nodeid Votes Name
2 1 node2
1 1 node1 (local)
如果没有Fence,建议禁用STONITH
[root@node1 ~]# pcs property set stonith-enabled=false
正常集群Quorum(法定)需要半数以上的票数,如果是双节点的集群
[root@node1 ~]# pcs property set no-quorum-policy=ignore
[root@node1 ~]# crm_verify -L -V
测试
[root@node1 ~]# pcs resource create VirtualIP IPaddr2 ip=192.168.0.99 cidr_netmask=32 nic=eth0 op monitor interval=30s
[root@node1 ~]# pcs resource show
Resource Group: apachegroup
web_lvm (ocf::heartbeat:LVM): Stopped
VirtualIP (ocf::heartbeat:IPaddr2): Started node
[root@node1 ~]# ip a
1: lo: mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 52:54:a8:ab:a3:c8 brd ff:ff:ff:ff:ff:ff
inet 192.168.122.194/24 brd 192.168.122.255 scope global dynamic eth0
valid_lft 3111sec preferred_lft 3111sec
inet 192.168.0.99/32 scope global eth0
valid_lft forever preferred_lft forever
inet6 fe80::5054:a8ff:feab:a3c8/64 scope link
valid_lft forever preferred_lft forever