corosync/openais+pacemaker实现的www的高可用性群集
拓扑图:
一:修改群集中各节点的网络参数
node1:
[root@node1 ~]# vim /etc/sysconfig/network
NETWORKING=yes
NETWORKING_IPV6=no
HOSTNAME=node1.a.com
[root@node1 ~]# vim /etc/hosts
192.168.2.10 node1.a.com node1
192.168.2.20 node2.a.com node2
[root@node1 ~]# hostname
node1.a.com
node2:
[root@node2 ~]# vim /etc/sysconfig/network
NETWORKING=yes
NETWORKING_IPV6=no
HOSTNAME=node2.a.com
[root@node2 ~]# vim /etc/hosts
192.168.2.10 node1.a.com node1
192.168.2.20 node2.a.com node2
[root@node2 ~]# hostname
node2.a.com
二:同步群集中各节点的时间
node1:
[root@node1~]# hwclock -s
node2:
[root@node2 ~]# hwclock -s
三:在各个节点上面产生密钥实现无密码的通讯
node1:
[root@node1 ~]# ssh-key-gen -t rsa 产生一个rsa的非对称加密的私钥对
[root@node1 ~]# ssh-copy-id -i .ssh/id_rsa.pub node2 拷贝到node2节点
node2:
[root@node2 ~]# ssh-key-gen -t rsa 产生一个rsa的非对称加密的私钥对
[root@node2 ~]# ssh-copy-id -i .ssh/id_rsa.pub node1 拷贝到node1节点
四:在各个节点上面配置好yum客户端
[root@node1 ~]# vim /etc/yum.repos.d/server.repo
[rhel-server]
name=Red Hat Enterprise Linux server
baseurl=file:///mnt/cdrom/Server
enabled=1
gpgcheck=1
gpgkey=file:///mnt/cdrom/RPM-GPG-KEY-redhat-release
[rhel-vt]
name=Red Hat Enterprise Linux vt
baseurl=file:///mnt/cdrom/VT
enabled=1
gpgcheck=1
gpgkey=file:///mnt/cdrom/RPM-GPG-KEY-redhat-release
[rhel-cluster] 做群集需要用到的仓库
name=Red Hat Enterprise Linux cluster
baseurl=file:///mnt/cdrom/Cluster
enabled=1
gpgcheck=1
gpgkey=file:///mnt/cdrom/RPM-GPG-KEY-redhat-release
[rhel-clusterstorage] 做群集文件系统需要用到的仓库
name=Red Hat Enterprise Linux clusterstorage
baseurl=file:///mnt/cdrom/ClusterStorage
enabled=1
gpgcheck=1
gpgkey=file:///mnt/cdrom/RPM-GPG-KEY-redhat-release
[root@node2 ~]# vim /etc/yum.repos.d/server.repo
[rhel-server]
name=Red Hat Enterprise Linux server
baseurl=file:///mnt/cdrom/Server
enabled=1
gpgcheck=1
gpgkey=file:///mnt/cdrom/RPM-GPG-KEY-redhat-release
[rhel-vt]
name=Red Hat Enterprise Linux vt
baseurl=file:///mnt/cdrom/VT
enabled=1
gpgcheck=1
gpgkey=file:///mnt/cdrom/RPM-GPG-KEY-redhat-release
[rhel-cluster] 做群集需要用到的仓库
name=Red Hat Enterprise Linux cluster
baseurl=file:///mnt/cdrom/Cluster
enabled=1
gpgcheck=1
gpgkey=file:///mnt/cdrom/RPM-GPG-KEY-redhat-release
[rhel-clusterstorage] 做群集文件系统需要用到的仓库
name=Red Hat Enterprise Linux clusterstorage
baseurl=file:///mnt/cdrom/ClusterStorage
enabled=1
gpgcheck=1
gpgkey=file:///mnt/cdrom/RPM-GPG-KEY-redhat-release
五:将下载好的rpm包上传到linux系统
node1:
[root@node1 ~]# ll
-rw-r--r-- 1 root root 271360 May 8 13:07 cluster-glue-1.0.6-1.6.el5.i386.rpm 为了在群集中增加对更多节点的支持
-rw-r--r-- 1 root root 133254 May 8 13:07 cluster-glue-libs-1.0.6-1.6.el5.i386.rpm
-rw-r--r-- 1 root root 170052 May 8 13:07 corosync-1.2.7-1.1.el5.i386.rpm corosync的主配置文件
-rw-r--r-- 1 root root 158502 May 8 13:07 corosynclib-1.2.7-1.1.el5.i386.rpmcorosync的库文件
-rw-r--r-- 1 root root 165591 May 8 13:07 heartbeat-3.0.3-2.3.el5.i386.rpm 我们的heartbeat在这里是做四层的资源代理用的
-rw-r--r-- 1 root root 289600 May 8 13:07 heartbeat-libs-3.0.3-2.3.el5.i386.rpm heartbeat的库文件
-rw-r--r-- 1 root root 60458 May 8 13:07 libesmtp-1.0.4-5.el5.i386.rpm
-rw-r--r-- 1 root root 126663 May 5 11:26 libmcrypt-2.5.7-5.el5.i386.rpm
-rw-r--r-- 1 root root 207085 May 8 13:07 openais-1.1.3-1.6.el5.i386.rpm 做丰富pacemake的内容使用
-rw-r--r-- 1 root root 94614 May 8 13:07 openaislib-1.1.3-1.6.el5.i386.rpm
-rw-r--r-- 1 root root 796813 May 8 13:07 pacemaker-1.1.5-1.1.el5.i386.rpm pacemake的主配置文档
-rw-r--r-- 1 root root 207925 May 8 13:07 pacemaker-cts-1.1.5-1.1.el5.i386.rpm
-rw-r--r-- 1 root root 332026 May 8 13:07 pacemaker-libs-1.1.5-1.1.el5.i386.rpm pacemaker的库文件
-rw-r--r-- 1 root root 32818 May 8 13:07 perl-TimeDate-1.16-5.el5.noarch.rpm
-rw-r--r-- 1 root root 388632 May 8 13:07 resource-agents-1.0.4-1.1.el5.i386.rpm
node2:
[root@node2 ~]# ll
-rw-r--r-- 1 root root 271360 May 8 13:07 cluster-glue-1.0.6-1.6.el5.i386.rpm 为了在群集中增加对更多节点的支持
-rw-r--r-- 1 root root 133254 May 8 13:07 cluster-glue-libs-1.0.6-1.6.el5.i386.rpm
-rw-r--r-- 1 root root 170052 May 8 13:07 corosync-1.2.7-1.1.el5.i386.rpm corosync的主配置文件
-rw-r--r-- 1 root root 158502 May 8 13:07 corosynclib-1.2.7-1.1.el5.i386.rpmcorosync的库文件
-rw-r--r-- 1 root root 165591 May 8 13:07 heartbeat-3.0.3-2.3.el5.i386.rpm 我们的heartbeat在这里是做四层的资源代理用的
-rw-r--r-- 1 root root 289600 May 8 13:07 heartbeat-libs-3.0.3-2.3.el5.i386.rpm heartbeat的库文件
-rw-r--r-- 1 root root 60458 May 8 13:07 libesmtp-1.0.4-5.el5.i386.rpm
-rw-r--r-- 1 root root 126663 May 5 11:26 libmcrypt-2.5.7-5.el5.i386.rpm
-rw-r--r-- 1 root root 207085 May 8 13:07 openais-1.1.3-1.6.el5.i386.rpm 做丰富pacemake的内容使用
-rw-r--r-- 1 root root 94614 May 8 13:07 openaislib-1.1.3-1.6.el5.i386.rpm
-rw-r--r-- 1 root root 796813 May 8 13:07 pacemaker-1.1.5-1.1.el5.i386.rpm pacemake的主配置文档
-rw-r--r-- 1 root root 207925 May 8 13:07 pacemaker-cts-1.1.5-1.1.el5.i386.rpm
-rw-r--r-- 1 root root 332026 May 8 13:07 pacemaker-libs-1.1.5-1.1.el5.i386.rpm pacemaker的库文件
-rw-r--r-- 1 root root 32818 May 8 13:07 perl-TimeDate-1.16-5.el5.noarch.rpm
-rw-r--r-- 1 root root 388632 May 8 13:07 resource-agents-1.0.4-1.1.el5.i386.rpm
六:在各节点上面安装所有的rpm包
node1: [root@node1 ~]# yum localinstall *.rpm -y --nogpgcheck
node2: [root@node2~]# yum localinstall *.rpm -y --nogpgcheck
七:对各个节点进行相应的配置
node1:
1:切换到主配置文件的目录
[root@node1 ~]# cd /etc/corosync/
root@node1 corosync]# cp corosync.conf.example corosync.conf
[root@node1 corosync]# vim corosync.conf
compatibility: whitetank
totem {
version: 2
secauth: off
threads: 0
interface {
# Please read the corosync.conf.5 manual page
compatibility: whitetank
totem { //这是用来传递心跳时的相关协议的信息
version: 2
secauth: off
threads: 0
interface {
ringnumber: 0
bindnetaddr: 192.168.2.0 //我们只改动这里就行啦
mcastaddr: 226.94.1.1
mcastport: 5405
}
}
logging {
fileline: off
to_stderr: no //是否发送标准出错
to_logfile: yes //日志
to_syslog: yes //系统日志 (建议关掉一个),会降低性能
logfile: /var/log/cluster/corosync.log //需要手动创建目录cluster
debug: off // 排除时可以起来
timestamp: on //日志中是否记录时间
//******以下是openais的东西,可以不用代开*****//
logger_subsys {
subsys: AMF
debug: off
}
}
amf {
mode: disabled
}
//*********补充一些东西,前面只是底层的东西,因为要用pacemaker ******//
service {
ver: 0
name: pacemaker
}
//******虽然用不到openais ,但是会用到一些子选项 ********//
aisexec {
user: root
group: root
}
2:创建cluster目录
[root@node1 corosync]# mkdir /var/log/cluster
3:为了便面其他主机加入该集群,需要认证,生成一authkey
[root@node1 corosync]# corosync-keygen
[root@node1 corosync]# ll
-rw-r--r-- 1 root root 5384 Jul 28 2010 amf.conf.example
-r-------- 1 root root 128 May 8 14:09 authkey
-rw-r--r-- 1 root root 538 May 8 14:08 corosync.conf
-rw-r--r-- 1 root root 436 Jul 28 2010 corosync.conf.example
drwxr-xr-x 2 root root 4096 Jul 28 2010 service.d
drwxr-xr-x 2 root root 4096 Jul 28 2010 uidgid.d
4:将node1节点上的文件拷贝到节点node2上面(记住要带-p)
[root@node1 corosync]# scp -p authkey corosync.conf node2:/etc/corosync/
authkey 100% 128 0.1KB/s 00:00
corosync.conf 100% 513 0.5KB/s 00:00
[root@node1 corosync]# ssh node2 'mkdir /var/log/cluster'
5:在node1节点上面启动 corosync 的服务
[root@node1 corosync]# service corosync start
6:验证corosync引擎是否正常启动了
[root@node1 corosync]# grep -i -e "corosync cluster engine" -e "configuration file" /var/log/messages
ct 18 23:24:02 node1 smartd[2832]: Opened configuration file /etc/smartd.conf
Oct 18 23:24:02 node1 smartd[2832]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
May 7 14:00:29 node1 smartd[2787]: Opened configuration file /etc/smartd.conf
May 7 14:00:29 node1 smartd[2787]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
May 7 16:24:36 node1 corosync[686]: [MAIN ] Corosync Cluster Engine ('1.2.7'): started and ready to provide service.
May 7 16:24:36 node1 corosync[686]: [MAIN ] Successfully read main configuration file '/etc/corosync/corosync.conf'.
7:查看初始化成员节点通知是否发出
[root@node1 corosync]# grep -i totem /var/log/messages
May 7 16:24:36 node1 corosync[686]: [TOTEM ] Initializing transport (UDP/IP).
May 7 16:24:36 node1 corosync[686]: [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
May 7 16:24:36 node1 corosync[686]: [TOTEM ] The network interface is down.
May 7 16:24:37 node1 corosync[686]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
May 7 16:38:30 node1 corosync[754]: [TOTEM ] Initializing transport (UDP/IP).
May 7 16:38:30 node1 corosync[754]: [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
May 7 16:38:30 node1 corosync[754]: [TOTEM ] The network interface [192.168.2.10] is now up.
May 7 16:38:31 node1 corosync[754]: [TOTEM ] Process pause detected for 603 ms, flushing membership messages.
May 7 16:38:31 node1 corosync[754]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
8:检查过程中是否有错误产生
[root@node1 corosync]#grep -i error: /var/log/messages |grep -v unpack_resources (避免stonith的错误)
9:检查pacemaker时候已经启动了
[root@node1 corosync]# grep -i pcmk_startup /var/log/messages
May 7 16:24:36 node1 corosync[686]: [pcmk ] info: pcmk_startup: CRM: Initialized
May 7 16:24:36 node1 corosync[686]: [pcmk ] Logging: Initialized pcmk_startup
May 7 16:24:36 node1 corosync[686]: [pcmk ] info: pcmk_startup: Maximum core file size is: 4294967295
May 7 16:24:36 node1 corosync[686]: [pcmk ] info: pcmk_startup: Service: 9
May 7 16:24:36 node1 corosync[686]: [pcmk ] info: pcmk_startup: Local hostname: node1.a.com
May 7 16:38:31 node1 corosync[754]: [pcmk ] info: pcmk_startup: CRM: Initialized
May 7 16:38:31 node1 corosync[754]: [pcmk ] Logging: Initialized pcmk_startup
May 7 16:38:31 node1 corosync[754]: [pcmk ] info: pcmk_startup: Maximum core file size is: 4294967295
May 7 16:38:31 node1 corosync[754]: [pcmk ] info: pcmk_startup: Service: 9
May 7 16:38:31 node1 corosync[754]: [pcmk ] info: pcmk_startup: Local hostname: node1.a.com
node2:重复上面的5--9步骤
八:在node1节点上查看群集的状态
1:查看群集状态
[root@node2 corosync]# crm status
============
Last updated: Mon May 7 17:00:08 2012
Stack: openais
Current DC: node1.a.com - partition with quorum
Version: 1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f
2 Nodes configured, 2 expected votes
0 Resources configured.
============
Online: [ node1.a.com node2.a.com ]
2:提供高可用服务
在corosync中,定义服务可以用两种接口
1.图形接口 (使用hb——gui)
2.crm (pacemaker 提供,是一个shell)
3:提供高可用服务
说明:
[root@node1 corosync]# ssh node2 'date'
Mon May 7 17:03:11 CST 2012
[root@node1 corosync]# crm configure show
node node1.a.com
node node2.a.com
property $id="cib-bootstrap-options" \
dc-version="1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2"
用于查看cib的相关信息
如果想查看xml格式的内容
[root@node1 corosync]# crm configure show xml
<?xml version="1.0" ?>
<cib admin_epoch="0" cib-last-written="Mon May 7 16:25:39 2012" crm_feature_set="3.0.5" dc-uuid="node1.a.com" epoch="5" have-quorum="1" num_updates="18" validate-with="pacemaker-1.2">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="openais"/>
<nvpair id="cib-bootstrap-options-expected-quorum-votes" name="expected-quorum-votes" value="2"/>
</cluster_property_set>
</crm_config>
<nodes>
<node id="node2.a.com" type="normal" uname="node2.a.com"/>
<node id="node1.a.com" type="normal" uname="node1.a.com"/>
</nodes>
<resources/>
<constraints/>
</configuration>
</cib>
如何验证该文件的语法错误
[root@node1 corosync]# crm_verify -L
crm_verify[878]: 2012/05/07_17:29:33 ERROR: unpack_resources: Resource start-up disabled since no STONITH resources have been defined
crm_verify[878]: 2012/05/07_17:29:33 ERROR: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
crm_verify[878]: 2012/05/07_17:29:33 ERROR: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
Errors found during check: config not valid
-V may provide more details
可以看到有stonith错误,在高可用的环境里面,会禁止实用任何资源
可以禁用stonith
[root@node1 corosync]# crm
crm(live)# configure
crm(live)configure# property stonith-enabled=false
crm(live)configure# commit
crm(live)configure# show
node node1.a.com
node node2.a.com
property $id="cib-bootstrap-options" \
dc-version="1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false"
再次进行检查
[root@node1 corosync]# crm_verify -L
没有错误了
系统上有专门的stonith命令
stonith -L 显示stonith所指示的类型
crm可以使用交互式模式
可以执行help
保存在cib里面,以xml的格式
集群的资源类型有4种
primitive 本地主资源 (只能运行在一个节点上)
group 把多个资源轨道一个组里面,便于管理
clone 需要在多个节点上同时启用的 (如ocfs2 ,stonith ,没有主次之分)
master 有主次之分,如drbd
crm(live)ra# classes
heartbeat
lsb
ocf / heartbeat pacemaker
stonith
可以实用list lsb 查看资源代理的脚本
crm(live)ra# list lsb
NetworkManager acpid anacron apmd
atd auditd autofs avahi-daemon
avahi-dnsconfd bluetooth capi conman
corosync cpuspeed crond cups
cups-config-daemon dnsmasq dund firstboot
functions gpm haldaemon halt
heartbeat hidd hplip ip6tables
ipmi iptables irda irqbalance
isdn kdump killall krb524
kudzu lm_sensors logd lvm2-monitor
mcstrans mdmonitor mdmpd messagebus
microcode_ctl multipathd netconsole netfs
netplugd network nfs nfslock
nscd ntpd openais openibd
pacemaker pand pcscd portmap
psacct rawdevices rdisc readahead_early
readahead_later restorecond rhnsd rpcgssd
rpcidmapd rpcsvcgssd saslauthd sendmail
setroubleshoot single smartd sshd
syslog vncserver wdaemon winbind
wpa_supplicant xfs xinetd ypbind
(是/etc/init.d目录下的)
查看ocf的heartbeat
crm(live)ra# list ocf heartbeat
实用info或者meta 用于显示一个资源的详细信息
meta ocf:heartbeat:IPaddr 各个子项用:分开
九:配置群集的资源
1:资源的ip
crm(live)configure#primitive webip ocf:heartbeat:IPaddr params ip=192.168.2.100
2:查看
crm(live)configure# show
node node1.a.com
node node2.a.com
primitive webIP ocf:heartbeat:IPaddr \
params ip="192.168.2.100"
property $id="cib-bootstrap-options" \
dc-version="1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false"
3:提交
crm(live)configure# commit
4:状态
crm(live)# status
============
Last updated: Mon May 7 19:39:37 2012
Stack: openais
Current DC: node1.a.com - partition with quorum
Version: 1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ node1.a.com node2.a.com ]
webIP (ocf::heartbeat:IPaddr): Started node1.a.com
5:实用ifconfig 在节点1上进行查看
[root@node1 corosync]# ifconfig
eth0 Link encap:Ethernet HWaddr 00:0C:29:A0:F4:39
inet addr:192.168.2.10 Bcast:192.168.2.255 Mask:255.255.255.0
inet6 addr: fe80::20c:29ff:fea0:f439/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:40843 errors:1 dropped:0 overruns:0 frame:0
TX packets:99212 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:7604883 (7.2 MiB) TX bytes:15595318 (14.8 MiB)
Interrupt:67 Base address:0x2000
eth0:0 Link encap:Ethernet HWaddr 00:0C:29:A0:F4:39
inet addr:192.168.2.100 Bcast:192.168.2.255 Mask:255.255.255.0
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
Interrupt:67 Base address:0x2000
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
inet6 addr: ::1/128 Scope:Host
UP LOOPBACK RUNNING MTU:16436 Metric:1
RX packets:14454 errors:0 dropped:0 overruns:0 frame:0
TX packets:14454 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:1442638 (1.3 MiB) TX bytes:1442638 (1.3 MiB)
6:定义web服务资源在两个节点上都要进行安装安装完毕后,可以查看httpd的lsb脚本
[root@node1 ~]# yum install -y httpd
[root@node1 ~]# chkconfig httpd off
[root@node2 ~]# yum install -y httpd
[root@node2 ~]# chkconfig httpd off
[root@node1 corosync]# crm ra list lsb
查看httpd的参数
crm(live)ra# meta lsb:httpd
lsb:httpd
Apache is a World Wide Web server. It is used to serve \
HTML files and CGI.
Operations' defaults (advisory minimum):
start timeout=15
stop timeout=15
status timeout=15
restart timeout=15
force-reload timeout=15
monitor interval=15 timeout=15 start-delay=15
7:定义httpd的资源
crm(live)configure# primitive webserver lsb:httpd
crm(live)configure# show
node node1.a.com
node node2.a.com
primitive webIP ocf:heartbeat:IPaddr \
params ip="192.168.2.100"
primitive webserver lsb:httpd
property $id="cib-bootstrap-options" \
dc-version="1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false"
crm(live)# status
============
Last updated: Mon May 7 20:01:12 2012
Stack: openais
Current DC: node1.a.com - partition with quorum
Version: 1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ node1.a.com node2.a.com ]
webIP (ocf::heartbeat:IPaddr): Started node1.a.com
webserver (lsb:httpd): Started node2.a.com
发现httpd已经启动了,但是在node2节点上
(高级群集服务资源越来越多,会分布在不同的节点上,以尽量负载均衡)
需要约束在同一个节点上,定义成一个组
可以实用 crm(live)configure# help group 查看配置组的帮助 信息
crm(live)configure# help group
The `group` command creates a group of resources.
Usage:
...............
group <name> <rsc> [<rsc>...]
[meta attr_list]
[params attr_list]
attr_list :: [$id=<id>] <attr>=<val> [<attr>=<val>...] | $id-ref=<id>
...............
Example:
...............
group internal_www disk0 fs0 internal_ip apache \
meta target_role=stopped
...............
8:定义web组
crm(live)configure# group web webip webserver
crm(live)configure# show
node node1.a.com
node node2.a.com
primitive webIP ocf:heartbeat:IPaddr \
params ip="192.168.2.100"
primitive webserver lsb:httpd
group web webIP webserver
property $id="cib-bootstrap-options" \
dc-version="1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
stonith-enabled="false"
在次查看群集的状态
crm(live)# status
============
Last updated: Mon May 7 20:09:06 2012
Stack: openais
Current DC: node1.a.com - partition with quorum
Version: 1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ node1.a.com node2.a.com ]
Resource Group: web
webIP (ocf::heartbeat:IPaddr): Started node1.a.com
webserver (lsb:httpd): Started node1.a.com
crm(live)#
(现在ip地址和 httpd都已经在node1上了)
[root@node1 ~]# ifconfig
eth0 Link encap:Ethernet HWaddr 00:0C:29:B4:EF:22
inet addr:192.168.2.10 Bcast:192.168.2.255 Mask:255.255.255.0
inet6 addr: fe80::20c:29ff:feb4:ef22/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:50010 errors:0 dropped:0 overruns:0 frame:0
TX packets:97328 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:9075084 (8.6 MiB) TX bytes:15527965 (14.8 MiB)
Interrupt:67 Base address:0x2000
eth0:0 Link encap:Ethernet HWaddr 00:0C:29:B4:EF:22
inet addr:192.168.2.100 Bcast:192.168.2.255 Mask:255.255.255.0
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
Interrupt:67 Base address:0x2000
十:测试群集的配置
1: 在节点1 和节点2 上分别创建网页
[root@node1 ~]# echo "node1.a.com" >/var/www/html/index.html
[root@node2 ~]# echo "node2.a.com" >/var/www/html/index.html
使用http://群集ip 发现在第一个节点上
2: 将节点1 的corosync 服务停止
[root@node1 corosync]# service corosync stop
在节点2上进行观察
[root@node2 corosync]# crm status
============
Last updated: Mon May 7 20:16:58 2012
Stack: openais
Current DC: node2.a.com - partition WITHOUT quorum
Version: 1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f
2 Nodes configured, 2 expected votes
1 Resources configured.
============
Online: [ node2.a.com ] 虽然node2节点在线但是所有的资源无法使用
OFFLINE: [ node1.a.com ]
3: 关闭 quorum
可选的参数有如下 ignore (忽略)
freeze (冻结,表示已经启用的资源继续实用,没有启用的资源不能启用)
stop(默认)
suicide (所有的资源杀掉)
[root@node1 corosync]# service corosync start
crm(live)configure# property no-quorum-policy=ignore
cimmit
4: 关闭 node1上的corosync服务
[root@node1 corosync]# service corosync stop
补充:
[root@node1 ~]# cd /etc/corosync/
[root@node1 corosync]# crm
crm crm_diff crm_master crm_node crm_resource crm_simulate crm_uuid crmadmin
crm_attribute crm_failcount crm_mon crm_report crm_shadow crm_standby crm_verify
corosync的常见指令
1,crm_attribute 修改集群的全局属性信息
比如前面的stonith和quorum
实际上是修改dc上的cib
2. crm_resource 修改资源
3. crm_node 管理节点
crm_node -e 查看节点的时代(配置文件修改过几次了)
[root@node2 corosync]# crm_node -q 显示当前节点的票数
1
4. cibadmin 集群配置的工具
-u, --upgrade Upgrade the configuration to the latest syntax
-Q, --query Query the contents of the CIB
-E, --erase Erase the contents of the whole CIB
-B, --bump Increase the CIB's epoch value by 1
如果某一个资源定义错了,就可以实用该工具进行删除
-D, --delete Delete the first object matching the supplied criteria, Eg.
也可以在crm的命令行下
crm(live)configure# delete
usage: delete <id> [<id>...]
也可以在该模式下执行edit
执行完毕后,commit 提交
5:集群的资源类型
crm(live)configure# help
- `primitive`本地主资源 (只能运行在一个节点上)
- `monitor`
- `group`
把多个资源轨道一个组里面,便于管理
- `clone` 需要在多个节点上同时启用的 (如ocfs2 ,stonith ,没有主次之分
- `ms`/`master` (master-slave) 有主次之分,如drbd
6:集群的资源代理ra可以用的类型
crm(live)configure ra# classes
heartbeat
lsb
ocf / heartbeat pacemaker
stonith
crm(live)configure ra# list heartbeat
AudibleAlarm Delay Filesystem ICP IPaddr IPaddr2 IPsrcaddr IPv6addr LVM
LinuxSCSI MailTo OCF Raid1 SendArp ServeRAID WAS WinPopup Xinetd
apache db2 hto-mapfuncs ids portblock
crm(live)configure ra# list lsb
NetworkManager acpid anacron apmd atd auditd
autofs avahi-daemon avahi-dnsconfd bluetooth capi conman
corosync cpuspeed crond cups cups-config-daemon dc_client
dc_server dnsmasq dund firstboot functions gpm
haldaemon halt heartbeat hidd hplip httpd
ip6tables ipmi iptables irda irqbalance isdn
kdump killall krb524 kudzu lm_sensors logd
lvm2-monitor mcstrans mdmonitor mdmpd messagebus microcode_ctl
multipathd mysqld netconsole netfs netplugd network
nfs nfslock nscd ntpd openais openibd
pacemaker pand pcscd portmap psacct rawdevices
rdisc readahead_early readahead_later restorecond rhnsd rpcgssd
rpcidmapd rpcsvcgssd saslauthd sendmail setroubleshoot single
smartd snmpd snmptrapd sshd syslog tgtd
vncserver vsftpd wdaemon winbind wpa_supplicant xfs
xinetd ypbind yum-updatesd
crm(live)configure ra# list ocf
AoEtarget AudibleAlarm CTDB ClusterMon Delay Dummy
EvmsSCC Evmsd Filesystem HealthCPU HealthSMART ICP
IPaddr IPaddr2 IPsrcaddr IPv6addr LVM LinuxSCSI
MailTo ManageRAID ManageVE Pure-FTPd Raid1 Route
SAPDatabase SAPInstance SendArp ServeRAID SphinxSearchDaemon Squid
Stateful SysInfo SystemHealth VIPArip VirtualDomain WAS
WAS6 WinPopup Xen Xinetd anything apache
conntrackd controld db2 drbd eDir88 exportfs
fio iSCSILogicalUnit iSCSITarget ids iscsi jboss
ldirectord mysql mysql-proxy nfsserver nginx o2cb
oracle oralsnr pgsql ping pingd portblock
postfix proftpd rsyncd scsi2reservation sfex syslog-ng
tomcat vmware
crm(live)configure ra# list stonith
apcmaster apcmastersnmp apcsmart baytech bladehpi
cyclades external/drac5 external/dracmc-telnet external/hmchttp external/ibmrsa
external/ibmrsa-telnet external/ipmi external/ippower9258 external/kdumpcheck external/rackpdu
external/riloe external/sbd external/vmware external/xen0 external/xen0-ha
fence_legacy ibmhmc ipmilan meatware nw_rpc100s
rcd_serial rps10 suicide wti_mpc
欢迎加入郑州阳仔的网络工程师自由交流群--132444800(请注明自己的身份,就说是51cto的博友)