参见上一篇博客 http://blog.csdn.net/nnuljl/article/details/38266737
安装心跳程序,在两个节点均安装corosync
apt-get install -y --force-yes corosync
root@controller1:~# vim /etc/default/corosync
# start corosync at boot [yes|no]
START=yes
~
~
将原来默认的no,修改为yes,否则service corosync start 无反应
root@controller1:~# vim /etc/corosync/corosync.conf
totem {
version: 2
# How long before declaring a token lost (ms)
token: 3000
# How many token retransmits before forming a new configuration
token_retransmits_before_loss_const: 10
# Please read the openais.conf.5 manual page
totem {
version: 2
# How long before declaring a token lost (ms)
token: 3000
# How many token retransmits before forming a new configuration
token_retransmits_before_loss_const: 10
# How long to wait for join messages in the membership protocol (ms)
join: 60
# How long to wait for consensus to be achieved before starting a new round of membership configuration (ms)
consensus: 3600
# Turn off the virtual synchrony filter
vsftype: none
# Number of messages that may be sent by one processor on receipt of the token
max_messages: 20
# Limit generated nodeids to 31-bits (positive signed integers)
clear_node_high_bit: yes
# Disable encryption
secauth: off
# How many threads to use for encryption/decryption
threads: 0
# Optionally assign a fixed node id (integer)
# nodeid: 1234
# This specifies the mode of redundant ring, which may be none, active, or passive.
rrp_mode: active
interface {
# The following values need to be set based on your environment
ringnumber: 0
bindnetaddr: 192.168.2.11
mcastaddr: 226.94.1.4
mcastport: 5405
}
interface {
ringnumber: 1
bindnetaddr: 192.168.99.2
mcastaddr: 226.94.1.4
mcastport:5405
}
}
amf {
mode: disabled
}
quorum {
# Quorum for the Pacemaker Cluster Resource Manager
provider: corosync_votequorum
expected_votes: 1
}
aisexec {
user: root
group: root
}
logging {
fileline: off
to_stderr: yes
to_logfile: yes
logfile: /var/log/corosync/corosync.log
to_syslog: yes
syslog_facility: daemon
debug: off
timestamp: on
logger_subsys {
subsys: AMF
debug: off
tags: enter|leave|trace1|trace2|trace3|trace4|trace6
}
}
# Now this is just for test, add a comment
需要修改配置的地方为totem域内的interface
rrp_mode: active
interface {
# The following values need to be set based on your environment
ringnumber: 0
bindnetaddr: 192.168.2.11
mcastaddr: 226.94.1.4
mcastport: 5405
}
interface {
ringnumber: 1
bindnetaddr: 192.168.99.2
mcastaddr: 226.94.1.4
mcastport:5405
}
可以配置多个心跳接口,ringnumber不要写成一样的即可。bindnetaddr为绑定的IP,还有就是广播IP。
查看心跳集群情况,如下为在节点查看情况
root@controller1:~# corosync-cmapctl |grep members
runtime.totem.pg.mrp.srp.members.1084752395.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.1084752395.ip (str) = r(0) ip(192.168.2.11) r(1) ip(192.168.99.2)
runtime.totem.pg.mrp.srp.members.1084752395.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.1084752395.status (str) = joined
runtime.totem.pg.mrp.srp.members.1084752396.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.1084752396.ip (str) = r(0) ip(192.168.2.12) r(1) ip(127.0.0.1)
runtime.totem.pg.mrp.srp.members.1084752396.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.1084752396.status (str) = joined
节点2查看情况
root@controller2:~# corosync-cmapctl |grep members
runtime.totem.pg.mrp.srp.members.1084752395.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.1084752395.ip (str) = r(0) ip(192.168.2.11) r(1) ip(192.168.99.2)
runtime.totem.pg.mrp.srp.members.1084752395.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.1084752395.status (str) = joined
runtime.totem.pg.mrp.srp.members.1084752396.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.1084752396.ip (str) = r(0) ip(192.168.2.12) r(1) ip(127.0.0.1)
runtime.totem.pg.mrp.srp.members.1084752396.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.1084752396.status (str) = joined
我们看到有一个IP是127.0.0.1因为节点2,因为硬件资源问题和192.168.99.2相对接的网卡被移除了,所以显示为本地IP。
apt-get install -y --force-yes pacemaker
ocf日志配置
root@controller1:/usr/lib/ocf/lib/heartbeat# ls
apache-conf.sh ocf-binaries ocf-rarun ocf-shellfuncs sapdb-nosha.sh
http-mon.sh ocf-directories ocf-returncodes ora-common.sh sapdb.sh
# Binaries and binary options for use in Resource Agents
prefix=/usr
exec_prefix=/usr
: ${INITDIR:=/etc/init.d}
: ${HA_DIR:=/etc/ha.d}
: ${HA_RCDIR:=$HA_DIR/rc.d}
: ${HA_CONFDIR=$HA_DIR/conf}
: ${HA_CF:=$HA_DIR/ha.cf}
: ${HA_VARLIB:=/var/lib/heartbeat}
: ${HA_RSCTMP:=/var/run/resource-agents}
: ${HA_RSCTMP_OLD:=/var/run/heartbeat/rsctmp}
: ${HA_FIFO:=/var/lib/heartbeat/fifo}
: ${HA_BIN:=/usr/lib/heartbeat}
: ${HA_SBIN_DIR:=/usr/sbin}
: ${HA_DATEFMT:="%Y/%m/%d_%T "}
: ${HA_DEBUGLOG:=/dev/null}
: ${HA_RESOURCEDIR:=$HA_DIR/resource.d}
: ${HA_DOCDIR:=/usr/share/doc/heartbeat}
: ${__SCRIPT_NAME:=`basename $0`}
: ${HA_VARRUN:=/var/run/}
: ${HA_VARLOCK:=/var/lock/subsys/}
: ${HA_LOGFILE:=/var/log/ha/ha.log}
: ${HA_DEBUGLOG:=/var/log/ha/ha_debug.log}
新增最后两行,并在/var/log下创建ha目录,当使用ocf资源时会生成日志。
crm status 查看集群情况
root@controller1:/usr/lib/ocf/lib/heartbeat# crm status
Last updated: Wed Jul 30 13:03:46 2014
Last change: Wed Jul 30 12:39:21 2014 via crm_attribute on controller2
Stack: corosync
Current DC: controller1 (1084752395) - partition with quorum
Version: 1.1.10-42f2063
2 Nodes configured
20 Resources configured
Node controller2 (1084752396): standby
Online: [ controller1 ]
root@controller1:~# crm ra
crm(live)ra# classes
lsb
ocf / heartbeat linbit openstack pacemaker rabbitmq redhat
service
stonith
upstart
查看pacemaker资源脚本,这些脚本控制着服务
crm(live)ra# help
This level contains commands which show various information about
the installed resource agents. It is available both at the top
level and at the `configure` level.
Available commands:
classes list classes and providers
list list RA for a class (and provider)
meta show meta data for a RA
providers show providers for a RA and a class
help show help (help topics for list of topics)
end go back one level
quit exit the program
其中crm(live)ra# meta IPaddr2 是作为虚拟IP绑定的资源。
配置服务资源
root@controller1:~# crm configure
crm(live)configure#
为交换命令可以用help查看命令帮助
Available commands:
node define a cluster node
primitive define a resource
monitor add monitor operation to a primitive
group define a group
clone define a clone
ms define a master-slave resource
rsc_template define a resource template
location a location preference
colocation colocate resources
order order resources
rsc_ticket resources ticket dependency
property set a cluster property
rsc_defaults set resource defaults
fencing_topology node fencing order
role define role access rights
user define user access rights
op_defaults set resource operations defaults
schema set or display current CIB RNG schema
show display CIB objects
edit edit CIB objects
filter filter CIB objects
delete delete CIB objects
default-timeouts set timeouts for operations to minimums from the meta-data
rename rename a CIB object
modgroup modify group
refresh refresh from CIB
erase erase the CIB
ptest show cluster actions if changes were committed
rsctest test resources as currently configured
cib CIB shadow management
cibstatus CIB status management and editing
template edit and import a configuration from a template
commit commit the changes to the CIB
verify verify the CIB with crm_verify
upgrade upgrade the CIB to version 1.0
save save the CIB to a file
load import the CIB from a file
graph generate a directed graph
xml raw xml
help show help (help topics for list of topics)
end go back one level
:
需要说明的primitive为定义一个资源,group将多个资源定义为组,方便多操作。location定义资源节点黏性,即优先运行于哪个节点,colocation定义多个服务运行于同一节点,或者不能运行于同一节点,order定义服务节点顺序。
crm(live)configure# show
node $id="1084752395" controller1 \
attributes standby="off"
node $id="1084752396" controller2 \
attributes standby="on"
...
primitive p_api_ip ocf:heartbeat:IPaddr2 \
params ip="192.168.99.3" cidr_netmask="24" \
op monitor interval="30s"
primitive p_fs_mysql ocf:heartbeat:Filesystem \
params device="/share-s/mysql" directory="/var/lib/mysql" fstype="ext4" options="relatime" \
op start interval="0" timeout="120s" \
op stop interval="0" timeout="180s" \
op monitor interval="60s" timeout="120s" \
meta target-role="Started"
primitive p_mysql ocf:heartbeat:mysql \
params additional_parameters="--bind-address=0.0.0.0" config="/etc/mysql/my.cnf" pid="/var/run/mysqld/mysqld.pid" socket="/var/run/mysqld/mysqld.sock" log="/var/log/mysql/mysqld.log"
primitive p_drbd_mysql ocf:linbit:drbd \
params drbd_resource="mysql" \
op start interval="0" timeout="240s" \
op stop interval="0" timeout="180s" \
op promote interval="0" timeout="180s" \
op demote interval="0" timeout="180s" \
op monitor interval="30s" role="Slave" \
op monitor interval="29s" role="Master"
group g_1_mysql p_ip_admin p_fs_mysql p_mysql
ms ms_drbd_mysql p_drbd_mysql \
meta notify="true" clone-max="2"
colocation c_mysql_on_drbd inf: g_1_mysql ms_drbd_mysql:Master
order o_drbd_before_mysql inf: ms_drbd_mysql:promote g_1_mysql:start
当定义好资源后可以在交互命令里面验证,提交。
root@controller1:~# crm configure
crm(live)configure# verify
WARNING: p_mysql: default timeout 20s for start is smaller than the advised 120
WARNING: p_mysql: default timeout 20s for stop is smaller than the advised 120
crm(live)configure# commit
INFO: apparently there is nothing to commit
INFO: try changing something first
提交以后资源就会在Pacemaker内运行了,可以通过crm status或者crm_mon查看。
Last updated: Wed Jul 30 13:20:54 2014
Last change: Wed Jul 30 12:39:21 2014 via crm_attribute on controller2
Stack: corosync
Current DC: controller1 (1084752395) - partition with quorum
Version: 1.1.10-42f2063
2 Nodes configured
20 Resources configured
Node controller2 (1084752396): standby
Online: [ controller1 ]
Resource Group: g_1_mysql
p_ip_admin (ocf::heartbeat:IPaddr2): Started controller1
p_fs_mysql (ocf::heartbeat:Filesystem): Started controller1
p_mysql (ocf::heartbeat:mysql): Started controller1
如果可以看到相关资源,当然Drbd也会显示在里面,但是由于笔者的测试环境已经切换为iSCSI共享存储,所以相关数据就没有了。
Pacermaker restart时,资源会进行迁移,同时可以在资源执行节点#crm node standby设置节点为备用,如果要恢复#crm node online
重启某个服务#crm resource restart
pacemaker提供fence机制,配置STONITH资源即可需要注意的时,要设置stonith-action,否则fence看不出效果。
property $id="cib-bootstrap-options" \
dc-version="1.1.10-42f2063" \
cluster-infrastructure="corosync" \
stonith-enabled="false" \
no-quorum-policy="ignore" \
stonith-action="reboot"
如果启用fence的stonith,将stonith-enabled设置为true,并且设置stonith-action,参数值可通过交互命令Tab知晓
crm(live)configure# property
batch-limit= enable-startup-probes= pe-error-series-max= stonith-enabled=
cluster-delay= is-managed-default= pe-input-series-max= stonith-timeout=
cluster-recheck-interval= maintenance-mode= pe-warn-series-max= stop-all-resources=
crmd-transition-delay= migration-limit= placement-strategy= stop-orphan-actions=
dc-deadtime= no-quorum-policy= remove-after-stop= stop-orphan-resources=
default-action-timeout= node-health-green= shutdown-escalation= symmetric-cluster=
default-resource-stickiness= node-health-red= start-failure-is-fatal=
election-timeout= node-health-strategy= startup-fencing=
enable-acl= node-health-yellow= stonith-action=
crm(live)configure# property stonith-action=
stonith-action (enum, [reboot]): Action to send to STONITH device
Action to send to STONITH device Allowed values: reboot, poweroff, off
系统能提供哪些STONITH服务可通过命令查询
root@controller1:~# crm ra
crm(live)ra# classes
lsb
ocf / heartbeat linbit openstack pacemaker rabbitmq redhat
service
stonith
upstart
crm(live)ra# list stonith
apcmaster apcmastersnmp apcsmart baytech bladehpi
cyclades drac3 external/drac5 external/dracmc-telnet external/hetzner
external/hmchttp external/ibmrsa external/ibmrsa-telnet external/ipmi external/ippower9258
external/kdumpcheck external/libvirt external/nut external/rackpdu external/riloe
external/ssh external/vcenter external/vmware external/xen0 external/xen0-ha
fence_legacy fence_pcmk ibmhmc ipmilan meatware
null nw_rpc100s rcd_serial rps10 ssh
suicide wti_mpc wti_nps
如需转载注明出处:http://blog.csdn.net/nnuljl