mfs高可用避免单点故障

1. 机器

W1  192.168.37.21/24(drbd+mfsmaster)

W2  192.168.37.22/24(drbd+mfsmaster)

VIP 192.168.37.200

W3  192.168.37.23/24(metalogger server)

W4  192.168.37.24/24(chuck server)

W5  192.168.37.25/24(chuck server)

W6  192.168.37.26/24(客户端)

##hosts文件里要写,保证能通过主机名互相访问



2. 安装并配置drbd

rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpm
yum install -y kmod-drbd84 drbd84-utils

vim /etc/drbd.d/global_common.conf

# DRBD is the result of over a decade of development by LINBIT.
# In case you need professional services for DRBD or have
# feature requests visit http://www.linbit.com

global {
	usage-count no;
	udev-always-use-vnr; # treat implicit the same as explicit volumes

}

common {
	protocol C;
	handlers {
		

		pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
		pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
		local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
		
	}

	startup {
		
	}

	options {
		
	}

	disk {
		on-io-error detach;
		
	}

	net {
		
	}
	syncer {
		rate 1024M;
	}
}

vim /etc/drbd.d/mfs.res

resource mfs {
	protocol C;
	meta-disk internal;
	device /dev/drbd1;
	syncer {
		verify-alg sha1;
	}
	net {
		allow-two-primaries;
	}
	on w1 {
		disk /dev/sdb;
		address 192.168.37.21:7789;
	}
	on w2 {
		disk /dev/sdb;
		address 192.168.37.22:7789;
	}
}

drbdadm create-md mfs

##启动drbd

modprobe drbd

##手动加载内核模块

[root@w1 ~]# lsmod | grep drbd
drbd                  396875  0 
libcrc32c              12644  2 xfs,drbd

##查看内核是否加载了模块


[root@w1 ~]# drbdadm up mfs
[root@w1 ~]# drbdadm -- --force primary mfs
[root@w1 ~]# drbd-overview 
NOTE: drbd-overview will be deprecated soon.
Please consider using drbdtop.
 1:mfs/0  WFConnection Primary/Unknown UpToDate/DUnknown
##启动drbd


在对端节点w2执行:

[root@w2 ~]# drbdadm create-md mfs
[root@w2 ~]# modprobe drbd
[root@w2 ~]# drbdadm up mfs
[root@w2 ~]# cat /proc/drbd
version: 8.4.10-1 (api:1/proto:86-101)
GIT-hash: a4d5de01fffd7e4cde48a080e2c686f9e8cebf4c build by mockbuild@, 2017-09-15 14:23:22

 1: cs:SyncTarget ro:Secondary/Primary ds:Inconsistent/UpToDate C r-----
    ns:0 nr:1808384 dw:1808384 dr:0 al:8 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:8677020
	[==>.................] sync'ed: 17.3% (8472/10236)M
	finish: 0:03:39 speed: 39,448 (39,312) want: 102,400 K/sec

##查看数据同步状态


[root@w1 ~]# mkfs.xfs /dev/drbd1
meta-data=/dev/drbd1             isize=512    agcount=4, agsize=655338 blks
         =                       sectsz=512   attr=2, projid32bit=1
         =                       crc=1        finobt=0, sparse=0
data     =                       bsize=4096   blocks=2621351, imaxpct=25
         =                       sunit=0      swidth=0 blks
naming   =version 2              bsize=4096   ascii-ci=0 ftype=1
log      =internal log           bsize=4096   blocks=2560, version=2
         =                       sectsz=512   sunit=0 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0

##格式化


mkdir /usr/local/mfs
chown -R mfs:mfs mfs/
[root@w1 local]# mount /dev/drbd1 /usr/local/mfs/
[root@w1 local]# df -h
Filesystem           Size  Used Avail Use% Mounted on
/dev/drbd1            10G   33M   10G   1% /usr/local/mfs
##创建目录,设置属性并挂载





3.  Moosefs部署

基础软件下载、安装

yum install zlib-devel gcc -y  ##所有机器都要##
useradd mfs  ##mfs所有机器创建的mfs用户id和组id要一样##

cd /usr/local/src
wget https://github.com/moosefs/moosefs/archive/v3.0.96.tar.gz
scp src/v3.0.96.tar.gz w3:/usr/local/src
##scp v3.0.96.tar.gz到除w2的主机##

tar xvf v3.0.96.tar.gz
cd moosefs-3.0.96/
##解压等这些命令在后面就打出来了

## Master主机w1 &&w2(因为是安装在共享存储里,所以装一台就行)

[root@w1 moosefs-3.0.96]# ./configure --prefix=/usr/local/mfs --with-default-user=mfs --with-default-group=mfs --disable-mfschunkserver --disable-mfsmount
[root@w1 moosefs-3.0.96]# make && make install

[root@w1 mfs]# pwd
/usr/local/mfs/etc/mfs
[root@w1 mfs]# cp mfsexports.cfg.sample mfsexports.cfg
[root@w1 mfs]# cp mfsmaster.cfg.sample mfsmaster.cfg
[root@w1 mfs]# vim mfsmaster.cfg
##mfsmaster.cfg直接使用官方的默认配置就行

[root@w1 mfs]# vim mfsexports.cfg
*                       /       rw,alldirs,mapall=mfs:mfs,password=000000
*                       .       rw
##修改控制文件
[root@w1 mfs]# pwd
/usr/local/mfs/var/mfs
[root@w1 mfs]# cp metadata.mfs.empty metadata.mfs

##需手动开启元数据文件


[root@w1 system]# pwd
/usr/lib/systemd/system
[root@w1 system]# cat mfsmaster.service 
[Unit]
Description=mfs
After=network.target
   
[Service]
Type=forking
ExecStart=/usr/local/mfs/sbin/mfsmaster start
ExecStop=/usr/local/mfs/sbin/mfsmaster stop
PrivateTmp=true
   
[Install]
WantedBy=multi-user.target

##编写启动脚本,记得要给执行权限

##scp同步到另一台master上

systemctl enable drbd mfsmaster
systemctl stop drbd  mfsmaster

##因为要让crm接管服务所以要把服务先关掉,但要设置开机自启,这样在crm中才能找到这个服务



Metalogger主机(w3)

[root@w3 moosefs-3.0.96]# ./configure --prefix=/usr/local/mfs --with-default-user=mfs --with-default-group=mfs  --disable-mfschunkserver --disable-mfsmount
[root@w3 moosefs-3.0.96]# make && make install
[root@w3 ~]# cd /usr/local/mfs/etc/mfs/
[root@w3 mfs]# cp mfsmetalogger.cfg.sample mfsmetalogger.cfg
[root@w3 mfs]# cat mfsmetalogger.cfg | grep MASTER_HOST
MASTER_HOST = 192.168.37.200

##这里我直接填写到时高可用配置的VIP地址

[root@w3 ~]# /usr/local/mfs/sbin/mfsmetalogger start
[root@w3 ~]# netstat -ntlp | grep metalogger
[root@w3 ~]# netstat -ntlp | grep 9419

##启动Metalogger Server,但是我这里是直接指向Master的VIP(暂时没有配置)所以是看不到任何东西的,所以我直接不启动它。如果想查看,把MASTER_HOST的IP直接指向其中一台master的IP就行






Chuck servers主机(w4、w5)

cd moosefs-3.0.96/
./configure --prefix=/usr/local/mfs --with-default-user=mfs --with-default-group=mfs  --disable-mfsmaster --disable-mfsmount
make && make install
cd /usr/local/mfs/etc/mfs
cp mfschunkserver.cfg.sample mfschunkserver.cfg
cat mfschunkserver.cfg | grep MASTER_HOST
MASTER_HOST = 192.168.37.200
cat mfshdd.cfg | grep html
/html

##目录名称,自己定义就行

mkdir /html
chown -R mfs:mfs /html/
/usr/local/mfs/sbin/mfschunkserver start
netstat -lantp|grep 9420

##同样是没配VIP,所以你懂的




客户端(w6)

[root@w6 ~]# yum install -y fuse fuse-devel
[root@w6 ~]# modprobe fuse
[root@w6 ~]# lsmod | grep fuse
fuse                   91874  1
[root@w6 moosefs-3.0.96]# ./configure --prefix=/usr/local/mfs --with-default-user=mfs --with-default-group=mfs --disable-mfsmaster --disable-mfschunkserver --enable-mfsmount
[root@w6 moosefs-3.0.96]# make && make install
[root@w6 ~]# chown -R mfs:mfs /test
[root@w6 ~]# /usr/local/mfs/bin/mfsmount /test -H 192.168.37.200 -p

##同样的这里没vip也是暂时没法验证(df -h),忘记密码的自己去看master的mfsexports.cfg

##也就是说最后挂载这句话暂时也是用不了的




4.  Pcs集群构建(w1、w2)

yum install -y pacemaker pcs psmisc policycoreutils-python
systemctl enable pcsd
systemctl start pcsd
echo 000000 | passwd --stdin hacluster

##修改hacluster用户的密码

 

以下在w1执行就行

[root@w1 ~]# pcs cluster auth w1 w2
Username: hacluster
Password: 000000
w2: Authorized
w1: Authorized

##注册pcs集群主机

[root@w1 ~]# pcs cluster setup --name mycluster w1 w2 –force

##在集群上注册两台集群

[root@w1 corosync]# pcs cluster start --all
w1: Starting Cluster...
w2: Starting Cluster...

##启动集群

ps -ef | grep corosync
ps -ef | grep pacemaker
corosync-cfgtool -s

##查看集群状态

corosync-cmapctl | grep members
runtime.totem.pg.mrp.srp.members.1.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.1.ip (str) = r(0) ip(192.168.37.21) 
runtime.totem.pg.mrp.srp.members.1.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.1.status (str) = joined
runtime.totem.pg.mrp.srp.members.2.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.2.ip (str) = r(0) ip(192.168.37.22) 
runtime.totem.pg.mrp.srp.members.2.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.2.status (str) = joined

##查看集群相关的子节点信息

pcs status
Cluster name: mycluster
WARNING: no stonith devices and stonith-enabled is not false
##这里得现实隔离设备
Stack: corosync   ##底层由哪个传递信息
Current DC: w1 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorum
##DC指定的协调员(DC为全局仲裁节点,由所有节点选举出来)
Last updated: Sat Oct 28 23:04:17 2017
Last change: Sat Oct 28 22:45:43 2017 by hacluster via crmd on w1
2 nodes configured
0 resources configured
Online: [ w1 w2 ]
No resources   ##还没有资源,所以暂时为空
Daemon Status:   ##各守护进程准备正常
  corosync: active/disabled
  pacemaker: active/disabled
  	 pcsd: active/enabled

crm_verify -L -V
   error: unpack_resources:	Resource start-up disabled since no STONITH resources have been defined
   error: unpack_resources:	Either configure some or disable STONITH with the stonith-enabled option
   error: unpack_resources:	NOTE: Clusters with shared data need STONITH to ensure data integrity
Errors found during check: config not valid

##查看集群是否有错,可以看出是STONITH设备的问题

[root@w1 corosync]# pcs property set stonith-enabled=false
crm_verify -L -V

##可以看到关闭STONITH设备后,两台机都没出现错误






5. Crm,只在一个节点安装就行

##在开始前,先确认一下drbd,mfsmaster要已经关了,保险起见再restart一下corosync和pacemaker,原因是遇到这种情况:

crm(live)# status
Stack: corosync
Current DC: w2 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorum
Last updated: Sun Oct 29 08:09:14 2017
Last change: Sun Oct 29 08:08:18 2017 by root via cibadmin on w1

2 nodes configured
2 resources configured

Online: [ w1 w2 ]

Full list of resources:

 Master/Slave Set: ms_mfs_drbd [mfs_drbd]
     Stopped: [ w1 w2 ]

Failed Actions:
* mfs_drbd_monitor_0 on w2 'not configured' (6): call=12, status=complete, exitreason='meta parameter misconfigured, expected clone-max -le 2, but found unset.',
    last-rc-change='Sun Oct 29 08:07:31 2017', queued=0ms, exec=77ms
* mfs_drbd_monitor_0 on w1 'not configured' (6): call=12, status=complete, exitreason='meta parameter misconfigured, expected clone-max -le 2, but found unset.',
last-rc-change='Sun Oct 29 08:07:31 2017', queued=1ms, exec=62ms


[root@w1 src]# cd crmsh-2.3.2
[root@w1 crmsh-2.3.2]# python setup.py install

##yum默认源里是没有的,我是自己下的

##https://github.com/ClusterLabs/crmsh  GitHub里面有

systemctl start corosync pacemaker
systemctl enable corosync pacemaker




6. drbd+mount资源配置

##开启配置工具

[root@w1 ~]# crm
##开始配置crm,使其接管服务
crm(live)configure# primitive mfs_drbd ocf:linbit:drbd params drbd_resource=mfs op monitor role=Master interval=10 timeout=20 op monitor role=Slave interval=20 timeout=20 op start timeout=240 op stop timeout=100 
##drbd定义资源名,drbd_resource定义drbd名,后面则是一些监控项
crm(live)configure# verify
		##检查语法
crm(live)configure# master mfs_drbd drbd meta master-max=1 master-node-max=2 clone-max=2 clone-node-max=1 notify="True"
crm(live)configure# verify
crm(live)configure# commit
##定义主资源
crm(live)configure# primitive mfsmount ocf:heartbeat:Filesystem params device=/dev/drbd1 directory=/usr/local/mfs fstype=xfs op start interval=0 timeout=60s op stop interval=0 timeout=60s
crm(live)configure# verify
crm(live)configure# colocation ms_mfs_drbd_mfsmount inf: mfsmount ms_mfs_drbd
##排列约束资源,指定哪些资源捆绑一起,在同一节点上运行
crm(live)configure# order ms_mfs_drbd_brfore_mfsmount Mandatory: ms_mfs_drbd:promote mfsmount:start
##资源的启动先后顺序,指定排列约束中的资源启动顺序,该顺序和colocation顺序相反
crm(live)configure# verify
crm(live)configure# commit
		##定义共享文件资源,并让服务器切换时可自动挂载
		## location位置约束,指定资源首选在哪些节点上运行

crm(live)# status
Stack: corosync
Current DC: w1 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorum
Last updated: Sun Oct 29 08:46:54 2017
Last change: Sun Oct 29 08:38:44 2017 by root via cibadmin on w1

2 nodes configured
3 resources configured

Online: [ w1 w2 ]

Full list of resources:

 Master/Slave Set: ms_mfs_drbd [mfs_drbd]
     Masters: [ w1 ]
     Slaves: [ w2 ]
 	mfsmount	(ocf::heartbeat:Filesystem):	Started w1
##drbd挂载共享的配置好像到这里就没了





7.  Mfs+VIP资源的配置

crm(live)configure# primitive mfs systemd:mfsmaster op monitor interval=30s timeout=100s op start interval=0 timeout=100s op stop interval=0 timeout=100s
##如果你设置timeout的时间小于100s,就会出下面的提示:
## WARNING: mfs: specified timeout 30s for start is smaller than the advised 100
##WARNING: mfs: specified timeout 30s for stop is smaller than the advised 100
##不用理会他
crm(live)configure# verify
crm(live)configure# colocation mfs_with_mfsmount inf: mfsmount mfs
##亲缘性绑定
crm(live)configure# order mfsmount_before_mfs Mandatory: mfsmount mfs
##先启动mfsmount,再启mfs
crm(live)configure# commit
##注意,这里一定要做好绑定和启动顺序,不然,直白点,挂载在w1,mfs在w2,但我们的安装内容在drbd的共享里,你觉得会没出问题吗



crm(live)configure# primitive vip ocf:heartbeat:IPaddr params ip=192.168.37.200
crm(live)configure# colocation vip_with_mfs inf: mfs vip
crm(live)configure# order mfs_before_vip Mandatory: mfs vip
crm(live)configure# verify
crm(live)configure# commit
crm(live)# status
Stack: corosync
Current DC: w2 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorum
Last updated: Sun Oct 29 09:26:59 2017
Last change: Sun Oct 29 09:26:46 2017 by root via cibadmin on w1

2 nodes configured
5 resources configured

Online: [ w1 w2 ]

Full list of resources:

 Master/Slave Set: ms_mfs_drbd [mfs_drbd]
     Masters: [ w1 ]
     Slaves: [ w2 ]
 mfsmount	(ocf::heartbeat:Filesystem):	Started w1
 mfs	(systemd:mfsmaster):	Started w1
 	vip	(ocf::heartbeat:IPaddr):	Started w1

##服务都在w1起了


[root@w1 ~]# ip addr | grep 192.168
    inet 192.168.37.21/24 brd 192.168.37.255 scope global ens33
    inet 192.168.37.200/24 brd 192.168.37.255 scope global secondary ens33

##在w1可以看到vip,w2是没有的



8. 验证

启动Metalogger server

[root@w3 ~]# cat /usr/lib/systemd/system/mfsmetalogger.service
[Unit]
Description=mfs
After=network.target
   
[Service]
Type=forking
ExecStart=/usr/local/mfs/sbin/mfsmetalogger start
ExecStop=/usr/local/mfs/sbin/mfsmetalogger stop
PrivateTmp=true
   
[Install]
WantedBy=multi-user.target
[root@w3 ~]# systemctl enable mfsmetalogger
[root@w3 ~]# systemctl start mfsmetalogger

启动chuck server(w4、w5)

cat /usr/lib/systemd/system/mfschuck.service 
[Unit]
Description=mfs
After=network.target
   
[Service]
Type=forking
ExecStart=/usr/local/mfs/sbin/mfschunkserver start
ExecStop=/usr/local/mfs/sbin/mfschunkserver stop
PrivateTmp=true
   
[Install]
WantedBy=multi-user.target

systemctl enable mfschuck
systemctl start mfschuck


客户端挂载

[root@w6 ~]# /usr/local/mfs/bin/mfsmount /test -H 192.168.37.200 -p
MFS Password:   ##密码000000
mfsmaster accepted connection with parameters: read-write,restricted_ip,map_all ; root mapped to mfs:mfs ; users mapped to mfs:mfs

[root@w6 ~]# df -h | grep test
192.168.37.200:9421   16G  3.6G   13G  23% /test
##挂载成功
[root@w6 test]# touch 1.txt
[root@w6 test]# echo "I am WSL" > 1.txt 
[root@w6 test]# cat 1.txt 
I am WSL
##写入读取完全没问题


cd /usr/local/mfs/var/mfs/

##可以去master和metalogger这个路径,里面的文件有记录你得操作



9. Master故障测试
[root@w1 ~]# pcs status
Cluster name: mycluster
Stack: corosync
Current DC: w2 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorum
Last updated: Sun Oct 29 10:06:24 2017
Last change: Sun Oct 29 09:26:46 2017 by root via cibadmin on w1

2 nodes configured
5 resources configured

Online: [ w1 w2 ]

Full list of resources:

 Master/Slave Set: ms_mfs_drbd [mfs_drbd]
     Masters: [ w1 ]
     Slaves: [ w2 ]
 mfsmount	(ocf::heartbeat:Filesystem):	Started w1
 mfs	(systemd:mfsmaster):	Started w1
 vip	(ocf::heartbeat:IPaddr):	Started w1

Daemon Status:
  corosync: active/enabled
  pacemaker: active/enabled
  pcsd: active/enabled

##这是w1未关机前的状态




[root@w2 ~]# pcs status
Cluster name: mycluster
Stack: corosync
Current DC: w2 (version 1.1.16-12.el7_4.4-94ff4df) - partition with quorum
Last updated: Sun Oct 29 10:08:26 2017
Last change: Sun Oct 29 09:26:46 2017 by root via cibadmin on w1

2 nodes configured
5 resources configured

Online: [ w2 ]
OFFLINE: [ w1 ]

Full list of resources:

 Master/Slave Set: ms_mfs_drbd [mfs_drbd]
     Masters: [ w2 ]
     Stopped: [ w1 ]
 mfsmount	(ocf::heartbeat:Filesystem):	Started w2
 mfs	(systemd:mfsmaster):	Started w2
 vip	(ocf::heartbeat:IPaddr):	Started w2

Daemon Status:
  corosync: active/enabled
  pacemaker: active/enabled
  pcsd: active/enabled

##在w2一直刷pcs status可以看到资源一个个慢慢关掉,再按照之前定义的顺序启动



[root@w6 test]# echo 123 > test.txt
[root@w6 test]# cat test.txt 
123

##客户端挂载照样能用

[root@w2 mfs]# cat changelog.0.mfs 
2: 1509286097|CSDBOP(0,3232245016,9422,1)
3: 1509286097|CHUNKADD(1,1,1509890897)
4: 1509286100|CSDBOP(0,3232245017,9422,2)
5: 1509286101|SESADD(#9773763645954448363,1,16,0000,1000,1000,1000,1000,1,9,0,4294967295,3232245018,/test):1
6: 1509286124|ACQUIRE(1,1)
7: 1509286124|ACQUIRE(1,2)
8: 1509286202|CREATE(1,test.txt,1,438,18,1000,1000,0):2
9: 1509286203|WRITE(2,0,1,0):2
10: 1509286204|AMTIME(2,1509286202,1509286203,1509286203)
11: 1509286204|LENGTH(2,4,0)
12: 1509286204|UNLOCK(2)
13: 1509286208|ACCESS(1)
14: 1509286209|AMTIME(2,1509286208,1509286203,1509286203)


10. 假设

假如你的运气真的很差,两台mfsmaster主机都挂了,没事metaloggerserver还在,把metalogger升为mfsmaster就行了

具体操作这里就不演示了,大概就是把chuckserver和本机的MASTER_HOST改为本机,启动本机的mfsmaster,重启chuckserver的服务,大概就这样了,具体可以自行查询如何升metalogger成mfsmaster

你可能感兴趣的:(架构)