1.实验环境
实验使用三个linux设备
a.abc.com ip 192.168.10.99
b.abc.com ip 192.168.10.100
test 192.168.10.200
拓扑图:
检查内核
[root@a ~]# uname -r
2.6.18-164.el5 #此版本内核默认未添加drbd内核模块
安装软件
在 a.abc.com 与b.abc.com 上同时执行安装操作
[root@love ~]# ll
总计 3088
-rw------- 1 root root 1192 10-05 18:43 anaconda-ks.cfg
drwxr-xr-x 2 root root 4096 10-05 19:56 Desktop
-rw-r--r-- 1 root root 221868 10-05 21:57 drbd83-8.3.8-1.el5.centos.i386.rpm
-rw-r--r-- 1 root root 1637238 10-05 21:58 heartbeat-2.1.4-9.el5.i386.rpm
-rw-r--r-- 1 root root 293349 10-05 21:58 heartbeat-devel-2.1.4-9.el5.i386.rpm
-rw-r--r-- 1 root root 230890 10-05 21:58 heartbeat-gui-2.1.4-9.el5.i386.rpm
-rw-r--r-- 1 root root 111742 10-05 21:58 heartbeat-ldirectord-2.1.4-9.el5.i386.rpm
-rw-r--r-- 1 root root 92070 10-05 21:58 heartbeat-pils-2.1.4-10.el5.i386.rpm
-rw-r--r-- 1 root root 179199 10-05 21:58 heartbeat-stonith-2.1.4-10.el5.i386.rpm
-rw-r--r-- 1 root root 35236 10-05 18:42 install.log
-rw-r--r-- 1 root root 3995 10-05 18:40 install.log.syslog
-rw-r--r-- 1 root root 125974 10-05 21:57 kmod-drbd83-8.3.8-1.el5.centos.i686.rpm
-rw-r--r-- 1 root root 56817 10-05 21:58 libnet-1.1.4-3.el5.i386.rpm
-rw-r--r-- 1 root root 92071 10-05 21:58 perl-MailTools-1.77-1.el5.noarch.rpm
[root@love ~]# yum localinstall *.rpm --nogpgcheck –y
设置在ssh通道中的无验证传输
在 a.abc.com 上创建密钥并将密钥转移到b.abc.com上
[root@a ~]# ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
/root/.ssh/id_rsa already exists.
Overwrite (y/n)? y
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
49:6f:9d:0c:03:78:94:7c:52:b1:91:4e:35:34:51:f3 [email protected]
[root@a .ssh]# ssh-copy-id -i id_rsa.pub b.abc.com
10
The authenticity of host 'a.abc.com (192.168.10.99)' can't be established.
RSA key fingerprint is 0a:78:89:da:1e:1d:97:95:0b:8b:03:22:e7:af:22:5c.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'a.abc.com,192.168.10.99' (RSA) to the list of known hosts.
[email protected]'s password:
Now try logging into the machine, with "ssh 'b.abc.com'", and check in:
.ssh/authorized_keys
to make sure we haven't added extra keys that you weren't expecting.
此时就可以在a.abc.com上将任意的文件使用 “scp 文件或目录 b.abc.com:/目录”进行移动
使用“ssh b.abc.com ‘命令’” #在“b.abc.com” 主机上执行命令
同步时钟频率
[root@a ~]# date
2012年 10月 06日 星期六 11:17:11 CST
[root@a ~]# hwclock -s
[root@a ~]# date
2012年 10月 06日 星期六 11:24:08 CST
[root@a ~]# ssh b.abc.com 'hwclock -s'
[root@a ~]# ssh b.abc.com 'hwclock'
2012年10月06日 星期六 11时24分33秒 -0.080474 seconds
[root@a ~]#modprobe drbd
[root@a ~]#ssh b.abc.com 'modprobe drbd'
新建磁盘分区。在b.abc.com 上进行相同的操作
[root@a ~]# fdisk /dev/sda
The number of cylinders for this disk is set to 2610.
There is nothing wrong with that, but this is larger than 1024,
and could in certain setups cause problems with:
1) software that runs at boot time (e.g., old versions of LILO)
2) booting and partitioning software from other OSs
(e.g., DOS FDISK, OS/2 FDISK)
Command (m for help): n
Command action
e extended
p primary partition (1-4)
e
Selected partition 4
First cylinder (1420-2610, default 1420):
Using default value 1420
Last cylinder or +size or +sizeM or +sizeK (1420-2610, default 2610):
Using default value 2610
Command (m for help): n
First cylinder (1420-2610, default 1420):
Using default value 1420
Last cylinder or +size or +sizeM or +sizeK (1420-2610, default 2610):
Using default value 2610
Command (m for help): w
The partition table has been altered!
Calling ioctl() to re-read partition table.
WARNING: Re-reading the partition table failed with error 16: 设备或资源忙.
The kernel still uses the old table.
The new table will be used at the next reboot.
Syncing disks.
[root@a ~]# partprobe /dev/sda
[root@a ~]# cd /usr/share/doc/drbd83-8.3.8/
[root@a drbd83-8.3.8]# ll
总计 64
-rw-r--r-- 1 root root 31183 2010-06-02 ChangeLog
-rw-r--r-- 1 root root 17990 2008-11-24 COPYING
-rw-r--r-- 1 root root 133 2010-06-02 drbd.conf
-rw-r--r-- 1 root root 22 2010-06-04 file.list
-rw-r--r-- 1 root root 425 2010-03-02 README
[root@a drbd83-8.3.8]# cp drbd.conf /etc/
cp:是否覆盖“/etc/drbd.conf”? y
[root@a drbd83-8.3.8]# scp drbd.conf b.abc.com:/etc/
drbd.conf 100% 133 0.1KB/s
[root@a drbd83-8.3.8]# cd
[root@a ~]# vim /etc/drbd.conf
# You can find an example in /usr/share/doc/drbd.../drbd.conf.example
include "drbd.d/global_common.conf";
include "drbd.d/*.res";
[root@a ~]# cd /etc/drbd.d
[root@a drbd.d]# ll
总计 4
-rwxr-xr-x 1 root root 1418 2010-06-04 global_common.conf
[root@a drbd.d]# cp global_common.conf global_common.conf.bak
[root@a drbd.d]# ll
总计 8
-rwxr-xr-x 1 root root 1418 2010-06-04 global_common.conf
-rwxr-xr-x 1 root root 1418 10-06 11:37 global_common.conf.bak
[root@a drbd.d]# vim global_common.conf
1 global {
2 usage-count no;
3 }
4 common {
5 protocol C;
6 startup {
7 wfc-timeout 120;
8 degr-wfc-timeout 120;
9 }
10 disk {
11 on-io-error detach;
12 fencing resource-only;
13 }
14 net {
15 cram-hmac-alg "sha1";
16 shared-secret "mydrbdlab";
17 }
18 syncer {
19 rate 100M;
20 }
21 }
[root@a drbd.d]# vim web.res
1 resource web {
2 on a.abc.com {
3 device /dev/drbd0;
4 disk /dev/sda5;
5 address 192.168.10.99:7789;
6 meta-disk internal;
7 }
8 on b.abc.com {
9 device /dev/drbd0;
10 disk /dev/sda5;
11 address 192.168.10.100:7789;
12 meta-disk internal;
13 }
14 }
[root@a drbd.d]# scp * b.abc.com:/etc/drbd.d/
global_common.conf 100% 425 0.4KB/s
global_common.conf.bak 100% 1418 1.4KB/s
web.res 100% 315 0.3KB/s
[root@a drbd.d]# drbdadm create-md web
Writing meta data...
initializing activity log
NOT initialized bitmap
New drbd meta data block successfully created.
[root@a drbd.d]# ssh b.abc.com 'drbdadm create-md web'
NOT initialized bitmap
Writing meta data...
initializing activity log
New drbd meta data block successfully created.
[root@a drbd.d]# service drbd start
Starting DRBD resources: [
web
Found valid meta data in the expected location, 9796272128 bytes into /dev/sda5.
d(web) s(web) n(web) ]..........
***************************************************************
DRBD's startup script waits for the peer node(s) to appear.
- In case this node was already a degraded cluster before the
reboot the timeout is 120 seconds. [degr-wfc-timeout]
- If the peer was available before the reboot the timeout will
expire after 120 seconds. [wfc-timeout]
(These values are for resource 'web'; 0 sec -> wait forever)
To abort waiting enter 'yes' [ 28]:
.
[root@a drbd.d]# drbdadm -- --overwrite-data-of-peer primary web
[root@a drbd.d]# service drbd status
]drbd driver loaded OK; device status:
version: 8.3.8 (api:88/proto:86-94)
GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by mockbuild@builder10. 2010-06-04 08:04:16
m:res cs ro ds p mount
... sync'ed: 0.9% (9260/9340)M delay_probe:
0:web SyncSource Primary/Secondary UpToDate/Inconsistent C
[root@a drbd.d]# watch -n 1 'cat /proc/drbd'
[root@a drbd.d]# mkfs -t ext3 -L drbdweb /dev/drbd0
mke2fs 1.39 (29-May-2006)
Filesystem label=drbdweb
OS type: Linux
Block size=4096 (log=2)
Fragment size=4096 (log=2)
1196032 inodes, 2391587 blocks
119579 blocks (5.00%) reserved for the super user
First data block=0
Maximum filesystem blocks=2449473536
73 block groups
32768 blocks per group, 32768 fragments per group
16384 inodes per group
Superblock backups stored on blocks:
32768, 98304, 163840, 229376, 294912, 819200, 884736, 1605632
Writing inode tables: done
Creating journal (32768 blocks):
done
Writing superblocks and filesystem accounting information: done
This filesystem will be automatically checked every 32 mounts or
180 days, whichever comes first. Use tune2fs -c or -i to override.
nfs共享服务相关设置
[root@a drbd.d]# vim /etc/exports
[root@a drbd.d]# scp /etc/exports b.abc.com:/etc/
exports 100% 50 0.1KB/s
[root@a drbd.d]# exportfs -rv
exporting *:/web
[root@a drbd.d]# ssh b.abc.com 'exportfs -rv'
exporting *:/web
[root@a drbd.d]# service portmap start
启动 portmap: [确定]
[root@a drbd.d]# chkconfig portmap on
[root@a drbd.d]# ssh b.abc.com 'service portmap start'
启动 portmap:[确定]
[root@a drbd.d]# ssh b.abc.com 'chkconfig portmap on'
[root@a drbd.d]# service nfs start
启动 NFS 服务: [确定]
关掉 NFS 配额: [确定]
启动 NFS 守护进程: [确定]
启动 NFS mountd: [确定]
[root@a drbd.d]# chkconfig nfs on
[root@a drbd.d]# ssh b.abc.com 'service nfs start'
启动 NFS 服务: [确定]
关掉 NFS 配额:[确定]
启动 NFS 守护进程:[确定]
启动 NFS mountd:[确定]
[root@a drbd.d]# ssh b.abc.com 'chkconfig nfs on'
[root@a drbd.d]# vim /etc/init.d/nfs #在b.abc.com主机上执行同样的操作
122 killproc nfsd –9
配置heartbeat的配置文件
[root@a drbd.d]# cd /usr/share/doc/heartbeat-2.1.4/
[root@a heartbeat-2.1.4]# ls
apphbd.cf DirectoryMap.txt HardwareGuide.html heartbeat_api.txt rsync.
authkeys faqntips.html HardwareGuide.txt logd.cf starts
AUTHORS faqntips.txt haresources README
ChangeLog GettingStarted.html hb_report.html Requirements.html
COPYING GettingStarted.txt hb_report.txt Requirements.txt
COPYING.LGPL ha.cf heartbeat_api.html rsync.html
[root@a heartbeat-2.1.4]# cp authkeys ha.cf haresources /etc/ha.d/
[root@a heartbeat-2.1.4]# cd /etc/ha.d
[root@a ha.d]# ll
总计 48
-rw-r--r-- 1 root root 645 10-06 11:58 authkeys
-rw-r--r-- 1 root root 10539 10-06 11:58 ha.cf
-rwxr-xr-x 1 root root 745 2009-07-25 harc
-rw-r--r-- 1 root root 5905 10-06 11:58 haresources
drwxr-xr-x 2 root root 4096 10-06 11:34 rc.d
-rw-r--r-- 1 root root 692 2009-07-25 README.config
drwxr-xr-x 2 root root 4096 10-06 11:34 resource.d
-rw-r--r-- 1 root root 7862 2009-07-25 shellfuncs
[root@a ha.d]# vim ha.cf
24 debugfile /var/log/ha-debug
29 logfile /var/log/ha-log
34 logfacility local0
48 keepalive 2
56 deadtime 10
76 udpport 694
95 bcast eth0 #heartbeat的监控网卡
214 node a.abc.com
215 node b.abc.com
[root@a ha.d]# echo "a.abc.com IPaddr::192.168.10.101/24/eth0 drbddisk::web Filesydrbd0::/web::ext3 killnfsd" >> /etc/ha.d/haresources
[root@a ha.d]# vim authkeys
27 auth 3
28 3 md5 123456
[root@a ha.d]# vim haresources
[root@a resource.d]# echo "killall -9 nfsd ; /etc/init.d/nfs restart ; exit 0" >> /etc/ha.d/resource.d/killnfsd
[root@a resource.d]# cd ../
[root@a ha.d]# chmod 600 /etc/ha.d/authkeys
[root@a ha.d]# chmod 755 /etc/ha.d/resource.d/killnfsd
[root@a ha.d]# ll
总计 48
-rw------- 1 root root 665 10-06 12:04 authkeys
-rw-r--r-- 1 root root 10576 10-06 12:01 ha.cf
-rwxr-xr-x 1 root root 745 2009-07-25 harc
-rw-r--r-- 1 root root 6003 10-06 12:05 haresources
drwxr-xr-x 2 root root 4096 10-06 11:34 rc.d
-rw-r--r-- 1 root root 692 2009-07-25 README.config
drwxr-xr-x 2 root root 4096 10-06 12:06 resource.d
-rw-r--r-- 1 root root 7862 2009-07-25 shellfuncs
[root@a ha.d]# scp ha.cf authkeys haresources b.abc.com:/etc/ha.d/
ha.cf 100% 10KB 10.3KB/s 00:00
authkeys 100% 665 0.7KB/s 00:00
haresources 100% 6003 5.9KB/s 00:00
[root@a ha.d]# scp resource.d/killnfsd b.abc.com:/etc/ha.d/resource.d/
killnfsd 100% 51 0.1KB/s 00:00
[root@a ha.d]# service heartbeat restart
Stopping High-Availability services:
[确定]
Waiting to allow resource takeover to complete:
[确定]
Starting High-Availability services:
2012/10/06_12:14:54 INFO: Resource is stopped
[确定]
[root@a ha.d]# ssh b.abc.com 'service heartbeat restart'
Stopping High-Availability services:
[确定]
Waiting to allow resource takeover to complete:
[确定]
Starting High-Availability services:
2012/10/06_12:15:31 INFO: Resource is stopped
[确定]
[root@a ha.d]# drbdadm -- --overwrite-data-of-peer primary web
[root@a ha.d]# service drbd status
drbd driver loaded OK; device status:
version: 8.3.8 (api:88/proto:86-94)
GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by [email protected], 2010-06-04 08:04:16
m:res cs ro ds p mounted fstype
0:web Connected Primary/Secondary UpToDate/UpToDate C
[root@a ha.d]# mount /dev/drbd0 /web
[root@a ha.d]# mount
/dev/sda2 on / type ext3 (rw)
proc on /proc type proc (rw)
sysfs on /sys type sysfs (rw)
devpts on /dev/pts type devpts (rw,gid=5,mode=620)
/dev/sda1 on /boot type ext3 (rw)
tmpfs on /dev/shm type tmpfs (rw)
none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw)
sunrpc on /var/lib/nfs/rpc_pipefs type rpc_pipefs (rw)
/dev/hdc on /mnt/cdrom type iso9660 (ro)
nfsd on /proc/fs/nfsd type nfsd (rw)
/dev/drbd0 on /web type ext3 (rw)
test测试机上进行的配置
[root@love ~]# mkdir /mnt/nfs
[root@love ~]#mount 192.168.10.101:/web /mnt/nfs
[root@love ~]# cd /mnt/nfs
[root@love nfs]# vim /mnt/test.sh
while true
do
echo ----\>trying touch x:`date`
touch x
echo \<-----done touch x:`date`
echo
sleep 2
done
[root@love nfs]# bash /mnt/test.sh #在当前目录下执行每隔两秒创建 文件“x” 的脚本
---->trying touch x:2012年 10月 06日 星期六 03:02:38 CST
<-----done touch x:2012年 10月 06日 星期六 03:02:39 CST
---->trying touch x:2012年 10月 06日 星期六 03:02:41 CST
touch: 无法触碰 “x”: Stale NFS file handle
<-----done touch x:2012年 10月 06日 星期六 03:02:41 CST
#关闭a.abc.com 主机上的heartbeat服务。终端一次,立刻恢复正常
---->trying touch x:2012年 10月 06日 星期六 03:02:52 CST
<-----done touch x:2012年 10月 06日 星期六 03:02:52 CST