首先查看多路径设备
[root@elt1 ~]#
[root@elt1 ~]# multipath -ll
mpathe (3600c0ff00026402cf4cf5d5601000000) dm-1 DotHill,DH3000
size=47G features='0' hwhandler='0' wp=rw
|-+- policy='round-robin 0' prio=1 status=enabled
| `- 3:0:0:0 sdb 8:16 active ready running
`-+- policy='round-robin 0' prio=1 status=active
`- 4:0:0:0 sdi 8:128 active ready running
mpathd (3600c0ff00026402cf5cf5d5601000000) dm-0 DotHill,DH3000
size=47G features='0' hwhandler='0' wp=rw
|-+- policy='round-robin 0' prio=1 status=enabled
| `- 3:0:0:1 sdc 8:32 active ready running
`-+- policy='round-robin 0' prio=1 status=active
`- 4:0:0:1 sdj 8:144 active ready running
mpathc (3600c0ff00026402cdbc95d5601000000) dm-4 DotHill,DH3000
size=1.8T features='0' hwhandler='0' wp=rw
|-+- policy='round-robin 0' prio=1 status=enabled
| `- 3:0:0:2 sdd 8:48 active ready running
`-+- policy='round-robin 0' prio=1 status=active
`- 4:0:0:2 sdk 8:160 active ready running
mpathb (3600c0ff00026402cdbc95d5602000000) dm-3 DotHill,DH3000
size=1.8T features='0' hwhandler='0' wp=rw
|-+- policy='round-robin 0' prio=1 status=enabled
| `- 3:0:0:3 sde 8:64 active ready running
`-+- policy='round-robin 0' prio=1 status=active
`- 4:0:0:3 sdl 8:176 active ready running
mpathh (3600c0ff00026402ce0c95d5604000000) dm-6 DotHill,DH3000
size=1.8T features='0' hwhandler='0' wp=rw
|-+- policy='round-robin 0' prio=1 status=active
| `- 3:0:0:6 sdh 8:112 active ready running
`-+- policy='round-robin 0' prio=1 status=enabled
`- 4:0:0:6 sdo 8:224 active ready running
mpathg (3600c0ff00026402ce0c95d5603000000) dm-5 DotHill,DH3000
size=1.8T features='0' hwhandler='0' wp=rw
|-+- policy='round-robin 0' prio=1 status=enabled
| `- 3:0:0:5 sdg 8:96 active ready running
`-+- policy='round-robin 0' prio=1 status=active
`- 4:0:0:5 sdn 8:208 active ready running
mpathf (3600c0ff00026402cdcc95d5601000000) dm-2 DotHill,DH3000
size=1.8T features='0' hwhandler='0' wp=rw
|-+- policy='round-robin 0' prio=1 status=enabled
| `- 3:0:0:4 sdf 8:80 active ready running
`-+- policy='round-robin 0' prio=1 status=active
`- 4:0:0:4 sdm 8:192 active ready running
[root@elt1 ~]#
再查看多路径设备的配置
multipathd> show config
defaults {
verbosity 2
polling_interval 5
udev_dir "/dev"
multipath_dir "/lib64/multipath"
path_selector "round-robin 0"
path_grouping_policy failover
getuid_callout "/lib/udev/scsi_id --whitelisted --device=/dev/%n"
prio const
features "0"
path_checker directio
failback manual
rr_min_io 1000
rr_min_io_rq 1
max_fds 1048576
rr_weight uniform
queue_without_daemon no
hwtable_regex_match no
flush_on_last_del no
user_friendly_names no
find_multipaths no
log_checker_err always
retain_attached_hw_handler no
detect_prio no
reload_readwrite no
replace_wwid_whitespace no
force_sync no
config_dir "/etc/multipath/conf.d"
delay_watch_checks no
delay_wait_checks no
}
multipaths {
multipath {
wwid 3600c0ff00026402cdbc95d5602000000
alias mpathb
}
multipath {
wwid 3600c0ff00026402cdbc95d5601000000
alias mpathc
}
multipath {
wwid 3600c0ff00026402cf5cf5d5601000000
alias mpathd
}
multipath {
wwid 3600c0ff00026402cf4cf5d5601000000
alias mpathe
}
multipath {
wwid 3600c0ff00026402cdcc95d5601000000
alias mpathf
}
multipath {
wwid 3600c0ff00026402ce0c95d5603000000
alias mpathg
}
multipath {
wwid 3600c0ff00026402ce0c95d5604000000
alias mpathh
}
}
multipathd>
在上面的配置中,路径分组策略参数path_grouping_policy的值为failover,这是一种主动/被动的多路径设备冗余策略,使用这种路径分组策略,一个多路径设备下面的每条路径都会单独分为一组(多路径设备下面有多少条路径就有多少个分组),基于优先权的算法,其中一组会被选为活动的路径,选出的分组中的路径可以对存储进行数据的读写,其他分组中的路径则处于非活动状态。只有活动路径出现故障无法访问存储时,才会从其他非活动分组中再选出一个分组作为活动路径。
这里用多路径设备mpathh来演示多路径切换,将一条非活动的路径切换为活动路径,原来的活动路径则会自动变为非活动状态,首先查看多路径设备mpathh的路径及路径的状态:
[root@elt1 ~]# multipathd -k
multipathd> show map mpathh topology
mpathh (3600c0ff00026402ce0c95d5604000000) dm-6 DotHill,DH3000
size=1.8T features='0' hwhandler='0' wp=rw
|-+- policy='round-robin 0' prio=1 status=active
| `- 3:0:0:6 sdh 8:112 active ready running
`-+- policy='round-robin 0' prio=1 status=enabled
`- 4:0:0:6 sdo 8:224 active ready running
multipathd>
根据上面的显示,多路径设备mpathh下面有两条路径,一条为sdh,一条为sdo。sdo的status=enabled表示此路径为非活动状态;sdh的status=active表示此路径为活动状态。当对多路径设备mpathh进行数据读写时,多路径程序将会调用sdh对存储进行读写,下面来做个测试:
[root@elt1 ~]# dd if=/dev/mapper/mpathh of=test bs=1M count=10240
10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 31.7054 s, 339 MB/s
[root@elt1 ~]#
在执行dd命令时打开另一窗口查看sdh和sdo的读写:
[root@elt1 ~]# iostat -m 1 20|grep -E "sdh|sdo|Device"
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.80 0.08 0.05 30801 21504
sdo 2.31 0.18 0.23 71738 93184
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 549.00 64.50 0.00 64 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2568.00 307.00 0.00 307 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2559.00 306.38 0.00 306 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2554.00 306.25 0.00 306 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2578.00 312.38 0.00 312 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2575.00 308.25 0.00 308 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2557.00 305.65 0.00 305 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2570.00 309.72 0.00 309 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2557.00 306.00 0.00 306 0
sdo 1.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2551.00 305.25 0.00 305 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2554.00 305.75 0.00 305 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2581.00 303.00 0.00 303 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2563.00 307.12 0.00 307 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2586.00 305.12 0.00 305 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2584.00 307.75 0.00 307 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2572.00 309.50 0.00 309 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2574.00 311.12 0.00 311 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2564.00 305.38 0.00 305 0
sdo 0.00 0.00 0.00 0 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 2581.00 306.88 0.00 306 0
sdo 0.00 0.00 0.00 0 0
[root@elt1 ~]#
从上面的iostat命令输出结果可以看出,sdo并没有对存储进行读写,只有shh对存储进行了读操作。下面继续演示将sdo切换为活动路径,然后再对多路径设备mpathh进行数据读写测试。
再次查看多路径设备mpathh的路径状态:
multipathd> show multipath mpathh topology
mpathh (3600c0ff00026402ce0c95d5604000000) dm-6 DotHill,DH3000
size=1.8T features='0' hwhandler='0' wp=rw
|-+- policy='round-robin 0' prio=1 status=active
| `- 3:0:0:6 sdh 8:112 active ready running
`-+- policy='round-robin 0' prio=1 status=enabled
`- 4:0:0:6 sdo 8:224 active ready running
multipathd>
多路径设备mpathh下面有两组路径,第1组路径下面有一条路径sdh,第二组下面有一条路径sdh,当前活动的路径为sdo,也就是第二组。每个组都有一个组编号,第1组的组编号为1,第二组的组编号为2,当路径切换时会用到组编号。下面演示路径切换。
[root@elt1 ~]# multipathd -k
multipathd> help
multipath-tools v0.4.9 (04/04, 2009)
CLI commands reference:
list|show paths
list|show paths format $format
list|show status
list|show daemon
list|show maps|multipaths
list|show maps|multipaths status
list|show maps|multipaths stats
list|show maps|multipaths format $format
list|show maps|multipaths topology
list|show topology
list|show map|multipath $map topology
list|show config
list|show blacklist
list|show devices
list|show wildcards
add path $path
remove|del path $path
add map|multipath $map
remove|del map|multipath $map
switch|switchgroup map|multipath $map group $group
reconfigure
suspend map|multipath $map
resume map|multipath $map
resize map|multipath $map
disablequeueing map|multipath $map
restorequeueing map|multipath $map
disablequeueing maps|multipaths
restorequeueing maps|multipaths
reinstate path $path
fail path $path
paths count
forcequeueing daemon
restorequeueing daemon
quit|exit
map|multipath $map getprstatus
map|multipath $map setprstatus
map|multipath $map unsetprstatus
multipathd>
将将第2组路径切换为活动路径:
multipathd> switch map mpathh group 2
ok
multipathd>
查看路径状态:
mpathh (3600c0ff00026402ce0c95d5604000000) dm-6 DotHill,DH3000
size=1.8T features='0' hwhandler='0' wp=rw
|-+- policy='round-robin 0' prio=1 status=enabled
| `- 3:0:0:6 sdh 8:112 active ready running
`-+- policy='round-robin 0' prio=1 status=enabled
`- 4:0:0:6 sdo 8:224 active ready running
发现第2组路径并没有活动,这可能是有延时的原因,路径状态还没有变成活动状态,下面用dd命令对多路径设备进行读操作并观察数据读写情况。
执行下面的命令,对多路径设备mpathh进行读数据操作:
[root@elt1 ~]# dd if=/dev/mapper/mpathh of=test bs=1M count=10240
10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 72.2445 s, 149 MB/s
[root@elt1 ~]#
执行dd命令的同时打开另一窗口观察数据读写:
[root@elt1 ~]# iostat -m 1 20|grep -E "sdh|sdo|Device"
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.84 0.08 0.06 30797 21504
sdo 2.19 0.16 0.24 61494 93184
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 686.00 70.13 0.00 70 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 3128.00 324.50 0.00 324 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 3085.00 322.88 0.00 322 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 3164.00 327.62 0.00 327 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 3012.00 312.88 0.00 312 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 2584.00 279.50 0.00 279 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 3064.00 314.88 0.00 314 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 3041.00 315.62 0.00 315 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 2193.00 236.88 0.00 236 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 602.00 63.62 0.00 63 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 1139.00 122.37 0.00 122 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 3175.00 348.25 0.00 348 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 3211.00 350.25 0.00 350 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 3152.00 336.75 0.00 336 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 3138.00 340.50 0.00 340 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 1930.00 202.38 0.00 202 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 2851.00 303.12 0.00 303 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 3147.00 339.00 0.00 339 0
Device: tps MB_read/s MB_wrtn/s MB_read MB_wrtn
sdh 0.00 0.00 0.00 0 0
sdo 2642.00 281.75 0.00 281 0
[root@elt1 ~]#
经过多路径切换后的测试,sdo已成为多路径设备mpathh的活动路径,多路径设备mpathh通过sdo对数据进行读写操作。再次查看多路径的状态:
multipathd> show map mpathh topology
mpathh (3600c0ff00026402ce0c95d5604000000) dm-6 DotHill,DH3000
size=1.8T features='0' hwhandler='0' wp=rw
|-+- policy='round-robin 0' prio=1 status=enabled
| `- 3:0:0:6 sdh 8:112 active ready running
`-+- policy='round-robin 0' prio=1 status=active
`- 4:0:0:6 sdo 8:224 active ready running
multipathd>
路径sdo已经处于活动状态,到此多路径切换结束。
在上面的配置中,还有一个非常重要的参数failback,此参数值为manual,当设为manual时,活动路径发生故障后它并不会自动切换到其他路径,需要手动实现路径的切换。如果想在活动路径发生故障后立即切换到其他路径,则failback的值要设置为immediate。failback共有如下几个值:
immediate 值指定立即恢复到包含活跃路径的最高级别路径组群。
manual 值指定不需要立即恢复,只有在操作者干预的情况下会发生恢复。
followover 值指定当路径组的第一个路径成为活跃路径时应执行自动恢复。这可让节点在另一个节点请求故障修复时不会自动恢复。
大于 0 的数字值指定推迟出错切换,以秒表示。
默认值为 m anual。
在生产环境中如果使用主动/被动的分组策略,最好将failback值设为immediate,这样在发生路径故障后会立即切换到其他正常路径,以保证正常的数据访问。