说明:本文档针对在同一个主机中,存在两种磁盘的OSD:SSD盘和SATA盘,使用CRUSH进行分层的测试。
以下测试环境均为Ceph 0.94.x。
测试环境:
ceph-mon节点1
ceph-osd1:10G*2 20G*2
ceph-osd2:10G*2 20G*2
ceph-osd3: 10G*1 20G*1
假设10G为SSD,20G为SATA
ceph-osd1:
/dev/sdb1 5.0G 34M 5.0G 1% /var/lib/ceph/osd/ceph-0
/dev/sdc1 5.0G 34M 5.0G 1% /var/lib/ceph/osd/ceph-1
/dev/sdd1 15G 35M 15G 1% /var/lib/ceph/osd/ceph-2
/dev/sde1 15G 34M 15G 1% /var/lib/ceph/osd/ceph-3
ceph-osd2:
/dev/sdb1 5.0G 34M 5.0G 1% /var/lib/ceph/osd/ceph-4
/dev/sdc1 15G 34M 15G 1% /var/lib/ceph/osd/ceph-5
/dev/sdd1 15G 34M 15G 1% /var/lib/ceph/osd/ceph-6
/dev/sde1 5.0G 34M 5.0G 1% /var/lib/ceph/osd/ceph-7
ceph-osd3:
/dev/sdb1 5.0G 34M 5.0G 1% /var/lib/ceph/osd/ceph-8
/dev/sdc1 15G 34M 15G 1% /var/lib/ceph/osd/ceph-9
$ ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-1 0.04997 root default
-2 0.01999 host ceph-osd1
0 0 osd.0 up 1.00000 1.00000
1 0 osd.1 up 1.00000 1.00000
2 0.00999 osd.2 up 1.00000 1.00000
3 0.00999 osd.3 up 1.00000 1.00000
-3 0.01999 host ceph-osd2
4 0 osd.4 up 1.00000 1.00000
5 0.00999 osd.5 up 1.00000 1.00000
6 0.00999 osd.6 up 1.00000 1.00000
7 0 osd.7 up 1.00000 1.00000
-4 0.00999 host ceph-osd3
8 0 osd.8 up 1.00000 1.00000
9 0.00999 osd.9 up 1.00000 1.00000
操作:
导出crush map
$ ceph osd getcrushmap -o crushmap.map
将map转为可读模式
crushtool -d crushmap.map -o crushmap.txt
原先cursh map:
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable straw_calc_version 1
# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root
# buckets
host ceph-osd1 {
id -2 # do not change unnecessarily
# weight 0.020
alg straw
hash 0 # rjenkins1
item osd.0 weight 0.000
item osd.1 weight 0.000
item osd.2 weight 0.010
item osd.3 weight 0.010
}
host ceph-osd2 {
id -3 # do not change unnecessarily
# weight 0.020
alg straw
hash 0 # rjenkins1
item osd.4 weight 0.000
item osd.5 weight 0.010
item osd.6 weight 0.010
item osd.7 weight 0.000
}
host ceph-osd3 {
id -4 # do not change unnecessarily
# weight 0.010
alg straw
hash 0 # rjenkins1
item osd.8 weight 0.000
item osd.9 weight 0.010
}
root default {
id -1 # do not change unnecessarily
# weight 0.050
alg straw
hash 0 # rjenkins1
item ceph-osd1 weight 0.020
item ceph-osd2 weight 0.020
item ceph-osd3 weight 0.010
}
# rules
rule replicated_ruleset {
ruleset 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
# end crush map
编辑后:
说明:增加了一个介于osd和host之间的type,将一个主机上的资源分隔为两组。
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable straw_calc_version 1
# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9
# types
type 0 osd
type 1 diskarray
type 2 host
type 3 chassis
type 4 rack
type 5 row
type 6 pdu
type 7 pod
type 8 room
type 9 datacenter
type 10 region
type 11 root
# buckets
diskarray ceph-osd1-ssd {
id -1
alg straw
hash 0
item osd.0 weight 0.005
item osd.1 weight 0.005
}
diskarray ceph-osd1-sata {
id -2
alg straw
hash 0
item osd.2 weight 0.015
item osd.3 weight 0.015
}
diskarray ceph-osd2-ssd {
id -3
alg straw
hash 0
item osd.4 weight 0.005
item osd.7 weight 0.005
}
diskarray ceph-osd2-sata {
id -4
alg straw
hash 0
item osd.5 weight 0.015
item osd.6 weight 0.015
}
diskarray ceph-osd3-ssd {
id -5
alg straw
hash 0
item osd.8 weight 0.005
}
diskarray ceph-osd3-sata {
id -6
alg straw
hash 0
item osd.9 weight 0.015
}
root ssd {
id -7
alg straw
hash 0
item ceph-osd1-ssd weight 0.010
item ceph-osd2-ssd weight 0.010
item ceph-osd3-ssd weight 0.005
}
root sata {
id -8
alg straw
hash 0
item ceph-osd1-sata weight 0.030
item ceph-osd2-sata weight 0.030
item ceph-osd3-sata weight 0.015
}
# rules
rule ssd_ruleset {
ruleset 0
type replicated
min_size 1
max_size 4
step take ssd
step chooseleaf firstn 0 type diskarray
step emit
}
rule sata_ruleset {
ruleset 1
type replicated
min_size 1
max_size 5
step take sata
step chooseleaf firstn 0 type diskarray
step emit
}
# end crush map
重新编译为二进制:
$ crushtool -c crushmapnew.txt -o crushmapnew.map
导入ceph:
$ ceph osd setcrushmap -i crushmapnew.map
创建不同类型的pool:
$ ceph osd pool create ssdpool 128 ssd_ruleset
$ ceph osd pool create satapool 128 sata_ruleset
注意:
0.94版本中,ceph osd需要设置:
osd_crush_update_on_start = false
否则在OSD启动时,OSD会自动变更到host这个容器下。
10.2.x版本未测试。