crush rule主要作用:
从OSD Map中的哪个节点开始查找
使用那个节点作为故障隔离域
定位副本的搜索模式(广度优先 or 深度优先)
查看规则内容
ceph osd crush rule dump
本文介绍在使用ceph时,如何配置实现创建不同性能的pool,以用于不同的场景。
环境介绍
(ceph-mon)[root@node01 /]# ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-6 6.00000 root stat
9 1.00000 osd.9 up 1.00000 1.00000
10 1.00000 osd.10 up 1.00000 1.00000
11 1.00000 osd.11 up 1.00000 1.00000
12 1.00000 osd.12 up 1.00000 1.00000
13 1.00000 osd.13 up 1.00000 1.00000
14 1.00000 osd.14 up 1.00000 1.00000
-5 9.00000 root ssd
0 1.00000 osd.0 up 1.00000 1.00000
1 1.00000 osd.1 up 1.00000 1.00000
2 1.00000 osd.2 up 1.00000 1.00000
3 1.00000 osd.3 up 1.00000 1.00000
4 1.00000 osd.4 up 1.00000 1.00000
5 1.00000 osd.5 up 1.00000 1.00000
6 1.00000 osd.6 up 1.00000 1.00000
7 1.00000 osd.7 up 1.00000 1.00000
8 1.00000 osd.8 up 1.00000 1.00000
-1 15.00000 root default
-2 5.00000 host 172.21.196.11
0 1.00000 osd.0 up 1.00000 1.00000
2 1.00000 osd.2 up 1.00000 1.00000
4 1.00000 osd.4 up 1.00000 1.00000
11 1.00000 osd.11 up 1.00000 1.00000
13 1.00000 osd.13 up 1.00000 1.00000
-3 5.00000 host 172.21.196.13
1 1.00000 osd.1 up 1.00000 1.00000
3 1.00000 osd.3 up 1.00000 1.00000
5 1.00000 osd.5 up 1.00000 1.00000
10 1.00000 osd.10 up 1.00000 1.00000
14 1.00000 osd.14 up 1.00000 1.00000
-4 5.00000 host 172.21.196.12
6 1.00000 osd.6 up 1.00000 1.00000
7 1.00000 osd.7 up 1.00000 1.00000
8 1.00000 osd.8 up 1.00000 1.00000
9 1.00000 osd.9 up 1.00000 1.00000
12 1.00000 osd.12 up 1.00000 1.00000
这里是我已经配置好的osd信息,未配置之前是没有root stat,root ssd这2部分内容的,一共有15个osd,0–8是SSD磁盘,9–14是SATA磁盘。
获取crush map
(ceph-mon)[root@node01 /]# ceph osd getcrushmap -o crushmapgot
got crush map from osdmap epoch 380
当前目录下会生成一个crushmapgot文件。
反编译crush map
(ceph-mon)[root@node01 /]# crushtool -d crushmapgot -o decrushmap
当前目录下会生成一个decrushmap文件。
修改crush map
(ceph-mon)[root@node01 /]# cat decrushmap
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable straw_calc_version 1
# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9
device 10 osd.10
device 11 osd.11
device 12 osd.12
device 13 osd.13
device 14 osd.14
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root
# buckets
host 172.21.196.11 {
id -2 # do not change unnecessarily
# weight 5.000
alg straw
hash 0 # rjenkins1
item osd.0 weight 1.000
item osd.2 weight 1.000
item osd.4 weight 1.000
item osd.11 weight 1.000
item osd.13 weight 1.000
}
host 172.21.196.13 {
id -3 # do not change unnecessarily
# weight 5.000
alg straw
hash 0 # rjenkins1
item osd.1 weight 1.000
item osd.3 weight 1.000
item osd.5 weight 1.000
item osd.10 weight 1.000
item osd.14 weight 1.000
}
host 172.21.196.12 {
id -4 # do not change unnecessarily
# weight 5.000
alg straw
hash 0 # rjenkins1
item osd.6 weight 1.000
item osd.7 weight 1.000
item osd.8 weight 1.000
item osd.9 weight 1.000
item osd.12 weight 1.000
}
root default {
id -1 # do not change unnecessarily
# weight 15.000
alg straw
hash 0 # rjenkins1
item 172.21.196.11 weight 5.000
item 172.21.196.13 weight 5.000
item 172.21.196.12 weight 5.000
}
#####新增2个bucket
-------------------------------------------------------------
root ssd {
id -5 # do not change unnecessarily
# weight 9.000
alg straw
hash 0 # rjenkins1
item osd.0 weight 1.000
item osd.1 weight 1.000
item osd.2 weight 1.000
item osd.3 weight 1.000
item osd.4 weight 1.000
item osd.5 weight 1.000
item osd.6 weight 1.000
item osd.7 weight 1.000
item osd.8 weight 1.000
}
root stat {
id -6 # do not change unnecessarily
# weight 6.000
alg straw
hash 0 # rjenkins1
item osd.9 weight 1.000
item osd.10 weight 1.000
item osd.11 weight 1.000
item osd.12 weight 1.000
item osd.13 weight 1.000
item osd.14 weight 1.000
}
-------------------------------------------------------------------------
# rules
rule replicated_ruleset {
ruleset 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
rule disks {
ruleset 1
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
##新增2个rule
----------------------------------------------------------------------
rule ssd {
ruleset 2
type replicated
min_size 1
max_size 10
step take ssd
step chooseleaf firstn 0 type osd
step emit
}
rule stat {
ruleset 3
type replicated
min_size 1
max_size 10
step take stat
step chooseleaf firstn 0 type osd
step emit
}
-----------------------------------------------------------------------
# end crush map
以上分割线内部内容为新增内容,主要就是增加了2个bucket和2个rules。
# rules
rule replicated_ruleset {
ruleset 0 #rule编号
type replicated #定义pool类型为replicated(还有esurecode模式)
min_size 1 #pool中最小指定的副本数量不能小1
max_size 10 #pool中最大指定的副本数量不能大于10
step take default #定义pg查找副本的入口点
step chooseleaf firstn 0 type host #选叶子节点、深度优先、隔离host
step emit #结束
}
编译crush map
(ceph-mon)[root@node01 /]# crushtool -c decrushmap -o newcrushmap
注入crush map
(ceph-mon)[root@node01 /]# ceph osd setcrushmap -i newcrushmap
创建资源池
(ceph-mon)[root@node01 /]# ceph osd pool create ssd_pool2 256 256
pool 'ssd_pool2' created
(ceph-mon)[root@node01 /]# ceph osd pool create sata_pool2 256 256
pool 'sata_pool2' created
(ceph-mon)[root@node01 /]# ceph osd dump|grep ssd_pool2
pool 34 'ssd_pool2' replicated size 3 min_size 1 crush_ruleset 0 object_hash rjenkins pg_num 256 pgp_num 256 last_change 381 flags hashpspool stripe_width 0
(ceph-mon)[root@node01 /]# ceph osd dump|grep sata_pool2
pool 35 'sata_pool2' replicated size 3 min_size 1 crush_ruleset 0 object_hash rjenkins pg_num 256 pgp_num 256 last_change 383 flags hashpspool stripe_width 0
注意:刚刚创建的两个资源池ssd_pool 和stat_pool 的 crush_ruleset 都是0,下面需要修改。
修改资源池存储规则
(ceph-mon)[root@node01 /]# ceph osd pool set ssd_pool2 crush_ruleset 2
set pool 34 crush_ruleset to 1
(ceph-mon)[root@node01 /]# ceph osd pool set sata_pool2 crush_ruleset 3
set pool 35 crush_ruleset to 2
(ceph-mon)[root@node01 /]# ceph osd dump|grep ssd_pool2
pool 34 'ssd_pool2' replicated size 3 min_size 1 crush_ruleset 2 object_hash rjenkins pg_num 256 pgp_num 256 last_change 385 flags hashpspool stripe_width 0
(ceph-mon)[root@node01 /]# ceph osd dump|grep sata_pool2
pool 35 'sata_pool2' replicated size 3 min_size 1 crush_ruleset 3 object_hash rjenkins pg_num 256 pgp_num 256 last_change 386 flags hashpspool stripe_width 0
修改为对应的rule。
验证
验证前先看看ssd_pool 和stat_pool 里面是否有对象
(ceph-mon)[root@node01 /]# rados ls -p ssd_pool2
(ceph-mon)[root@node01 /]# rados ls -p sata_pool2
是空的, 用rados命令 添加对象到两个资源池中
(ceph-mon)[root@node01 /]# rados -p ssd_pool2 put test_object1 /etc/hosts
(ceph-mon)[root@node01 /]# rados -p sata_pool2 put test_object2 /etc/hosts
(ceph-mon)[root@node01 /]# rados ls -p ssd_pool2
test_object1
(ceph-mon)[root@node01 /]# rados ls -p sata_pool2
test_object2
查看对象的分布情况
(ceph-mon)[root@node01 /]# ceph osd map ssd_pool2 test_object1
osdmap e392 pool 'ssd_pool2' (34) object 'test_object1' -> pg 34.d5066e42 (34.42) -> up ([7,2,1], p7) acting ([7,2,1], p7)
(ceph-mon)[root@node01 /]# ceph osd map sata_pool2 test_object2
osdmap e392 pool 'sata_pool2' (35) object 'test_object2' -> pg 35.c5cfe5e9 (35.e9) -> up ([12,14,10], p12) acting ([12,14,10], p12)
object1 三个副本在osd7,osd2,osd1上,object2在osd10,osd12,osd14上。