参阅:https://ceph.readthedocs.io/en/latest/rados/operations/crush-map/
(一)手动编辑Crush Map规则
生产环境中,做类似调整尽量在磁盘为空磁盘的情况下进行调整或者提前规划好CrushMap的规则,提前实施。
1.分析情况
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.02939 root default
-3 0.00980 host k-master
0 hdd 0.00490 osd.0 up 1.00000 1.00000
1 hdd 0.00490 osd.1 up 1.00000 1.00000
-5 0.00980 host k-worker001
2 hdd 0.00490 osd.2 up 1.00000 1.00000
4 ssd 0.00490 osd.4 up 1.00000 1.00000
-7 0.00980 host k-worker002
3 hdd 0.00490 osd.3 up 1.00000 1.00000
5 ssd 0.00490 osd.5 up 1.00000 1.00000
[root@k-master ceph]#
可以看出:存在1种类型的规则,即default。
这里由于存在ssd和hdd两种磁盘,我们通过定制规则把2种磁盘分别制作规则分隔开来以便根据用途创建不同资源池使用。
2.导出现有的Crush map规则进行备份与分析
[root@k-master ceph]# ceph osd getcrushmap -o crushmap.bin
18
[root@k-master ceph]# file crushmap.bin
crushmap.bin: MS Windows icon resource - 16 icons, 1-colors
[root@k-master ceph]# ls
ceph.bootstrap-mds.keyring ceph.bootstrap-rgw.keyring ceph-deploy-ceph.log etc swift_openrc.sh
ceph.bootstrap-mgr.keyring ceph.client.admin.keyring ceph.mon.keyring rbdmap testrbd
ceph.bootstrap-osd.keyring ceph.conf crushmap.bin s3test.py
[root@k-master ceph]# crushtool -d crushmap.bin -o crushmap.txt
[root@k-master ceph]# cat crushmap.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class ssd
device 5 osd.5 class ssd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root
# buckets
host k-master {
id -3 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
id -9 class ssd # do not change unnecessarily
# weight 0.010
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.005
item osd.1 weight 0.005
}
host k-worker001 {
id -5 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
id -10 class ssd # do not change unnecessarily
# weight 0.010
alg straw2
hash 0 # rjenkins1
item osd.2 weight 0.005
item osd.4 weight 0.005
}
host k-worker002 {
id -7 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
id -11 class ssd # do not change unnecessarily
# weight 0.010
alg straw2
hash 0 # rjenkins1
item osd.3 weight 0.005
item osd.5 weight 0.005
}
root default {
id -1 # do not change unnecessarily
id -2 class hdd # do not change unnecessarily
id -12 class ssd # do not change unnecessarily
# weight 0.029
alg straw2
hash 0 # rjenkins1
item k-master weight 0.010
item k-worker001 weight 0.010
item k-worker002 weight 0.010
}
# rules
rule replicated_rule {
id 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
# end crush map
[root@k-master ceph]#
3.修改规则并应用
[1]修改后的规则文件
[root@k-master ceph]# cat crushmap.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class ssd
device 5 osd.5 class ssd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root
# buckets
host k-master {
id -3 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
id -9 class ssd # do not change unnecessarily
# weight 0.010
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.005
item osd.1 weight 0.005
}
host k-worker001 {
id -5 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
# weight 0.010
alg straw2
hash 0 # rjenkins1
item osd.2 weight 0.005
}
host k-worker002 {
id -7 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
# weight 0.010
alg straw2
hash 0 # rjenkins1
item osd.3 weight 0.005
}
host k-worker001-ssd {
# weight 0.010
alg straw2
hash 0 # rjenkins1
item osd.4 weight 0.005
}
host k-worker002-ssd {
# weight 0.010
alg straw2
hash 0 # rjenkins1
item osd.5 weight 0.005
}
root default {
id -1 # do not change unnecessarily
id -2 class hdd # do not change unnecessarily
# weight 0.029
alg straw2
hash 0 # rjenkins1
item k-master weight 0.005
item k-worker001 weight 0.005
item k-worker002 weight 0.005
}
root ssd {
# weight 0.029
alg straw2
hash 0 # rjenkins1
item k-worker001-ssd weight 0.005
item k-worker002-ssd weight 0.005
}
# rules
rule replicated_rule {
id 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
rule demo_rule {
id 10
type replicated
min_size 1
max_size 10
step take ssd
step chooseleaf firstn 0 type host
step emit
}
# end crush map
[root@k-master ceph]#
[2]开始应用规则
[root@k-master ceph]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.02939 root default
-3 0.00980 host k-master
0 hdd 0.00490 osd.0 up 1.00000 1.00000
1 hdd 0.00490 osd.1 up 1.00000 1.00000
-5 0.00980 host k-worker001
2 hdd 0.00490 osd.2 up 1.00000 1.00000
4 ssd 0.00490 osd.4 up 1.00000 1.00000
-7 0.00980 host k-worker002
3 hdd 0.00490 osd.3 up 1.00000 1.00000
5 ssd 0.00490 osd.5 up 1.00000 1.00000
[root@k-master ceph]# vim crushmap.txt
[root@k-master ceph]# crushtool -c crushmap.txt -o crushmap-new.bin
[root@k-master ceph]# ll crushmap-new.bin
-rw-r--r-- 1 root root 1920 Jul 29 15:35 crushmap-new.bin
[root@k-master ceph]# ceph osd setcrushmap -i crushmap-new.bin
19
[root@k-master ceph]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-12 0.00998 root ssd
-10 0.00499 host k-worker001-ssd
4 ssd 0.00499 osd.4 up 1.00000 1.00000
-11 0.00499 host k-worker002-ssd
5 ssd 0.00499 osd.5 up 1.00000 1.00000
-1 0.01497 root default
-3 0.00499 host k-master
0 hdd 0.00499 osd.0 up 1.00000 1.00000
1 hdd 0.00499 osd.1 up 1.00000 1.00000
-5 0.00499 host k-worker001
2 hdd 0.00499 osd.2 up 1.00000 1.00000
-7 0.00499 host k-worker002
3 hdd 0.00499 osd.3 up 1.00000 1.00000
[root@k-master ceph]#
可以看到,现在存在2种规则:ssd与default
4.修改资源池规则以便测试
[root@k-master ceph]# ceph osd lspools
1 rbd,3 ceph,4 k8s,6 ceph-demo,7 ceph-tt,8 rbd-test,9 .rgw.root,10 default.rgw.control,11 default.rgw.meta,12 default.rgw.log,13 default.rgw.buckets.index,14 default.rgw.buckets.data,15 cephfs_metadata,16 cephfs_data,
获取资源池的规则信息
[root@k-master ceph]# ceph osd pool get ceph-demo crush_rule
crush_rule: replicated_rule
列出现有规则
[root@k-master ceph]# ceph osd crush rule ls
replicated_rule
demo_rule
设置规则为demo_rule
[root@k-master ceph]# ceph osd pool set ceph-demo crush_rule demo_rule
set pool 6 crush_rule to demo_rule
[root@k-master ceph]# ceph osd pool get ceph-demo crush_rule
crush_rule: demo_rule
[root@k-master ceph]#
创建image进行落盘分析:
[root@k-master ceph]# rbd create ceph-demo/crush-demo.img --size 3G
[root@k-master ceph]# ceph osd map ceph-demo crush-demo.img
osdmap e255 pool 'ceph-demo' (6) object 'crush-demo.img' -> pg 6.d267742c (6.c) -> up ([4,5], p4) acting ([4,5], p4)
[root@k-master ceph]#
5.清理还原
[root@k-master ceph]# ceph osd pool set ceph-demo crush_rule replicated_rule
set pool 6 crush_rule to replicated_rule
[root@k-master ceph]# clear
还原备份规则
[root@k-master ceph]# ceph osd setcrushmap -i crushmap.bin
20
[root@k-master ceph]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.02939 root default
-3 0.00980 host k-master
0 hdd 0.00490 osd.0 up 1.00000 1.00000
1 hdd 0.00490 osd.1 up 1.00000 1.00000
-5 0.00980 host k-worker001
2 hdd 0.00490 osd.2 up 1.00000 1.00000
4 ssd 0.00490 osd.4 up 1.00000 1.00000
-7 0.00980 host k-worker002
3 hdd 0.00490 osd.3 up 1.00000 1.00000
5 ssd 0.00490 osd.5 up 1.00000 1.00000
[root@k-master ceph]#
[root@k-master ceph]# ceph osd crush rule ls
replicated_rule
[root@k-master ceph]#
(二)通过命令行调整Crush map规则
1.一系列添加移动操作
[root@k-master ceph]# ceph osd crush add-bucket ssd root
added bucket ssd type root to crush map
[root@k-master ceph]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-13 0 root ssd
-1 0.02939 root default
-3 0.00980 host k-master
0 hdd 0.00490 osd.0 up 1.00000 1.00000
1 hdd 0.00490 osd.1 up 1.00000 1.00000
-5 0.00980 host k-worker001
2 hdd 0.00490 osd.2 up 1.00000 1.00000
4 ssd 0.00490 osd.4 up 1.00000 1.00000
-7 0.00980 host k-worker002
3 hdd 0.00490 osd.3 up 1.00000 1.00000
5 ssd 0.00490 osd.5 up 1.00000 1.00000
[root@k-master ceph]# ceph osd crush add-bucket k-worker001-ssd host
added bucket k-worker001-ssd type host to crush map
[root@k-master ceph]# ceph osd crush add-bucket k-worker002-ssd host
added bucket k-worker002-ssd type host to crush map
[root@k-master ceph]#
[root@k-master ceph]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-15 0 host k-worker002-ssd
-14 0 host k-worker001-ssd
-13 0 root ssd
-1 0.02939 root default
-3 0.00980 host k-master
0 hdd 0.00490 osd.0 up 1.00000 1.00000
1 hdd 0.00490 osd.1 up 1.00000 1.00000
-5 0.00980 host k-worker001
2 hdd 0.00490 osd.2 up 1.00000 1.00000
4 ssd 0.00490 osd.4 up 1.00000 1.00000
-7 0.00980 host k-worker002
3 hdd 0.00490 osd.3 up 1.00000 1.00000
5 ssd 0.00490 osd.5 up 1.00000 1.00000
[root@k-master ceph]#
[root@k-master ceph]# ceph osd crush move k-worker002-ssd root=ssd
moved item id -15 name 'k-worker002-ssd' to location {root=ssd} in crush map
[root@k-master ceph]# ceph osd crush move k-worker001-ssd root=ssd
moved item id -14 name 'k-worker001-ssd' to location {root=ssd} in crush map
[root@k-master ceph]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-13 0 root ssd
-14 0 host k-worker001-ssd
-15 0 host k-worker002-ssd
-1 0.02939 root default
-3 0.00980 host k-master
0 hdd 0.00490 osd.0 up 1.00000 1.00000
1 hdd 0.00490 osd.1 up 1.00000 1.00000
-5 0.00980 host k-worker001
2 hdd 0.00490 osd.2 up 1.00000 1.00000
4 ssd 0.00490 osd.4 up 1.00000 1.00000
-7 0.00980 host k-worker002
3 hdd 0.00490 osd.3 up 1.00000 1.00000
5 ssd 0.00490 osd.5 up 1.00000 1.00000
[root@k-master ceph]#
[root@k-master ceph]# ceph osd crush move osd.4 host=k-worker001-ssd root=ssd
moved item id 4 name 'osd.4' to location {host=k-worker001-ssd,root=ssd} in crush map
[root@k-master ceph]# ceph osd crush move osd.5 host=k-worker002-ssd root=ssd
moved item id 5 name 'osd.5' to location {host=k-worker002-ssd,root=ssd} in crush map
[root@k-master ceph]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-13 0.00980 root ssd
-14 0.00490 host k-worker001-ssd
4 ssd 0.00490 osd.4 up 1.00000 1.00000
-15 0.00490 host k-worker002-ssd
5 ssd 0.00490 osd.5 up 1.00000 1.00000
-1 0.01959 root default
-3 0.00980 host k-master
0 hdd 0.00490 osd.0 up 1.00000 1.00000
1 hdd 0.00490 osd.1 up 1.00000 1.00000
-5 0.00490 host k-worker001
2 hdd 0.00490 osd.2 up 1.00000 1.00000
-7 0.00490 host k-worker002
3 hdd 0.00490 osd.3 up 1.00000 1.00000
[root@k-master ceph]#
2.创建规则
[root@k-master ceph]# ceph osd crush rule ls
replicated_rule
[root@k-master ceph]# ceph osd crush rule create-replicated ssd-demo ssd host ssd
[root@k-master ceph]# ceph osd crush rule ls
replicated_rule
ssd-demo
[root@k-master ceph]#
3.测试规则
[root@k-master ceph]# ceph osd pool set ceph-demo crush_rule ssd-demo
set pool 6 crush_rule to ssd-demo
[root@k-master ceph]# ceph osd pool get ceph-demo crush_rule
crush_rule: ssd-demo
[root@k-master ceph]# rbd create ceph-demo/ssd-demo.img --size 3G
[root@k-master ceph]# ceph osd map ceph-demo crush-demo.img
osdmap e292 pool 'ceph-demo' (6) object 'crush-demo.img' -> pg 6.d267742c (6.c) -> up ([4,5], p4) acting ([4,5], p4)
[root@k-master ceph]#
(三)编辑Crush Map的注意事项
1.编辑之前,注意备份。
2.初始时,规划好Crush Map,否则实施时会引发很大的迁移变动。
3.osd_crush_update_on_start参数调整为false
[root@k-master ceph]# ceph daemon /var/run/ceph/ceph-osd.1.asok config show | grep 'osd_crush_update_on_start'
"osd_crush_update_on_start": "true",
[root@k-master ceph]# tail -n 2 ceph.conf
[osd]
osd crush update on start = false
[root@k-master ceph]# ceph-deploy --overwrite-conf config push k-master k-worker001 k-worker002
[ceph_deploy.conf][DEBUG ] found configuration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO ] Invoked (2.0.1): /usr/bin/ceph-deploy --overwrite-conf config push k-master k-worker001 k-worker002
[ceph_deploy.cli][INFO ] ceph-deploy options:
[ceph_deploy.cli][INFO ] username : None
[ceph_deploy.cli][INFO ] verbose : False
[ceph_deploy.cli][INFO ] overwrite_conf : True
[ceph_deploy.cli][INFO ] subcommand : push
[ceph_deploy.cli][INFO ] quiet : False
[ceph_deploy.cli][INFO ] cd_conf :
[ceph_deploy.cli][INFO ] cluster : ceph
[ceph_deploy.cli][INFO ] client : ['k-master', 'k-worker001', 'k-worker002']
[ceph_deploy.cli][INFO ] func :
[ceph_deploy.cli][INFO ] ceph_conf : None
[ceph_deploy.cli][INFO ] default_release : False
[ceph_deploy.config][DEBUG ] Pushing config to k-master
[k-master][DEBUG ] connected to host: k-master
[k-master][DEBUG ] detect platform information from remote host
[k-master][DEBUG ] detect machine type
[k-master][DEBUG ] write cluster configuration to /etc/ceph/{cluster}.conf
[ceph_deploy.config][DEBUG ] Pushing config to k-worker001
[k-worker001][DEBUG ] connected to host: k-worker001
[k-worker001][DEBUG ] detect platform information from remote host
[k-worker001][DEBUG ] detect machine type
[k-worker001][DEBUG ] write cluster configuration to /etc/ceph/{cluster}.conf
[ceph_deploy.config][DEBUG ] Pushing config to k-worker002
[k-worker002][DEBUG ] connected to host: k-worker002
[k-worker002][DEBUG ] detect platform information from remote host
[k-worker002][DEBUG ] detect machine type
[k-worker002][DEBUG ] write cluster configuration to /etc/ceph/{cluster}.conf
[root@k-master ceph]# systemctl restart ceph-osd.target
[root@k-master ceph]# ssh k-worker001
Last login: Wed Jul 29 14:45:39 2020 from 192.168.43.201
[root@k-worker001 ~]# systemctl restart ceph-osd.target
[root@k-worker001 ~]# exit
logout
Connection to k-worker001 closed.
[root@k-master ceph]# ssh k-worker002
Last login: Wed Jul 29 14:45:38 2020 from 192.168.43.201
[root@k-worker002 ~]# systemctl restart ceph-osd.target
[root@k-worker002 ~]# exit
logout
Connection to k-worker002 closed.
[root@k-master ceph]# ceph daemon /var/run/ceph/ceph-osd.1.asok config show | grep 'osd_crush_update_on_start'
"osd_crush_update_on_start": "false",
[root@k-master ceph]#