Ceph基于不同性能磁盘创建Pool

crush rule主要作用:
从OSD Map中的哪个节点开始查找
使用那个节点作为故障隔离域
定位副本的搜索模式(广度优先 or 深度优先)


查看规则内容

ceph osd crush rule dump

本文介绍在使用ceph时,如何配置实现创建不同性能的pool,以用于不同的场景。

环境介绍

(ceph-mon)[root@node01 /]# ceph osd tree
ID WEIGHT   TYPE NAME              UP/DOWN REWEIGHT PRIMARY-AFFINITY 
-6  6.00000 root stat                                                
 9  1.00000     osd.9                   up  1.00000          1.00000 
10  1.00000     osd.10                  up  1.00000          1.00000 
11  1.00000     osd.11                  up  1.00000          1.00000 
12  1.00000     osd.12                  up  1.00000          1.00000 
13  1.00000     osd.13                  up  1.00000          1.00000 
14  1.00000     osd.14                  up  1.00000          1.00000 
-5  9.00000 root ssd                                                 
 0  1.00000     osd.0                   up  1.00000          1.00000 
 1  1.00000     osd.1                   up  1.00000          1.00000 
 2  1.00000     osd.2                   up  1.00000          1.00000 
 3  1.00000     osd.3                   up  1.00000          1.00000 
 4  1.00000     osd.4                   up  1.00000          1.00000 
 5  1.00000     osd.5                   up  1.00000          1.00000 
 6  1.00000     osd.6                   up  1.00000          1.00000 
 7  1.00000     osd.7                   up  1.00000          1.00000 
 8  1.00000     osd.8                   up  1.00000          1.00000 
-1 15.00000 root default                                             
-2  5.00000     host 172.21.196.11                                   
 0  1.00000         osd.0               up  1.00000          1.00000 
 2  1.00000         osd.2               up  1.00000          1.00000 
 4  1.00000         osd.4               up  1.00000          1.00000 
11  1.00000         osd.11              up  1.00000          1.00000 
13  1.00000         osd.13              up  1.00000          1.00000 
-3  5.00000     host 172.21.196.13                                   
 1  1.00000         osd.1               up  1.00000          1.00000 
 3  1.00000         osd.3               up  1.00000          1.00000 
 5  1.00000         osd.5               up  1.00000          1.00000 
10  1.00000         osd.10              up  1.00000          1.00000 
14  1.00000         osd.14              up  1.00000          1.00000 
-4  5.00000     host 172.21.196.12                                   
 6  1.00000         osd.6               up  1.00000          1.00000 
 7  1.00000         osd.7               up  1.00000          1.00000 
 8  1.00000         osd.8               up  1.00000          1.00000 
 9  1.00000         osd.9               up  1.00000          1.00000 
12  1.00000         osd.12              up  1.00000          1.00000 

这里是我已经配置好的osd信息,未配置之前是没有root stat,root ssd这2部分内容的,一共有15个osd,0–8是SSD磁盘,9–14是SATA磁盘。

获取crush map

(ceph-mon)[root@node01 /]# ceph osd getcrushmap -o crushmapgot 
got crush map from osdmap epoch 380

当前目录下会生成一个crushmapgot文件。

反编译crush map

(ceph-mon)[root@node01 /]#  crushtool -d crushmapgot -o decrushmap

当前目录下会生成一个decrushmap文件。

修改crush map

(ceph-mon)[root@node01 /]# cat decrushmap 
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable straw_calc_version 1

# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9
device 10 osd.10
device 11 osd.11
device 12 osd.12
device 13 osd.13
device 14 osd.14

# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root

# buckets
host 172.21.196.11 {
    id -2       # do not change unnecessarily
    # weight 5.000
    alg straw
    hash 0  # rjenkins1
    item osd.0 weight 1.000
    item osd.2 weight 1.000
    item osd.4 weight 1.000
    item osd.11 weight 1.000
    item osd.13 weight 1.000
}
host 172.21.196.13 {
    id -3       # do not change unnecessarily
    # weight 5.000
    alg straw
    hash 0  # rjenkins1
    item osd.1 weight 1.000
    item osd.3 weight 1.000
    item osd.5 weight 1.000
    item osd.10 weight 1.000
    item osd.14 weight 1.000
}
host 172.21.196.12 {
    id -4       # do not change unnecessarily
    # weight 5.000
    alg straw
    hash 0  # rjenkins1
    item osd.6 weight 1.000
    item osd.7 weight 1.000
    item osd.8 weight 1.000
    item osd.9 weight 1.000
    item osd.12 weight 1.000
}
root default {
    id -1       # do not change unnecessarily
    # weight 15.000
    alg straw
    hash 0  # rjenkins1
    item 172.21.196.11 weight 5.000
    item 172.21.196.13 weight 5.000
    item 172.21.196.12 weight 5.000
}

#####新增2个bucket
-------------------------------------------------------------
root ssd {
    id -5       # do not change unnecessarily
    # weight 9.000
    alg straw
    hash 0  # rjenkins1
    item osd.0 weight 1.000
    item osd.1 weight 1.000
    item osd.2 weight 1.000
    item osd.3 weight 1.000
    item osd.4 weight 1.000
    item osd.5 weight 1.000
    item osd.6 weight 1.000
    item osd.7 weight 1.000
    item osd.8 weight 1.000
}
root stat {
    id -6       # do not change unnecessarily
    # weight 6.000
    alg straw
    hash 0  # rjenkins1
    item osd.9 weight 1.000
    item osd.10 weight 1.000
    item osd.11 weight 1.000
    item osd.12 weight 1.000
    item osd.13 weight 1.000
    item osd.14 weight 1.000
}
-------------------------------------------------------------------------

# rules
rule replicated_ruleset {
    ruleset 0
    type replicated
    min_size 1
    max_size 10
    step take default
    step chooseleaf firstn 0 type host
    step emit
}
rule disks {
    ruleset 1
    type replicated
    min_size 1
    max_size 10
    step take default
    step chooseleaf firstn 0 type host
    step emit
}

##新增2个rule
----------------------------------------------------------------------
rule ssd {
    ruleset 2
    type replicated
    min_size 1
    max_size 10
    step take ssd
    step chooseleaf firstn 0 type osd
    step emit
}
rule stat {
    ruleset 3
    type replicated
    min_size 1
    max_size 10
    step take stat
    step chooseleaf firstn 0 type osd
    step emit
}
-----------------------------------------------------------------------
# end crush map

以上分割线内部内容为新增内容,主要就是增加了2个bucket和2个rules。

# rules
rule replicated_ruleset {
    ruleset 0   #rule编号
    type replicated   #定义pool类型为replicated(还有esurecode模式)
    min_size 1        #pool中最小指定的副本数量不能小1
    max_size 10       #pool中最大指定的副本数量不能大于10 
    step take default #定义pg查找副本的入口点
    step chooseleaf firstn 0 type host  #选叶子节点、深度优先、隔离host
    step emit  #结束

}

编译crush map

(ceph-mon)[root@node01 /]# crushtool -c decrushmap -o newcrushmap

注入crush map

(ceph-mon)[root@node01 /]# ceph osd setcrushmap -i newcrushmap

创建资源池

(ceph-mon)[root@node01 /]#  ceph osd pool create ssd_pool2   256  256
pool 'ssd_pool2' created
(ceph-mon)[root@node01 /]#  ceph osd pool create sata_pool2   256  256
pool 'sata_pool2' created
(ceph-mon)[root@node01 /]# ceph osd dump|grep ssd_pool2
pool 34 'ssd_pool2' replicated size 3 min_size 1 crush_ruleset 0 object_hash rjenkins pg_num 256 pgp_num 256 last_change 381 flags hashpspool stripe_width 0
(ceph-mon)[root@node01 /]# ceph osd dump|grep sata_pool2
pool 35 'sata_pool2' replicated size 3 min_size 1 crush_ruleset 0 object_hash rjenkins pg_num 256 pgp_num 256 last_change 383 flags hashpspool stripe_width 0

注意:刚刚创建的两个资源池ssd_pool 和stat_pool 的 crush_ruleset 都是0,下面需要修改。

修改资源池存储规则

(ceph-mon)[root@node01 /]# ceph osd pool set ssd_pool2 crush_ruleset 2
set pool 34 crush_ruleset to 1
(ceph-mon)[root@node01 /]# ceph osd pool set sata_pool2 crush_ruleset 3
set pool 35 crush_ruleset to 2
(ceph-mon)[root@node01 /]# ceph osd dump|grep ssd_pool2
pool 34 'ssd_pool2' replicated size 3 min_size 1 crush_ruleset 2 object_hash rjenkins pg_num 256 pgp_num 256 last_change 385 flags hashpspool stripe_width 0
(ceph-mon)[root@node01 /]# ceph osd dump|grep sata_pool2
pool 35 'sata_pool2' replicated size 3 min_size 1 crush_ruleset 3 object_hash rjenkins pg_num 256 pgp_num 256 last_change 386 flags hashpspool stripe_width 0

修改为对应的rule。

验证

验证前先看看ssd_pool 和stat_pool 里面是否有对象

(ceph-mon)[root@node01 /]# rados ls -p ssd_pool2
(ceph-mon)[root@node01 /]# rados ls -p sata_pool2

是空的, 用rados命令 添加对象到两个资源池中

(ceph-mon)[root@node01 /]# rados -p ssd_pool2 put test_object1 /etc/hosts
(ceph-mon)[root@node01 /]# rados -p sata_pool2 put test_object2 /etc/hosts
(ceph-mon)[root@node01 /]# rados ls -p ssd_pool2
test_object1
(ceph-mon)[root@node01 /]# rados ls -p sata_pool2
test_object2

查看对象的分布情况

(ceph-mon)[root@node01 /]# ceph osd map ssd_pool2 test_object1
osdmap e392 pool 'ssd_pool2' (34) object 'test_object1' -> pg 34.d5066e42 (34.42) -> up ([7,2,1], p7) acting ([7,2,1], p7)
(ceph-mon)[root@node01 /]# ceph osd map sata_pool2 test_object2
osdmap e392 pool 'sata_pool2' (35) object 'test_object2' -> pg 35.c5cfe5e9 (35.e9) -> up ([12,14,10], p12) acting ([12,14,10], p12)

object1 三个副本在osd7,osd2,osd1上,object2在osd10,osd12,osd14上。

你可能感兴趣的:(Ceph基于不同性能磁盘创建Pool)