Ceph Crushmap新特性:class

下面是ceph version 12.2.1 luminous 安装后,查看crushmap文件的结果:

[root@osdnode1 ~]# cat crushmapdump-decompiled 
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
#第一个段落是可调参数
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
device 16 osd.16 class hdd
device 17 osd.17 class hdd
device 18 osd.18 class nvme
device 19 osd.19 class nvme
device 20 osd.20 class nvme
#id大于0的设备列表,是叶子。我发现跟常见的文档指导手册里比较,
#多了class hdd/nvme,一开始我也不知道它是干啥用的
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root

# buckets
host osdnode1 {
        id -3           # do not change unnecessarily
        id -4 class hdd         # do not change unnecessarily
        id -9 class nvme                # do not change unnecessarily
        # weight 22.192
        alg straw2
        hash 0  # rjenkins1
        item osd.0 weight 3.638
        item osd.1 weight 3.638
        item osd.2 weight 3.638
        item osd.3 weight 3.638
        item osd.4 weight 3.638
        item osd.6 weight 3.638
        item osd.18 weight 0.364
}
#与之前的版本相比较,每个bucket的id,不是一个,而是三个数字,这也引起我的注意。
#第一个id没有说明,第二三个id后面都带着class属性。
host osdnode2 {
        id -5           # do not change unnecessarily
        id -6 class hdd         # do not change unnecessarily
        id -10 class nvme               # do not change unnecessarily
        # weight 22.192
        alg straw2
        hash 0  # rjenkins1
        item osd.5 weight 3.638
        item osd.7 weight 3.638
        item osd.8 weight 3.638
        item osd.11 weight 3.638
        item osd.12 weight 3.638
        item osd.14 weight 3.638
        item osd.19 weight 0.364
}
host osdnode3 {
        id -7           # do not change unnecessarily
        id -8 class hdd         # do not change unnecessarily
        id -11 class nvme               # do not change unnecessarily
        # weight 22.192
        alg straw2
        hash 0  # rjenkins1
        item osd.9 weight 3.638
        item osd.10 weight 3.638
        item osd.13 weight 3.638
        item osd.15 weight 3.638
        item osd.16 weight 3.638
        item osd.17 weight 3.638
        item osd.20 weight 0.364
}
root default {
        id -1           # do not change unnecessarily
        id -2 class hdd         # do not change unnecessarily
        id -12 class nvme               # do not change unnecessarily
        # weight 66.577
        alg straw2
        hash 0  # rjenkins1
        item osdnode1 weight 22.192
        item osdnode2 weight 22.192
        item osdnode3 weight 22.192
}

# rules
rule replicated_rule {
        id 0
        type replicated
        min_size 1
        max_size 10
        step take default
        step chooseleaf firstn 0 type host
        step emit
}

rule ssd-pool {
        ruleset 1
        type replicated
        min_size 1
        max_size 10
        step take default class nvme
        step chooseleaf firstn 0 type osd
        step emit
}

rule sata-pool {
        ruleset 2
        type replicated
        min_size 1
        max_size 10
        step take default class hdd
        step chooseleaf firstn 0 type osd
        step emit
}



# end crush map

class属性带来的两点变化

  1. 每个device定义了class;
  2. 每个bucket,如果它包含了n种class的device,则会有n+1个id编号,其中一个id包含所有device,n个id后面各有class属性,表示这个bucket里面具备特定class属性的子节点。

引用方法

在crush rule里面的take语句,可以引用bucket的名字,选择bucket所有的资源,也可以引用bucket class xxx,选择特定类型的资源。
比如

        step take default class nvme

就是引用了名称为default的bucket里面所有类型为nvme的osd。

好处

有了这个新特征,使用ssd硬盘作为cache层的配置,被大大简化,原来需要在crushmap里面写多个资源树,现在只需要一个,用class属性来区分。

你可能感兴趣的:(技术分享)