Ceph CRUSH调整实例:在同一个主机中存在两种类型磁盘

说明:本文档针对在同一个主机中,存在两种磁盘的OSD:SSD盘和SATA盘,使用CRUSH进行分层的测试。
以下测试环境均为Ceph 0.94.x。

测试环境:

ceph-mon节点1
ceph-osd110G*2  20G*2
ceph-osd210G*2  20G*2
ceph-osd310G*1  20G*1
假设10G为SSD,20G为SATA

ceph-osd1:
/dev/sdb1                5.0G   34M  5.0G   1% /var/lib/ceph/osd/ceph-0
/dev/sdc1                5.0G   34M  5.0G   1% /var/lib/ceph/osd/ceph-1
/dev/sdd1                 15G   35M   15G   1% /var/lib/ceph/osd/ceph-2
/dev/sde1                 15G   34M   15G   1% /var/lib/ceph/osd/ceph-3


ceph-osd2:
/dev/sdb1                5.0G   34M  5.0G   1% /var/lib/ceph/osd/ceph-4
/dev/sdc1                 15G   34M   15G   1% /var/lib/ceph/osd/ceph-5
/dev/sdd1                 15G   34M   15G   1% /var/lib/ceph/osd/ceph-6
/dev/sde1                5.0G   34M  5.0G   1% /var/lib/ceph/osd/ceph-7

ceph-osd3:
/dev/sdb1                5.0G   34M  5.0G   1% /var/lib/ceph/osd/ceph-8
/dev/sdc1                 15G   34M   15G   1% /var/lib/ceph/osd/ceph-9

$ ceph osd tree
ID WEIGHT  TYPE NAME          UP/DOWN REWEIGHT PRIMARY-AFFINITY 
-1 0.04997 root default                                         
-2 0.01999     host ceph-osd1                                   
 0       0         osd.0           up  1.00000          1.00000 
 1       0         osd.1           up  1.00000          1.00000 
 2 0.00999         osd.2           up  1.00000          1.00000 
 3 0.00999         osd.3           up  1.00000          1.00000 
-3 0.01999     host ceph-osd2                                   
 4       0         osd.4           up  1.00000          1.00000 
 5 0.00999         osd.5           up  1.00000          1.00000 
 6 0.00999         osd.6           up  1.00000          1.00000 
 7       0         osd.7           up  1.00000          1.00000 
-4 0.00999     host ceph-osd3                                   
 8       0         osd.8           up  1.00000          1.00000 
 9 0.00999         osd.9           up  1.00000          1.00000 

操作:

导出crush map

$ ceph osd getcrushmap -o crushmap.map

将map转为可读模式

crushtool -d crushmap.map -o crushmap.txt

原先cursh map:

# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable straw_calc_version 1

# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9

# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root

# buckets
host ceph-osd1 {
    id -2        # do not change unnecessarily
    # weight 0.020
    alg straw
    hash 0    # rjenkins1
    item osd.0 weight 0.000
    item osd.1 weight 0.000
    item osd.2 weight 0.010
    item osd.3 weight 0.010
}
host ceph-osd2 {
    id -3        # do not change unnecessarily
    # weight 0.020
    alg straw
    hash 0    # rjenkins1
    item osd.4 weight 0.000
    item osd.5 weight 0.010
    item osd.6 weight 0.010
    item osd.7 weight 0.000
}
host ceph-osd3 {
    id -4        # do not change unnecessarily
    # weight 0.010
    alg straw
    hash 0    # rjenkins1
    item osd.8 weight 0.000
    item osd.9 weight 0.010
}
root default {
    id -1        # do not change unnecessarily
    # weight 0.050
    alg straw
    hash 0    # rjenkins1
    item ceph-osd1 weight 0.020
    item ceph-osd2 weight 0.020
    item ceph-osd3 weight 0.010
}

# rules
rule replicated_ruleset {
    ruleset 0
    type replicated
    min_size 1
    max_size 10
    step take default
    step chooseleaf firstn 0 type host
    step emit
}

# end crush map

编辑后:
说明:增加了一个介于osd和host之间的type,将一个主机上的资源分隔为两组。

# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable straw_calc_version 1

# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9

# types
type 0 osd
type 1 diskarray
type 2 host
type 3 chassis
type 4 rack
type 5 row
type 6 pdu
type 7 pod
type 8 room
type 9 datacenter
type 10 region
type 11 root

# buckets
diskarray ceph-osd1-ssd {
    id -1
    alg straw
    hash 0
    item osd.0 weight 0.005
    item osd.1 weight 0.005
}

diskarray ceph-osd1-sata {
    id -2
    alg straw
    hash 0
    item osd.2 weight 0.015
    item osd.3 weight 0.015
}

diskarray ceph-osd2-ssd {
    id -3
    alg straw
    hash 0
    item osd.4 weight 0.005
    item osd.7 weight 0.005
}

diskarray ceph-osd2-sata {
    id -4
    alg straw
    hash 0
    item osd.5 weight 0.015
    item osd.6 weight 0.015
}

diskarray ceph-osd3-ssd {
    id -5
    alg straw
    hash 0
    item osd.8 weight 0.005
}

diskarray ceph-osd3-sata {
    id -6
    alg straw
    hash 0
    item osd.9 weight 0.015
}    

root ssd {
    id -7
    alg straw
    hash 0
    item ceph-osd1-ssd weight 0.010
    item ceph-osd2-ssd weight 0.010
    item ceph-osd3-ssd weight 0.005
}

root sata {
    id -8
    alg straw
    hash 0
    item ceph-osd1-sata weight 0.030
    item ceph-osd2-sata weight 0.030
    item ceph-osd3-sata weight 0.015
}

# rules
rule ssd_ruleset {
    ruleset 0
    type replicated
    min_size 1
    max_size 4
    step take ssd
    step chooseleaf firstn 0 type diskarray
    step emit
}

rule sata_ruleset {
    ruleset 1
    type replicated
    min_size 1
    max_size 5
    step take sata
    step chooseleaf firstn 0 type diskarray
    step emit
}

# end crush map

重新编译为二进制:

$ crushtool -c crushmapnew.txt -o crushmapnew.map

导入ceph:

$ ceph osd setcrushmap -i crushmapnew.map

创建不同类型的pool:

$ ceph osd pool create ssdpool 128 ssd_ruleset
$ ceph osd pool create satapool 128 sata_ruleset

注意:
0.94版本中,ceph osd需要设置:

osd_crush_update_on_start = false

否则在OSD启动时,OSD会自动变更到host这个容器下。

10.2.x版本未测试。

你可能感兴趣的:(Ceph)