RGW数据分布及寻址
RGW是一个对象处理网关。数据实际存储在ceph集群中。利用librados的接口,与ceph集群通信。RGW主要存储三类数据:元数据(metadata)、索引数据(bucket index)、数据(data)。这三类数据一般存储在不同的pool中,元数据也分多种元数据,存在不同的ceph pool中。
1、 Metadata
元数据信息包括:user,bucket,以及bucket.instance。其中:
user: 主要是对象存储的用户信息
bucket:主要维护bucket name与bucket instance id之间的映射信息
bucket.instance:维护了bucket instance信息
查看user的元数据如下:
radosgw-admin metadata list user:
//user相关的数据信息
[
"ups3",
"56789abcdef0123456789abcdef0123456789abcdef0123456789abcdef01234",
"testx$9876543210abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
"0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
"test",
"testid"
]
radosgw-admin metadata get user:testid:
//获取用户testid的数据信息
{
"key": "user:testid",
"ver": {
"tag": "_cAI-F3h-MuIUGNtXh2RgHlf",
"ver": 1
},
"mtime": "2018-11-05 08:45:47.819290Z",
"data": {
"user_id": "testid",
"display_name": "M. Tester",
"email": "[email protected]",
"suspended": 0,
"max_buckets": 1000,
"auid": 0,
"subusers": [],
"keys": [
{
"user": "testid",
"access_key": "0555b35654ad1656d804",
"secret_key": "h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q=="
}
],
"swift_keys": [],
"caps": [],
"op_mask": "read, write, delete",
"default_placement": "",
"placement_tags": [],
"bucket_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"user_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"temp_url_keys": [],
"type": "rgw",
"mfa_ids": [],
"attrs": [
{
"key": "user.rgw.idtag",
"val": ""
}
]
}
}
radosgw-admin metadata list bucket:
//bucket相关的元数据
[
"first"
]
radosgw-admin metadata get bucket:first:
//bucket相关的元数据
{
"key": "bucket:first",
"ver": {
"tag": "_Hx7x_Mor001U3WM8zfUAH6C",
"ver": 1
},
"mtime": "2018-11-05 09:14:01.135441Z",
"data": {
"bucket": {
"name": "first",
"marker": "b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1",
"bucket_id": "b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"owner": "testid",
"creation_time": "2018-11-05 09:13:59.491939Z",
"linked": "true",
"has_bucket_info": "false"
}
}
radosgw-admin metadata list bucket.instance:
//bucket.instance相关的元数据
[
"first:b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1"
]
radosgw-admin metadata get bucket.instance:first:{bucket_id}
//bucket.instance相关的元数据
{
"key": "bucket.instance:first:b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1",
"ver": {
"tag": "_CUSoooY8ectqQPoZELOd_BE",
"ver": 1
},
"mtime": "2018-11-05 09:14:01.132899Z",
"data": {
"bucket_info": {
"bucket": {
"name": "first",
"marker": "b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1",
"bucket_id": "b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"creation_time": "2018-11-05 09:13:59.491939Z",
"owner": "testid",
"flags": 0,
"zonegroup": "98906840-3c9c-4402-beec-1616bcf2e07c",
"placement_rule": "default-placement",
"has_instance_obj": "true",
"quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"num_shards": 0,
"bi_shard_hash_type": 0,
"requester_pays": "false",
"has_website": "false",
"swift_versioning": "false",
"swift_ver_location": "",
"index_type": 0,
"mdsearch_config": [],
"reshard_status": 0,
"new_bucket_instance_id": ""
},
"attrs": [
{
"key": "user.rgw.acl",
"val": "AgKNAAAAAwIXAAAABgAAAHRlc3RpZAkAAABNLiBUZXN0ZXIEA2oAAAABAQAAAAYAAAB0ZXN0aWQPAAAAAQAAAAYAAAB0ZXN0aWQFAzsAAAACAgQAAAAAAAAABgAAAHRlc3RpZAAAAAAAAAAAAgIEAAAADwAAAAkAAABNLiBUZXN0ZXIAAAAAAAAAAAAAAAAAAAAA"
},
{
"key": "user.rgw.idtag",
"val": ""
}
]
}
}
2、Bucket Index
bucket index主要维护的是一个bucket中object的索引信息。一个bucket对应一个或多个rados object(开启bucket shards下)。维护的是一个key-val的map结构,map存放在object的omap(rocksdb)中,key对应的rgw object,val是关于rgw object的一些元数据信息,检索bucket的存放的object时,需要这些信息。omap也包含一个Header,其存放的是bucket account info,如此bucket中Object的个数,总的size等。
3、Data
rgw object内容,存放在一个或多个rados object中。rados object分为header和tail部分,header最多可以容纳512KB的数据,如果一个rgw object的大小小于512KB,那么只有header。否则剩余的数据会按照集群rados object的大小条带化分割成多个rados object。
数据检索路径
- .rgw.root
- {zone}.rgw.control
- {zone}.rgw.meta
- {zone}.rgw.log
- {zone}.rgw.buckets.index
- {zone}.rgw.buckets.data
- {zone}.rgw.buckets.non-ec
在Pool: {zone}.rgw.meta利用namespace隔离多个存储空间:
- root: bucket及bucket-instance
- users.keys: 用户key
- users.email:用户Email,object的key值=email
- users.swift: swift账号
- users.uid: s3用户及用户的Bucket信息
- roles:
- heap:
对于Pool: {zone}.rgw.log也包含多个namespace:
- gc: 垃圾清理
- lc: lifecycle
- reshard:
当检索对象存储中的一个object时,会包含三个要素:user,bucket,object。user主要是RGW用于获取user id验证ACL;bucket及obejct用于确定object在pool中的位置。
User
user数据存储在{zone}.rgw.meta:users.uid
中,如下:
[root@luminous1 ~]# rados -p upc.rgw.meta -N users.uid ls
ups3
ups3.buckets
包含两部分: ups3: user本身信息; ups3.buckets: 用户所属的bucket。
ups3: 用户的基本信息,及ACL/Bucekt Quota/User Quota等;对应struct RGWUserInfo, 定义于rgw_common.h。
ups3.buckets:用户所属的Buckets,key-value结构,存放于omap结构中;对应struct cls_user_bucket_entry,定义于rgw_common.h,数据操作如下:
# rados -p upc.rgw.meta -N users.uid listomapkeys ups3.buckets
first
# rados -p upc.rgw.meta -N users.uid getomapval ups3.buckets first ups3_bucket
Writing to ups3_bucket
# ceph-dencoder import ups3_bucket type cls_user_bucket_entry decode dump_json
{
"bucket": {
"name": "first",
"marker": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
"bucket_id": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4"
},
"size": 1887,
"size_rounded": 4096,
"creation_time": "2019-07-22 02:48:17.945890Z",
"count": 1,
"user_stats_sync": "true"
}
通过{uid}.buckets查到用户具有哪些buckets,并且这些bucket以下基本数据。
Bucket
Bucket信息存在在{zone}.rgw.meta:root
中,如下:
[root@luminous1 ~]# rados -p upc.rgw.meta -N root ls
.bucket.meta.first:1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4
first
first: 记录了bucket与bucket_instance_id的对应关系,其对应于数据结构:struct RGWBucketEntryPoint
.bucket.meta.first:1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4: bucket instance;寻址方式:.bucket.meta.{tenant}:{bucket.name}:{bucket_id};对应结构体:struct RGWBucketInfo。
其中Bucket ACL及IAM Policy存放在bucket instance object的attr中。如下:
# rados -p upc.rgw.meta -N root get .bucket.meta.first:1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4 bucket_instance
通过读取bucket-instance object获取bucket的元数据信息
# ceph-dencoder import bucket_instance type RGWBucketInfo decode dump_json
{
"bucket": {
"name": "first",
"marker": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
"bucket_id": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"creation_time": "2019-07-22 02:48:17.945890Z",
"owner": "ups3",
"flags": 0,
"zonegroup": "2e7ac7db-8e21-43ed-9f3c-5a061ce1c7e3",
"placement_rule": "default-placement",
"has_instance_obj": "true",
"quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"num_shards": 0,
"bi_shard_hash_type": 0,
"requester_pays": "false",
"has_website": "false",
"swift_versioning": "false",
"swift_ver_location": "",
"index_type": 0,
"mdsearch_config": [],
"reshard_status": 0,
"new_bucket_instance_id": ""
}
获取Bucket ACL及IAM Policy数据如下:
ACL:
#rados -p upc.rgw.meta -N root getxattr .bucket.meta.first:1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4 user.rgw.acl > acl
#ceph-dencoder type RGWAccessControlPolicy import acl decode dump_json
{
"acl": {
"acl_user_map": [
{
"user": "ups3",
"acl": 15
}
],
"acl_group_map": [],
"grant_map": [
{
"id": "ups3",
"grant": {
"type": {
"type": 0
},
"id": "ups3",
"email": "",
"permission": {
"flags": 15
},
"name": "S3 User",
"group": 0,
"url_spec": ""
}
}
]
},
"owner": {
"id": "ups3",
"display_name": "S3 User"
}
}
Bucket Policy:
#rados -p upc.rgw.meta -N root getxattr .bucket.meta.first:1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4 user.rgw.iam-policy
{
"Version": "2012-10-17",
"Statement": [{
"Effect": "Allow",
"Principal": {"AWS": ["arn:aws:iam::usfolks:user/fred"]},
"Action": "s3:PutObjectAcl",
"Resource": [
"arn:aws:s3:::first/*"
]
}]
}
Object
- Bucket Index:
Bucket Index: Bucket中包含的Object信息,都存放在一个或多个Object的 omap 中。此omap为一个key-value结构,key为object的名称,value对应struct rgw_bucket_dir_entry
: cls_rgw_types.h
。
Bucket Index Object:
pool: '{zone}.rgw.buckets.index';
Object名称:1. 无shard下,'.dir.{bucket_id}';2. shard,'.dir.{bucket_id}.{shard_id}'。
如下:
[root@luminous1 ~]# rados -p upc.rgw.buckets.index ls
.dir.1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4
在此bucket下,有一个object: ntp.conf:
[root@luminous1 ~]# rados -p upc.rgw.buckets.index listomapkeys .dir.1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4
ntp.conf
检索value:
[root@luminous1 ~]# rados -p upc.rgw.buckets.index getomapval .dir.1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4 ntp.conf object_key
Writing to object_key
[root@luminous1 ~]# ceph-dencoder type rgw_bucket_dir_entry import object_key decode dump_json
{
"name": "ntp.conf",
"instance": "",
"ver": {
"pool": 9,
"epoch": 1
},
"locator": "",
"exists": "true",
"meta": {
"category": 1,
"size": 1887,
"mtime": "2019-07-22 02:48:36.088530Z",
"etag": "385c339c343a2495fd4479c992bfeb10",
"owner": "ups3",
"owner_display_name": "S3 User",
"content_type": "text/plain",
"accounted_size": 1887,
"user_data": ""
},
"tag": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44192.174",
"flags": 0,
"pending_map": [],
"versioned_epoch": 0
}
omap header记录了以下统计信息:
[root@luminous1 ~]# rados -p upc.rgw.buckets.index getomapheader .dir.1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4 index_object_header
Writing to index_object_header
[root@luminous1 ~]# ceph-dencoder type rgw_bucket_dir_header import index_object_header decode dump_json
{
"ver": 33,
"master_ver": 0,
"stats": [
1,
{
"total_size": 1887,
"total_size_rounded": 4096,
"num_entries": 1,
"actual_size": 1887
},
3,
{
"total_size": 0,
"total_size_rounded": 0,
"num_entries": 0,
"actual_size": 0
},
{
"reshard_status": "none",
"new_bucket_instance_id": "",
"num_shards": -1
}
]
}
- Object Data:
对象存储object的数据存放在pool: {zone}.rgw.buckets.data
中。object的构成及寻址分为以下两类:
一个RGW Object可以由一个或多个rados object构成。其中第一个 object 是此RGW 的 head 对象,主要包含一些元数据信息,如manifest, ACLs, content type, ETag, and user-defined metadata
。这些metadata存放在此head 对象的xattr中。其中manifest
描述了此rgw object在分布情况。同时,此head对象,最多可额外容纳4MB
数据,如果RGW Object大小下于4MB
,那么此 RGW Object就不会分片,只有此 head 对象。
如下检索:
[root@luminous1 ~]# rados -p upc.rgw.buckets.data listxattr 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_ntp.conf
user.rgw.acl
user.rgw.content_type
user.rgw.etag
user.rgw.idtag
user.rgw.manifest
user.rgw.pg_ver
user.rgw.source_zone
user.rgw.tail_tag
user.rgw.x-amz-content-sha256
user.rgw.x-amz-date
user.rgw.x-amz-meta-s3cmd-attrs
user.rgw.x-amz-storage-class
- 非multipart上传的object
目前bucket下有一个ntp.conf
,<4MB
。检索其manifest
:
# rados -p upc.rgw.buckets.data getxattr 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_ntp.conf user.rgw.manifest > manifest
# ceph-dencoder type RGWObjManifest import manifest decode dump_json
{
"objs": [],
"obj_size": 1887,
"explicit_objs": "false",
"head_size": 1887,
"max_head_size": 4194304,
"prefix": ".Y3GeEIYgfMSqzZKW6xUfX-dPtPSH50f_",
"rules": [
{
"key": 0,
"val": {
"start_part_num": 0,
"start_ofs": 4194304,
"part_size": 0,
"stripe_max_size": 4194304,
"override_prefix": ""
}
}
],
"tail_instance": "",
"tail_placement": {
"bucket": {
"name": "first",
"marker": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
"bucket_id": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"placement_rule": "default-placement"
}
}
如上:
max_head_size: 表示head对象最大size;
head_size: 表示当前head 对象size;
prefix: 用于在rados中分片object的寻址。
RGW OBject ACL:
# rados -p upc.rgw.buckets.data getxattr 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_ntp.conf user.rgw.acl > object_acl
# ceph-dencoder type RGWAccessControlPolicy import object_acl decode dump_json
{
"acl": {
"acl_user_map": [
{
"user": "ups3",
"acl": 15
}
],
"acl_group_map": [],
"grant_map": [
{
"id": "ups3",
"grant": {
"type": {
"type": 0
},
"id": "ups3",
"email": "",
"permission": {
"flags": 15
},
"name": "S3 User",
"group": 0,
"url_spec": ""
}
}
]
},
"owner": {
"id": "ups3",
"display_name": "S3 User"
}
}
上传一个>4MB
的 RGW Object,检索其manifest
信息:
# dd if=/dev/zero of=./rgw_object bs=1024 count=13000
# s3cmd put rgw_object s3://first
# rados -p upc.rgw.buckets.data ls
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_rgw_object
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_1
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_3
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_ntp.conf
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_2
如上,Object分为4个对象,head,3个shadow分片。手动拼接如下,检查md5值相同:
# md5sum rgw_object
315e281f1e162ea635b56f7e0a2e25d8 rgw_object
# rados -p upc.rgw.buckets.data get 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_1 1
# cat 1 >> rgw_object_down
# rados -p upc.rgw.buckets.data get 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_2 2
# cat 2 >> rgw_object_down
# rados -p upc.rgw.buckets.data get 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_3 3
# cat 3 >> rgw_object_down
# md5sum rgw_object_down
315e281f1e162ea635b56f7e0a2e25d8 rgw_object_down
Manifest信息:
[root@luminous1 ~]# rados -p upc.rgw.buckets.data getxattr 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_rgw_object user.rgw.manifest > manifest
[root@luminous1 ~]# ceph-dencoder type RGWObjManifest import manifest decode dump_json
{
"objs": [],
"obj_size": 13312000,
"explicit_objs": "false",
"head_size": 4194304,
"max_head_size": 4194304,
"prefix": ".KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_",
"rules": [
{
"key": 0,
"val": {
"start_part_num": 0,
"start_ofs": 4194304,
"part_size": 0,
"stripe_max_size": 4194304,
"override_prefix": ""
}
}
],
"tail_instance": "",
"tail_placement": {
"bucket": {
"name": "first",
"marker": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
"bucket_id": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"placement_rule": "default-placement"
}
}
根据manifest
检索对象:
- head: {bucket-id}_{object_name},最大容纳4MB大小数据;
- shadow分片: {bucket-id}__shadow_{prefix}_{id} //id根据RGW Object大小及Rados Object(stripe_max_size)大小计算。
- multipart object
对于一个大的RGW Object,会被切割成多个独立的RGW Object上传,称为multipart。multipar的优势是断点续传。s3接口默认切割大小为15MB。
在此,上传一个60MB大小的Object。
[root@luminous1 ~]# s3cmd put rgw_object s3://first
upload: 'rgw_object' -> 's3://first/rgw_object' [part 1 of 4, 15MB] [1 of 1]
15728640 of 15728640 100% in 1s 11.80 MB/s done
upload: 'rgw_object' -> 's3://first/rgw_object' [part 2 of 4, 15MB] [1 of 1]
15728640 of 15728640 100% in 1s 13.61 MB/s done
upload: 'rgw_object' -> 's3://first/rgw_object' [part 3 of 4, 15MB] [1 of 1]
15728640 of 15728640 100% in 1s 14.76 MB/s done
upload: 'rgw_object' -> 's3://first/rgw_object' [part 4 of 4, 920kB] [1 of 1]
942080 of 942080 100% in 0s 6.38 MB/s done
分成了四个部分上传,查看rados对象:
[root@luminous1 ~]# rados -p upc.rgw.buckets.data ls | sort
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__multipart_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.1
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__multipart_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.2
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__multipart_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.3
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__multipart_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.4
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_rgw_object
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.1_1
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.1_2
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.1_3
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.2_1
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.2_2
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.2_3
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.3_1
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.3_2
1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.3_3
包含了三类对象,head,multipart,shadow
。
- head: {bucket_id}_{object_name},只在xattr中存元数据,并不实际包含object data;
- multipart: multipart分段首对象,{bucket_id}__multipart_{prefix}.{multipart_id},其中`multipart_id`根据`manifest`计算;
- shadow: 从属于multipart的分段对象,{bucket_id}__shadow_{prefix}.{multipart_id}_{shadow_id},`shadow_id`:根据`manifest.rule.part_size`及 `manifest.rule.stripe_max_size`计算。
multipart
下的manifest
:
# rados -p upc.rgw.buckets.data getxattr 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_rgw_ob
# ceph-dencoder type RGWObjManifest import manifest decode dump_json
{
"objs": [],
"obj_size": 48128000,
"explicit_objs": "false",
"head_size": 0,
"max_head_size": 0,
"prefix": "rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd",
"rules": [
{
"key": 0,
"val": {
"start_part_num": 1,
"start_ofs": 0,
"part_size": 15728640,
"stripe_max_size": 4194304,
"override_prefix": ""
}
},
{
"key": 47185920,
"val": {
"start_part_num": 4,
"start_ofs": 47185920,
"part_size": 942080,
"stripe_max_size": 4194304,
"override_prefix": ""
}
}
],
"tail_instance": "",
"tail_placement": {
"bucket": {
"name": "first",
"marker": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
"bucket_id": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
"tenant": "",
"explicit_placement": {
"data_pool": "",
"data_extra_pool": "",
"index_pool": ""
}
},
"placement_rule": "default-placement"
}
}
所有的object的检索是根据上述manifest信息构建object index:
- head_size: head对象大小,此处为0,表示无object data,只有xattr的元数据
- obj_size: RGW Object的原始大小
- prefix: object index前缀
- rules: multipart 及 shadow object检索信息。如在此manifest表示有4个multipart,其中1-3的大小为15MB,第4个为920KB
- rules.start_part_num: multipart 序号
- rules.part_size: 此multipart的分段大小
- rules.stripe_max_size: rados object的大小
在上以上的信息中,此RGW Object大小为48128000字节,分为4段,三段15MB,最后一段为920KB。同时每段存储在rados集群中的条带化大小为4MB。因此15MB大小的分段,也分为4个rados object,一个multipart首部,及3个shadow分片。920KB大小的分段只有一个multipart首部。
.rgw.root
:
包含的都是zone,zonegroup,realm等信息
# rados -p .rgw.root ls
period_config.b30c383b-25d3-46aa-8ef1-aedbd0196579
default.realm
periods.383529f4-1566-4061-a4ed-5a39188845dc.latest_epoch
default.zone.b30c383b-25d3-46aa-8ef1-aedbd0196579
zone_names.upc
periods.383529f4-1566-4061-a4ed-5a39188845dc.1
realms.b30c383b-25d3-46aa-8ef1-aedbd0196579
default.zonegroup.b30c383b-25d3-46aa-8ef1-aedbd0196579
realms_names.sh
zonegroup_info.2e7ac7db-8e21-43ed-9f3c-5a061ce1c7e3
realms.b30c383b-25d3-46aa-8ef1-aedbd0196579.control
zone_info.1c60b268-0a5d-4718-ad02-e4b5bce824bf
zonegroups_names.pd