[root@yhg-2 cmds]# rados -p .rgw.buckets.index listomapvals .dir.yhg-yhg.5457.29 --cluster yhg | more afuhan value: (198 bytes) : 0000 : 08 03 c0 00 00 00 06 00 00 00 61 66 75 68 61 6e : ..........afuhan 0010 : 02 00 00 00 00 00 00 00 01 04 03 75 00 00 00 01 : ...........u.... 0020 : 09 00 00 00 00 00 00 00 fc b7 10 57 00 00 00 00 : ...........W.... 0030 : 20 00 00 00 62 62 62 38 61 61 65 35 37 63 31 30 : ...bbb8aae57c10 0040 : 34 63 64 61 34 30 63 39 33 38 34 33 61 64 35 65 : 4cda40c93843ad5e 0050 : 36 64 62 38 03 00 00 00 78 78 31 11 00 00 00 5a : 6db8....xx1....Z 0060 : 6f 6e 65 20 75 73 65 72 20 66 6f 72 20 79 68 67 : one user for yhg 0070 : 18 00 00 00 61 70 70 6c 69 63 61 74 69 6f 6e 2f : ....application/ 0080 : 6f 63 74 65 74 2d 73 74 72 65 61 6d 09 00 00 00 : octet-stream.... 0090 : 00 00 00 00 00 00 00 00 00 00 00 00 01 01 02 00 : ................ 00a0 : 00 00 01 02 04 0f 00 00 00 79 68 67 2d 79 68 67 : .........yhg-yhg 00b0 : 2e 31 34 31 30 32 2e 37 00 00 00 00 00 00 00 00 : .14102.7........ 00c0 : 00 00 00 00 00 00 : ......
// 获取region配置 [root@yhg-2 cmds]# radosgw-admin region get --cluster yhg > /tmp/region // 将 'bucket_index_max_shards'的值修改为 4 [root@yhg-2 cmds]# vim /tmp/region // 更新region配置 [root@yhg-2 cmds]# radosgw-admin region put --cluster yhg < /tmp/region // 更新到region map [root@yhg-2 cmds]# radosgw-admin regionmap update --cluster yhg
[client.radosgw.yhg-yhg-yhg-2] rgw frontends = "civetweb port=80" rgw bucket index max shards = 4
// 查看bucket的id [root@yhg-2 cmds]# radosgw-admin metadata get bucket:mmm --cluster yhg | grep bucket_id "bucket_id": "yhg-yhg.14236.1" // 查看bucket 实例信息 [root@yhg-2 cmds]# radosgw-admin metadata get bucket.instance:mmm:yhg-yhg.14236.1 --cluster yhg | grep shard "num_shards": 4, "bi_shard_hash_type": 0 // 查看.dir对象分片 [root@yhg-2 cmds]# rados -p .rgw.buckets.index ls --cluster yhg | grep "yhg-yhg.14236.1" .dir.yhg-yhg.14236.1.2 .dir.yhg-yhg.14236.1.0 .dir.yhg-yhg.14236.1.1 .dir.yhg-yhg.14236.1.3
2593 int RGWRados::init_bucket_index(rgw_bucket& bucket, int num_shards) 2594 { 2595 librados::IoCtx index_ctx; // context for new bucket 2596 // 创建到集群中bucket.index_pool的IoCtx,用于后续对该pool进行操作 // 本例中bucket.index_pool是 .rgw.buckets.index 2597 int r = open_bucket_index_ctx(bucket, index_ctx); 2598 if (r < 0) 2599 return r; 2600 // 拼接bucket的.dir_XXX对象名字 // 例如:.dir.yhg-yhg.5457.24 // 其中, '.dir'为统一前缀,yhg-yhg.5457.24为bucket id/marker 2601 string dir_oid = dir_oid_prefix; 2602 dir_oid.append(bucket.marker); 2603 // 获取.dir_XXX对象map。如果没有打开index shard特性,该map中只有一个 // 项,就是<0, '.dir_XXX'>。 // 如果打开了index shard特性,.dir_XXX为分成num_shards个对象。 // 名字为.dir_XXX.$NUM 2604 map<int, string> bucket_objs; 2605 get_bucket_index_objects(dir_oid, num_shards, bucket_objs); 2606 // 调用了ceph osd端cls操作 'rgw bucket_init_index', // 即调用了CLSRGWConcurrentIO() 2607 return CLSRGWIssueBucketIndexInit(index_ctx, bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); 2608 }
对于index shard配置为4时,
[root@yhg-2 ~]# rados -p .rgw.buckets.index ls --cluster yhg | grep "yhg-yhg.14236.1" .dir.yhg-yhg.14236.1.2 .dir.yhg-yhg.14236.1.0 .dir.yhg-yhg.14236.1.1 .dir.yhg-yhg.14236.1.3
// cls/rgw/cls_rgw_client.h 246 int operator()() { 247 int ret = 0; 248 iter = objs_container.begin(); 249 for (; iter != objs_container.end() && max_aio-- > 0; ++iter) { // 最终调用了issue_bucket_index_init_op // 即,调用了集群端cls操作, 'rgw bucket_init_index' 250 ret = issue_op(iter->first, iter->second);
// cls/rgw/cls_rgw.cc 566 int rgw_bucket_init_index(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
// key为 header 542 struct rgw_bucket_dir_header { 543 map<uint8_t, rgw_bucket_category_stats> stats; 544 uint64_t tag_timeout; 545 uint64_t ver; // 本次操作时,只有有ver被设置为了1 546 uint64_t master_ver; 547 string max_marker
// RGWRados::Bucket::UpdateIndex::prepare 3468 r = index_op.prepare(CLS_RGW_OP_ADD); // RGWRados::Bucket::UpdateIndex::complete 3487 r = index_op.complete(poolid, epoch, size, 3488 ut, etag, content_type, &acl_bl, 3489 meta.category, meta.remove_objs);
// rgw/rgw_rados.h 1529 int get_bucket_shard(BucketShard **pbs) { 1530 if (!bs_initialized) { 1531 int r = bs.init(bucket_info.bucket, obj); // rgw/rgw_rados.cc 4589 int RGWRados::open_bucket_index_shard(rgw_bucket& bucket, librados::IoCtx& index_ctx, 4590 const string& obj_key, string *bucket_obj, int *shard_id) // 建立index base对象所在pool的io上下文, // 并返回拼接好的index base对象名字.dir.${bucket.marker} 4592 string bucket_oid_base; 4593 int ret = open_bucket_index_base(bucket, index_ctx, bucket_oid_base); // 从bucket meta对象(.bucket.meta.${bucket.name}:${bucket.marker})中读出 // bucket的描述信息 4599 // Get the bucket info 4600 RGWBucketInfo binfo; 4601 ret = get_bucket_instance_info(obj_ctx, bucket, binfo, NULL, NULL); // 采用简单的hash算法,计算出shard id,并拼接出bucket index对象的名字 // 比如,.dir.yhg-yhg.14236.1.1 4605 ret = get_bucket_index_object(bucket_oid_base, obj_key, binfo.num_shards, 4606 (RGWBucketInfo::BIShardsHashType)binfo.bucket_index_shard_hash_type, bucket_obj, shard_id)
(gdb) print *bs $62 = { store = 0x3301c70, bucket = { name = "mmm", data_pool = "dpool1", data_extra_pool = ".rgw.buckets.extra", index_pool = ".rgw.buckets.index", marker = "yhg-yhg.14236.1", bucket_id = "yhg-yhg.14236.1", oid = ".bucket.meta.mmm:yhg-yhg.14236.1" }, shard_id = 1, index_ctx = { io_ctx_impl = 0x7f5074008d10 }, bucket_obj = ".dir.yhg-yhg.14236.1.1" }
(gdb) print binfo $53 = { bucket = { name = "mmm", data_pool = "dpool1", data_extra_pool = ".rgw.buckets.extra", index_pool = ".rgw.buckets.index", marker = "yhg-yhg.14236.1", bucket_id = "yhg-yhg.14236.1", oid = ".bucket.meta.mmm:yhg-yhg.14236.1" }, owner = "xx1", flags = 0, region = "yhg", creation_time = 1461035367, placement_rule = "default-placement", has_instance_obj = true, objv_tracker = { read_version = { ver = 1, tag = "_TrpC7B0VOdoBEkokzucAQtd" }, write_version = { ver = 0, tag = "" } }, ep_objv = { ver = 0, tag = "" }, quota = { max_size_kb = -1, max_objects = -1, enabled = false, max_size_soft_threshold = -1, max_objs_soft_threshold = -1 }, num_shards = 4, bucket_index_shard_hash_type = 0 '\000', static NUM_SHARDS_BLIND_BUCKET = 4294967295 }
6589 BucketShard bs(this); 6590 int ret = bs.init(bucket, obj_instance)
// rgw/rgw_rados.cc 2396 /** 2397 * get listing of the objects in a bucket. 2398 * bucket: bucket to list contents of 2399 * max: maximum number of results to return 2400 * prefix: only return results that match this prefix 2401 * delim: do not include results that match this string. 2402 * Any skipped results will have the matching portion of their name 2403 * inserted in common_prefixes with a "true" mark. 2404 * marker: if filled in, begin the listing with this object. 2405 * result: the objects are put in here. 2406 * common_prefixes: if delim is filled in, any matching prefixes are placed 2407 * here. 2408 */ 2409 int RGWRados::Bucket::List::list_objects(int max, vector<RGWObjEnt> *result, 2410 map<string, bool> *common_prefixes, 2411 bool *is_truncated)
8084 int RGWRados::cls_bucket_list(rgw_bucket& bucket, rgw_obj_key& start, const string& prefix, 8085 uint32_t num_entries, bool list_versions, map<string, RGWObjEnt>& m, 8086 bool *is_truncated, rgw_obj_key *last_entry, 8087 bool (*force_check_filter)(const string& name)) ... 8092 // key - oid (for different shards if there is any) 8093 // value - list result for the corresponding oid (shard), it is filled by the AIO callback 8094 map<int, string> oids; // 存放shard id // CLSRGWIssueBucketList列举的结果存放在list_results中 8095 map<int, struct rgw_cls_list_ret> list_results; // oids中存放bucket index shard 对象的名字 // 比如, // (gdb) print oids // $75 = std::map with 4 elements = { // [0] = ".dir.yhg-yhg.14236.1.0", // [1] = ".dir.yhg-yhg.14236.1.1", // [2] = ".dir.yhg-yhg.14236.1.2", // [3] = ".dir.yhg-yhg.14236.1.3" // // 调用了"4551 int RGWRados::open_bucket_index/get_bucket_index_objects" // 获取了iods的名字列表(bucket index分片的名字)。 8096 int r = open_bucket_index(bucket, index_ctx, oids); 8097 if (r < 0) 8098 return r; 8099 8100 cls_rgw_obj_key start_key(start.name, start.instance); // 对于oids中的所有对象,调用issue_op方法 // 详见:cls/rgw/cls_rgw_client.h:246 // issue_op中调用了 osd 端cls 函数 'rgw bucket_list' 8101 r = CLSRGWIssueBucketList(index_ctx, start_key, prefix, num_entries, list_versions, 8102 oids, list_results, cct->_conf->rgw_bucket_index_max_aio)();
(gdb) print list_results $85 = std::map with 4 elements = { [0] = { dir = { header = { stats = std::map with 1 elements = { [1 '\001'] = { total_size = 18, total_size_rounded = 8192, num_entries = 2 } }, tag_timeout = 0, ver = 7, master_ver = 0, max_marker = "00000000006.219.3" }, m = std::map with 2 elements = { ["h4h4"] = { key = { name = "h4h4", instance = "" }, ver = { pool = 1, epoch = 8 }, locator = "", exists = true, meta = { category = 1 '\001', size = 9, mtime = { tv = { tv_sec = 1461035810, tv_nsec = 0 } }, etag = "bbb8aae57c104cda40c93843ad5e6db8", owner = "xx1", owner_display_name = "Zone user for yhg", content_type = "application/octet-stream", accounted_size = 9 }, pending_map = std::multimap with 0 elements, index_ver = 4, tag = "yhg-yhg.14236.5", flags = 0, versioned_epoch = 0 }, ["sbsb"] = { key = { name = "sbsb", instance = "" }, ver = { pool = 1, epoch = 99 }, locator = "", exists = true, meta = { category = 1 '\001', size = 9, mtime = { tv = { tv_sec = 1461055490, tv_nsec = 0 } }, etag = "bbb8aae57c104cda40c93843ad5e6db8", owner = "xx1", owner_display_name = "Zone user for yhg", content_type = "application/octet-stream", accounted_size = 9 }, pending_map = std::multimap with 0 elements, index_ver = 6, tag = "yhg-yhg.14236.29", flags = 0, versioned_epoch = 0 } } }, is_truncated = false }, [1] = { dir = { header = { stats = std::map with 1 elements = { [1 '\001'] = { total_size = 10485760, total_size_rounded = 10485760, num_entries = 1 } }, tag_timeout = 0, ver = 11, master_ver = 0, max_marker = "00000000010.67.3" }, m = std::map with 1 elements = { ["tttt"] = { key = { name = "tttt", instance = "" }, ver = { pool = 1, epoch = 13 }, locator = "", exists = true, meta = { category = 1 '\001', size = 10485760, mtime = { tv = { tv_sec = 1461132772, tv_nsec = 0 } }, etag = "219c7b0c38567750b218389f15c57e82", owner = "xx1", owner_display_name = "Zone user for yhg", content_type = "application/octet-stream", accounted_size = 10485760 }, pending_map = std::multimap with 0 elements, index_ver = 10, tag = "yhg-yhg.14236.49", flags = 0, versioned_epoch = 0 } } }, is_truncated = false }, [2] = { dir = { header = { stats = std::map with 1 elements = { [1 '\001'] = { total_size = 9, total_size_rounded = 4096, num_entries = 1 } }, tag_timeout = 0, ver = 11, master_ver = 0, max_marker = "00000000010.101.3" }, m = std::map with 1 elements = { ["h5h5"] = { key = { name = "h5h5", instance = "" }, ver = { pool = 1, epoch = 34 }, locator = "", exists = true, meta = { category = 1 '\001', size = 9, mtime = { tv = { tv_sec = 1461053473, tv_nsec = 0 } }, etag = "bbb8aae57c104cda40c93843ad5e6db8", owner = "xx1", owner_display_name = "Zone user for yhg", content_type = "application/octet-stream", accounted_size = 9 }, pending_map = std::multimap with 0 elements, index_ver = 10, tag = "yhg-yhg.14236.26", flags = 0, versioned_epoch = 0 } } }, is_truncated = false }, [3] = { dir = { header = { stats = std::map with 0 elements, tag_timeout = 0, ver = 1, master_ver = 0, max_marker = "" }, m = std::map with 0 elements }, is_truncated = false } }
375 struct rgw_cls_list_ret 376 { 377 rgw_bucket_dir dir; 378 bool is_truncated; 584 struct rgw_bucket_dir { 585 struct rgw_bucket_dir_header header; 586 std::map<string, struct rgw_bucket_dir_entry> m; 542 struct rgw_bucket_dir_header { 543 map<uint8_t, rgw_bucket_category_stats> stats; 544 uint64_t tag_timeout; 545 uint64_t ver; 546 uint64_t master_ver; 547 string max_marker; 516 struct rgw_bucket_category_stats { 517 uint64_t total_size; 518 uint64_t total_size_rounded; 519 uint64_t num_entries;
403 int rgw_bucket_list(cls_method_context_t hctx, bufferlist *in, bufferlist *out) // 读取bucket index 对象上的omap header // (gdb) print new_dir.header // $2 = { // stats = std::map with 1 elements = { // [1 '\001'] = { // total_size = 18, // total_size_rounded = 8192, // num_entries = 2 // } // }, // tag_timeout = 0, // ver = 7, // master_ver = 0, // max_marker = "00000000006.219.3" // } // 415 struct rgw_cls_list_ret ret; 416 struct rgw_bucket_dir& new_dir = ret.dir; 417 int rc = read_bucket_header(hctx, &new_dir.header); 425 map<string, bufferlist> keys; 426 string start_key; 427 encode_list_index_key(hctx, op.start_obj, &start_key); // 没有什么作用 // 读取bucket index 对象的omap的各个k/v entry 428 rc = get_obj_vals(hctx, start_key, op.filter_prefix, op.num_entries + 1, &keys);
(gdb) print new_dir.m $5 = std::map with 2 elements = { ["h4h4"] = { key = { name = "h4h4", instance = "" }, ver = { pool = 1, epoch = 8 }, locator = "", exists = true, meta = { category = 1 '\001', size = 9, mtime = { tv = { tv_sec = 1461035810, tv_nsec = 0 } }, etag = "bbb8aae57c104cda40c93843ad5e6db8", owner = "xx1", owner_display_name = "Zone user for yhg", content_type = "application/octet-stream", accounted_size = 9 }, pending_map = std::multimap with 0 elements, index_ver = 4, tag = "yhg-yhg.14236.5", flags = 0, versioned_epoch = 0 }, ["sbsb"] = { key = { name = "sbsb", instance = "" }, ver = { pool = 1, epoch = 99 }, locator = "", exists = true, meta = { category = 1 '\001', size = 9, mtime = { tv = { tv_sec = 1461055490, tv_nsec = 0 } }, etag = "bbb8aae57c104cda40c93843ad5e6db8", owner = "xx1", owner_display_name = "Zone user for yhg", content_type = "application/octet-stream", accounted_size = 9 }, pending_map = std::multimap with 0 elements, index_ver = 6, tag = "yhg-yhg.14236.29", flags = 0, versioned_epoch = 0 } }