Ceph RBD请求过程的分析

Ceph RBD请求过程的分析
RBD是ceph中提供块存储的客户端服务,只所以说是客户端服务是相对于RADOS而言,RBD是基于librados API开发的一个块存储服务。
本文会通过rbd的一个写入操作来介绍RBD对RADOS的请求过程。

以通过rbd想RADOS中导入一个文件为例,介绍整个请求过程:

rbd -p {pool_name} import {local_file_path} {rbd_image}

rbd 是一个可执行的二进制程序,该命名会启动一个进程来完成该文件的导入工作。程序运行时会到默认的目录下打开给程序需要的配置文件(ceph.conf)加载一些配置项,
并解析命令行中传入的参数,接着进行参数的安全监察。在必要的一些监察结束之后,就会根据解析出来的命令做对应具体的请求处理操作:
主要的变量

  librbd::RBD rbd;      //操作rbd的实例对象
  librados::IoCtx io_ctx, dest_io_ctx;  //与pool关联的上下文
  librbd::Image image; //镜像

1.初始化rados对象:

    cerr << "rbd: couldn't initialize rados!" << std::endl;
    return EXIT_FAILURE;
  }

2.与rados服务端建立连接:

 if (talk_to_cluster && rados.connect() < 0) {
    cerr << "rbd: couldn't connect to the cluster!" << std::endl;
    return EXIT_FAILURE;

3.初始化io_ctx,io_ctx对应一个pool池,保存与该pool相关的一些信息。

  if (talk_to_cluster && opt_cmd != OPT_IMPORT) {
    r = rados.ioctx_create(poolname, io_ctx);
    if (r < 0) {
      cerr << "rbd: error opening pool " << poolname << ": "
       << cpp_strerror(-r) << std::endl;
      return -r;
    }

4.打开本地要上传的文件:

      r = -errno;
      cerr << "rbd: error opening " << path << std::endl;
      goto done2;
    }

5.先创建该镜像:

                stripe_unit, stripe_count);

static int do_create(librbd::RBD &rbd, librados::IoCtx& io_ctx,
             const char *imgname, uint64_t size, int *order,
             int format, uint64_t features,
             uint64_t stripe_unit, uint64_t stripe_count)
{
  int r;

  if (format == 1) {
    // weird striping not allowed with format 1!
    if ((stripe_unit || stripe_count) &&
    (stripe_unit != (1ull << *order) && stripe_count != 1)) {
      cerr << "non-default striping not allowed with format 1; use --image-format 2"
       << std::endl;
      return -EINVAL;
    }
    r = rbd.create(io_ctx, imgname, size, order);
  } else {
    r = rbd.create3(io_ctx, imgname, size, features, order,
            stripe_unit, stripe_count);
  }
  if (r < 0)
    return r;
  return 0;
}

6.打开image镜像,该过程中创建了与该镜像绑定的ImageCtx类实例:


 int RBD::open(IoCtx& io_ctx, Image& image, const char *name,
        const char *snap_name)
  {
    ImageCtx *ictx = new ImageCtx(name, "", snap_name, io_ctx, false);
    TracepointProvider::initialize(get_cct(io_ctx));
    tracepoint(librbd, open_image_enter, ictx, ictx->name.c_str(), ictx->id.c_str(), ictx->snap_name.c_str(), ictx->read_only);

    if (image.ctx != NULL) {
      close_image(reinterpret_cast(image.ctx));
      image.ctx = NULL;
    }

    int r = librbd::open_image(ictx);
    if (r < 0) {
      tracepoint(librbd, open_image_exit, r);
      return r;
    }

    image.ctx = (image_ctx_t) ictx;
    tracepoint(librbd, open_image_exit, 0);
    return 0;
  }

7.从打开的本地文件中循环读取数据,写入到打开的image中。

  while ((readlen = ::read(fd, p + blklen, reqlen)) >= 0) {
    blklen += readlen;
    // if read was short, try again to fill the block before writing
    if (readlen && ((size_t)readlen < reqlen)) {
      reqlen -= readlen;
      continue;
    }
    if (!from_stdin)
      pc.update_progress(image_pos, size);

    bufferlist bl(blklen);
    bl.append(p, blklen);
    // resize output image by binary expansion as we go for stdin
    if (from_stdin && (image_pos + (size_t)blklen) > size) {
      size *= 2;
      r = image.resize(size);
      if (r < 0) {
    cerr << "rbd: can't resize image during import" << std::endl;
    goto done;
      }
    }

    // write as much as we got; perhaps less than imgblklen
    // but skip writing zeros to create sparse images
    if (!bl.is_zero()) {
    //在AioImportContext的构造方法中实现了数据的写入过程。
      new AioImportContext(*throttle, image, bl, image_pos);
    }

    // done with whole block, whether written or not
    image_pos += blklen;
    // if read had returned 0, we're at EOF and should quit
    if (readlen == 0)
      break;
    blklen = 0;
    reqlen = imgblklen;
  }
...

8.将读取的数据m_bufferlist,写入到镜像的m_offset偏移位置处:

 int r = image.aio_write2(m_offset, m_bufferlist.length(), m_bufferlist,
                 m_aio_completion, op_flags);
...

9.提交一步写入请求:

    submit_aio_write(ictx, off, len, bl.c_str(), get_aio_completion(c),
                     op_flags);

void submit_aio_write(librbd::ImageCtx *ictx, uint64_t off, size_t len,
                      const char *buf, librbd::AioCompletion *c, int op_flags) {
  if (ictx->cct->_conf->rbd_non_blocking_aio) {
    ictx->aio_work_queue->queue(new C_AioWriteWQ(ictx, off, len, buf, c,
                                                 op_flags));
  } else {
    librbd::aio_write(ictx, off, len, buf, c, op_flags);
  }
}

10.librbd执行写入操作:

librbd::aio_write(ictx, off, len, buf, c, op_flags);

11.将本次写入操作映射到对应的对象:

    vector extents;
    if (len > 0) {
      Striper::file_to_extents(ictx->cct, ictx->format_string,
                   &ictx->layout, off, clip_len, 0, extents);
    }

12.将写入的数据根据对象进行聚合:

...
 for (vector::iterator p = extents.begin(); p != extents.end(); ++p) {
      ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length
             << " from " << p->buffer_extents << dendl;
      // assemble extent
      bufferlist bl;
      for (vector >::iterator q = p->buffer_extents.begin();
       q != p->buffer_extents.end();
       ++q) {
    bl.append(buf + q->first, q->second);
      }
...

13.创建一步写请求的回调对象。

 C_AioWrite *req_comp = new C_AioWrite(cct, c);

14.rbd支持cache和非cache两种写入操作:

      if (ictx->object_cacher) {
    c->add_request();
    ictx->write_to_cache(p->oid, bl, p->length, p->offset, req_comp, op_flags);
      } else {
    AioWrite *req = new AioWrite(ictx, p->oid.name, p->objectno, p->offset,
                     bl, snapc, req_comp);
    c->add_request();

    req->set_op_flags(op_flags);
    req->send();
      }

15.以非cache写入操作为列,创建异步写请求对象。

    AioWrite *req = new AioWrite(ictx, p->oid.name, p->objectno, p->offset,
                     bl, snapc, req_comp);

待续。。。。。。。。

你可能感兴趣的:(ceph)