bluefs

在bluestore 中时通rocksenv为rockdb来运行环境来让rockdb存储元数据。所以ceph通过一个简单文件系统bluefs来实现rocksenv的接口
class BlueFS {
public:
  CephContext* cct;
  #可以看到bluefs中支持下面这三种块设备
  static constexpr unsigned MAX_BDEV = 3;
  static constexpr unsigned BDEV_WAL = 0;
  static constexpr unsigned BDEV_DB = 1;
  static constexpr unsigned BDEV_SLOW = 2;

  #文件系统在内存中的映像,下面这两个map中包含了所有的文件和目录
  mempool::bluefs::map dir_map;              ///< dirname -> Dir
  mempool::bluefs::unordered_map file_map; ///< ino -> File
};
bluefs的初始化流程如下:
int BlueStore::_open_db(bool create, bool to_repair_db)
{
  int r;
  assert(!db);
  string fn = path + "/db";
  string options;
  stringstream err;
  ceph::shared_ptr merge_op(new Int64ArrayMergeOperator);

  string kv_backend;
  std::vector cfs;
  #读取元数据
  if (create) {
    kv_backend = cct->_conf->bluestore_kvbackend;
  } else {
    r = read_meta("kv_backend", &kv_backend);
    if (r < 0) {
      derr << __func__ << " unable to read 'kv_backend' meta" << dendl;
      return -EIO;
    }
  }
  #根据元数据在内存中新建bluefs
    bluefs = new BlueFS(cct);

    #读取块设置的元数据
    // shared device
    if (read_meta("path_block", &bfn) < 0) {
      bfn = path + "/block";
    }
	#添加设备
    r = bluefs->add_block_device(bluefs_shared_bdev, bfn);
    if (r < 0) {
      derr << __func__ << " add block device(" << bfn << ") returned: " 
	   << cpp_strerror(r) << dendl;
      goto free_bluefs;
    }

      // align to bluefs's alloc_size
      initial = P2ROUNDUP(initial, cct->_conf->bluefs_alloc_size);
      // put bluefs in the middle of the device in case it is an HDD
      uint64_t start = P2ALIGN((bdev->get_size() - initial) / 2,
			       cct->_conf->bluefs_alloc_size);
	#添加设备的存储空间
      bluefs->add_block_extent(bluefs_shared_bdev, start, initial);
      bluefs_extents.insert(start, initial);
    }
   
    if (create) {
	#格式化文件系统
      bluefs->mkfs(fsid);
    }
	#mount文件系统
    r = bluefs->mount();
    if (r < 0) {
      derr << __func__ << " failed bluefs mount: " << cpp_strerror(r) << dendl;
      goto free_bluefs;
    }

}
int BlueFS::mount()
{
  dout(1) << __func__ << dendl;

  int r = _open_super();
  if (r < 0) {
    derr << __func__ << " failed to open super: " << cpp_strerror(r) << dendl;
    goto out;
  }

  block_all.clear();
  block_all.resize(MAX_BDEV);
  #初始化allocator为磁盘所有的空间
  _init_alloc();
#从日志项中构建bluefs中的dir_map和file_map
  r = _replay(false, false);
  if (r < 0) {
    derr << __func__ << " failed to replay log: " << cpp_strerror(r) << dendl;
    _stop_alloc();
    goto out;
  }

  #初始化alloc中所有空闲的磁盘空间list
  for (auto& p : file_map) {
    dout(30) << __func__ << " noting alloc for " << p.second->fnode << dendl;
    for (auto& q : p.second->fnode.extents) {
      alloc[q.bdev]->init_rm_free(q.offset, q.length);
    }
  }
}
void BlueFS::_init_alloc()
{
  dout(20) << __func__ << dendl;
  alloc.resize(MAX_BDEV);
  pending_release.resize(MAX_BDEV);
  for (unsigned id = 0; id < bdev.size(); ++id) {
  #block id为null,则退出
    if (!bdev[id]) {
      continue;
    }
    assert(bdev[id]->get_size());
	为每个block 设备创建一个Allocator
    alloc[id] = Allocator::create(cct, cct->_conf->bluefs_allocator,
				  bdev[id]->get_size(),
				  cct->_conf->bluefs_alloc_size);
    interval_set& p = block_all[id];
    for (interval_set::iterator q = p.begin(); q != p.end(); ++q) {
	#并添加这个块设备的起始地址和接收地址
      alloc[id]->init_add_free(q.get_start(), q.get_len());
    }
  }
}
可见在_init_alloc 中会为每个block 设备创建一个Allocator。并添加这个块设备的起始地址和接收地址

你可能感兴趣的:(ceph)