了解分布式存储的朋友 一定知道 lvm2 , PV LV VG等 ,简单看看lvm2是如何和内核交互的,为下一步开发自己的lvm 做准备
首先看 lvcreate 的调用走向 希望你自己摸索过lvm 也熟悉vfs ,fs子系统 ,这样可以一看就知道什么意思,然后大家一起交流。
lvm: dev_manager.c
/* * Add LV and any known dependencies */ static int _add_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree, struct logical_volume *lv) { //... if (!_add_dev_to_dtree(dm, dtree, lv, NULL)) return_0; //... return 1; }
_add_dev_to_dtree ---》int _info() ---》 dm_task_run()
然后就是 dm 库
int dm_task_run(struct dm_task *dmt) { repeat_ioctl:/*关键就是这个*/ if (!(dmi = _do_dm_ioctl(dmt, command, _ioctl_buffer_double_factor))) return 0; if (dmi->flags & DM_BUFFER_FULL_FLAG) { switch (dmt->type) { case DM_DEVICE_LIST_VERSIONS: case DM_DEVICE_LIST: case DM_DEVICE_DEPS: case DM_DEVICE_STATUS: case DM_DEVICE_TABLE: case DM_DEVICE_WAITEVENT: _ioctl_buffer_double_factor++; dm_free(dmi); goto repeat_ioctl;/*这里其实是在循环决定操作*/ default: log_error("WARNING: libdevmapper buffer too small for data"); } //... }
static struct dm_ioctl *_do_dm_ioctl(struct dm_task *dmt, unsigned command, unsigned repeat_count) { struct dm_ioctl *dmi; dmi = _flatten(dmt, repeat_count);/*dm_task结构字段合法性检查*/ if (!dmi) { log_error("Couldn't create ioctl argument."); return NULL; } if (dmt->type == DM_DEVICE_TABLE) dmi->flags |= DM_STATUS_TABLE_FLAG; dmi->flags |= DM_EXISTS_FLAG; /* FIXME */ if (dmt->no_open_count) dmi->flags |= DM_SKIP_BDGET_FLAG; //... #ifdef DM_IOCTLS if (ioctl(_control_fd, command, dmi) < 0) {/*注意这里的dmi */ if (errno == ENXIO && ((dmt->type == DM_DEVICE_INFO) || (dmt->type == DM_DEVICE_MKNODES) || (dmt->type == DM_DEVICE_STATUS))) dmi->flags &= ~DM_EXISTS_FLAG; /* FIXME */ else { if (_log_suppress) log_verbose("device-mapper: %s ioctl " "failed: %s", _cmd_data_v4[dmt->type].name, strerror(errno)); else log_error("device-mapper: %s ioctl " "failed: %s", _cmd_data_v4[dmt->type].name, strerror(errno)); dm_free(dmi); return NULL; } } #else /* Userspace alternative for testing */ #endif return dmi; }
看一下 command的取值:
int dm_task_run(struct dm_task *dmt) { struct dm_ioctl *dmi; unsigned command; //... command = _cmd_data_v4[dmt->type].cmd; //... }
对应一个全局数组
static struct cmd_data _cmd_data_v4[] = { {"create", DM_DEV_CREATE, {4, 0, 0}}, {"reload", DM_TABLE_LOAD, {4, 0, 0}}, {"remove", DM_DEV_REMOVE, {4, 0, 0}}, {"remove_all", DM_REMOVE_ALL, {4, 0, 0}}, {"suspend", DM_DEV_SUSPEND, {4, 0, 0}}, {"resume", DM_DEV_SUSPEND, {4, 0, 0}}, {"info", DM_DEV_STATUS, {4, 0, 0}}, {"deps", DM_TABLE_DEPS, {4, 0, 0}}, {"rename", DM_DEV_RENAME, {4, 0, 0}}, {"version", DM_VERSION, {4, 0, 0}}, {"status", DM_TABLE_STATUS, {4, 0, 0}}, {"table", DM_TABLE_STATUS, {4, 0, 0}}, {"waitevent", DM_DEV_WAIT, {4, 0, 0}}, {"names", DM_LIST_DEVICES, {4, 0, 0}}, {"clear", DM_TABLE_CLEAR, {4, 0, 0}}, {"mknodes", DM_DEV_STATUS, {4, 0, 0}}, #ifdef DM_LIST_VERSIONS {"versions", DM_LIST_VERSIONS, {4, 1, 0}}, #endif #ifdef DM_TARGET_MSG {"message", DM_TARGET_MSG, {4, 2, 0}}, #endif #ifdef DM_DEV_SET_GEOMETRY {"setgeometry", DM_DEV_SET_GEOMETRY, {4, 6, 0}}, #endif };
前面就是 shell输入的命令。 关键就是
struct cmd_data { const char *name; const int cmd; const int version[3]; };
接着往下看
#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl) /*.....................*/ #define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
比如我们通过shell 调用[root]\> dmsetup create kko txt
3241737475 就是 C138FD03
在 dm-ioctl.h 里面可以看到
#define DM_IOCTL 0xfd
关键的 _IOWR_(nr) 的序号就是
enum { /* Top level cmds */ DM_VERSION_CMD = 0, DM_REMOVE_ALL_CMD, DM_LIST_DEVICES_CMD, /* device level cmds */ DM_DEV_CREATE_CMD, DM_DEV_REMOVE_CMD, DM_DEV_RENAME_CMD, DM_DEV_SUSPEND_CMD, DM_DEV_STATUS_CMD, DM_DEV_WAIT_CMD, /* Table level cmds */ DM_TABLE_LOAD_CMD, DM_TABLE_CLEAR_CMD, DM_TABLE_DEPS_CMD, DM_TABLE_STATUS_CMD, /* Added later */ DM_LIST_VERSIONS_CMD, DM_TARGET_MSG_CMD, DM_DEV_SET_GEOMETRY_CMD };
所以DM_DEV_CREATE 就是 03
明白这个几个宏之后 , 就去内核看看处理
device_mapper 主要的代码都在 /drivers/md 里面
首先看一下和上面 ioctl 一一对应的一个结构体
1 先注册fs 的操作类
static const struct file_operations _ctl_fops = { .open = nonseekable_open, .unlocked_ioctl = dm_ctl_ioctl, .compat_ioctl = dm_compat_ctl_ioctl, .owner = THIS_MODULE, .llseek = noop_llseek, };
当用户调用 ioctl 的时候(上面分析过) 通过字符设备 就会调用内核的.unlocked_ioctl
2 接下来就是dm_ctl_ioctl()->ctl_ioctl() -> lookup_ioctl()
static ioctl_fn lookup_ioctl(unsigned int cmd) { static struct { int cmd; ioctl_fn fn; } _ioctls[] = { {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */ {DM_REMOVE_ALL_CMD, remove_all}, {DM_LIST_DEVICES_CMD, list_devices}, {DM_DEV_CREATE_CMD, dev_create}, {DM_DEV_REMOVE_CMD, dev_remove}, {DM_DEV_RENAME_CMD, dev_rename}, {DM_DEV_SUSPEND_CMD, dev_suspend}, {DM_DEV_STATUS_CMD, dev_status}, {DM_DEV_WAIT_CMD, dev_wait}, {DM_TABLE_LOAD_CMD, table_load}, {DM_TABLE_CLEAR_CMD, table_clear}, {DM_TABLE_DEPS_CMD, table_deps}, {DM_TABLE_STATUS_CMD, table_status}, {DM_LIST_VERSIONS_CMD, list_versions}, {DM_TARGET_MSG_CMD, target_message}, {DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry} }; return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn; }
按上面的 shell [root]\>dmsetup create xxx
gdb
就会去调用 dev_create
static int dev_create(struct dm_ioctl *param, size_t param_size) { int r, m = DM_ANY_MINOR; struct mapped_device *md; r = check_name(param->name);/*这个就是dmsetup create 后面跟的名字*/ if (r) return r; if (param->flags & DM_PERSISTENT_DEV_FLAG)/*这里一般都是由内核自己制定一个未用的主备设备号*/ m = MINOR(huge_decode_dev(param->dev)); r = dm_create(m, &md);/*初始化mapped_device */ if (r) return r; /*hash插入mapped_device到全局 static struct list_head _name_buckets[NUM_BUCKETS];*/ r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md); if (r) { dm_put(md); dm_destroy(md); return r; } param->flags &= ~DM_INACTIVE_PRESENT_FLAG; __dev_status(md, param); dm_put(md); return 0; }
接着gdb 就会发现第二次调用 ioctl 的地方
static int table_load(struct dm_ioctl *param, size_t param_size) { int r; struct hash_cell *hc; struct dm_table *t; struct mapped_device *md; /*在hash表中找到操作对应的mapped_device */ md = find_device(param); if (!md) return -ENXIO; /*产生一个dm_table 结构*/ r = dm_table_create(&t, get_mode(param), param->target_count, md); if (r) goto out; /*通过dm_table_add_target 初始化需要的dm_taget结构 并且保持在一颗对应md 的btree上*/ r = populate_table(t, param, param_size); if (r) { dm_table_destroy(t); goto out; } /* Protect md->type and md->queue against concurrent table loads. */ dm_lock_md_type(md); if (dm_get_md_type(md) == DM_TYPE_NONE) /* Initial table load: acquire type of table. */ dm_set_md_type(md, dm_table_get_type(t)); else if (dm_get_md_type(md) != dm_table_get_type(t)) { DMWARN("can't change device type after initial table load."); dm_table_destroy(t); dm_unlock_md_type(md); r = -EINVAL; goto out; } /* setup md->queue to reflect md's type (may block) */ r = dm_setup_md_queue(md);/*初始化DM vfs 请求队列*/ if (r) { DMWARN("unable to set up device queue for new table."); dm_table_destroy(t); dm_unlock_md_type(md); goto out; } dm_unlock_md_type(md); /* stage inactive table */ down_write(&_hash_lock); hc = dm_get_mdptr(md); if (!hc || hc->md != md) { DMWARN("device has been removed from the dev hash table."); dm_table_destroy(t); up_write(&_hash_lock); r = -ENXIO; goto out; } if (hc->new_map) dm_table_destroy(hc->new_map); hc->new_map = t;/*放入hash 结构中*/ up_write(&_hash_lock); param->flags |= DM_INACTIVE_PRESENT_FLAG; __dev_status(md, param); out: dm_put(md); return r; }
接着gdb 就会发现第三次调用 ioctl 的地方
static int dev_suspend(struct dm_ioctl *param, size_t param_size)
{ if (param->flags & DM_SUSPEND_FLAG) return do_suspend(param); /*用新的 dm_table 激活mapped device*/ return do_resume(param); }
=======================================================
这里补充一下关于 lvm2 伟大的逻辑卷管理:
大家都知道 lvm2 会把自己的卷组信息写在分区的前512个字节上。 主要通过 dev-io.c
的 _io() 方法,操作原始磁盘 这块尤其重要 这个后面再看。
static int _io(struct device_area *where, void *buffer, int should_write)
而所有的备份 灾难恢复秘密都在 Archive.c 文件中
外部的接口主要有:
int vgcfgrestore(struct cmd_context *cmd, int argc, char **argv)
—>
int archive_display(struct cmd_context *cmd, const char *vg_name)
--->
int archive_list(struct cmd_context *cmd, const char *dir, const char *vgname)
--->
/* * Returns a list of archive_files. */ static struct list *_scan_archive(struct dm_pool *mem, const char *vgname, const char *dir)
明白了吧 。所以你可以在 [root]\> 输入
别的关于 vgcfg 大家可以自己看
对于备份 vgcfgbackup
主要的流程就是
--->int process_each_vg()
然后会去找当前有效的VG 然后通过_process_one_vg 接下里就是通过
->>>
vg = vg_read(cmd, vg_name, vgid, &consistent)
(这里的struct cmd_context *cmd 里面记录了)
struct cmd_context { //.... struct list config_files; int config_valid; struct config_tree *cft; struct config_tree *cft_override; struct config_info default_settings; struct config_info current_settings; struct archive_params *archive_params; struct backup_params *backup_params; /* List of defined tags */ struct list tags; int hosttags; /*目录的的路径 */ char sys_dir[PATH_MAX];/*一般是: /etc/lvm*/ char dev_dir[PATH_MAX];/*一般是: /dev/*/ char proc_dir[PATH_MAX];/*一般是: /proc*/ };
关键的部分到了
->>>
archiver.c:
int backup(struct volume_group *vg)
int __backup(struct volume_group *vg)
backup_to_file(name, desc, vg);
好的 lvm 的基本 skeleton 就这样了。。 别的都能看懂, 我不分析了