virsh save c75_test /home/data/c75_test_save_cmd.mem
virsh restore /home/data/c75_test_save_cmd.mem
#define QEMU_SAVE_MAGIC "LibvirtQemudSave"
#define QEMU_SAVE_PARTIAL "LibvirtQemudPart"
struct _virQEMUSaveHeader {
char magic[sizeof(QEMU_SAVE_MAGIC)-1];
uint32_t version;
uint32_t data_len;
uint32_t was_running;
uint32_t compressed;
uint32_t cookieOffset;
uint32_t unused[14];
};
typedef struct _virQEMUSaveData virQEMUSaveData;
typedef virQEMUSaveData *virQEMUSaveDataPtr;
struct _virQEMUSaveData {
virQEMUSaveHeader header;
char *xml;
char *cookie;
};
typedef struct SaveStateEntry {
QTAILQ_ENTRY(SaveStateEntry) entry; // 所有SaveState组织成队列由全局变量savevm_state.handler维护,entry用来加入该队列
char idstr[256]; /* qemu将同类可迁移信息组织成一个SaveStateEntry,比如timer,ram,dirty-bitmap,apic等,idstr是这类信息的类名 */
int instance_id; /* 同一个idstr的不同se,用instance_id来区分 */
/* SaveStateEntry.idstr这个域表示的仅仅是相同类型se的名字
* 同类se中还有不同se实例,这些实例在savevm_state.handlersl链表中
* 通过instance_id或者alias_id区分
*/
int alias_id;
int version_id;
/* version id read from the stream
* 从源端读取到的VMState的版本ID
*/
int load_version_id;
int section_id; /* 可迁移信息迁移过程中以section为单位传输,qemu为每个添加到SaveState.hanler链表上se分配一个id,从0开始递增 */
/* section id read from the stream */
int load_section_id;
const SaveVMHandlers *ops; // 内存信息的收集操作,比如ram,ops包括了内存传输前的设置操作,内存传输操作等
const VMStateDescription *vmsd; // 设备状态信息,包含设备状态的搜集操作,比如保存设备状态,加载设备状态等
void *opaque;
CompatEntry *compat;
int is_ram; // 区分SaveStateEntry包含的是内存信息,还是设备状态信息
} SaveStateEntry;
typedef struct SaveState {
QTAILQ_HEAD(, SaveStateEntry) handlers;
int global_section_id;
uint32_t len;
const char *name;
uint32_t target_page_bits;
uint32_t caps_count;
MigrationCapability *capabilities;
} SaveState;
static SaveState savevm_state = {
.handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
.global_section_id = 0,
};
void ram_mig_init(void)
{
qemu_mutex_init(&XBZRLE.lock);
register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
}
#define QEMU_VM_SECTION_START 0x01
#define QEMU_VM_SECTION_PART 0x02
#define QEMU_VM_SECTION_END 0x03
#define QEMU_VM_SECTION_FULL 0x04
#define QEMU_VM_SUBSECTION 0x05
#define QEMU_VM_VMDESCRIPTION 0x06
#define QEMU_VM_CONFIGURATION 0x07
#define QEMU_VM_FILE_MAGIC 0x5145564d
#define QEMU_VM_FILE_VERSION 0x00000003
void qemu_savevm_state_header(QEMUFile *f)
{
trace_savevm_state_header();
qemu_put_be32(f, QEMU_VM_FILE_MAGIC); // 发送"QEVM" magic
qemu_put_be32(f, QEMU_VM_FILE_VERSION); // 发送版本信息
if (migrate_get_current()->send_configuration) { // 如果标记了发送configuration,发送
qemu_put_byte(f, QEMU_VM_CONFIGURATION);
vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
}
}
int qemu_loadvm_state(QEMUFile *f)
{
......
v = qemu_get_be32(f);
if (v != QEMU_VM_FILE_MAGIC) { // 判断magic
error_report("Not a migration stream");
return -EINVAL;
}
......
if (v != QEMU_VM_FILE_VERSION) { // 判断版本
error_report("Unsupported migration stream version");
return -ENOTSUP;
}
......
}
static const VMStateDescription vmstate_configuration = {
.name = "configuration",
.version_id = 1,
.pre_load = configuration_pre_load,
.post_load = configuration_post_load,
.pre_save = configuration_pre_save, // 保存VMState信息的操作函数
.fields = (VMStateField[]) {
VMSTATE_UINT32(len, SaveState),
VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription*[]) {
&vmstate_target_page_bits,
&vmstate_capabilites,
NULL
}
};
static int configuration_pre_save(void *opaque)
{
SaveState *state = opaque;
const char *current_name = MACHINE_GET_CLASS(current_machine)->name; // 获取machine type
......
state->len = strlen(current_name); // 计算machine type长度
state->name = current_name; // 获取machine type
......
}
qemu_loadvm_state
if (migrate_get_current()->send_configuration) {
if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { // 如果没有configuratin section,终止
error_report("Configuration section missing");
qemu_loadvm_state_cleanup();
return -EINVAL;
}
ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0)
......
}
vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0)
static int configuration_post_load(void *opaque, int version_id)
{
SaveState *state = opaque;
const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
/* 比较machine type是否与本地不同,不同就终止 */
if (strncmp(state->name, current_name, state->len) != 0) {
error_report("Machine type received is '%.*s' and local is '%s'",
(int) state->len, state->name, current_name);
return -EINVAL;
}
......
}
void qemu_savevm_state_setup(QEMUFile *f)
{
SaveStateEntry *se;
Error *local_err = NULL;
int ret;
trace_savevm_state_setup();
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_setup) {
continue;
}
if (se->ops && se->ops->is_active) {
if (!se->ops->is_active(se->opaque)) {
continue;
}
}
save_section_header(f, se, QEMU_VM_SECTION_START);
ret = se->ops->save_setup(f, se->opaque);
save_section_footer(f, se);
......
}
}
static int ram_save_setup(QEMUFile *f, void *opaque)
{
RAMState **rsp = opaque;
RAMBlock *block;
......
/* 发送ram总长度 */
qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
/* 遍历ram_list.blocks的每个RAMBlock,发送其idstr和已使用长度 */
RAMBLOCK_FOREACH_MIGRATABLE(block) {
qemu_put_byte(f, strlen(block->idstr));
qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
qemu_put_be64(f, block->used_length);
if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
qemu_put_be64(f, block->page_size);
}
if (migrate_ignore_shared()) {
qemu_put_be64(f, block->mr->addr);
qemu_put_byte(f, ramblock_is_ignored(block) ? 1 : 0);
}
}
......
/* 结束发送 */
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
qemu_fflush(f);
......
}
struct RAMBlock {
struct rcu_head rcu;
struct MemoryRegion *mr;
uint8_t *host; /* HVA,qemu通过malloc向主机申请得到 */
uint8_t *colo_cache; /* For colo, VM's ram cache */
ram_addr_t offset; /* 本段内存相对host地址的偏移 */
ram_addr_t used_length; /* 已使用的内存长度 */
ram_addr_t max_length; /* 申请的内存长度*/
void (*resized)(const char*, uint64_t length, void *host);
uint32_t flags;
/* Protected by iothread lock. */
char idstr[256];
/* RCU-enabled, writes protected by the ramlist lock */
QLIST_ENTRY(RAMBlock) next; /* 指向链表的下一个成员 */
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
int fd;
size_t page_size;
/* 用于迁移时记录脏页的位图 */
/* dirty bitmap used during migration */
unsigned long *bmap;
/* bitmap of pages that haven't been sent even once
* only maintained and used in postcopy at the moment
* where it's used to send the dirtymap at the start
* of the postcopy phase
*/
unsigned long *unsentmap;
/* bitmap of already received pages in postcopy */
unsigned long *receivedmap;
};
cat /proc/qemu_pic/maps | less
获取qemu进程的所有虚拟机内存空间,查找到以下内存区域:int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
{
SaveStateEntry *se;
int ret = 1;
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_live_iterate) {
continue;
}
if (se->ops && se->ops->is_active) {
if (!se->ops->is_active(se->opaque)) {
continue;
}
}
if (se->ops && se->ops->is_active_iterate) {
if (!se->ops->is_active_iterate(se->opaque)) {
continue;
}
}
......
/* 找到合适的SaveStateEntry,首先写入part section的头部*/
save_section_header(f, se, QEMU_VM_SECTION_PART);
/* 调用save_live_iterate,如果是ram section,调用ram_save_iterate */
ret = se->ops->save_live_iterate(f, se->opaque);
/* 发送结束,标记section结束 */
save_section_footer(f, se);
......
}
ram_save_iterate
ram_find_and_save_block
pss.block = rs->last_seen_block
/* 取出ram_list维护的第一个RAMBlock */
if (!pss.block) {
pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
}
/* 根据位图查找脏的RAMBlock */
find_dirty_block(rs, &pss, &again)
/* 从位图中找到下一个脏页,如果找到脏页的索引 */
pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page)
ram_save_host_page(rs, &pss, last_stage)
/* 发送脏页*/
ram_save_target_page(rs, pss, last_stage)
ram_save_page
save_normal_page
save_page_header
qemu_put_buffer_async
save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE)
/**
* save_page_header: write page header to wire
*
* If this is the 1st block, it also writes the block identification
* 如果发送的内存页所属的RAMBlock是一个新的RAMBlock,将RAMBlock的idstr一起发送
* Returns the number of bytes written
*
* @f: QEMUFile where to send the data
* @block: block that contains the page we want to send
* @offset: offset inside the block for the page
* in the lower bits, it contains flags
*/
static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
ram_addr_t offset)
{
size_t size, len;
if (block == rs->last_sent_block) {
offset |= RAM_SAVE_FLAG_CONTINUE;
}
qemu_put_be64(f, offset); /* 发送内存页在RAMBlock内存区域的偏移*/
size = 8;
if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
len = strlen(block->idstr);
qemu_put_byte(f, len);
qemu_put_buffer(f, (uint8_t *)block->idstr, len);
size += 1 + len;
rs->last_sent_block = block;
}
return size;
}
/*
* directly send the page to the stream
*
* Returns the number of pages written.
*
* @rs: current RAM state
* @block: block that contains the page we want to send
* @offset: offset inside the block for the page
* @buf: the page to be sent
* @async: send to page asyncly
*/
static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
uint8_t *buf, bool async)
{
ram_counters.transferred += save_page_header(rs, rs->f, block,
offset | RAM_SAVE_FLAG_PAGE);
if (async) {
qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
migrate_release_ram() &
migration_in_postcopy());
} else {
qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
}
ram_counters.transferred += TARGET_PAGE_SIZE;
ram_counters.normal++;
return 1;
}
/*
* Return true if continue to the next iteration directly, false
* otherwise.
*/
static MigIterateState migration_iteration_run(MigrationState *s)
{
uint64_t pending_size, pend_pre, pend_compat, pend_post;
bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
/* pending_size,剩余的脏页总和 */
qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre,
&pend_compat, &pend_post);
pending_size = pend_pre + pend_compat + pend_post;
trace_migrate_pending(pending_size, s->threshold_size,
pend_pre, pend_compat, pend_post);
/* 当剩余脏页数大于水线时,继续迁移 */
if (pending_size && pending_size >= s->threshold_size) {
/* Still a significant amount to transfer */
if (migrate_postcopy() && !in_postcopy &&
pend_pre <= s->threshold_size &&
atomic_read(&s->start_postcopy)) {
if (postcopy_start(s)) {
error_report("%s: postcopy failed to start", __func__);
}
return MIG_ITERATE_SKIP;
}
/* Just another iteration step */
qemu_savevm_state_iterate(s->to_dst_file,
s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
} else { /* 小于水线时,进入迁移完成阶段 */
trace_migration_thread_low_pending(pending_size);
migration_completion(s);
return MIG_ITERATE_BREAK;
}
return MIG_ITERATE_RESUME;
}
int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
bool inactivate_disks)
{
QJSON *vmdesc;
int vmdesc_len;
SaveStateEntry *se;
int ret;
......
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops ||
(in_postcopy && se->ops->has_postcopy &&
se->ops->has_postcopy(se->opaque)) ||
(in_postcopy && !iterable_only) ||
!se->ops->save_live_complete_precopy) {
continue;
}
if (se->ops && se->ops->is_active) {
if (!se->ops->is_active(se->opaque)) {
continue;
}
}
trace_savevm_section_start(se->idstr, se->section_id);
save_section_header(f, se, QEMU_VM_SECTION_END);
ret = se->ops->save_live_complete_precopy(f, se->opaque);
trace_savevm_section_end(se->idstr, se->section_id, ret);
save_section_footer(f, se);
......
}
......
}
int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
bool inactivate_disks)
{
......
/* json对象记录所有迁移的VMState,如果需要,会在迁移结束后发送到目的端 */
vmdesc = qjson_new();
json_prop_int(vmdesc, "page_size", qemu_target_page_size());
json_start_array(vmdesc, "devices");
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
/* 如果SaveStateEntry的vmsd为空,说明它是一个内存section,跳过*/
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
continue;
}
if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
trace_savevm_section_skip(se->idstr, se->section_id);
continue;
}
......
json_start_object(vmdesc, NULL);
json_prop_str(vmdesc, "name", se->idstr);
json_prop_int(vmdesc, "instance_id", se->instance_id);
/* 添加full类型的section header */
save_section_header(f, se, QEMU_VM_SECTION_FULL);
/* 迁移VMState*/
ret = vmstate_save(f, se, vmdesc);
......
/* 添加页尾 */
save_section_footer(f, se);
json_end_object(vmdesc);
}
......
}
Q:迁移实现中的SaveStateEntry和section是什么关系,为什么说它俩都是迁移操作的基本单位?
A:SaveStateEntry和section并没有直接关系,SaveStateEntry QEMU设计用来组织迁移的数据结构,它提供给QEMU的是“一类设备迁移的操作方法”。而section是迁移动态数据流的基本单位,主要用于迁移两端信息同步。其实section才是迁移数据流的基本单位,SaveStateEntry只是迁移信息数据结构的封装,这两者不是一一对应的。