接上一篇 openstack冷迁移/Resize源码分析(一)
/nova_queens/nova/compute/manager.py
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@wrap_instance_fault
def resize_instance(self, context, instance, image,
migration, instance_type, clean_shutdown,
request_spec=None):
"""开始将正在运行的实例迁移到另一台主机。
这是从目标主机的“prep_resize”例程启动的,并在源主机上运行。
"""
try:
self._resize_instance(context, instance, image, migration,
instance_type, clean_shutdown, request_spec)
except Exception:
with excutils.save_and_reraise_exception():
self._revert_allocation(context, instance, migration)
def _resize_instance(self, context, instance, image,
migration, instance_type, clean_shutdown,
request_spec):
# 传递 instance_state=instance.vm_state
# 因为我们可以调整 STOPPED 服务器的大小,
# 并且我们不想在 migrate_disk_and_power_off 引发 InstanceFaultRollback 的情况下将其设置回 ACTIVE。
instance_state = instance.vm_state
with self._error_out_instance_on_exception(
context, instance, instance_state=instance_state), \
errors_out_migration_ctxt(migration):
# 获取网络信息
network_info = self.network_api.get_instance_nw_info(context,
instance)
migration.status = 'migrating'
migration.save()
instance.task_state = task_states.RESIZE_MIGRATING
instance.save(expected_task_state=task_states.RESIZE_PREP)
# 获取磁盘信息
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
self._send_resize_instance_notifications(
context, instance, bdms, network_info,
fields.NotificationPhase.START)
block_device_info = self._get_instance_block_device_info(
context, instance, bdms=bdms)
# 获取虚拟机的停机超时和重试信息
timeout, retry_interval = self._get_power_off_values(
instance, clean_shutdown)
# 关闭虚拟机电源并迁移虚拟机磁盘文件
# 去 libvirt 下 driver.py 文件查找 migrate_disk_and_power_off 函数
disk_info = self.driver.migrate_disk_and_power_off(
context, instance, migration.dest_host,
instance_type, network_info,
block_device_info,
timeout, retry_interval)
# 断开虚拟机的共享块设备连接
self._terminate_volume_connections(context, instance, bdms)
# 迁移虚拟机网络
self.network_api.migrate_instance_start(context,
instance,
migration)
migration.status = 'post-migrating'
migration.save()
# 修改虚拟机的主机记录
instance.host = migration.dest_compute
instance.node = migration.dest_node
instance.task_state = task_states.RESIZE_MIGRATED
instance.save(expected_task_state=task_states.RESIZE_MIGRATING)
# RPC 转换到目标主机以完成调整大小or迁移。
# 去nova/compute/manager.py下找finish_resize的实现
self.compute_rpcapi.finish_resize(context, instance,
migration, image, disk_info, migration.dest_compute,
request_spec)
self._send_resize_instance_notifications(
context, instance, bdms, network_info,
fields.NotificationPhase.END)
self.instance_events.clear_events_for_instance(instance)
/nova_queens/nova/virt/libvirt/driver.py
def migrate_disk_and_power_off(self, context, instance, dest,
flavor, network_info,
block_device_info=None,
timeout=0, retry_interval=0):
LOG.debug("Starting migrate_disk_and_power_off",
instance=instance)
# 获取临时盘信息
ephemerals = driver.block_device_info_get_ephemerals(block_device_info)
# 如果新实例请求的块设备映射不包含临时设备,则 get_bdm_ephemeral_disk_size() 将返回 0。
# 但是,我们仍然想检查是否设置了原始实例的 ephemeral_gb 属性并确保新请求的flavor临时大小更大
eph_size = (block_device.get_bdm_ephemeral_disk_size(ephemerals) or
instance.flavor.ephemeral_gb)
# 检查迁移是否需要缩小磁盘大小。
root_down = flavor.root_gb < instance.flavor.root_gb
ephemeral_down = flavor.ephemeral_gb < eph_size
# 检查虚拟机是否通过卷启动
booted_from_volume = self._is_booted_from_volume(block_device_info)
# 本地磁盘文件不能 Resize
if (root_down and not booted_from_volume) or ephemeral_down:
reason = _("Unable to resize disk down.")
raise exception.InstanceFaultRollback(
exception.ResizeError(reason=reason))
# NOTE(dgenin): 没有为 LVM backed 的虚拟机提供迁移支持.
# 也就是说 LVM Backend 且不是卷启动的虚拟机(也就是镜像启动用本地存储的虚拟机)不能迁移
if CONF.libvirt.images_type == 'lvm' and not booted_from_volume:
reason = _("Migration is not supported for LVM backed instances")
raise exception.InstanceFaultRollback(
exception.MigrationPreCheckError(reason=reason))
# 将磁盘复制到目标
# 首先将实例目录重命名为 +_resize 以使用实例目录的共享存储。
inst_base = libvirt_utils.get_instance_path(instance)
inst_base_resize = inst_base + "_resize"
# 判断共享存储
shared_instance_path = self._is_path_shared_with(dest, inst_base)
# 如果失败,请尝试在远程计算节点上创建目录,
# 我们将异常向上传递堆栈,以便我们可以更早地在此处捕获故障
if not shared_instance_path:
try:
# 非共享存储:通过 SSH 在目的主机上创建虚拟机目录
self._remotefs.create_dir(dest, inst_base)
except processutils.ProcessExecutionError as e:
reason = _("not able to execute ssh command: %s") % e
raise exception.InstanceFaultRollback(
exception.ResizeError(reason=reason))
# 关闭虚拟机电源
self.power_off(instance, timeout, retry_interval)
# 卸载共享块设备
block_device_mapping = driver.block_device_info_get_mapping(
block_device_info)
for vol in block_device_mapping:
connection_info = vol['connection_info']
self._disconnect_volume(context, connection_info, instance)
# 获取 disk.info 配置文件内容
# 记录了 Root Disk、Ephemeral Disk、Swap Disk 的 file paths
disk_info = self._get_instance_disk_info(instance, block_device_info)
try:
os.rename(inst_base, inst_base_resize)
# 如果我们使用共享实例路径迁移实例,则创建目录。
# 如果是远程节点,则目录已经创建
if shared_instance_path:
# 共享存储:目的主机看作是自己
dest = None
fileutils.ensure_tree(inst_base)
on_execute = lambda process: \
self.job_tracker.add_job(instance, process.pid)
on_completion = lambda process: \
self.job_tracker.remove_job(instance, process.pid)
# 块迁移虚拟机本地磁盘文件
for info in disk_info:
# 假设 inst_base == dirname(info['path'])
img_path = info['path']
fname = os.path.basename(img_path)
from_path = os.path.join(inst_base_resize, fname)
# 我们不会在这里复制交换磁盘,而是依靠 finish_migration 为我们重新创建它。
# 这是可以的,因为操作系统已关闭,并且由于重新创建交换磁盘非常便宜,因此它比本地复制或通过网络复制更有效。 这也意味着我们不必调整它的大小。
if fname == 'disk.swap':
continue
# 是否启用压缩
compression = info['type'] not in NO_COMPRESSION_TYPES
# 非共享存储:使用 scp 远程拷贝
# 共享存储:使用 cp 本地拷贝
libvirt_utils.copy_image(from_path, img_path, host=dest,
on_execute=on_execute,
on_completion=on_completion,
compression=compression)
# 确保将 disk.info 写入新路径,以避免重新检查磁盘并可能更改格式。
# 拷贝 diks.inof 配置文件
src_disk_info_path = os.path.join(inst_base_resize, 'disk.info')
if os.path.exists(src_disk_info_path):
dst_disk_info_path = os.path.join(inst_base, 'disk.info')
libvirt_utils.copy_image(src_disk_info_path,
dst_disk_info_path,
host=dest, on_execute=on_execute,
on_completion=on_completion)
# 如果需要,处理 vTPM 数据的迁移
libvirt_utils.save_and_migrate_vtpm_dir(
instance.uuid, inst_base_resize, inst_base, dest,
on_execute, on_completion)
except Exception:
with excutils.save_and_reraise_exception():
self._cleanup_remote_migration(dest, inst_base,
inst_base_resize,
shared_instance_path)
return jsonutils.dumps(disk_info)
/nova_queens/nova/compute/manager.py
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@errors_out_migration
@wrap_instance_fault
def finish_resize(self, context, disk_info, image, instance,
migration, request_spec=None):
"""
完成迁移过程。设置新传输的磁盘并在其新主机上打开实例。
"""
try:
# 调用本py文件的_finish_resize_helper函数
self._finish_resize_helper(context, disk_info, image, instance,
migration, request_spec)
except Exception:
with excutils.save_and_reraise_exception():
# 此时,resize_instance(在源上运行)已经更新了实例主机/节点值以指向此(dest)计算
# 因此我们需要保持对 dest 节点资源提供者的分配不变,并删除对 源节点资源提供者。
# 如果用户试图通过硬重启来恢复服务器,它会发生在这个主机上,所以这就是分配应该去的地方。
# 请注意,这与从 confirm_resize 调用以清除迁移记录持有的源节点分配的方法相同。
"""
在 finish_resize 失败后删除源节点实例上旧flavor的分配。 您可以通过硬重启来恢复实例。
"""
LOG.info('Deleting allocations for old flavor on source node '
'%s after finish_resize failure. You may be able to '
'recover the instance by hard rebooting it.',
migration.source_compute, instance=instance)
self._delete_allocation_after_move(
context, instance, migration)
同一个py文件
def _finish_resize_helper(self, context, disk_info, image, instance,
migration):
"""完成迁移过程。如果迁移过程失败,调用者必须恢复实例的分配。"""
...
# 调用本py文件的的_finish_resize
network_info = self._finish_resize(context, instance, migration,
disk_info, image_meta, bdms)
...
同一个py文件
def _finish_resize(self, context, instance, migration, disk_info,
image_meta, bdms):
resize_instance = False
...
# 判断migration中的instance_type_id,看执行冷迁移还是resize
if old_instance_type_id != new_instance_type_id:
...
if old_instance_type[key] != instance_type[key]:
resize_instance = True
...
# 在目标主机上设置网络
self.network_api.setup_networks_on_host(context, instance,
migration['dest_compute'])
...
# 获取当前云主机的网络信息
network_info = self.network_api.get_instance_nw_info(context, instance)
# 更新数据库虚拟机状态为 RESIZE_FINISH
instance.task_state = task_states.RESIZE_FINISH
instance.save(expected_task_state=task_states.RESIZE_MIGRATED)
...
# 使用目标主机连接器更新所有卷附件
# 以便我们可以在调用 driver.finish_migration 之前更新 BDM.connection_info
# 否则驱动程序将不知道如何将卷连接到该主机。
self._update_volume_attachments(context, instance, bdms)
block_device_info = self._get_instance_block_device_info(
context, instance, refresh_conn_info=True, bdms=bdms)
# 如果原始 vm_state 为 STOPPED,我们不会在实例迁移后自动启动实例
power_on = old_vm_state != vm_states.STOPPED
try:
# 调用libvirt driver
# 去libvirt下driver.py文件查找finish_migration函数
# nova/virt/libvirt/driver.py
self.driver.finish_migration(context, migration, instance,
disk_info,
network_info,
image_meta, resize_instance,
block_device_info, power_on)
...
nova_queens/nova/virt/libvirt/driver.py
def finish_migration(self, context, migration, instance, disk_info,
network_info, image_meta, resize_instance,
block_device_info=None, power_on=True):
LOG.debug("Starting finish_migration", instance=instance)
# 迁移磁盘信息(仅仅对于root_gb与ephemeral_gb)
block_disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
instance,
image_meta,
block_device_info)
# 创建镜像
self._create_image(context, instance, block_disk_info['mapping'],
block_device_info=block_device_info,
ignore_bdi_for_swap=True,
fallback_from_host=migration.source_compute)
# 获取磁盘信息(例如virio、disk.local、disk.swap)
disk_info = jsonutils.loads(disk_info)
...
if (disk_name != 'disk.config' and
info['type'] == 'raw' and CONF.use_cow_images):
self._disk_raw_to_qcow2(info['path'])
# 生成xml文件,就是虚拟机的xml
xml = self._get_guest_xml(context, instance, network_info,
block_disk_info, image_meta,
block_device_info=block_device_info)
# 根据配置及xml文件创建虚拟机
guest = self._create_domain_and_network(context, xml, instance,
network_info,
block_device_info=block_device_info,
power_on=power_on,
vifs_already_plugged=True,
post_xml_callback=gen_confdrive)
#如果迁移前为开机状态,则开机(至此迁移全部完成)
if power_on:
timer = loopingcall.FixedIntervalLoopingCall(
self._wait_for_running,
instance)
timer.start(interval=0.5).wait()
# 迁移完成,同步guest时间
guest.sync_guest_time()
LOG.debug("finish_migration finished successfully.", instance=instance)
nova_queens/nova/api/openstack/compute/servers.py
@wsgi.response(204)
@wsgi.expected_errors((400, 404, 409))
@wsgi.action('confirmResize')
def _action_confirm_resize(self, req, id, body):
context = req.environ['nova.context']
context.can(server_policies.SERVERS % 'confirm_resize')
# 从数据库获取实例信息
instance = self._get_server(context, req, id)
try:
# 调用 compute 下的 api.py 文件的 confirm_resize 函数
self.compute_api.confirm_resize(context, instance)
except exception.InstanceUnknownCell as e:
raise exc.HTTPNotFound(explanation=e.format_message())
except exception.MigrationNotFound:
msg = _("Instance has not been resized.")
raise exc.HTTPBadRequest(explanation=msg)
except exception.InstanceIsLocked as e:
raise exc.HTTPConflict(explanation=e.format_message())
except exception.InstanceInvalidState as state_error:
common.raise_http_conflict_for_instance_invalid_state(state_error,
'confirmResize', id)
nova_queens/nova/compute/api.py
@check_instance_lock
@check_instance_cell
@check_instance_state(vm_state=[vm_states.RESIZED])
def confirm_resize(self, context, instance, migration=None):
"""确认迁移/调整大小并删除“旧”实例。"""
elevated = context.elevated()
# 判断 migration 是否已经完成
if migration is None:
# 已完成则变更为 finished
migration = objects.Migration.get_by_instance_and_status(
elevated, instance.uuid, 'finished')
# 未完成则设置迁移状态为正在confirming
migration.status = 'confirming'
migration.save()
# 通知实例确认resize
self._record_action_start(context, instance,
instance_actions.CONFIRM_RESIZE)
# prc 调用 compute 下的 manager.py 的 confirm_resize 函数
self.compute_rpcapi.confirm_resize(context,
instance,
migration,
migration.source_compute)
nova_queens/nova/compute/manager.py
@wrap_exception()
@wrap_instance_event(prefix='compute')
@errors_out_migration
@wrap_instance_fault
def confirm_resize(self, context, instance, migration):
"""确认迁移/调整大小并删除“旧”实例。
这是从 API 调用并在源主机上运行的。
此时目标主机上不需要发生任何事情,因为实例已经在那里运行。 这个例程只是清理源主机。
"""
@utils.synchronized(instance.uuid)
def do_confirm_resize(context, instance, migration_id):
LOG.debug("Going to confirm migration %s", migration_id,
instance=instance)
try:
# 从migrate表中获取该instance resize记录
migration = objects.Migration.get_by_id(
context.elevated(), migration_id)
except exception.MigrationNotFound:
LOG.error("Migration %s is not found during confirmation",
migration_id, instance=instance)
return
# 如果已经被确认,则不能再次确认
if migration.status == 'confirmed':
LOG.info("Migration %s is already confirmed",
migration_id, instance=instance)
return
# 如果迁移状态不是完成或者正在迁移,则数据有异常,直接return退出
elif migration.status not in ('finished', 'confirming'):
LOG.warning("Unexpected confirmation status '%(status)s' "
"of migration %(id)s, exit confirmation process",
{"status": migration.status, "id": migration_id},
instance=instance)
return
# 从数据库中获取实例,如果已经被删除,就什么都不做,直接返回这里
expected_attrs = ['metadata', 'system_metadata', 'flavor']
try:
# 根据uuid获取instance信息
instance = objects.Instance.get_by_uuid(
context, instance.uuid,
expected_attrs=expected_attrs)
except exception.InstanceNotFound:
LOG.info("Instance is not found during confirmation",
instance=instance)
return
...
# 调用本py文件的_confirm_resize
self._confirm_resize(
...
# 回调自身,直到满足条件退出
do_confirm_resize(context, instance, migration.id)
同一个py文件
def _confirm_resize(self, context, instance, migration=None):
"""销毁源实例。"""
# 通知实例确认 resize.confirm 开始
self._notify_about_instance_usage(context, instance,
"resize.confirm.start")
compute_utils.notify_about_instance_action(context, instance,
self.host, action=fields.NotificationAction.RESIZE_CONFIRM,
phase=fields.NotificationPhase.START)
# 删除之前保存的迁移信息,并更新 instance 为新的 flavor 数据
old_instance_type = instance.old_flavor
instance.old_flavor = None
instance.new_flavor = None
instance.system_metadata.pop('old_vm_state', None)
instance.save()
# 清理源主机的网络信息
self.network_api.setup_networks_on_host(context, instance,
migration.source_compute, teardown=True)
# 获取当前实例的网络信息
network_info = self.network_api.get_instance_nw_info(context,
instance)
# NOTE(adrianc): Populate old PCI device in VIF profile
# to allow virt driver to properly unplug it from Hypervisor.
pci_mapping = (instance.migration_context.
get_pci_mapping_for_migration(True))
network_info = self._get_updated_nw_info_with_pci_mapping(
network_info, pci_mapping)
# 在此处获取 BDM 并将它们传递给驱动程序。
# 调用 libvirt 的 dirver.py 的 confirm_migration 函数
self.driver.confirm_migration(context, migration, instance,
network_info)
# 设置迁移状态为确认完成
migration.status = 'confirmed'
with migration.obj_as_admin():
migration.save()
rt = self._get_resource_tracker()
rt.drop_move_claim(context, instance, migration.source_node,
old_instance_type, prefix='old_')
instance.drop_migration_context()
# NOTE(mriedem): The old_vm_state could be STOPPED but the user
# might have manually powered up the instance to confirm the
# resize/migrate, so we need to check the current power state
# on the instance and set the vm_state appropriately. We default
# to ACTIVE because if the power state is not SHUTDOWN, we
# assume _sync_instance_power_state will clean it up.
p_state = instance.power_state
vm_state = None
if p_state == power_state.SHUTDOWN:
vm_state = vm_states.STOPPED
LOG.debug("Resized/migrated instance is powered off. "
"Setting vm_state to '%s'.", vm_state,
instance=instance)
else:
vm_state = vm_states.ACTIVE
instance.vm_state = vm_state
instance.task_state = None
instance.save(expected_task_state=[None, task_states.DELETING,
task_states.SOFT_DELETING])
# 通知实例 resize.confirm 确认完成
self._notify_about_instance_usage(
context, instance, "resize.confirm.end",
network_info=network_info)
compute_utils.notify_about_instance_action(context, instance,
self.host, action=fields.NotificationAction.RESIZE_CONFIRM,
phase=fields.NotificationPhase.END)
nova_queens/nova/virt/libvirt/driver.py
def confirm_migration(self, context, migration, instance, network_info):
"""确认调整大小,摧毁源虚拟机。"""
# 调用本py文件的 _cleanup_resize 函数
self._cleanup_resize(context, instance, network_info)
同一个py文件
def _cleanup_resize(self, context, instance, network_info):
inst_base = libvirt_utils.get_instance_path(instance)
target = inst_base + '_resize'
if os.path.exists(target):
# 删除_resize文件夹,为了避免失败,重试5次
utils.execute('rm', '-rf', target, delay_on_retry=True,
attempts=5)
...
if instance.host != CONF.host:
# 调用libvirt接口,将该虚拟机undefine
self._undefine_domain(instance)
# 网络及防火墙信息、配置清理
self.unplug_vifs(instance, network_info)
self.unfilter_instance(instance, network_info)
nova_queens/nova/api/openstack/compute/servers.py
@wsgi.response(202)
@wsgi.expected_errors((400, 404, 409))
@wsgi.action('revertResize')
def _action_revert_resize(self, req, id, body):
context = req.environ['nova.context']
context.can(server_policies.SERVERS % 'revert_resize')
# 从数据库获取instance信息
instance = self._get_server(context, req, id)
try:
# 调用 compute 下的 api.py 的 revert_resize 函数
self.compute_api.revert_resize(context, instance)
...
/nova/nova/compute/api.py
@check_instance_lock
@check_instance_cell
@check_instance_state(vm_state=[vm_states.RESIZED])
def revert_resize(self, context, instance):
"""恢复调整大小,删除过程中的“新”实例。"""
# 回滚resize,并将新实例删除
elevated = context.elevated()
# 从数据库migration 表中,获取迁移信息
migration = objects.Migration.get_by_instance_and_status(
elevated, instance.uuid, 'finished')
# 如果这是调整大小,则还原可能会超过配额。
self._check_quota_for_upsize(context, instance, instance.flavor,
instance.old_flavor)
...
# 设置虚拟机任务状态为 RESIZE_REVERTING
instance.task_state = task_states.RESIZE_REVERTING
instance.save(expected_task_state=[None])
# 迁移状态设置为 reverting
migration.status = 'reverting'
migration.save()
# 通知实例回滚 resize
self._record_action_start(context, instance,
instance_actions.REVERT_RESIZE)
# 在初始调整大小操作期间,Conductor 更新了 RequestSpec.flavor 以指向新的flavor
# 因此我们需要更新 RequestSpec 以指向旧的flavor,否则通过调度程序进行的后续移动操作将使用错误的flavor。
try:
reqspec = objects.RequestSpec.get_by_instance_uuid(
context, instance.uuid)
reqspec.flavor = instance.old_flavor
reqspec.save()
...
#prc 调用 compute下的 mannager.py 中的 revert_resize
self.compute_rpcapi.revert_resize(context, instance,
migration,
migration.dest_compute)
nova_queens/nova/compute/manager.py
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@errors_out_migration
@wrap_instance_fault
def revert_resize(self, context, instance, migration):
"""
销毁目标机器上的新实例。 回滚resize改变,并给源主机上的旧实例开机。
"""
# revert_resize 本质上是将大小调整回旧大小,因此我们需要在此处发送 usage 事件。
compute_utils.notify_usage_exists(self.notifier, context, instance,
current_period=True)
with self._error_out_instance_on_exception(context, instance):
# 目标主机的网络信息清除
self.network_api.setup_networks_on_host(context, instance,
teardown=True)
# 获取原始的实例,迁移信息
migration_p = obj_base.obj_to_primitive(migration)
# 网络迁移开始
self.network_api.migrate_instance_start(context,
instance,
migration_p)
# 获取实例网络及磁盘设备信息
network_info = self.network_api.get_instance_nw_info(context,
instance)
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
block_device_info = self._get_instance_block_device_info(
context, instance, bdms=bdms)
destroy_disks = not self._is_instance_storage_shared(
context, instance, host=migration.source_compute)
# 销毁目标主机上的新实例
self.driver.destroy(context, instance, network_info,
block_device_info, destroy_disks)
# 断开与卷连接
self._terminate_volume_connections(context, instance, bdms)
# 设置迁移状态
migration.status = 'reverted'
...
# RPC 转换回源主机以在那里完成还原。
# prc调用,去找 compute 下的 mannager.py 的 finish_revert_resize 函数
self.compute_rpcapi.finish_revert_resize(context, instance,
migration, migration.source_compute)
同一个py文件
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@errors_out_migration
@wrap_instance_fault
def finish_revert_resize(self, context, instance, migration):
"""
完成在源主机上恢复调整大小的后半部分。
恢复原始源实例状态(活动/关闭)并恢复数据库中调整大小的属性。
"""
...
# 通知 resize.revert 回滚开始
self._notify_about_instance_usage(
context, instance, "resize.revert.start")
compute_utils.notify_about_instance_action(context, instance,
self.host, action=fields.NotificationAction.RESIZE_REVERT,
phase=fields.NotificationPhase.START, bdms=bdms)
# 回滚数据库实例信息
old_vm_state = instance.system_metadata.pop('old_vm_state',
vm_states.ACTIVE)
self._set_instance_info(instance, instance.old_flavor)
instance.old_flavor = None
instance.new_flavor = None
instance.host = migration.source_compute
instance.node = migration.source_node
instance.save()
self._revert_allocation(context, instance, migration)
# 源主机上创建网络
self.network_api.setup_networks_on_host(context, instance,
migration.source_compute)
migration_p = obj_base.obj_to_primitive(migration)
...
migration_p['dest_compute'] = migration.source_compute
# 迁移虚拟机网络
self.network_api.migrate_instance_finish(context,
instance,
migration_p)
network_info = self.network_api.get_instance_nw_info(context,
instance)
...
self._update_volume_attachments(context, instance, bdms)
# 获取磁盘信息
block_device_info = self._get_instance_block_device_info(
context, instance, refresh_conn_info=True, bdms=bdms)
power_on = old_vm_state != vm_states.STOPPED
# 完成回滚迁移,调用 libvirt 的 drivirt.py 文件的 finish_revert_migration 函数
self.driver.finish_revert_migration(context, instance,
network_info,
block_device_info, power_on)
instance.drop_migration_context()
instance.launched_at = timeutils.utcnow()
# 设置虚拟机状态信息
instance.save(expected_task_state=task_states.RESIZE_REVERTING)
self._complete_volume_attachments(context, bdms)
LOG.info("Updating instance to original state: '%s'",
old_vm_state, instance=instance)
#设置虚拟机相应运行状态
if power_on:
instance.vm_state = vm_states.ACTIVE
instance.task_state = None
instance.save()
else:
instance.task_state = task_states.POWERING_OFF
instance.save()
self.stop_instance(context, instance=instance,
clean_shutdown=True)
# 通知 resize.revert 完成
self._notify_about_instance_usage(
context, instance, "resize.revert.end")
compute_utils.notify_about_instance_action(context, instance,
self.host, action=fields.NotificationAction.RESIZE_REVERT,
phase=fields.NotificationPhase.END, bdms=bdms)
nova_queens/nova/virt/libvirt/driver.py
def finish_revert_migration(self, context, instance, network_info,
block_device_info=None, power_on=True):
LOG.debug("开始finish_revert_migration",
instance=instance)
# 获取实例路径及resize路径
inst_base = libvirt_utils.get_instance_path(instance)
inst_base_resize = inst_base + "_resize"
# 如果我们正在从失败的迁移中恢复,请确保我们没有遗留的会发生冲突的相同主机基本目录。
# 实现就是如果inst_base_resize 路径存在,就清理掉,保证没有冲突目录
if os.path.exists(inst_base_resize):
self._cleanup_failed_migration(inst_base)
utils.execute('mv', inst_base_resize, inst_base)
# 快照处理
root_disk = self.image_backend.by_name(instance, 'disk')
if root_disk.exists():
try:
root_disk.rollback_to_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
except exception.SnapshotNotFound:
LOG.warning("Failed to rollback snapshot (%s)",
libvirt_utils.RESIZE_SNAPSHOT_NAME)
finally:
root_disk.remove_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME,
ignore_errors=True)
# 获取磁盘信息
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
instance,
instance.image_meta,
block_device_info)
# 根据上述信息及配置,生成xml文件,创建虚拟机
xml = self._get_guest_xml(context, instance, network_info, disk_info,
instance.image_meta,
block_device_info=block_device_info)
self._create_domain_and_network(context, xml, instance, network_info,
block_device_info=block_device_info,
power_on=power_on,
vifs_already_plugged=True)
if power_on:
timer = loopingcall.FixedIntervalLoopingCall(
self._wait_for_running,
instance)
timer.start(interval=0.5).wait()
LOG.debug("finish_revert_migration finished successfully.",
instance=instance)
冷迁移/Resize相对详细的流程基本就是这样,但是在准备时期具体的quota处理、回滚时期资源、quota的细节上还没有深入。
如果要做增强开发,或者增加热规格调整这样的新特性的话,这几点基本是避不开的。
但是楼主这个懒货还要睡懒觉、做好吃的、喂猫猫、打游戏,看NASA新出的片,估计要到猴年马月了
https://www.codetd.com/article/9445368
https://blog.csdn.net/tantexian/article/details/41444461
https://blog.csdn.net/tantexian/article/details/41519135
https://blog.csdn.net/tantexian/article/details/41519179