这篇博文开始,我们来解析一下Nova虚拟机动态迁移的实现源码。这里不会再像前面那样对代码进行逐行的详细解析,目的是来整理Nova虚拟机动态迁移的实现步骤和相关源码部分架构。
首先我们来看虚拟机动态迁移在Opentack Nova中的API请求处理函数/nova/api/openstack/compute/contrib/admin_actions.py----def _migrate_live:
- def _migrate_live(self, req, id, body):
-
-
-
-
- context = req.environ["nova.context"]
- authorize(context, 'migrateLive')
-
- try:
- block_migration = body["os-migrateLive"]["block_migration"]
- disk_over_commit = body["os-migrateLive"]["disk_over_commit"]
-
- host = body["os-migrateLive"]["host"]
- except (TypeError, KeyError):
- msg = _("host and block_migration must be specified.")
- raise exc.HTTPBadRequest(explanation=msg)
-
- try:
- instance = self.compute_api.get(context, id)
-
- self.compute_api.live_migrate(context, instance, block_migration,
- disk_over_commit, host)
- except (exception.ComputeServiceUnavailable,
- exception.InvalidHypervisorType,
- exception.UnableToMigrateToSelf,
- exception.DestinationHypervisorTooOld) as ex:
- raise exc.HTTPBadRequest(explanation=ex.format_message())
- except Exception:
- if host is None:
- msg = _("Live migration of instance %(id)s to another host"
- " failed") % locals()
- else:
- msg = _("Live migration of instance %(id)s to host %(host)s"
- " failed") % locals()
- LOG.exception(msg)
-
- raise exc.HTTPBadRequest(explanation=msg)
-
- return webob.Response(status_int=202)
在这个方法中我们可以看到,实现虚拟机动态迁移主要实现的语句就是:
self.compute_api.live_migrate(context, instance, block_migration, disk_over_commit, host)
我们接着看方法live_migrate的源码:
-
-
- @check_instance_state(vm_state=[vm_states.ACTIVE])
- def live_migrate(self, context, instance, block_migration,
- disk_over_commit, host_name):
-
-
-
- LOG.debug(_("Going to try to live migrate instance to %s"),
- host_name or "another host", instance=instance)
-
-
-
- instance = self.update(context, instance,
- task_state=task_states.MIGRATING,
- expected_task_state=None)
-
-
- self.scheduler_rpcapi.live_migration(context, block_migration,
- disk_over_commit, instance, host_name)
这个方法中,主要实现了两部分内容,更新记录中的实例信息和在主题topic的模式上发送实时迁移虚拟机的消息。
我们先来看跟新记录中的实例信息这部分内容,主要是通过调用方法update方法实现的。具体来看方法update的实现源码:
- def update(self, context, instance, **kwargs):
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- _, updated = self._update(context, instance, **kwargs)
- return updated
- def _update(self, context, instance, **kwargs):
-
-
-
-
-
- old_ref, instance_ref = self.db.instance_update_and_get_original(context, instance['uuid'], kwargs)
-
- notifications.send_update(context, old_ref, instance_ref, service="api")
-
- return dict(old_ref.iteritems()), dict(instance_ref.iteritems())
方法_update主要实现了两方面的内容,一是实现更新实例信息,二是发送通知,来报告实例中发生的任何改变。
先来看方法instance_update_and_get_original的实现源码:
- def instance_update_and_get_original(context, instance_uuid, values):
-
-
-
-
- rv = IMPL.instance_update_and_get_original(context, instance_uuid, values)
- try:
-
- cells_rpcapi.CellsAPI().instance_update_at_top(context, rv[1])
- except Exception:
- LOG.exception(_("Failed to notify cells of instance update"))
- return rv
- def instance_update_and_get_original(context, instance_uuid, values):
-
-
-
-
- return _instance_update(context, instance_uuid, values,
- copy_old_instance=True)
- def _instance_update(context, instance_uuid, values, copy_old_instance=False):
-
-
-
-
-
-
-
- session = get_session()
-
-
-
- if not uuidutils.is_uuid_like(instance_uuid):
- raise exception.InvalidUUID(instance_uuid)
-
- with session.begin():
-
- instance_ref = _instance_get_by_uuid(context, instance_uuid, session=session)
- if "expected_task_state" in values:
-
- expected = values.pop("expected_task_state")
- if not isinstance(expected, (tuple, list, set)):
- expected = (expected,)
- actual_state = instance_ref["task_state"]
- if actual_state not in expected:
- raise exception.UnexpectedTaskStateError(actual=actual_state,
- expected=expected)
-
- instance_hostname = instance_ref['hostname'] or ''
- if ("hostname" in values and
- values["hostname"].lower() != instance_hostname.lower()):
- _validate_unique_server_name(context,
- session,
- values['hostname'])
-
- if copy_old_instance:
- old_instance_ref = copy.copy(instance_ref)
- else:
- old_instance_ref = None
-
- metadata = values.get('metadata')
- if metadata is not None:
- _instance_metadata_update_in_place(context, instance_ref,
- 'metadata',
- models.InstanceMetadata,
- values.pop('metadata'),
- session)
-
- system_metadata = values.get('system_metadata')
- if system_metadata is not None:
- _instance_metadata_update_in_place(context, instance_ref,
- 'system_metadata',
- models.InstanceSystemMetadata,
- values.pop('system_metadata'),
- session)
-
- instance_ref.update(values)
- instance_ref.save(session=session)
-
- return (old_instance_ref, instance_ref)
再来看方法_update中实现发送通知,来报告实例中发生的任何改变的实现内容,即调用了方法send_update来实现的:
- def send_update(context, old_instance, new_instance, service=None, host=None):
-
-
-
-
- if not CONF.notify_on_any_change and not CONF.notify_on_state_change:
- return
-
- update_with_state_change = False
-
- old_vm_state = old_instance["vm_state"]
- new_vm_state = new_instance["vm_state"]
- old_task_state = old_instance["task_state"]
- new_task_state = new_instance["task_state"]
-
- if old_vm_state != new_vm_state:
- update_with_state_change = True
- elif CONF.notify_on_state_change:
- if (CONF.notify_on_state_change.lower() == "vm_and_task_state" and
- old_task_state != new_task_state):
- update_with_state_change = True
-
- if update_with_state_change:
- send_update_with_states(context, new_instance, old_vm_state,
- new_vm_state, old_task_state, new_task_state, service, host)
-
- else:
- try:
- _send_instance_update_notification(context, new_instance,
- service=service, host=host)
- except Exception:
- LOG.exception(_("Failed to send state update notification"),
- instance=new_instance)
我们再回到方法live_migrate中,来看第二部分主要实现的内容,即调用方法live_migration来实现在主题topic的模式上发送实时迁移虚拟机的消息。来看方法live_migration:
- def live_migration(self, ctxt, block_migration, disk_over_commit,
- instance, dest):
-
-
-
- instance_p = jsonutils.to_primitive(instance)
-
-
- return self.call(ctxt, self.make_msg('live_migration',
- block_migration=block_migration,
- disk_over_commit=disk_over_commit, instance=instance_p,
- dest=dest))
这里调用方法call来实现在一个主题topic上发送一条消息(进行实时迁移操作),并等待响应。这里将调用/nova/scheduler/manager.py----live_migration这个方法来执行实时迁移的调度的方法,并返回实例当前运行的主机。具体来看方法的实现源码:
- def live_migration(self, context, instance, dest, block_migration, disk_over_commit):
- try:
-
-
- return self.driver.schedule_live_migration(
- context, instance, dest,
- block_migration, disk_over_commit)
- except (exception.ComputeServiceUnavailable,
- exception.InvalidHypervisorType,
- exception.UnableToMigrateToSelf,
- exception.DestinationHypervisorTooOld,
- exception.InvalidLocalStorage,
- exception.InvalidSharedStorage) as ex:
- request_spec = {'instance_properties': {
- 'uuid': instance['uuid'], },
- }
- with excutils.save_and_reraise_exception():
- self._set_vm_state_and_notify('live_migration',
- dict(vm_state=instance['vm_state'],
- task_state=None,
- expected_task_state=task_states.MIGRATING,),
- context, ex, request_spec)
- except Exception as ex:
- with excutils.save_and_reraise_exception():
- self._set_vm_state_and_notify('live_migration',
- {'vm_state': vm_states.ERROR},
- context, ex, {})
- def schedule_live_migration(self, context, instance, dest,
- block_migration, disk_over_commit):
-
-
-
-
-
-
- self._live_migration_src_check(context, instance)
-
-
- if dest is None:
-
-
- ignore_hosts = [instance['host']]
- while dest is None:
-
-
- dest = self._live_migration_dest_check(context, instance, dest, ignore_hosts)
- try:
-
- self._live_migration_common_check(context, instance, dest)
-
-
- migrate_data = self.compute_rpcapi.\
- check_can_live_migrate_destination(context, instance,
- dest,
- block_migration,
- disk_over_commit)
- except exception.Invalid:
- ignore_hosts.append(dest)
- dest = None
- continue
- else:
-
-
- self._live_migration_dest_check(context, instance, dest)
-
- self._live_migration_common_check(context, instance, dest)
-
-
- migrate_data = self.compute_rpcapi.\
- check_can_live_migrate_destination(context, instance, dest,
- block_migration,
- disk_over_commit)
-
-
- src = instance['host']
- self.compute_rpcapi.live_migration(context, host=src,
- instance=instance, dest=dest,
- block_migration=block_migration,
- migrate_data=migrate_data)
可以看见,在方法schedule_live_migration中,主要进行了三部分的内容,第一,如果目前主机不存在,则由调度算法选取一个目标主机,并且进行相关的检测,确保能够进行实时迁移操作;第二,如果目标主机存在,则直接进行相关的检测操作,确保能够进行实时迁移操作;第三,执行迁移操作。
在前两部分的内容中,分别调用了三个方法_live_migration_dest_check、_live_migration_common_check和
check_can_live_migrate_destination。我们分别来看这三个方法:
首先来看方法_live_migration_dest_check,具体来看它的源码:
- def _live_migration_dest_check(self, context, instance_ref, dest, ignore_hosts=None):
-
-
-
-
-
-
-
-
-
-
- if dest is None:
-
- instance_type = db.instance_type_get(context, instance_ref['instance_type_id'])
-
-
-
- image = self.image_service.show(context, instance_ref['image_ref'])
- request_spec = {'instance_properties': instance_ref,
- 'instance_type': instance_type,
- 'instance_uuids': [instance_ref['uuid']],
- 'image': image}
-
- filter_properties = {'ignore_hosts': ignore_hosts}
-
- return self.select_hosts(context, request_spec, filter_properties)[0]
-
-
- src = instance_ref['host']
- if dest == src:
- raise exception.UnableToMigrateToSelf(
- instance_id=instance_ref['uuid'], host=dest)
-
-
- try:
-
- dservice_ref = db.service_get_by_compute_host(context, dest)
- except exception.NotFound:
- raise exception.ComputeServiceUnavailable(host=dest)
-
-
- if not self.servicegroup_api.service_is_up(dservice_ref):
- raise exception.ComputeServiceUnavailable(host=dest)
-
-
-
- self._assert_compute_node_has_enough_memory(context, instance_ref, dest)
-
- return dest
这个方法中,将会判断是否定义了目标主机dest,如果没有定义目标主机,将会调用合适的scheduler算法来获取合适的主机,作为实时迁移的目标主机。然后会针对目标主机进行一系列的检查操作。
再来看方法_live_migration_common_check,看看它的源码实现:
- def _live_migration_common_check(self, context, instance_ref, dest):
-
-
-
-
- dservice_ref = self._get_compute_info(context, dest)
- src = instance_ref['host']
-
- oservice_ref = self._get_compute_info(context, src)
-
-
- orig_hypervisor = oservice_ref['hypervisor_type']
- dest_hypervisor = dservice_ref['hypervisor_type']
- if orig_hypervisor != dest_hypervisor:
- raise exception.InvalidHypervisorType()
-
-
- orig_hypervisor = oservice_ref['hypervisor_version']
- dest_hypervisor = dservice_ref['hypervisor_version']
- if orig_hypervisor > dest_hypervisor:
- raise exception.DestinationHypervisorTooOld()
在这个方法中,主要实现了对源主机和目标主机上的虚拟机管理程序的版本进行检查。
最后来看方法check_can_live_migrate_destination,这个方法实现的是在目标主机上检测是否可以进行实时迁移,并将检测结果返回给源主机。来看看它的源码实现:
- def check_can_live_migrate_destination(self, ctxt, instance, destination,
- block_migration, disk_over_commit):
- instance_p = jsonutils.to_primitive(instance)
- return self.call(ctxt,
- self.make_msg('check_can_live_migrate_destination',
- instance=instance_p,
- block_migration=block_migration,
- disk_over_commit=disk_over_commit),
- topic=_compute_topic(self.topic, ctxt, destination, None))
这里调用call方法实现在一个主题topic上发送一条远程消息,实现在目标主机上进行检测是否可以进行实时迁移,并等待响应。
接下来将会执行/nova/compute/manager.py中的方法check_can_live_migrate_destination,这个方法实现了在目标主机上进行检测是否可以进行实时迁移。具体来看方法的实现代码:
- @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
- def check_can_live_migrate_destination(self, ctxt, instance,
- block_migration=False,
- disk_over_commit=False):
-
-
-
-
-
-
- src_compute_info = self._get_compute_info(ctxt, instance['host'])
-
- dst_compute_info = self._get_compute_info(ctxt, CONF.host)
-
-
- dest_check_data = self.driver.check_can_live_migrate_destination(ctxt,
- instance, src_compute_info, dst_compute_info,
- block_migration, disk_over_commit)
- migrate_data = {}
- try:
-
- migrate_data = self.compute_rpcapi.check_can_live_migrate_source(ctxt, instance, dest_check_data)
- finally:
-
- self.driver.check_can_live_migrate_destination_cleanup(ctxt, dest_check_data)
- if dest_check_data and 'migrate_data' in dest_check_data:
- migrate_data.update(dest_check_data['migrate_data'])
- return migrate_data
这个方法中继而调用了以下方法,这里不再做一一解释:
- def check_can_live_migrate_destination(self, ctxt, instance_ref,
- src_compute_info, dst_compute_info,
- block_migration=False,
- disk_over_commit=False):
-
-
-
-
- disk_available_mb = None
-
-
- if block_migration:
- disk_available_gb = dst_compute_info['disk_available_least']
-
-
-
-
- disk_available_mb = (disk_available_gb * 1024) - CONF.reserved_host_disk_mb
-
-
- src = instance_ref['host']
- source_cpu_info = src_compute_info['cpu_info']
-
- self._compare_cpu(source_cpu_info)
-
-
- filename = self._create_shared_storage_test_file()
-
- return {"filename": filename,
- "block_migration": block_migration,
- "disk_over_commit": disk_over_commit,
- "disk_available_mb": disk_available_mb}
- def check_can_live_migrate_source(self, ctxt, instance, dest_check_data):
- instance_p = jsonutils.to_primitive(instance)
- return self.call(ctxt, self.make_msg('check_can_live_migrate_source',
- instance=instance_p,
- dest_check_data=dest_check_data),
- topic=_compute_topic(self.topic, ctxt, None,
- instance))
- @exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
- def check_can_live_migrate_source(self, ctxt, instance, dest_check_data):
-
-
-
- capi = self.conductor_api
- bdms = capi.block_device_mapping_get_all_by_instance(ctxt, instance)
-
- is_volume_backed = self.compute_api.is_volume_backed_instance(ctxt,
- instance,
- bdms)
- dest_check_data['is_volume_backed'] = is_volume_backed
-
- return self.driver.check_can_live_migrate_source(ctxt, instance, dest_check_data)
- def check_can_live_migrate_source(self, ctxt, instance_ref,
- dest_check_data):
-
-
-
- source = CONF.host
- filename = dest_check_data["filename"]
- block_migration = dest_check_data["block_migration"]
- is_volume_backed = dest_check_data.get('is_volume_backed', False)
-
- shared = self._check_shared_storage_test_file(filename)
-
- if block_migration:
- if shared:
- reason = _("Block migration can not be used "
- "with shared storage.")
- raise exception.InvalidLocalStorage(reason=reason, path=source)
-
- self._assert_dest_node_has_enough_disk(ctxt, instance_ref,
- dest_check_data['disk_available_mb'],
- dest_check_data['disk_over_commit'])
-
- elif not shared and not is_volume_backed:
- reason = _("Live migration can not be used "
- "without shared storage.")
- raise exception.InvalidSharedStorage(reason=reason, path=source)
- dest_check_data.update({"is_shared_storage": shared})
- return dest_check_data
我们再来看方法schedule_live_migration中的第三部分内容,即执行迁移操作。
先来回顾其中实现这部分的源码:
-
- src = instance['host']
- self.compute_rpcapi.live_migration(context, host=src,
- instance=instance, dest=dest,
- block_migration=block_migration,
- migrate_data=migrate_data)
- def live_migration(self, ctxt, instance, dest, block_migration, host,
- migrate_data=None):
- instance_p = jsonutils.to_primitive(instance)
- self.cast(ctxt, self.make_msg('live_migration', instance=instance_p,
- dest=dest, block_migration=block_migration,
- migrate_data=migrate_data),
- topic=_compute_topic(self.topic, ctxt, host, None))
- def live_migration(self, context, instance, dest, block_migration, disk_over_commit):
- try:
-
-
- return self.driver.schedule_live_migration(
- context, instance, dest,
- block_migration, disk_over_commit)
- except (exception.ComputeServiceUnavailable,
- exception.InvalidHypervisorType,
- exception.UnableToMigrateToSelf,
- exception.DestinationHypervisorTooOld,
- exception.InvalidLocalStorage,
- exception.InvalidSharedStorage) as ex:
- request_spec = {'instance_properties': {
- 'uuid': instance['uuid'], },
- }
- with excutils.save_and_reraise_exception():
- self._set_vm_state_and_notify('live_migration',
- dict(vm_state=instance['vm_state'],
- task_state=None,
- expected_task_state=task_states.MIGRATING,),
- context, ex, request_spec)
- except Exception as ex:
- with excutils.save_and_reraise_exception():
- self._set_vm_state_and_notify('live_migration',
- {'vm_state': vm_states.ERROR},
- context, ex, {})
- def live_migration(self, ctxt, instance_ref, dest,
- post_method, recover_method, block_migration=False,
- migrate_data=None):
-
-
-
-
-
-
-
-
- greenthread.spawn(self._live_migration, ctxt, instance_ref, dest,
- post_method, recover_method, block_migration,
- migrate_data)
- def _live_migration(self, ctxt, instance_ref, dest, post_method,
- recover_method, block_migration=False,
- migrate_data=None):
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- try:
- if block_migration:
-
-
-
- flaglist = CONF.block_migration_flag.split(',')
- else:
-
-
-
- flaglist = CONF.live_migration_flag.split(',')
- flagvals = [getattr(libvirt, x.strip()) for x in flaglist]
- logical_sum = reduce(lambda x, y: x | y, flagvals)
-
-
- dom = self._lookup_by_name(instance_ref["name"])
-
-
-
-
-
-
-
- dom.migrateToURI(CONF.live_migration_uri % dest,
- logical_sum,
- None,
- CONF.live_migration_bandwidth)
-
- except Exception as e:
- with excutils.save_and_reraise_exception():
- LOG.error(_("Live Migration failure: %(e)s") % locals(),
- instance=instance_ref)
- recover_method(ctxt, instance_ref, dest, block_migration)
-
-
-
- timer = utils.FixedIntervalLoopingCall(f=None)
-
- def wait_for_live_migration():
-
-
-
-
- try:
- self.get_info(instance_ref)['state']
- except exception.NotFound:
- timer.stop()
- post_method(ctxt, instance_ref, dest, block_migration,
- migrate_data)
-
-
- timer.f = wait_for_live_migration
- timer.start(interval=0.5).wait()
至此,Nova虚拟机动态迁移的实现机制和实现源码解析完成。
博文中不免有不正确的地方,欢迎朋友们不吝批评指正,谢谢大家了!
博客原文地址:http://blog.csdn.net/gaoxingnengjisuan