该分析主要分析主流程,分支流程不进行追溯
nova/api/openstack/compute/contrib/admin_actions.py
@wsgi.action('
os-migrateLive')
def
_migrate_live(self, req, id, body):
"""Permit admins to (live) migrate a server to a new host"""
context = req.environ["nova.context"]
authorize(context, 'migrateLive')
try:
# If true, migration is blocked
block_migration = body["os-migrateLive"]["block_migration"]
# If true, disk over commit is allowed.
disk_over_commit = body["os-migrateLive"]["disk_over_commit"]
# Name of the new host. dest host
host = body["os-migrateLive"]["host"]
except (TypeError, KeyError):
msg = _("host and block_migration must be specified.")
raise exc.HTTPBadRequest(explanation=msg)
try:
instance = self.compute_api.get(context, id)
#这里的compute_api是nova.compute.api.API()
self.
compute_api.live_migrate(context, instance, block_migration,
disk_over_commit, host)
except exception.ComputeServiceUnavailable as ex:
nova/compute/api.py
#注意,此处在live_migrate返回之后,还调用check_instance_state对迁移后的vm进行状态检查
@check_instance_state(vm_state=[vm_states.ACTIVE])
def
live_migrate(self, context, instance, block_migration,
disk_over_commit, host):
"""Migrate a server lively to a new host."""
LOG.debug(_("Going to try to live migrate instance to %s"),
host, instance=instance)
instance = self.update(context, instance,
task_state=task_states.MIGRATING,
expected_task_state=None)
#这里的scheduler_rpcapi是nvoa.scheduler.rpcapi
self.
scheduler_rpcapi.live_migration(context, block_migration,
disk_over_commit, instance, host)
nova/scheduler/rpcapi.py
def
live_migration(self, ctxt, block_migration, disk_over_commit,
instance, dest):
# NOTE(comstud): Call vs cast so we can get exceptions back, otherwise
# this call in the scheduler driver doesn't return anything.
instance_p = jsonutils.to_primitive(instance)
return self.
call(ctxt, self.make_msg('live_migration',
block_migration=block_migration,
disk_over_commit=disk_over_commit, instance=instance_p,
dest=dest))
使用scheduler的rpcapi的call方法发送 live_migration 的请求到消息队列,实际上这个消息的最终处理者是nova-compute
注意,此处使用的是call方法,会一直等待调用返回(区别于cast方法的直接返回)
nova/compute/manager.py
#这里是在
目的主机上发起migrate过程,调用了两个方法pre_live_migration和live_migration
#第一个方法是通过compute_rpc发送一个pre_live_migration的请求给源主机的nova-compute进行动态迁移的预处理
#另一个方法是调用目的主机上的libvirt驱动进行实际的迁移过程
def
live_migration(self, context, dest, instance,
block_migration=False, migrate_data=None):
"""Executing live migration.
:param context: security context
:param instance: instance dict
:param dest: destination host
:param block_migration: if true, prepare for block migration
:param migrate_data: implementation specific params
"""
try:
if block_migration:
disk = self.driver.get_instance_disk_info(instance['name'])
else:
disk = None
#向nova-compute服务发送动态迁移预处理请求
self.
compute_rpcapi.pre_live_migration(context, instance,
block_migration, disk, dest)
except Exception:
with excutils.save_and_reraise_exception():
LOG.exception(_('Pre live migration failed at %(dest)s'),
locals(), instance=instance)
self._rollback_live_migration(context, instance, dest,
block_migration)
# Executing live migration
# live_migration might raises exceptions, but
# nothing must be recovered in this version.
# 根据我本地的配置(compute_driver=libvirt.LibvirtDriver),所以这里的driver是nova.virt.libvirt.driver.LibvirtDriver
self.
driver.live_migration(context, instance, dest,
self._post_live_migration,
self._rollback_live_migration,
block_migration, migrate_data)
nova/compute/manager.py
#注意此处是
目的主机上的预处理
def
pre_live_migration(self, context, instance,
block_migration=False, disk=None):
"""Preparations for live migration at dest host.
:param context: security context
:param instance: dict of instance data
:param block_migration: if true, prepare for block migration
"""
# If any volume is mounted, prepare here.
# 取出虚拟机实例的块设备信息
block_device_info = self._get_instance_volume_block_device_info(
context, instance['uuid'])
if not block_device_info['block_device_mapping']:
LOG.info(_('Instance has no volume.'), instance=instance)
#
assign the volume to host system
# needed by the lefthand volume driver and maybe others
connector = self.driver.get_volume_connector(instance)
for bdm in self._get_instance_volume_bdms(context, instance['uuid']):
volume = self.
volume_api.get(context, bdm['volume_id'])
self.
volume_api.initialize_connection(context, volume, connector)
# 取出虚拟机实例的网络配置信息(实际调用的是networ_api想源主机请求数据)
network_info = self.
_get_instance_nw_info(context, instance)
# TODO(tr3buchet): figure out how on the earth this is necessary
fixed_ips = network_info.fixed_ips()
if not fixed_ips:
raise exception.FixedIpNotFoundForInstance(
instance_uuid=instance['uuid'])
# 调用libvirt驱动进行动态迁移的预处理
self.
driver.pre_live_migration(context, instance,
block_device_info,
self._legacy_nw_info(network_info))
# NOTE(tr3buchet): setup networks on destination host
self.
network_api.setup_networks_on_host(context, instance,
self.host)
# Creating filters to hypervisors and firewalls.
# An example is that nova-instance-instance-xxx,
# which is written to libvirt.xml(Check "virsh nwfilter-list")
# This nwfilter is necessary on the destination host.
# In addition, this method is creating filtering rule
# onto destination host.
# 创建hypervisors和firewalls相关的过滤器
self.
driver.ensure_filtering_rules_for_instance(instance,
self._legacy_nw_info(network_info))
# Preparation for block migration
if block_migration:
#调用libvirt驱动进行块设备迁移的预处理
self.
driver.pre_block_migration(context, instance, disk)
nova/virt/libvirt/driver.py
#注意此处是
目的主机上的预处理
#该方法实际上是在目的主机上创建网络及防火墙相关内容的处理过程
def
pre_live_migration(self, context, instance_ref, block_device_info,
network_info):
"""Preparation live migration."""
# Establishing connection to volume server.
# 与volume server建立连接
block_device_mapping = driver.block_device_info_get_mapping(
block_device_info)
for vol in block_device_mapping:
connection_info = vol['connection_info']
mount_device = vol['mount_device'].rpartition("/")[2]
self.volume_driver_method('connect_volume',
connection_info,
mount_device)
#
We call plug_vifs before the compute manager calls
#
ensure_filtering_rules_for_instance, to ensure bridge is set up
# Retry operation is necessary because continuously request comes,
# concorrent request occurs to iptables, then it complains.
max_retry = FLAGS.live_migration_retry_count
for cnt in range(max_retry):
try:
self.
plug_vifs(instance_ref, network_info)
break
except exception.ProcessExecutionError:
if cnt == max_retry - 1:
raise
else:
LOG.warn(_("plug_vifs() failed %(cnt)d."
"Retry up to %(max_retry)d for %(hostname)s.")
% locals())
greenthread.sleep(1)
#该方法实际上是创建新实例的路径及下载相关镜像的处理过程
def
pre_block_migration(self, ctxt, instance, disk_info_json):
"""Preparation block migration.
:params ctxt: security context
:params instance:
nova.db.sqlalchemy.models.Instance object
instance object that is migrated.
:params disk_info_json:
json strings specified in get_instance_disk_info
"""
disk_info = jsonutils.loads(disk_info_json)
# make instance directory
instance_dir = os.path.join(FLAGS.instances_path, instance['name'])
if os.path.exists(instance_dir):
raise exception.DestinationDiskExists(path=instance_dir)
os.mkdir(instance_dir)
#此处有一个处理过程,如果被迁移vm的系统镜像是基于backfile创建,则会有一个backfile的下载过程
#下载会存储到与原路径相同的位置
for info in disk_info:
base = os.path.basename(info['path'])
# Get image type and create empty disk image, and
# create backing file in case of qcow2.
instance_disk = os.path.join(instance_dir, base)
if not info['backing_file']:
libvirt_utils.create_image(info['type'], instance_disk,
info['disk_size'])
else:
# Creating backing file follows same way as spawning instances.
cache_name = os.path.basename(info['backing_file'])
# Remove any size tags which the cache manages
cache_name = cache_name.split('_')[0]
image = self.image_backend.image(instance['name'],
instance_disk,
FLAGS.libvirt_images_type)
image.cache(fetch_func=libvirt_utils.fetch_image,
context=ctxt,
filename=cache_name,
image_id=instance['image_ref'],
user_id=instance['user_id'],
project_id=instance['project_id'],
size=info['virt_disk_size'])
# if image has kernel and ramdisk, just download
# following normal way.
if instance['kernel_id']:
libvirt_utils.fetch_image(ctxt,
os.path.join(instance_dir, 'kernel'),
instance['kernel_id'],
instance['user_id'],
instance['project_id'])
if instance['ramdisk_id']:
libvirt_utils.fetch_image(ctxt,
os.path.join(instance_dir,
'ramdisk'),
instance['ramdisk_id'],
instance['user_id'],
instance['project_id'])
nova/virt/libvirt/driver.py
#这里是在目标主机上开始真正进行动态迁移
def
live_migration(self, ctxt, instance_ref, dest,
post_method, recover_method, block_migration=False,
migrate_data=None):
"""Spawning live_migration operation for distributing high-load.
greenthread.spawn(self._live_migration, ctxt, instance_ref, dest,
post_method, recover_method, block_migration)
def
_live_migration(self, ctxt, instance_ref, dest, post_method,
recover_method, block_migration=False):
"""Do live migration.
:params ctxt: security context
:params instance_ref:
nova.db.sqlalchemy.models.Instance object
instance object that is migrated.
:params dest: destination host
:params post_method:
post operation method.
expected nova.compute.manager.post_live_migration.
:params recover_method:
recovery method when any exception occurs.
expected nova.compute.manager.recover_live_migration.
"""
# Do live migration.
try:
if block_migration:
flaglist = FLAGS.block_migration_flag.split(',')
else:
flaglist = FLAGS.live_migration_flag.split(',')
flagvals = [getattr(libvirt, x.strip()) for x in flaglist]
logical_sum = reduce(lambda x, y: x | y, flagvals)
dom = self._lookup_by_name(instance_ref["name"])
dom.migrateToURI(FLAGS.live_migration_uri % dest,
logical_sum,
None,
FLAGS.live_migration_bandwidth)
except Exception as e:
with excutils.save_and_reraise_exception():
LOG.error(_("Live Migration failure: %(e)s") % locals(),
instance=instance_ref)
recover_method(ctxt, instance_ref, dest, block_migration)
# Waiting for completion of live_migration.
timer = utils.LoopingCall(f=None)
def wait_for_live_migration():
"""waiting for live migration completion"""
try:
self.get_info(instance_ref)['state']
except exception.NotFound:
timer.stop()
post_method(ctxt, instance_ref, dest, block_migration)
timer.f = wait_for_live_migration
timer.start(interval=0.5).wait()