nova-scheduler是Nova的调度器,主要作用是通过调度策略选择虚拟机实例的落点。当大规模部署时,我们需要通过定制调度策略,去满足我们的特殊需求,最典型的就是负载均衡。
nova-scheduler同nova-conductor一样,作为rpcserver对外提供rpcapi。
Icehouse版本的nova-scheduler对外暴露的rpcapi较少:
1.select_destinations 返回指定要求和过滤条件的节点列表
2.run_instance 根据请求条件和过滤属性选取出合适的节点,然后通过rpc调用节点nova-compute的run_instance api
3. prep_resize 对虚拟机实例进行resize前要先进行调度,然后通过rpc调用节点nova-compute的prep_resize api
下面分析一下run_instance的流程:
1.nova-scheduler有很多驱动,有随机节点选择调度器ChanceScheduler、过滤称重调度器FilterScheduler、带缓存调度器CachingScheduler(FilterScheduler的子类,缓存节点信息,可以加速过滤称重过程),默认使用的驱动是FilterScheduler
2.每种驱动都必须实现3个方法schedule_run_instance、select_destinations和run_periodic_tasks(可选)
3.每当nova-scheduler接收到run_instance请求时,就会调用对应驱动的schedule_run_instance方法
4.在FilterScheduler的schedule_run_instance中,会先根据request_spec请求规格和filter_properties进行过滤和称重,request_spec里面包含的信息有“要创建的实例数量“、“实例的块设备信息包括镜像id”、“使用的镜像信息”、“实例类型包括flavor信息”,“元数据”、“安全组”等,filter_properties里面则主要包括“实例的flavor信息”,下面使用代码进行说明
# 实例组: OpenStack支持对实例进行分组,这些分组有一些策略,譬如我现在要使用OpenStack的多个实例搭建一个分布式存储系统,
# 为了保证这个系统的高可用性,我希望让这些实例落在不同的主机上,那么就可以借助实例组实现
# 该方法用于判断过滤属性中是否有实例组相关参数,如果有就更新过滤属性中的属性,并返回True;反之返回False
def _setup_instance_group(context, filter_properties):
update_group_hosts = False
scheduler_hints = filter_properties.get('scheduler_hints') or {}
# 获取过滤属性中是否有group参数,可以是uuid也可以是实例组名称
group_hint = scheduler_hints.get('group', None)
if group_hint:
# 获取数据库中的InstanceGroup对象
group = instance_group_obj.InstanceGroup.get_by_hint(context,
group_hint)
policies = set(('anti-affinity', 'affinity'))
# 如果实例组的策略中有anti-affinity或者affinity,那么这就会成为影响过滤的因素
if any((policy in policies) for policy in group.policies):
# 意味着我们需要更新实例组的主机列表
update_group_hosts = True
filter_properties.setdefault('group_hosts', set())
# 过滤属性中的实例组主机列表
user_hosts = set(filter_properties['group_hosts'])
# 获取当前运行实例组中成员实例的主机列表
group_hosts = set(group.get_hosts(context))
# 更新过滤属性中的group_hosts和group_policies
filter_properties['group_hosts'] = user_hosts | group_hosts
filter_properties['group_policies'] = group.policies
return update_group_hosts
# 通过过滤和称重选出符合要求的主机
def _schedule(self, context, request_spec, filter_properties,
instance_uuids=None):
elevated = context.elevated()
instance_properties = request_spec['instance_properties']
instance_type = request_spec.get("instance_type", None)
# 获取是否需要更新实例组的主机列表
update_group_hosts = self._setup_instance_group(context,
filter_properties)
# 从调度器json配置文件中获取配置选项,默认情况下为空
config_options = self._get_configuration_options()
properties = instance_properties.copy()
if instance_uuids:
properties['uuid'] = instance_uuids[0]
# 更新调度重试相关属性,记录调度错误日志,并判断重试次数是否超过配置的最大重试次数(默认为3),如果是就抛出"无有效主机"异常
self._populate_retry(filter_properties, properties)
filter_properties.update({'context': context,
'request_spec': request_spec,
'config_options': config_options,
'instance_type': instance_type})
# 将request_spec中的相关信息更新至filter_properties,包括project_id、os_type和pci_requests
self.populate_filter_properties(request_spec,
filter_properties)
# 获取所有正常运行的主机状态, 返回值是一个迭代器
hosts = self._get_all_host_states(elevated)
selected_hosts = []
if instance_uuids:
num_instances = len(instance_uuids)
else:
num_instances = request_spec.get('num_instances', 1)
# 对要创建的虚拟机实例逐个进行调度
for num in xrange(num_instances):
# 从上面的全部正常运行的主机中过滤出满足要求的节点
hosts = self.host_manager.get_filtered_hosts(hosts,
filter_properties, index=num)
if not hosts:
# Can't get any more locally.
break
LOG.debug(_("Filtered %(hosts)s"), {'hosts': hosts})
# 在对上面过滤出的主机进行称重排序
weighed_hosts = self.host_manager.get_weighed_hosts(hosts,
filter_properties)
LOG.debug(_("Weighed %(hosts)s"), {'hosts': weighed_hosts})
scheduler_host_subset_size = CONF.scheduler_host_subset_size
if scheduler_host_subset_size > len(weighed_hosts):
scheduler_host_subset_size = len(weighed_hosts)
if scheduler_host_subset_size < 1:
scheduler_host_subset_size = 1
# 从上面的称重排序结果中根据配置进行切片并随机选取一个主机
chosen_host = random.choice(
weighed_hosts[0:scheduler_host_subset_size])
selected_hosts.append(chosen_host)
# 因为有实例要落在上面,所以我们需要更新缓存中该选中主机的状态信息以进行下一个实例的调度工作,
# 这些状态信息包括内存、硬盘、vCPU和实例数等
chosen_host.obj.consume_from_instance(instance_properties)
if update_group_hosts is True:
# 更新实例组的节点集合
filter_properties['group_hosts'].add(chosen_host.obj.host)
# 返回所有被选中的主机列表
# 如果有多个实例需要创建,那么节点列表中对应索引的主机即为落点
return selected_hosts
def _provision_resource(self, context, weighed_host, request_spec,
filter_properties, requested_networks, injected_files,
admin_password, is_first_time, instance_uuid=None,
legacy_bdm_in_spec=True):
request_spec['instance_uuids'] = [instance_uuid]
payload = dict(request_spec=request_spec,
weighted_host=weighed_host.to_dict(),
instance_id=instance_uuid)
self.notifier.info(context,
'scheduler.run_instance.scheduled', payload)
scheduler_hints = filter_properties.get('scheduler_hints') or {}
try:
# 更新数据库中该实例的调度相关信息,包括node、host和scheduled_at
updated_instance = driver.instance_update_db(context,
instance_uuid)
except exception.InstanceNotFound:
LOG.warning(_("Instance disappeared during scheduling"),
context=context, instance_uuid=instance_uuid)
else:
# 为filter_properties添加额外的信息
scheduler_utils.populate_filter_properties(filter_properties,
weighed_host.obj)
# 通过rpc调用主机nova-compute服务的run_instance方法,
# 也就是通知nova-compute创建实例
self.compute_rpcapi.run_instance(context,
instance=updated_instance,
host=weighed_host.obj.host,
request_spec=request_spec,
filter_properties=filter_properties,
requested_networks=requested_networks,
injected_files=injected_files,
admin_password=admin_password, is_first_time=is_first_time,
node=weighed_host.obj.nodename,
legacy_bdm_in_spec=legacy_bdm_in_spec)
def schedule_run_instance(self, context, request_spec,
admin_password, injected_files,
requested_networks, is_first_time,
filter_properties, legacy_bdm_in_spec):
payload = dict(request_spec=request_spec)
self.notifier.info(context, 'scheduler.run_instance.start', payload)
instance_uuids = request_spec.get('instance_uuids')
LOG.info(_("Attempting to build %(num_instances)d instance(s) "
"uuids: %(instance_uuids)s"),
{'num_instances': len(instance_uuids),
'instance_uuids': instance_uuids})
LOG.debug(_("Request Spec: %s") % request_spec)
# 获取过滤称重后的被选中主机列表
weighed_hosts = self._schedule(context, request_spec,
filter_properties, instance_uuids)
instance_uuids = request_spec.pop('instance_uuids')
filter_properties.pop('context', None)
for num, instance_uuid in enumerate(instance_uuids):
request_spec['instance_properties']['launch_index'] = num
try:
try:
# 从选中主机列表中取出当前实例的落点
weighed_host = weighed_hosts.pop(0)
LOG.info(_("Choosing host %(weighed_host)s "
"for instance %(instance_uuid)s"),
{'weighed_host': weighed_host,
'instance_uuid': instance_uuid})
except IndexError:
raise exception.NoValidHost(reason="")
# 更新实例的数据库信息并通过nova-compute的RpcAPI创建实例
self._provision_resource(context, weighed_host,
request_spec,
filter_properties,
requested_networks,
injected_files, admin_password,
is_first_time,
instance_uuid=instance_uuid,
legacy_bdm_in_spec=legacy_bdm_in_spec)
except Exception as ex:
driver.handle_schedule_error(context, ex, instance_uuid,
request_spec)
# scrub retry host list in case we're scheduling multiple
# instances:
retry = filter_properties.get('retry', {})
retry['hosts'] = []
self.notifier.info(context, 'scheduler.run_instance.end', payload)