本博客欢迎转载,但请注明出处 http://blog.csdn.net/ringoshen/article/details/51321540
由于能力与时间有限,文章内容难免错漏,望大家多加指正,相互进步!
nova的服务在功能上主要分为应用层和逻辑层两类,nova-api就是应用层的服务,而nova-scheduler、nova-conductor等则是逻辑层服务。本文主要通过注释的方式讲述nova-api服务的启动流程。
首先看一下main函数,之后针对几个重要的部分简单跟踪一下。
# nova/cmd/api.py
def main():
# 加载命令行传入的参数,设置数据库和RPC,argv是服务启动命令的参数列表,
# ['/usr/local/bin/nova-api', '--config-file', '/etc/nova/nova.conf']
config.parse_args(sys.argv)
# 直接调用oslo_log.log设置服务的日志文件
logging.setup(CONF, "nova")
# 打补丁的方式导入模块,但默认不开启
utils.monkey_patch()
# 其实就是import objects模块的各个object,在RPC接收时用到
objects.register_all()
# 根据wiki的解释,应用于admin了解系统运行细节,
# 通过信号量的方式进行触发,可生成极为详细的运行报告
gmr.TextGuruMeditation.setup_autorun(version)
# 创建一个ProcessLauncher对象,也就是服务启动器
launcher = service.process_launcher()
# 创建api,我的默认配置是enabled_apis = osapi_compute,metadata
for api in CONF.enabled_apis:
# 默认都不使用ssl
should_use_ssl = api in CONF.enabled_ssl_apis、
# 创建相应api的WSGIService对象
server = service.WSGIService(api, use_ssl=should_use_ssl)
# 启动相应api的服务
launcher.launch_service(server, workers=server.workers or 1)
# 监控api服务的worker,不正常结束就reset;接收到终止的信号量就kill workers
launcher.wait()
主要是设置默认的配置,其中最主要的是RPC的设置和初始化,我的环境的默认driver是rabbitmq。
# nova/config.py
def parse_args(argv, default_config_files=None, configure_db=True, init_rpc=True):
# 设置oslo_log使用的默认选项
log.set_defaults(_DEFAULT_LOGGING_CONTEXT_FORMAT, _DEFAULT_LOG_LEVELS)
# 注册命令行和配置选项
log.register_options(CONF)
# 设置DB相关的默认选项
options.set_defaults(CONF, connection=_DEFAULT_SQL_CONNECTION,
sqlite_db='nova.sqlite')
# 设置RPC的默认选项,设置默认exchange为nova
rpc.set_defaults(control_exchange='nova')
# ===== 这三个默认配置并不是很懂,之后看懂了补上 =====
cache.configure(CONF)
debugger.register_cli_opts()
config.set_middleware_defaults()
# ====================================================
# 解析命令行参数和配置文件
CONF(argv[1:],
project='nova',
version=version.version_string(),
default_config_files=default_config_files)
# 初始化RPC。之前都是些默认配置,RPC的初始化过程稍微复杂一些,涉及stevedore模块,
# 主要涉及几个对象:TRANSPORT, NOTIFICATION_TRANSPORT, LEGACY_NOTIFIER, NOTIFIER
if init_rpc:
rpc.init(CONF)
# 配置sqlalchemy api
if configure_db:
sqlalchemy_api.configure(CONF)
这个模块主要是进行对系统操作的监控,这里可以看一看wiki里的一个展示看一下效果。
如果有兴趣可以看一下wiki的详细解释 Guru Meditation Reports
========================================================================
==== Guru meditation report ====
========================================================================
UUID: 6d4faf99-480c-4d0c-90be-b7d090ecd83e
Time: Mon, 18 Feb 2013 18:01:11 +0000
========================================================================
==== Config ====
========================================================================
DEFAULT:
allow_resize_to_same_host=True
allow_same_net_traffic=True
allowed_direct_url_schemes=[]
allowed_rpc_exception_modules=['nova.openstack.common.exception', 'nova.exception', 'cinder.exception', 'exceptions']
api_paste_config=/etc/nova/api-paste.ini
api_rate_limit=True
auth_strategy=keystone
auto_assign_floating_ip=False
backdoor_port=None
bandwidth_poll_interval=600
base_dir_name=_base
bindir=/home/berrange/src/cloud/nova/bin
... ... ... ...
... ... ... ...
Thread ID 3
/usr/lib/python2.7/site-packages/eventlet/hubs/hub.py:177 in switch
return self.greenlet.switch()
/usr/lib/python2.7/site-packages/eventlet/greenthread.py:30 in sleep
hub.switch()
/home/berrange/src/cloud/nova/nova/utils.py:596 in _inner
greenthread.sleep(interval)
/usr/lib/python2.7/site-packages/eventlet/greenthread.py:192 in main
result = function(*args, **kwargs)
========================================================================
==== Package ====
========================================================================
Vendor: OpenStack Foundation
Product: OpenStack Nova
Version: 2013.1
========================================================================
# nova/service.py
def process_launcher():
return service.ProcessLauncher(CONF)
可以简单看一下ProcessLauncher的初始化,这个类有点意思,之后的操作会详细使用这个类。
# oslo_service/service.py
class ProcessLauncher(object):
"""Launch a service with a given number of workers."""
def __init__(self, conf, wait_interval=0.01):
"""Constructor. :param conf: an instance of ConfigOpts :param wait_interval: The interval to sleep for between checks of child process exit. """
self.conf = conf
conf.register_opts(_options.service_opts)
self.children = {}
self.sigcaught = None
self.running = True
self.wait_interval = wait_interval
self.launcher = None
rfd, self.writepipe = os.pipe()
self.readpipe = eventlet.greenio.GreenPipe(rfd, 'r')
self.signal_handler = SignalHandler()
self.handle_signal()
这边以osapi_compute为例进行跟踪。
# nova/service.py
class WSGIService(service.Service):
"""Provides ability to launch API from a 'paste' configuration."""
# name = 'osapi_compute'
def __init__(self, name, loader=None, use_ssl=False, max_url_len=None):
"""Initialize, but do not start the WSGI server. :param name: The name of the WSGI server given to the loader. :param loader: Loads the WSGI application using the given name. :returns: None """
# name = 'osapi_compute'
self.name = name
# NOTE(danms): Name can be metadata, os_compute, or ec2, per
# nova.service's enabled_apis
# binary = 'nova-osapi_compute'
self.binary = 'nova-%s' % name
self.topic = None
# manager = None,这一个值只有在metadata是会有,这是在conf中配置的
# ===== 暂时还没深入看,之后看完再补上 =====
self.manager = self._get_manager()
# ==========================================
# 接下来2步加载app,app的信息在"/etc/nova/api-paste.ini",
# loader = <nova.wsgi.Loader object at 0x7f6f2b5b1ed0>
self.loader = loader or wsgi.Loader()
# app = <nova.api.openstack.urlmap.URLMap object at 0x7f6f2b541790>
# 这边主要返回一个URLMap对象,从名字上就知道是用来根据url路由到app
self.app = self.loader.load_app(name)
# inherit all compute_api worker counts from osapi_compute
if name.startswith('openstack_compute_api'):
wname = 'osapi_compute'
else:
# wname = 'osapi_compute'
wname = name
# 设置监听的主机,host = '0.0.0.0'
self.host = getattr(CONF, '%s_listen' % name, "0.0.0.0")
# 设置监听端口,port = 8774
self.port = getattr(CONF, '%s_listen_port' % name, 0)
# 子进程的个数,workers = 2,因为我的环境是2核
self.workers = (getattr(CONF, '%s_workers' % wname, None) or
processutils.get_worker_count())
if self.workers and self.workers < 1:
worker_name = '%s_workers' % name
msg = (_("%(worker_name)s value of %(workers)s is invalid, "
"must be greater than 0") %
{'worker_name': worker_name,
'workers': str(self.workers)})
raise exception.InvalidInput(msg)
# 默认不使用ssl,use_ssl = False
self.use_ssl = use_ssl
# server = <nova.wsgi.Server object at 0x7f6f38530d90>
# 创建一个wsgi Server的对象
self.server = wsgi.Server(name,
self.app,
host=self.host,
port=self.port,
use_ssl=self.use_ssl,
max_url_len=max_url_len)
# Pull back actual port used
# port = 8774
self.port = self.server.port
self.backdoor_port = None
这边其实是直接调用paste.deploy.loadapp进行加载,关于loadapp,之后的文章再进行简单的描述。
# nova/wsgi.py
class Loader(object):
"""Used to load WSGI applications from paste configurations."""
def __init__(self, config_path=None):
self.config_path = None
# config_path = '/etc/nova/api-paste.ini'
config_path = config_path or CONF.api_paste_config
if not os.path.isabs(config_path):
self.config_path = CONF.find_file(config_path)
elif os.path.exists(config_path):
self.config_path = config_path
if not self.config_path:
raise exception.ConfigNotFound(path=config_path)
def load_app(self, name):
"""Return the paste URLMap wrapped WSGI application. :param name: Name of the application to load. :returns: Paste URLMap object wrapping the requested application. :raises: `nova.exception.PasteAppNotFound` """
# 可以发现是直接调用paste.deploy.loadapp方法
try:
LOG.debug("Loading app %(name)s from %(path)s",
{'name': name, 'path': self.config_path})
return deploy.loadapp("config:%s" % self.config_path, name=name)
except LookupError:
LOG.exception(_LE("Couldn't lookup app: %s"), name)
raise exception.PasteAppNotFound(name=name, path=self.config_path)
# nova/wsgi.py
class Server(service.ServiceBase):
"""Server class to manage a WSGI server, serving a WSGI application."""
default_pool_size = CONF.wsgi_default_pool_size
def __init__(self, name, app, host='0.0.0.0', port=0, pool_size=None, protocol=eventlet.wsgi.HttpProtocol, backlog=128, use_ssl=False, max_url_len=None):
"""Initialize, but do not start, a WSGI server. """
# Allow operators to customize http requests max header line size.
eventlet.wsgi.MAX_HEADER_LINE = CONF.max_header_line
# name = 'osapi_compute'
self.name = name
# app = <nova.api.openstack.urlmap.URLMap object at 0x7f7a715b1550>
# 就是之前load到的app
self.app = app
self._server = None
# 接下来是eventlet的相关设置,详细原理参见之前的文章
# _protocol = eventlet.wsgi.HttpProtocol
# 选择HTTP协议
self._protocol = protocol
# 绿色线程池大小pool_size = 1000
self.pool_size = pool_size or self.default_pool_size
# 创建一个绿色线程池_pool = <eventlet.greenpool.GreenPool object at 0x7f7a72852550>
self._pool = eventlet.GreenPool(self.pool_size)
# _logger = <oslo_log.log.KeywordArgumentAdapter object at 0x7f7a6b410790>
self._logger = logging.getLogger("nova.%s.wsgi.server" % self.name)
# _use_ssl = False
self._use_ssl = use_ssl
# _max_url_len = None
self._max_url_len = max_url_len
# 设置超时时间,client_socket_timeout = 900
self.client_socket_timeout = CONF.client_socket_timeout or None
if backlog < 1:
raise exception.InvalidInput(
reason=_('The backlog must be more than 0'))
# bind_adrr = ('0.0.0.0', 8774)
bind_addr = (host, port)
try:
# info = (2, 1, 6, '', ('0.0.0.0', 8774))
info = socket.getaddrinfo(bind_addr[0],
bind_addr[1],
socket.AF_UNSPEC,
socket.SOCK_STREAM)[0]
family = info[0]
bind_addr = info[-1]
except Exception:
family = socket.AF_INET
try:
# _socket = <eventlet.greenio.base.GreenSocket object at 0x7f7a6b410810>
# 创建绿色线程的socket
self._socket = eventlet.listen(bind_addr, family, backlog=backlog)
except EnvironmentError:
LOG.error(_LE("Could not bind to %(host)s:%(port)s"),
{'host': host, 'port': port})
raise
# host = '0.0.0.0', port = 8774
(self.host, self.port) = self._socket.getsockname()[0:2]
LOG.info(_LI("%(name)s listening on %(host)s:%(port)s"),
{'name': self.name, 'host': self.host, 'port': self.port})
# oslo_service/service.py
def launch_service(self, service, workers=1):
# service类型检查
_check_service_base(service)
# 把参数包装成ServiceWrapper对象,类似结构体
wrap = ServiceWrapper(service, workers)
LOG.info(_LI('Starting %d workers'), wrap.workers)
# 创建workers个子进程
while self.running and len(wrap.children) < wrap.workers:
self._start_child(wrap)
接下来看看创建子进程的代码。
# oslo_service/service.py
def _start_child(self, wrap):
if len(wrap.forktimes) > wrap.workers:
# Limit ourselves to one process a second (over the period of
# number of workers * 1 second). This will allow workers to
# start up quickly but ensure we don't fork off children that
# die instantly too quickly.
if time.time() - wrap.forktimes[0] < wrap.workers:
LOG.info(_LI('Forking too fast, sleeping'))
time.sleep(1)
wrap.forktimes.pop(0)
# 把当前时间加入列表
wrap.forktimes.append(time.time())
# fork一个子进程,子进程的pid = 0,父进程的pid > 0
pid = os.fork()
# 当前进程为子进程时执行以下代码
if pid == 0:
# =============== 子进程操作 ===============
# 创建并运行子线程(绿色线程)
self.launcher = self._child_process(wrap.service)
# 循环等待信号量,如果接收到终止命令则kill子进程
while True:
self._child_process_handle_signal()
status, signo = self._child_wait_for_exit_or_signal(
self.launcher)
if not _is_sighup_and_daemon(signo):
self.launcher.wait()
break
self.launcher.restart()
os._exit(status)
# ==========================================
LOG.debug('Started child %d', pid)
wrap.children.add(pid)
self.children[pid] = wrap
return pid
# oslo_service/service.py
def _child_process(self, service):
self._child_process_handle_signal()
# Reopen the eventlet hub to make sure we don't share an epoll
# fd with parent and/or siblings, which would be bad
eventlet.hubs.use_hub()
# Close write to ensure only parent has it open
os.close(self.writepipe)
# Create greenthread to watch for parent to close pipe
eventlet.spawn_n(self._pipe_watcher)
# Reseed random number generator
random.seed()
# 创建一个服务启动器
launcher = Launcher(self.conf)
# 启动服务
launcher.launch_service(service)
return launcher
# oslo_service/service.py
class Launcher(object):
def launch_service(self, service):
# 检查service类型
_check_service_base(service)
# 设置后门端口,用于创建backdoor_server来监控进程
service.backdoor_port = self.backdoor_port
# 运行服务
self.services.add(service)
# oslo_service/service.py
class Services(object):
def add(self, service):
# 服务加入列表
self.services.append(service)
# 在线程池中创建一个绿色线程
self.tg.add_thread(self.run_service, service, self.done)
@staticmethod
def run_service(service, done):
"""Service start wrapper. :param service: service to run :param done: event to wait on until a shutdown is triggered :returns: None """
try:
service.start() # 开始服务
except Exception:
LOG.exception(_LE('Error starting thread.'))
raise SystemExit(1)
else:
done.wait()
之后我们可以看一下service.start()的跟踪。
# nova/service.py
class WSGIService(service.Service):
def start(self):
... ...
... ...
# 关于这边的manager,之前也有提到过,默认只有metadata有,
# 只有看这边的调用就可看出它其实是提供了一些hook,不过具体作用我还没有深入看,之后看完了再补上
if self.manager:
self.manager.init_host()
self.manager.pre_start_hook()
if self.backdoor_port is not None:
self.manager.backdoor_port = self.backdoor_port
# 开启服务
self.server.start()
if self.manager:
self.manager.post_start_hook()
# nova/wsgi.py
class Server(service.ServiceBase):
def start(self):
"""Start serving a WSGI application."""
... ...
... ...
wsgi_kwargs = {
'func': eventlet.wsgi.server, # 创建http服务的eventlet方法
'sock': dup_socket, # socket(eventlet_listen)
'site': self.app, # 应用服务
'protocol': self._protocol, # eventlet.wsgi.HttpProtocol
'custom_pool': self._pool, # 绿色线程池
'log': self._logger,
'log_format': CONF.wsgi_log_format,
'debug': False,
'keepalive': CONF.wsgi_keep_alive,
'socket_timeout': self.client_socket_timeout
}
if self._max_url_len:
wsgi_kwargs['url_length_limit'] = self._max_url_len
# 调用utils的spawn进行创建
self._server = utils.spawn(**wsgi_kwargs)
# nova/utils.py
def spawn(func, *args, **kwargs):
_context = common_context.get_current()
@functools.wraps(func)
def context_wrapper(*args, **kwargs):
# NOTE: If update_store is not called after spawn it won't be
# available for the logger to pull from threadlocal storage.
if _context is not None:
_context.update_store()
return func(*args, **kwargs)
# 可见是直接调用eventlet的spawn方法进行创建
return eventlet.spawn(context_wrapper, *args, **kwargs)
之后可以看一下日志文件的输出,可以见到2个服务分别开启了2个子进程workers
循环等待子进程终止进程的信号量或者重启已经挂了的worker。
对于信号量这一块我还不是很清楚,没有深入了解其代码,所以在这边挖个坑等以后填上。
# oslo_service/service.py
class ProcessLauncher(object):
def wait(self):
"""Loop waiting on children to die and respawning as necessary."""
systemd.notify_once()
if self.conf.log_options:
LOG.debug('Full set of CONF:')
self.conf.log_opt_values(LOG, logging.DEBUG)
try:
while True:
self.handle_signal()
self._respawn_children()
# No signal means that stop was called. Don't clean up here.
if not self.sigcaught:
return
signame = self.signal_handler.signals_to_name[self.sigcaught]
LOG.info(_LI('Caught %s, stopping children'), signame)
if not _is_sighup_and_daemon(self.sigcaught):
break
self.conf.reload_config_files()
for service in set(
[wrap.service for wrap in self.children.values()]):
service.reset()
for pid in self.children:
os.kill(pid, signal.SIGTERM)
self.running = True
self.sigcaught = None
except eventlet.greenlet.GreenletExit:
LOG.info(_LI("Wait called after thread killed. Cleaning up."))
# if we are here it means that we are trying to do graceful shutdown.
# add alarm watching that graceful_shutdown_timeout is not exceeded
if (self.conf.graceful_shutdown_timeout and
self.signal_handler.is_signal_supported('SIGALRM')):
signal.alarm(self.conf.graceful_shutdown_timeout)
self.stop()