我用的tornado版本为3.1.1,python版本为2.7.3,下面是一个简单的例子:
from tornado.httpserver import HTTPServer
from tornado.ioloop import IOLoop
from tornado.web import Application, RequestHandler, asynchronous
class MainHandler(RequestHandler):
@asynchronous
def get(self):
self.finish("Hello, world")
if __name__ == "__main__":
http_server = HTTPServer(Application([(r"/", MainHandler),]))
http_server.listen(8888)
IOLoop.instance().start()
这样一个高性能的web服务就完成了,核心只有一个IOLoop和一个HTTPServer,我们从上往下看,先看HTTPServer。
HTTPServer继承TCPServer,它只负责处理将接收到的新连接的socket添加到IOLoop中。
def listen(self, port, address=""):
sockets = bind_sockets(port, address=address)
self.add_sockets(sockets)
def add_sockets(self, sockets):
if self.io_loop is None:
self.io_loop = IOLoop.current()
for sock in sockets:
self._sockets[sock.fileno()] = sock
add_accept_handler(sock, self._handle_connection, io_loop=self.io_loop)
首先将HTTPServer这个监听型socket添加到IOLoop中,添加完成后在accept_handler接受新连接,接受到新连接后调用self._handle_connection。
def add_accept_handler(sock, callback, io_loop=None):
if io_loop is None:
io_loop = IOLoop.current()
def accept_handler(fd, events):
while True:
try:
connection, address = sock.accept()
except socket.error as e:
if e.args[0] in (errno.EWOULDBLOCK, errno.EAGAIN):
return
if e.args[0] == errno.ECONNABORTED:
continue
raise
callback(connection, address)
io_loop.add_handler(sock.fileno(), accept_handler, IOLoop.READ)
在_handle_connection中创建一个IOStream对象,传给handle_stream,并且在handle_stream中初始化一个HTTPConnection对象。
def _handle_connection(self, connection, address):
if self.ssl_options is not None:
assert ssl, "Python 2.6+ and OpenSSL required for SSL"
try:
connection = ssl_wrap_socket(connection,
self.ssl_options,
server_side=True,
do_handshake_on_connect=False)
except ssl.SSLError as err:
if err.args[0] == ssl.SSL_ERROR_EOF:
return connection.close()
else:
raise
except socket.error as err:
if err.args[0] in (errno.ECONNABORTED, errno.EINVAL):
return connection.close()
else:
raise
try:
if self.ssl_options is not None:
stream = SSLIOStream(connection, io_loop=self.io_loop, max_buffer_size=self.max_buffer_size)
else:
stream = IOStream(connection, io_loop=self.io_loop, max_buffer_size=self.max_buffer_size)
self.handle_stream(stream, address)
except Exception:
app_log.error("Error in connection callback", exc_info=True)
def handle_stream(self, stream, address):
HTTPConnection(stream, address, self.request_callback, self.no_keep_alive, self.xheaders, self.protocol)
到HTTPConnection初始化时,新的连接已经接受,并初始化了IOStream对象,就可以开始读请求过来的数据了,读完之后交给_header_callback,实际是交给_on_headers解析数据。
def __init__(self, stream, address, request_callback, no_keep_alive=False,
xheaders=False, protocol=None):
self.stream = stream
self.address = address
self.address_family = stream.socket.family
self.request_callback = request_callback
self.no_keep_alive = no_keep_alive
self.xheaders = xheaders
self.protocol = protocol
self._clear_request_state()
self._header_callback = stack_context.wrap(self._on_headers)
self.stream.set_close_callback(self._on_connection_close)
self.stream.read_until(b"\r\n\r\n", self._header_callback)
这里还啰嗦几句,Application的__call__方法首先会调用该请求对应Handler的父类RequestHandler的_execute方法,这里的几个逻辑解释一下。
def _execute(self, transforms, *args, **kwargs):
"""Executes this request with the given output transforms."""
self._transforms = transforms
try:
if self.request.method not in self.SUPPORTED_METHODS:
raise HTTPError(405)
self.path_args = [self.decode_argument(arg) for arg in args]
self.path_kwargs = dict((k, self.decode_argument(v, name=k))
for (k, v) in kwargs.items())
# If XSRF cookies are turned on, reject form submissions without
# the proper cookie
if self.request.method not in ("GET", "HEAD", "OPTIONS") and \
self.application.settings.get("xsrf_cookies"):
self.check_xsrf_cookie()
self._when_complete(self.prepare(), self._execute_method)
except Exception as e:
self._handle_request_exception(e)
def _when_complete(self, result, callback):
try:
if result is None:
callback()
elif isinstance(result, Future):
if result.done():
if result.result() is not None:
raise ValueError('Expected None, got %r' % result)
callback()
else:
# Delayed import of IOLoop because it's not available
# on app engine
from tornado.ioloop import IOLoop
IOLoop.current().add_future(
result, functools.partial(self._when_complete,
callback=callback))
else:
raise ValueError("Expected Future or None, got %r" % result)
except Exception as e:
self._handle_request_exception(e)
def _execute_method(self):
if not self._finished:
method = getattr(self, self.request.method.lower())
self._when_complete(method(*self.path_args, **self.path_kwargs),
self._execute_finish)
def _execute_finish(self):
if self._auto_finish and not self._finished:
self.finish()
再看IOLoop,这个模块是异步机制的核心,它包含了一系列已经打开的文件描述符和每个描述符的处理器(handlers)。
def add_handler(self, fd, handler, events):
self._handlers[fd] = stack_context.wrap(handler)
self._impl.register(fd, events | self.ERROR)
接下来IOLoop就要开始工作了,看start()方法(代码比较长,只保留了主要部分):
def start(self):
[...]
self._running = True
[...]
while True:
poll_timeout = 3600.0
with self._callback_lock:
callbacks = self._callbacks
self._callbacks = []
for callback in callbacks:
self._run_callback(callback)
[...通过_timeouts来优化poll_timeout...]
if self._callbacks:
poll_timeout = 0.0
if not self._running:
break
[...]
try:
event_pairs = self._impl.poll(poll_timeout)#取出数据已准备好的事件,当poll有结果时才会返回,否则一直阻塞,直到poll_timeout
except Exception as e:
if (getattr(e, 'errno', None) == errno.EINTR or
(isinstance(getattr(e, 'args', None), tuple) and
len(e.args) == 2 and e.args[0] == errno.EINTR)):
continue
else:
raise
[...]
# Pop one fd at a time from the set of pending fds and run
# its handler. Since that handler may perform actions on
# other file descriptors, there may be reentrant calls to
# this IOLoop that update self._events
self._events.update(event_pairs)
while self._events:
fd, events = self._events.popitem()
try:
self._handlers[fd](fd, events)#执行handler,即执行netutil中的accept_handler方法,接着会接受socket,调用TCPServer中的_handle_connection方法,该方法会创建一个IOStream实例进行异步读写
except (OSError, IOError) as e:
if e.args[0] == errno.EPIPE:
# Happens when the client closes the connection
pass
else:
app_log.error("Exception in I/O handler for fd %s",
fd, exc_info=True)
except Exception:
app_log.error("Exception in I/O handler for fd %s",
fd, exc_info=True)
[...]
这里看下SelectIOLoop的_impl(即_Select)的poll:
def poll(self, timeout):
readable, writeable, errors = select.select(self.read_fds, self.write_fds, self.error_fds, timeout)#有结果才返回,否则一直阻塞,直到poll_timeout
events = {}
for fd in readable:
events[fd] = events.get(fd, 0) | IOLoop.READ
for fd in writeable:
events[fd] = events.get(fd, 0) | IOLoop.WRITE
for fd in errors:
events[fd] = events.get(fd, 0) | IOLoop.ERROR
return events.items()
至此,可以清晰得看到tornado是如何工作的了!核心就两点:使用epoll模型,保证高并发时接受请求的高效性;将可能阻塞的方法都放到IOLoop里面去循环执行,即程序上的异步,保证CPU的高利用率。这样高并发时,tornado一直在接受请求并一直在努力顺畅的工作,性能自然就上去了。
参考:
http://www.tornadoweb.org/en/stable/http://golubenco.org/understanding-the-code-inside-tornado-the-asynchronous-web-server-powering-friendfeed.html
http://blog.csdn.net/goldlevi/article/details/7047726