tornado 是一个 python web 框架和异步网络库,使用 IO 事件循环
分析版本为:v3.0.0 ,先从早期的看起
# 文件 ioloop.py
class PollIOLoop(IOLoop):
def start(self):
while True:
...
try:
event_pairs = self._impl.poll(poll_timeout)
except Exception as e:
...
self._events.update(event_pairs)
while self._events:
fd, events = self._events.popitem()
self._handlers[fd](fd, events)
当有新请求时,IOLoop
便会从 poll
的阻塞中返回读事件,然后调用对应的事件 handler。
在上一节中我们注册了如下所示的 handler
因为设置了 socket 为非阻塞,所以 accept 这里不会阻塞。参考高性能网络编程(一)----accept建立连接
# 文件 netutil.py
def add_accept_handler(sock, callback, io_loop=None):
if io_loop is None:
io_loop = IOLoop.current()
def accept_handler(fd, events):
while True:
try:
connection, address = sock.accept()
except socket.error as e:
if e.args[0] in (errno.EWOULDBLOCK, errno.EAGAIN):
return
raise
callback(connection, address)
io_loop.add_handler(sock.fileno(), accept_handler, IOLoop.READ)
上述代码中调用的 callback
即是 TCPServer._handle_connection
# 文件 tcpserver.py
class TCPServer(object):
def _handle_connection(self, connection, address):
if self.ssl_options is not None:
assert ssl, "Python 2.6+ and OpenSSL required for SSL"
try:
connection = ssl_wrap_socket(connection,
self.ssl_options,
server_side=True,
do_handshake_on_connect=False)
except ssl.SSLError as err:
if err.args[0] == ssl.SSL_ERROR_EOF:
return connection.close()
else:
raise
except socket.error as err:
if err.args[0] == errno.ECONNABORTED:
return connection.close()
else:
raise
try:
if self.ssl_options is not None:
stream = SSLIOStream(connection, io_loop=self.io_loop)
else:
stream = IOStream(connection, io_loop=self.io_loop)
self.handle_stream(stream, address)
except Exception:
app_log.error("Error in connection callback", exc_info=True)
TCPServer._handle_connection
方法将连接封装成 IOStream
再进行处理(IOStream
主要用来读写连接中的数据),再深入 handle_stream
方法
# 文件 httpserver.py
class HTTPServer(TCPServer):
def __init__(self, request_callback, no_keep_alive=False, io_loop=None,
xheaders=False, ssl_options=None, protocol=None, **kwargs):
# http_server = tornado.httpserver.HTTPServer(application)
# 这个就是 application
self.request_callback = request_callback
self.no_keep_alive = no_keep_alive
self.xheaders = xheaders
self.protocol = protocol
TCPServer.__init__(self, io_loop=io_loop, ssl_options=ssl_options,
**kwargs)
def handle_stream(self, stream, address):
HTTPConnection(stream, address, self.request_callback,
self.no_keep_alive, self.xheaders, self.protocol)
之后的调用链 iostream.py
BaseIOStream.read_until
-> BaseIOStream._try_inline_read
-> BaseIOStream._read_to_buffer
-> BaseIOStream._read_from_buffer
-> BaseIOStream._run_callback
class BaseIOStream(object):
def _run_callback(self, callback, *args):
def wrapper():
self._pending_callbacks -= 1
try:
callback(*args)
except Exception:
...
self._maybe_add_error_listener()
with stack_context.NullContext():
self._pending_callbacks += 1
self.io_loop.add_callback(wrapper)
BaseIOStream._run_callback
方法往 ioloop 中添加了回调函数
其中参数中的 callback
便是 httpserver.py/HTTPConnection._on_headers
# 文件 httpserver.py
class HTTPConnection(object):
def _on_headers(self, data):
try:
data = native_str(data.decode('latin1'))
eol = data.find("\r\n")
start_line = data[:eol]
try:
method, uri, version = start_line.split(" ")
except ValueError:
raise _BadRequestException("Malformed HTTP request line")
if not version.startswith("HTTP/"):
raise _BadRequestException("Malformed HTTP version in HTTP Request-Line")
headers = httputil.HTTPHeaders.parse(data[eol:])
# HTTPRequest wants an IP, not a full socket address
if self.address_family in (socket.AF_INET, socket.AF_INET6):
remote_ip = self.address[0]
else:
# Unix (or other) socket; fake the remote address
remote_ip = '0.0.0.0'
self._request = HTTPRequest(
connection=self, method=method, uri=uri, version=version,
headers=headers, remote_ip=remote_ip, protocol=self.protocol)
content_length = headers.get("Content-Length")
if content_length:
content_length = int(content_length)
if content_length > self.stream.max_buffer_size:
raise _BadRequestException("Content-Length too long")
if headers.get("Expect") == "100-continue":
self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n")
self.stream.read_bytes(content_length, self._on_request_body)
return
self.request_callback(self._request)
except _BadRequestException as e:
gen_log.info("Malformed HTTP request from %s: %s",
self.address[0], e)
self.close()
return
根据读取的数据,解析出 HTTP 请求 header ,通过 Content-Length 字段读取请求 body ,
最后调用 self.request_callback(self._request)
self.request_callback
即是 application。
继续深入,
# 文件 web.py
class Application(object):
def __call__(self, request):
"""Called by HTTPServer to execute the request."""
transforms = [t(request) for t in self.transforms]
handler = None
args = []
kwargs = {}
handlers = self._get_host_handlers(request)
if not handlers:
handler = RedirectHandler(
self, request, url="http://" + self.default_host + "/")
else:
for spec in handlers:
match = spec.regex.match(request.path) # 匹配 handler
if match:
handler = spec.handler_class(self, request, **spec.kwargs)
if spec.regex.groups:
# None-safe wrapper around url_unescape to handle
# unmatched optional groups correctly
def unquote(s):
if s is None:
return s
return escape.url_unescape(s, encoding=None)
# Pass matched groups to the handler. Since
# match.groups() includes both named and unnamed groups,
# we want to use either groups or groupdict but not both.
# Note that args are passed as bytes so the handler can
# decide what encoding to use.
if spec.regex.groupindex:
kwargs = dict(
(str(k), unquote(v))
for (k, v) in match.groupdict().items())
else:
args = [unquote(s) for s in match.groups()]
break
if not handler:
handler = ErrorHandler(self, request, status_code=404)
# In debug mode, re-compile templates and reload static files on every
# request so you don't need to restart to see changes
if self.settings.get("debug"):
with RequestHandler._template_loader_lock:
for loader in RequestHandler._template_loaders.values():
loader.reset()
StaticFileHandler.reset()
handler._execute(transforms, *args, **kwargs) # 执行 handler
return handler
先是通过 request.path
找到对应的 handler
,同时解析出定义的参数,
之后调用 handler._execute(transforms, *args, **kwargs)
# 文件 web.py
class RequestHandler(object):
def _execute(self, transforms, *args, **kwargs):
"""Executes this request with the given output transforms."""
self._transforms = transforms
try:
if self.request.method not in self.SUPPORTED_METHODS:
raise HTTPError(405)
self.path_args = [self.decode_argument(arg) for arg in args]
self.path_kwargs = dict((k, self.decode_argument(v, name=k))
for (k, v) in kwargs.items())
# If XSRF cookies are turned on, reject form submissions without
# the proper cookie
if self.request.method not in ("GET", "HEAD", "OPTIONS") and \
self.application.settings.get("xsrf_cookies"):
self.check_xsrf_cookie()
self.prepare()
if not self._finished:
# 这里调用我们定义的 get 等方法
getattr(self, self.request.method.lower())(
*self.path_args, **self.path_kwargs)
if self._auto_finish and not self._finished:
self.finish()
except Exception as e:
self._handle_request_exception(e)
处理完请求后,便是生成响应并返回
主要是三个方法 write
finish
flush
# 文件 web.py
class RequestHandler(object):
def write(self, chunk):
"""Writes the given chunk to the output buffer."""
if self._finished:
raise RuntimeError("Cannot write() after finish(). May be caused "
"by using async operations without the "
"@asynchronous decorator.")
if isinstance(chunk, dict):
chunk = escape.json_encode(chunk)
self.set_header("Content-Type", "application/json; charset=UTF-8")
chunk = utf8(chunk)
self._write_buffer.append(chunk)
def finish(self, chunk=None):
"""Finishes this response, ending the HTTP request."""
if self._finished:
raise RuntimeError("finish() called twice. May be caused "
"by using async operations without the "
"@asynchronous decorator.")
if chunk is not None:
self.write(chunk)
# Automatically support ETags and add the Content-Length header if
# we have not flushed any content yet.
if not self._headers_written:
if (self._status_code == 200 and
self.request.method in ("GET", "HEAD") and
"Etag" not in self._headers):
etag = self.compute_etag()
if etag is not None:
self.set_header("Etag", etag)
inm = self.request.headers.get("If-None-Match")
if inm and inm.find(etag) != -1:
self._write_buffer = []
self.set_status(304)
if self._status_code == 304:
assert not self._write_buffer, "Cannot send body with 304"
self._clear_headers_for_304()
elif "Content-Length" not in self._headers:
content_length = sum(len(part) for part in self._write_buffer)
self.set_header("Content-Length", content_length)
if hasattr(self.request, "connection"):
# Now that the request is finished, clear the callback we
# set on the IOStream (which would otherwise prevent the
# garbage collection of the RequestHandler when there
# are keepalive connections)
self.request.connection.stream.set_close_callback(None)
if not self.application._wsgi:
# 生成响应
self.flush(include_footers=True)
# 返回响应
self.request.finish()
self._log()
self._finished = True
self.on_finish()
def flush(self, include_footers=False, callback=None):
"""Flushes the current output buffer to the network."""
if self.application._wsgi:
raise Exception("WSGI applications do not support flush()")
chunk = b"".join(self._write_buffer)
self._write_buffer = []
if not self._headers_written:
self._headers_written = True
for transform in self._transforms:
self._status_code, self._headers, chunk = \
transform.transform_first_chunk(
self._status_code, self._headers, chunk, include_footers)
headers = self._generate_headers()
else:
for transform in self._transforms:
chunk = transform.transform_chunk(chunk, include_footers)
headers = b""
# Ignore the chunk and only write the headers for HEAD requests
if self.request.method == "HEAD":
if headers:
self.request.write(headers, callback=callback)
return
self.request.write(headers + chunk, callback=callback)
write
方法暂存响应内容,finish
方法完成响应
从中可以看出,最后处理响应是调用 self.request
的方法,即 httpserver.py/HTTPRequest
一层层追溯下去,最后是调用 iostream.py/BaseIOStream
返回响应
个人想法:
这里跟我之前看的腾讯 phxrpc 处理连接的方式不同,phxrpc 拿到连接后会尝试读取数据,如果不可读,会为这个连接注册读事件;tornado 这里则是阻塞读取数据。还有一点不同的是,phxrpc 有一个单独的线程负责 accept ,读写数据由另外的线程处理;tornado 则是 accept 和读写都是一个线程处理