概述
本文分析了CherryPy wsgiserver的模块,分析它如何基于socket,来处理HTTP请求的。
类
一些主要的类,如下图所示:
- HTTPServer:代表了HTTP服务程序,基于socket,诊听在某个端口上,比如:localhost:8080。
- ThreadPool:代表了线程池,有一个消息队列,所有的线程都等在这个消息队列上。
- WorkerThread:代表了一个HTTP请求的处理线程,它由ThreadPool统一管理,它等着ThreadPool消息队列中的消息。
- HTTPConnection:代表了一个HTTP连接,WorkerThread的实际处理体。
- HTTPRequest:代表了一个HTTP请求,HTTP请求和响应都由该类处理。
处理流程
HTTPServer首先创建诊听的socket,位于HTTPServer.bind()中。
def bind(self, family, type, proto=0):
"""Create (or recreate) the actual socket object."""
self.socket = socket.socket(family, type, proto)
prevent_socket_inheritance(self.socket)
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
if self.nodelay and not isinstance(self.bind_addr, str):
self.socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
if self.ssl_adapter is not None:
self.socket = self.ssl_adapter.bind(self.socket)
# If listening on the IPV6 any address ('::' = IN6ADDR_ANY),
# activate dual-stack. See http://www.cherrypy.org/ticket/871.
if (hasattr(socket, 'AF_INET6') and family == socket.AF_INET6
and self.bind_addr[0] in ('::', '::0', '::0.0.0.0')):
try:
self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
except (AttributeError, socket.error):
# Apparently, the socket option is not available in
# this machine's TCP stack
pass
self.socket.bind(self.bind_addr)
然后,进入accept循环,位于start()函数中。
while self.ready:
try:
self.tick()
except (KeyboardInterrupt, SystemExit):
raise
except:
self.error_log("Error in HTTPServer.tick", level=logging.ERROR,
traceback=True)
if self.interrupt:
while self.interrupt is True:
# Wait for self.stop() to complete. See _set_interrupt.
time.sleep(0.1)
if self.interrupt:
raise self.interrupt
tick函数中,主要是对请求的socket,包装成HTTPConnection,然后丢到ThreadPool的队列中。
def tick(self):
"""Accept a new connection and put it on the Queue."""
try:
s, addr = self.socket.accept()
if self.stats['Enabled']:
self.stats['Accepts'] += 1
if not self.ready:
return
prevent_socket_inheritance(s)
if hasattr(s, 'settimeout'):
s.settimeout(self.timeout)
makefile = CP_makefile
ssl_env = {}
# if ssl cert and key are set, we try to be a secure HTTP server
if self.ssl_adapter is not None:
try:
s, ssl_env = self.ssl_adapter.wrap(s)
except NoSSLError:
msg = ("The client sent a plain HTTP request, but "
"this server only speaks HTTPS on this port.")
buf = ["%s 400 Bad Request\r\n" % self.protocol,
"Content-Length: %s\r\n" % len(msg),
"Content-Type: text/plain\r\n\r\n",
msg]
wfile = makefile(s, "wb", DEFAULT_BUFFER_SIZE)
try:
wfile.write("".join(buf).encode('ISO-8859-1'))
except socket.error:
x = sys.exc_info()[1]
if x.args[0] not in socket_errors_to_ignore:
raise
return
if not s:
return
makefile = self.ssl_adapter.makefile
# Re-apply our timeout since we may have a new socket object
if hasattr(s, 'settimeout'):
s.settimeout(self.timeout)
conn = self.ConnectionClass(self, s, makefile)
if not isinstance(self.bind_addr, basestring):
# optional values
# Until we do DNS lookups, omit REMOTE_HOST
if addr is None: # sometimes this can happen
# figure out if AF_INET or AF_INET6.
if len(s.getsockname()) == 2:
# AF_INET
addr = ('0.0.0.0', 0)
else:
# AF_INET6
addr = ('::', 0)
conn.remote_addr = addr[0]
conn.remote_port = addr[1]
conn.ssl_env = ssl_env
self.requests.put(conn)
except socket.timeout:
# The only reason for the timeout in start() is so we can
# notice keyboard interrupts on Win32, which don't interrupt
# accept() by default
return
except socket.error:
x = sys.exc_info()[1]
if self.stats['Enabled']:
self.stats['Socket Errors'] += 1
if x.args[0] in socket_error_eintr:
# I *think* this is right. EINTR should occur when a signal
# is received during the accept() call; all docs say retry
# the call, and I *think* I'm reading it right that Python
# will then go ahead and poll for and handle the signal
# elsewhere. See http://www.cherrypy.org/ticket/707.
return
if x.args[0] in socket_errors_nonblocking:
# Just try again. See http://www.cherrypy.org/ticket/479.
return
if x.args[0] in socket_errors_to_ignore:
# Our socket was closed.
# See http://www.cherrypy.org/ticket/686.
return
raise
值得注意的是,HTTPConnection在初始化的时候,会将socket包装成文件形式的对象。之所以这样做,需要对HTTP请求的处理,像对文本文件那样。
def __init__(self, server, sock, makefile=CP_fileobject):
self.server = server
self.socket = sock
self.rfile = makefile(sock, "rb", self.rbufsize)
self.wfile = makefile(sock, "wb", self.wbufsize)
self.requests_seen = 0
此时,HTTPConnection会被WorkerThread线程接收到,拿到HTTPConnection对象后,直接处理之。
def run(self):
self.server.stats['Worker Threads'][self.getName()] = self.stats
try:
self.ready = True
while True:
conn = self.server.requests.get()
if conn is _SHUTDOWNREQUEST:
return
self.conn = conn
if self.server.stats['Enabled']:
self.start_time = time.time()
try:
conn.communicate()
finally:
conn.close()
if self.server.stats['Enabled']:
self.requests_seen += self.conn.requests_seen
self.bytes_read += self.conn.rfile.bytes_read
self.bytes_written += self.conn.wfile.bytes_written
self.work_time += time.time() - self.start_time
self.start_time = None
self.conn = None
except (KeyboardInterrupt, SystemExit):
exc = sys.exc_info()[1]
self.server.interrupt = exc
conn.communicate()中也直接转调HTTPRequest处理实际的HTTP请求。
while True:
# (re)set req to None so that if something goes wrong in
# the RequestHandlerClass constructor, the error doesn't
# get written to the previous request.
req = None
req = self.RequestHandlerClass(self.server, self)
# This order of operations should guarantee correct pipelining.
req.parse_request()
if self.server.stats['Enabled']:
self.requests_seen += 1
if not req.ready:
# Something went wrong in the parsing (and the server has
# probably already made a simple_response). Return and
# let the conn close.
return
request_seen = True
req.respond()
if req.close_connection:
return
req.parse_request()的处理流程也不复杂,先读取请求的那一行,然后读取头,具体参考 HTTP协议。
def parse_request(self):
"""Parse the next HTTP request start-line and message-headers."""
self.rfile = SizeCheckWrapper(self.conn.rfile,
self.server.max_request_header_size)
try:
success = self.read_request_line()
except MaxSizeExceeded:
self.simple_response("414 Request-URI Too Long",
"The Request-URI sent with the request exceeds the maximum "
"allowed bytes.")
return
else:
if not success:
return
try:
success = self.read_request_headers()
except MaxSizeExceeded:
self.simple_response("413 Request Entity Too Large",
"The headers sent with the request exceed the maximum "
"allowed bytes.")
return
else:
if not success:
return
self.ready = True
req.respond()流程稍许有点复杂,它会去调用用户定义的wsgi app,来获取HTTP的响应。详细的流程不再本文范围之内,不再详述。
def respond(self):
"""Process the current request."""
response = self.req.server.wsgi_app(self.env, self.start_response)
try:
for chunk in response:
# "The start_response callable must not actually transmit
# the response headers. Instead, it must store them for the
# server or gateway to transmit only after the first
# iteration of the application return value that yields
# a NON-EMPTY string, or upon the application's first
# invocation of the write() callable." (PEP 333)
if chunk:
if isinstance(chunk, unicodestr):
chunk = chunk.encode('ISO-8859-1')
self.write(chunk)
finally:
if hasattr(response, "close"):
response.close()
总结
- 通过对源码的分析,可以看到CherryPy是如何包装socket的,先包成HTTPConnection,然后包成伪文件。
- 对Python语言而言,有些技巧可以在源码中看到。实际上HTTPServer等类,有些参数是可以修改的,比如,你可以实现自己的HTTPConnection,然后设置到HTTPServer中。