在Web应用中,服务器把网页传给浏览器,实际上就是把网页的HTML代码发送给浏览器,让浏览器显示出来。而浏览器和服务器之间的传输协议是HTTP,所以:
HTML是一种用来定义网页的文本,会HTML,就可以编写网页;
HTTP是在网络上传输HTML的协议,用于浏览器和服务器的通信。
HTTP是Hyper Text Transfer Protocol(超文本传输协议)的缩写。它的发展是万维网协会(World Wide Web Consortium)和Internet工作小组IETF(Internet Engineering Task Force)合作的结果,(他们)最终发布了一系列的RFC,RFC 1945定义了HTTP/1.0版本。其中最著名的就是RFC 2616。RFC 2616定义了今天普遍使用的一个版本——HTTP 1.1。
HTTP协议(HyperText Transfer Protocol,超文本传输协议)是用于从WWW服务器传输超文本到本地浏览器的传送协议。它可以使浏览器更加高效,使网络传输减少。它不仅保证计算机正确快速地传输超文本文档,还确定传输文档中的哪一部分,以及哪部分内容首先显示(如文本先于图形)等。
HTTP是一个基于TCP/IP通信协议来传递数据(HTML 文件, 图片文件, 查询结果等)。
HTTP是一个应用层协议,由请求和响应构成,是一个标准的客户端服务器模型。HTTP是一个无状态的协议。
下面写一个web静态页面,显示固定页面。代码如下:
import socket
import multiprocessing
import time
import os
def clientP(newSocket):
recvData = newSocket.recv(1024).decode('gbk')
print(recvData)
responseLine = 'HTTP/1.1 200 OK' + os.linesep
responseHeader = 'Server: wangzy' + os.linesep
responseHeader += 'Date: %s' % time.ctime() + os.linesep
responseBody = 'Hello World!'
sendData = (responseLine + responseHeader + os.linesep + responseBody).encode('gbk')
newSocket.send(sendData)
newSocket.close()
def main():
severSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
severSocket.bind(('', 7777))
severSocket.listen(10)
while True:
newSocket, clientAddr = severSocket.accept()
p = multiprocessing.Process(target=clientP, args=(newSocket,))
p.start()
newSocket.close()
if __name__ == '__main__':
main()
结果如下:
第一行:
GET / HTTP/1.1
GET表示一个读取请求,将从服务器获得网页数据,/表示URL的路径,URL总是以/开头,/就表示首页,最后的HTTP/1.1指示采用的HTTP协议版本是1.1。目前HTTP协议的版本就是1.1,但是大部分服务器也支持1.0版本,主要区别在于1.1版本允许多个HTTP请求复用一个TCP连接,以加快传输速度。
我们可以跟数据库的CRUD增删改查操作对应起来:
CREATE :PUT
READ:GET
UPDATE:POST
DELETE:DELETE
打开网页,查看开发者工具:
200表示一个成功的响应,后面的OK是说明。
如果返回的不是200,那么往往有其他的功能,例如
失败的响应有404 Not Found:网页不存在
500 Internal Server Error:服务器内部出错
等等...
HTTP格式
HTTP GET请求的格式:
GET /path HTTP/1.1
Header1: Value1
Header2: Value2
Header3: Value3
每个Header一行一个,换行符是\r\n。
HTTP POST请求的格式:
POST /path HTTP/1.1
Header1: Value1
Header2: Value2
Header3: Value3
body data goes here...
当遇到连续两个\r\n时,Header部分结束,后面的数据全部是Body。
HTTP响应的格式:
200 OK
Header1: Value1
Header2: Value2
Header3: Value3
body data goes here...
HTTP响应如果包含body,也是通过\r\n\r\n来分隔的。
请再次注意,Body的数据类型由Content-Type头来确定,如果是网页,Body就是文本,如果是图片,Body就是图片的二进制数据。
当存在Content-Encoding时,Body数据是被压缩的,最常见的压缩方式是gzip,所以,看到Content-Encoding: gzip时,需要将Body数据先解压缩,才能得到真正的数据。压缩的目的在于减少Body的大小,加快网络传输。
显示需要的页面
import socket
import re
from multiprocessing import Process
# 设置静态文件根目录
HTML_ROOT_DIR = "./html"
def handle_client(client_socket):
"""处理客户端请求"""
# 获取客户端请求数据
request_data = client_socket.recv(1024)
print("request data:", request_data.decode('utf-8'))
request_lines = request_data.splitlines()
# 解析请求报文
# 'GET / HTTP/1.1'
request_start_line = request_lines[0].decode('utf-8')
# 提取用户请求的文件名
file_name = re.match(r"\w+ +(/[^ ]*) ", request_start_line).group(1)
if "/" == file_name:
file_name = "/index.html"
# 打开文件,读取内容
try:
file = None
file = open(HTML_ROOT_DIR + file_name, "rb")
file_data = file.read()
# 构造响应数据
response_start_line = "HTTP/1.1 200 OK\r\n"
response_headers = "Server: My server\r\n"
response_body = file_data.decode("utf-8")
except FileNotFoundError:
response_start_line = "HTTP/1.1 404 Not Found\r\n"
response_headers = "Server: My server\r\n"
response_body = "The file is not found!"
finally:
if file and (not file.closed):
file.close()
response = response_start_line + response_headers + "\r\n" + response_body
print("response data:", response)
# 向客户端返回响应数据
client_socket.send(bytes(response, "utf-8"))
# 关闭客户端连接
client_socket.close()
def main():
'作为程序的主控制入口'
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server_socket.bind(("", 8000))
server_socket.listen(128)
while True:
client_socket, client_address = server_socket.accept()
# print("[%s, %s]用户连接上了" % (client_address[0],client_address[1]))
print("[%s, %s]用户连接上了" % client_address)
handle_client_process = Process(target=handle_client, args=(client_socket,))
handle_client_process.start()
client_socket.close()
if __name__ == "__main__":
main()
结果如下:
使用类创建服务器并实现动态资源的申请
import socket
import multiprocessing
import re
import os
import time
class MyServer(object):
def __init__(self, pork=8888):
serverSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
serverSocket.bind(('', pork))
self.serverSocket = serverSocket
self.HTMLPATH = './html'
def start(self):
self.serverSocket.listen()
while True:
newSocket, clientAddr = self.serverSocket.accept()
multiprocessing.Process(target=self.clientHander, args=(newSocket,)).start()
newSocket.close()
def clientHander(self, newSocket):
try:
recvData = newSocket.recv(1024).decode('gbk')
fileName = re.split(r' +', recvData.splitlines()[0])[1]
filePath = self.HTMLPATH
if fileName.endswith('.py'):
try:
pyname = fileName[1:-3]
print(pyname)
# 导入
pyModule = __import__(pyname)
env = {}
responseBody = pyModule.application(env, self.startResponse)
responseLine = self.responseLine
responseHeader = self.responseHeader
except ImportError:
responseLine = 'HTTP/1.1 404 NOT FOUND'+os.linesep
responseHeader = 'Server: Wangzy' + os.linesep
responseBody = '找不到PY文件'
else:
if '/' == fileName:
filePath += '/index.html'
else:
filePath += fileName
try:
file = None
file = open(filePath, 'r', encoding='gbk')
responseBody = file.read()
responseLine = 'HTTP/1.1 200 OK' + os.linesep
responseHeader = 'Server: Wangzy' + os.linesep
except FileNotFoundError:
responseLine = 'HTTP/1.1 404 NOT FOUND' + os.linesep
responseHeader = 'Server: Wangzy' + os.linesep
responseBody = '找不到html文件'
finally:
if (file != None) and (not file.closed):
file.close()
except Exception:
responseLine = 'HTTP/1.1 500 ERROR' + os.linesep
responseHeader = 'Server: Wangzy' + os.linesep
responseBody = '服务器忙,稍后再试。。。'
finally:
sendData = (responseLine + responseHeader + os.linesep + responseBody).encode('gbk')
newSocket.send(sendData)
newSocket.close()
def startResponse(self, status, responseHeaders):
'''
:param self:
:param status:
:param responseHeaders:
:return:
'''
self.responseLine = status
self.responseHeader = ''
for k, v in responseHeaders:
kv = (k + ':' + v + os.linesep)
self.responseHeader += kv
if __name__ == '__main__':
server = MyServer()
server.start()
动态申请的py文件写法:
import time
def application(env, startResponse):
'''
:param env:
:param startResponse:
:return:
'''
status = 'HTTP/1.1 200 OK'
responseHeaders = [('Server', 'Wangzy'), ('Date', 'today'), ('Content-Type', 'text/plain')]
startResponse(status, responseHeaders)
responseBody = str(time.ctime())
return responseBody
结果如下:
如果请求的路径不存在
以上例子中就用到了WSGI接口。
上面的application()函数就是符合WSGI标准的一个HTTP处理函数,它接收两个参数:
environ:一个包含所有HTTP请求信息的dict对象;
start_response:一个发送HTTP响应的函数。
整个application()函数本身没有涉及到任何解析HTTP的部分,也就是说,把底层web服务器解析部分和应用程序逻辑部分进行了分离,这样开发者就可以专心做一个领域了
不过,等等,这个application()函数怎么调用?如果我们自己调用,两个参数environ和start_response我们没法提供,返回的str也没法发给浏览器。
所以application()函数必须由WSGI服务器来调用。有很多符合WSGI规范的服务器。而我们此时的web服务器项目的目的就是做一个极可能解析静态网页还可以解析动态网页的服务器。
下面来编写框架和服务器,实现功能的分离。
服务器代码如下:
import socket
import multiprocessing
import re
import os
import myFramework
class MyServer(object):
def __init__(self, application):
serverSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.serverSocket = serverSocket
self.application = application
def bind(self, port=9999):
self.serverSocket.bind(('', port))
def start(self):
self.serverSocket.listen()
while True:
newSocket, clientAddr = self.serverSocket.accept()
multiprocessing.Process(target=self.clientHander, args=(newSocket,)).start()
newSocket.close()
def clientHander(self, newSocket):
recvData = newSocket.recv(1024).decode('gbk')
fileName = re.split(r' +', recvData.splitlines()[0])[1]
method = re.split(r' +', recvData.splitlines()[0])[0]
env = {
'PATH_INFO': fileName,
'METHOD': method
}
for item in recvData:
if item.count(':') != 0:
k, v = item.split(':')
env[k] = v
responseBody = self.application(env, self.startResponse)
sendData = (self.responseHeader + os.linesep + os.linesep + responseBody).encode('gbk')
newSocket.send(sendData)
newSocket.close()
def startResponse(self, status, responseHeader):
self.responseHeader = status + os.linesep
for k, v in responseHeader:
kv = (k + ':' + v + os.linesep)
self.responseHeader += kv
if __name__ == '__main__':
server = MyServer(myFramework.application)
server.bind(8888)
server.start()
框架代码如下:
import time
class Application(object):
def __init__(self, urls):
self.urls = urls
self.filePath = './html'
def __call__(self, env, startResponse):
# 从请求头中获取访问的名字
fileName = env.get('PATH_INFO')
# 判断是静态访问还是动态访问
# 静态访问方法
if fileName.startswith('/static'):
fileName = fileName[7:]
if '/' == fileName:
self.filePath += '/index.html'
else:
self.filePath += fileName
try:
file = None
file = open(self.filePath, 'r', encoding='gbk')
responseBody = file.read()
status = 'HTTP/1.1 200 OK'
responseHeaders = [('Server', 'Wangzy')]
except FileNotFoundError:
status = 'HTTP/1.1 404 NOT FOUND'
responseHeaders = [('Server', 'Wangzy')]
responseBody = 'HTML文件找不到!'
finally:
startResponse(status, responseHeaders)
if (file != None) and (not file.closed):
file.close()
# 动态访问方法
else:
# 表示请求的名字是否在urls中,True:存在,False:不存在
isIn = False
for k, v in self.urls:
if k == fileName:
responseBody = v(env, startResponse)
isIn = True
break
if isIn == False:
status = 'HTTP/1.1 404 NOT FOUND'
responseHeaders = [('Server', 'Wangzy')]
responseBody = 'py文件找不到!'
startResponse(status, responseHeaders)
return responseBody
def showtime(env, startResponse):
status = 'HTTP/1.1 200 OK'
responseHeaders = [('Server', 'Wangzy')]
startResponse(status,responseHeaders)
responseBody = time.ctime()
return responseBody
def shownews(env, startResponse):
status = 'HTTP/1.1 200 OK'
responseHeaders = [('Server', 'Wangzy')]
startResponse(status, responseHeaders)
responseBody = '今天的新闻是。。。。'
return responseBody
urls = [
('/showtime', showtime),
('/shownews', shownews)
]
application = Application(urls)
结果如下:
这里就实现了框架和服务器的功能分离,但是现在还存在一个问题,我们在服务器中导入了框架,并且把需要调用的框架对象给写死了,如果需要应用其他框架,还需要修改服务器代码,这就违反了代码的开闭原则。
为了使我们的服务器具有通用性,可以匹配任何符合wsgi的框架,我们修改服务器代码如下:
import socket
import multiprocessing
import re
import os
import sys
class MyServer(object):
def __init__(self, application):
serverSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.serverSocket = serverSocket
self.application = application
def bind(self, port=9999):
self.serverSocket.bind(('', port))
def start(self):
self.serverSocket.listen()
while True:
newSocket, clientAddr = self.serverSocket.accept()
multiprocessing.Process(target=self.clientHander, args=(newSocket,)).start()
newSocket.close()
def clientHander(self, newSocket):
recvData = newSocket.recv(1024).decode('gbk')
fileName = re.split(r' +', recvData.splitlines()[0])[1]
method = re.split(r' +', recvData.splitlines()[0])[0]
env = {
'PATH_INFO': fileName,
'METHOD': method
}
for item in recvData:
if item.count(':') != 0:
k, v = item.split(':')
env[k] = v
responseBody = self.application(env, self.startResponse)
sendData = (self.responseHeader + os.linesep + os.linesep + responseBody).encode('gbk')
newSocket.send(sendData)
newSocket.close()
def startResponse(self, status, responseHeader):
self.responseHeader = status + os.linesep
for k, v in responseHeader:
kv = (k + ':' + v + os.linesep)
self.responseHeader += kv
def main():
print(sys.argv)
moduleName, attrName = sys.argv[1].split(':')
print(moduleName)
print(attrName)
myModule = __import__(moduleName)
server = MyServer(getattr(myModule, attrName))
server.bind(8888)
server.start()
if __name__ == '__main__':
main()
框架代码如下:
import time
class Application(object):
def __init__(self, urls):
self.urls = urls
self.filePath = './html'
def __call__(self, env, startResponse):
# 从请求头中获取访问的名字
fileName = env.get('PATH_INFO')
# 判断是静态访问还是动态访问
# 静态访问方法
if fileName.startswith('/static'):
fileName = fileName[7:]
if '/' == fileName:
self.filePath += '/index.html'
else:
self.filePath += fileName
try:
file = None
file = open(self.filePath, 'r', encoding='gbk')
responseBody = file.read()
status = 'HTTP/1.1 200 OK'
responseHeaders = [('Server', 'Wangzy')]
except FileNotFoundError:
status = 'HTTP/1.1 404 NOT FOUND'
responseHeaders = [('Server', 'Wangzy')]
responseBody = 'HTML文件找不到!'
finally:
startResponse(status, responseHeaders)
if (file != None) and (not file.closed):
file.close()
# 动态访问方法
else:
# 表示请求的名字是否在urls中,True:存在,False:不存在
isIn = False
for k, v in self.urls:
if k == fileName:
responseBody = v(env, startResponse)
isIn = True
break
if isIn == False:
status = 'HTTP/1.1 404 NOT FOUND'
responseHeaders = [('Server', 'Wangzy')]
responseBody = 'py文件找不到!'
startResponse(status, responseHeaders)
return responseBody
def showtime(env, startResponse):
status = 'HTTP/1.1 200 OK'
responseHeaders = [('Server', 'Wangzy')]
startResponse(status, responseHeaders)
responseBody = time.ctime()
return responseBody
def shownews(env, startResponse):
status = 'HTTP/1.1 200 OK'
responseHeaders = [('Server', 'Wangzy')]
startResponse(status, responseHeaders)
responseBody = '今天的新闻是。。。。'
return responseBody
urls = [
('/showtime', showtime),
('/shownews', shownews)
]
application = Application(urls)
此时,我们如果运行服务器会报错:
在终端用命令运行服务器