目录
一、asyncio模块 - 检测网络IO,实现应用程序级别切换
1-1 原始方式 - 仅支持TCP级别请求
1-2 使 asyncio 支持Http协议请求
1-2-1 自定义报头
1-2-2 aiohttp模块封装报头
1-2-3 requests.get方法封装报头
二、gevent模块 优化io阻塞
三、grequests模块
四、twisted 框架 - 自动检测IO并切换
五、tornado
一、asyncio模块 - 检测网络IO,实现应用程序级别切换
1-1 原始方式 - 仅支持TCP级别请求
import asyncio @asyncio.coroutine def task(task_id,senconds): print('%s is start' %task_id) yield from asyncio.sleep(senconds) #只能检测网络IO,检测到IO后切换到其他任务执行 print('%s is end' %task_id) tasks=[task(task_id="任务1",senconds=3),task("任务2",2),task(task_id="任务3",senconds=1)] # 使用循环,提交多个任务,单进程内进行IO切换 loop=asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) loop.close() # 同上 第二种写法
1-2 使 asyncio 支持Http协议请求
1-2-1 自定义报头
注意!:在发送请求时,需要SSL时,必须确保库内存在pyopenssl模块
import asyncio import requests import uuid user_agent='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0' def parse_page(host,res): print('%s 解析结果 %s' %(host,len(res))) with open('%s.html' %(uuid.uuid1()),'wb') as f: f.write(res) @asyncio.coroutine def get_page(host,port=80,url='/',callback=parse_page,ssl=False): print('下载 http://%s:%s%s' %(host,port,url)) # 步骤一(IO阻塞):发起tcp链接,是阻塞操作,因此需要yield from # 若请求页面为ssl证书的安全网页,则切换请求网页的端口号 if ssl: port=443 recv,send=yield from asyncio.open_connection(host=host,port=443,ssl=ssl) # 步骤二:封装http协议的报头,因为asyncio模块只能封装并发送tcp包,因此这一步需要我们自己封装http协议的包 request_headers="""GET %s HTTP/1.0\r\nHost: %s\r\nUser-agent: %s\r\n\r\n""" %(url,host,user_agent) # requset_headers="""POST %s HTTP/1.0\r\nHost: %s\r\n\r\nname=egon&password=123""" % (url, host,) request_headers=request_headers.encode('utf-8') # 步骤三(IO阻塞):发送http请求包 - 请求头数据 # 将数据交给操作系统 send.write(request_headers) # 发送数据 yield from send.drain() # 步骤四(IO阻塞):接收响应头 while True: line=yield from recv.readline() if line == b'\r\n': break print('%s Response headers:%s' %(host,line)) # 步骤五(IO阻塞):接收响应体 text=yield from recv.read() # 步骤六:执行回调函数 callback(host,text) # 步骤七:关闭套接字 send.close() # 没有recv.close()方法,因为是四次挥手断链接,双向链接的两端,一端发完数据后执行send.close()另外一端就被动地断开 if __name__ == '__main__': tasks=[ get_page('www.baidu.com',url='/s?wd=柯基',ssl=True), get_page('www.cnblogs.com',url='/',ssl=True), ] loop=asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) loop.close()
1-2-2 aiohttp模块封装报头
import aiohttp import asyncio @asyncio.coroutine def get_page(url): print('GET:%s' %url) response=yield from aiohttp.request('GET',url) data=yield from response.read() print(url,data) response.close() return 1 tasks=[ get_page('https://www.python.org/doc'), get_page('https://www.openstack.org') ] loop=asyncio.get_event_loop() results=loop.run_until_complete(asyncio.gather(*tasks)) loop.close() print('=====>',results) # [1, 1]
1-2-3 requests.get方法封装报头
import requests import asyncio @asyncio.coroutine def get_page(func,*args): print('GET:%s' %args[0]) loog=asyncio.get_event_loop() furture=loop.run_in_executor(None,func,*args) response=yield from furture print(response.url,len(response.text)) return 1 tasks=[ get_page(requests.get,'https://www.python.org/doc'), get_page(requests.get,'https://www.openstack.org') ] loop=asyncio.get_event_loop() results=loop.run_until_complete(asyncio.gather(*tasks)) loop.close() print('=====>',results) # [1, 1]
二、gevent模块 优化io阻塞
from gevent import monkey;monkey.patch_all() import gevent import requests def get_page(url): print('GET:%s' %url) response=requests.get(url) print(url,len(response.text)) return 1 # 不使用协程池 # g1=gevent.spawn(get_page,'https://www.python.org/doc') # g2=gevent.spawn(get_page,'https://www.openstack.org') # gevent.joinall([g1,g2,]) # print(g1.value,g2.value,) # 拿到返回值 # 协程池 from gevent.pool import Pool pool=Pool(2) g1=pool.spawn(get_page,'https://www.python.org/doc') g2=pool.spawn(get_page,'https://www.openstack.org') gevent.joinall([g1,g2,]) print(g1.value,g2.value) # 拿到返回值
三、grequests模块
#pip3 install grequests import grequests request_list=[ grequests.get('https://wwww.xxxx.org/doc1'), grequests.get('https://www.openstack.org') ] ##### 执行并获取响应列表 ##### # response_list = grequests.map(request_list) # print(response_list) ##### 执行并获取响应列表(处理异常) ##### def exception_handler(request, exception): # print(request,exception) print("%s Request failed" %request.url) response_list = grequests.map(request_list, exception_handler=exception_handler) print(response_list)
四、twisted 框架 - 自动检测IO并切换
''' # 问题一:error: Microsoft Visual C++ 14.0 is required. Get it with "Microsoft Visual C++ Build Tools": http://landinghub.visualstudio.com/visual-cpp-build-tools https://www.lfd.uci.edu/~gohlke/pythonlibs/#twisted pip3 install C:\Users\Administrator\Downloads\Twisted-17.9.0-cp36-cp36m-win_amd64.whl pip3 install twisted # 问题二:ModuleNotFoundError: No module named 'win32api' https://sourceforge.net/projects/pywin32/files/pywin32/ # 问题三:openssl pip3 install pyopenssl ''' #twisted基本用法 from twisted.web.client import getPage,defer from twisted.internet import reactor def all_done(arg): # print(arg) reactor.stop() def callback(res): print(res) return 1 defer_list=[] urls=[ 'http://www.baidu.com', 'http://www.bing.com', 'https://www.python.org', ] for url in urls: obj=getPage(url.encode('utf=-8'),) obj.addCallback(callback) defer_list.append(obj) defer.DeferredList(defer_list).addBoth(all_done) reactor.run() # twisted的getPage的详细用法 from twisted.internet import reactor from twisted.web.client import getPage import urllib.parse def one_done(arg): print(arg) reactor.stop() post_data = urllib.parse.urlencode({'check_data': 'adf'}) post_data = bytes(post_data, encoding='utf8') headers = {b'Content-Type': b'application/x-www-form-urlencoded'} response = getPage(bytes('http://dig.chouti.com/login', encoding='utf8'), method=bytes('POST', encoding='utf8'), postdata=post_data, cookies={}, headers=headers) response.addBoth(one_done) reactor.run()
五、tornado
from tornado.httpclient import AsyncHTTPClient from tornado.httpclient import HTTPRequest from tornado import ioloop def handle_response(response): """ 处理返回值内容(需要维护计数器,来停止IO循环),调用 ioloop.IOLoop.current().stop() :param response: :return: """ if response.error: print("Error:", response.error) else: print(response.body) def func(): url_list = [ 'http://www.baidu.com', 'http://www.bing.com', ] for url in url_list: print(url) http_client = AsyncHTTPClient() http_client.fetch(HTTPRequest(url), handle_response) ioloop.IOLoop.current().add_callback(func) ioloop.IOLoop.current().start() # 发现上例在所有任务都完毕后也不能正常结束,为了解决该问题,让我们来加上计数器 from tornado.httpclient import AsyncHTTPClient from tornado.httpclient import HTTPRequest from tornado import ioloop count=0 def handle_response(response): """ 处理返回值内容(需要维护计数器,来停止IO循环),调用 ioloop.IOLoop.current().stop() :param response: :return: """ if response.error: print("Error:", response.error) else: print(len(response.body)) global count count-=1 #完成一次回调,计数减1 if count == 0: ioloop.IOLoop.current().stop() def func(): url_list = [ 'http://www.baidu.com', 'http://www.bing.com', ] global count for url in url_list: print(url) http_client = AsyncHTTPClient() http_client.fetch(HTTPRequest(url), handle_response) count+=1 # 计数加1 ioloop.IOLoop.current().add_callback(func) ioloop.IOLoop.current().start()