方法一:
import asyncio
import requests
async def main():
loop = asyncio.get_event_loop()
future1 = loop.run_in_executor(None, requests.get, 'http://www.baidu.com')
future2 = loop.run_in_executor(None, requests.get, 'http://www.python.org')
response1 = await future1
response2 = await future2
print(response1.text)
print(response2.text)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
方法二:
copy from liaoxuefeng
https://www.liaoxuefeng.com/wiki/0014316089557264a6b348958f449949df42a6d3a2e542c000/001432090954004980bd351f2cd4cc18c9e6c06d855c498000
import asyncio
@asyncio.coroutine
def wget(host):
print('wget %s...' % host)
connect = asyncio.open_connection(host, 80)
reader, writer = yield from connect
header = 'GET / HTTP/1.0\r\nHost: %s\r\n\r\n' % host
writer.write(header.encode('utf-8'))
yield from writer.drain()
while True:
line = yield from reader.readline()
if line == b'\r\n':
break
print('%s header > %s' % (host, line.decode('utf-8').rstrip()))
# Ignore the body, close the socket
writer.close()
loop = asyncio.get_event_loop()
tasks = [wget(host) for host in ['www.sina.com.cn', 'www.sohu.com', 'www.163.com']]
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
方法三:
在一个web上,一个工程师做了一个探索。发现,当使用aiohtt+asyncio时候是比requests要快很多倍的。所以,推荐使用原装的库。
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
async with aiohttp.ClientSession() as session:
html = await fetch(session, 'http://python.org')
print(html)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
import aiohttp
import asyncio
import time
import requests
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
async with aiohttp.ClientSession() as session:
await fetch(session, 'http://python.org')
st = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
et = time.time()
print(et - st)
st = time.time()
res_text = requests.get('http://python.org').text
et = time.time()
print(et - st)
输出结果是:
3.109215259552002
2.7337353229522705
目前来看,只爬一个页面的话,其实反而request会快点。
但是当爬取的数目提高的时候就发送了变化了。
import aiohttp
import asyncio
import time
import requests
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
async with aiohttp.ClientSession() as session:
await fetch(session, 'http://python.org')
st = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait([main(), main()]))
et = time.time()
print(et - st)
st = time.time()
res_text = requests.get('http://python.org').text
res_text_2 = requests.get('http://python.org').text
et = time.time()
print(et - st)
输出:
3.1400091648101807
5.478497505187988
当然啦,这是因为第一个其实是采用是协程技术,所以,差别有点大也是可以理解的。
所以,我们要接着探究。
这个非常有意思,因为两者都是关于协程的,关于随者更优对比一下就可以知道结果了。
文件结构:
三个文件放在同一个目录下:
main_Test.py
import asyncio_Test
import gevent_Test
import matplotlib.pyplot as plt
asyncio_list = []
gevent_list = []
N = 50
Begin = 1
url = 'http://www.python.org'
for i in range(N):
asyncio_list.append(asyncio_Test.f(Begin + i, url))
gevent_list.append(gevent_Test.f(Begin + i, url))
plt.plot(asyncio_list, label='asyncio')
plt.plot(gevent_list, label='gevent')
plt.legend()
plt.savefig('1.png')
plt.show()
asyncio_Test.py
import aiohttp
import asyncio
import time
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main(Target_url):
async with aiohttp.ClientSession() as session:
await fetch(session, Target_url)
def f(Times, Target_url):
st = time.time()
loop = asyncio.get_event_loop()
task = [main(Target_url)] * Times
loop.run_until_complete(asyncio.wait(task))
et = time.time()
return et - st
gevent_Test.py
import gevent
from gevent import monkey
import time
import requests
monkey.patch_socket()
def request_f(Target_url):
res_text = requests.get(Target_url).text
def f(Times, Target_url):
st = time.time()
WaitList = [gevent.spawn(request_f, Target_url)] * Times
gevent.joinall(WaitList)
et = time.time()
return et - st
对比效果如下:
可以发现,当只爬取一个网页的时候,其实,反而用gevent+request会更快。
随着每次爬取的网页是数目的累积,只有当爬取的数目接近20个的时候,asyncio才普遍低于gevent(虽然也是基本接近)。
相差只有0.01秒左右(我直接把数值输出来过)