前言
py的世界里虽有GIL,但是进程,线程的使用还是有一些用武之地的.
本文对比的是
threading
multiprocessing,
concurrent.futures
asyncio(3.5 版本以后的协程)
gevent(常称monkey_patch)
至于源码,有兴趣的,研究研究,还是很有收获的。改天再开篇。
asyncio 是基于yield魔改的,整体的生态还不成熟,常被吐槽, 但据我目前使用来看,还是不错的。
monkey_patch 经过长时间的线上验证,及代码不侵入性,性能还是挺可靠的。
基于并发的考虑,这些都是不错的思路。
threading 及 multiprocessing
这两个的api差不多,要说不太一样的是,multiprocessing可带返回结果,threading返回结果,一般有两种思路是用队列或用类再封装一层
multiprocessing返回结果
def req_multi(url:str, result: dict):
r = requests.get(url)
result[url] = r
def main_multi():
process_manager = multiprocessing.Manager()
result_dict = process_manager.dict()
urls = ["http://www.baidu.com", "http://www.zhihu.com"]
reqs = [multiprocessing.Process(target=req_multi, args=(i, result_dict)) for i in urls]
[j.start() for j in reqs]
[j.join() for j in reqs]
print(result_dict)
结果
![image.png](https://upload-images.jianshu.io/upload_images/1500770-b608536e103201be.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
threading返回结果
queue的方式
q = queue.Queue()
def req_threading(url: str):
r = requests.get(url)
q.put({url: r})
def main_threading_queue():
urls = ["http://www.baidu.com", "http://www.zhihu.com"]
results = [threading.Thread(target=req_threading, args=(i,)) for i in urls]
[j.start() for j in results]
[k.join() for k in results]
while q:
print(q.get())
class封装的方式
class MainThread(threading.Thread):
def __init__(self, fn_name: str, url: list):
super(MainThread, self).__init__()
self.fn_name = fn_name
self.url = url
self.result = {}
def run(self):
for i in self.url:
r = requests.get(i)
self.result[self.fn_name] = r
异步的方式import concurrent.futures
def main_future():
urls = ["http://www.baidu.com", "http://www.zhihu.com"]
pool = concurrent.futures.ProcessPoolExecutor(max_workers=2)
results = [pool.submit(req_futrue, i) for i in urls]
# results = [pool.map(req_futrue, urls)]
r = [i.result(timeout=10) for i in results]
print(r)
asyncio
import gevent
from gevent import socket
# from gevent import monkey; monkey.patch_all()
import time
start_time = time.time()
async def main_gevnet():
urls = ["http://www.baidu.com", "http://www.baidu.com"]
# jobs_1 = [gevent.spawn(req_1, i )for i in urls]
# jobs_2 = [gevent.spawn(req_2, i )for i in urls]
# for j in jobs_1:
# j.start()
# j.join()
# for k in jobs_2:
# k.start()
# k.join()
job_1 = req_1("http://www.baidu.com")
job_2 = req_2("http://www.baidu.com")
import asyncio
for i in range(100):
loop = asyncio.get_event_loop()
loop.run_until_complete(main_gevnet())
print(time.time() - start_time)
gevent
import gevent
from gevent import socket
# from gevent import monkey; monkey.patch_all()
import time
start_time = time.time()
async def main_gevnet():
urls = ["http://www.baidu.com", "http://www.baidu.com"]
jobs_1 = [gevent.spawn(req_1, i )for i in urls]
jobs_2 = [gevent.spawn(req_2, i )for i in urls]
for j in jobs_1:
j.start()
j.join()
for k in jobs_2:
k.start()
k.join()
# job_1 = req_1("http://www.baidu.com")
# job_2 = req_2("http://www.baidu.com")