2023-03-29
2.17s/p
Python中的多线程受GIL制约,因此表面上的并行,实际上是并发,完全的计算密集型任务就和串行的耗时差不多了;
Deal | 耗时 | 备注 |
---|---|---|
普通的循环执行 | 22.68 | 串行 |
线程池执行 | 22.10 | 并发 |
进程池执行 | 6.95 | 并行 |
协程执行(多线程) | 22.39 | 并发 |
协程执行(多进程) | 6.67 | 并行 |
协程执行(多进程,更直接的方式,无嵌套调用) | 6.88 | 并行 |
协程执行(多进程,并测试异步迭代器的使用) | 7.52 | 并行 |
1s/p
I/O密集型的任务,表面是并行,实际也可以当做并行看待,基本不受GIL的影响,甚至由于开销少,耗时会更短;
Deal | 耗时 | 备注 |
---|---|---|
普通的循环执行 | 25.05 | 串行 |
线程池执行 | 5.01 | 并行 |
进程池执行 | 6.12 | 并行 |
协程执行(多线程) | 5.01 | 并行 |
协程执行(多进程) | 5.04 | 并行 |
协程执行(多进程,更直接的方式,无嵌套调用) | 5.04 | 并行 |
协程执行(多进程,并测试异步迭代器的使用) | 5.03 | 并行 |
# pip install aiohttp
import nest_asyncio
nest_asyncio.apply() # PyCharm中 不需要这两句
import time
import asyncio
import concurrent.futures
from multiprocessing import Pool
POOL_SIZE = 5
def download_file(row):
# 非协程对象支持(耗时操作模拟)
# print(row,'_downloading')
# I/O密集模拟
time.sleep(1)
# 计算密集模拟
# total = 0
# for i in range(10000000):
# total += i
# total -= i
# print(row,'_downloaded')
return row
async def download_prepare(row, pool):
# 协程对象支持(基于线程/进程池包装非协程对象,返回Future对象)
# 在协程对象中 可以await另一个协程对象 如果要支持非协程对象的方法调用 就需要结合线程/进程池
loop = asyncio.get_event_loop()
fut = loop.run_in_executor(pool, download_file, row)
response = await fut
class Reader(object):
def __init__(self, datalist, pool=None):
self.datalist = iter(datalist)
self.pool = pool
async def readrow(self):
try:
row = next(iter(self.datalist))
# 创建task 需要传入一个协程对象(对应要执行的方法)
task = asyncio.create_task(download_prepare(row, self.pool))
# 返回一个可等待对象(这里是Task)
return task
except StopIteration:
return None
def __aiter__(self):
return self
async def __anext__(self):
val = await self.readrow()
if val == None:
raise StopAsyncIteration
return val
# 普通的循环执行
def main(datalist):
for row in datalist:
download_file(row)
# 线程池执行
def main1(datalist):
threadpool = concurrent.futures.ThreadPoolExecutor(POOL_SIZE)
for row in datalist:
# 注意在等待时,与主线程无关,主线程依然在继续执行。
threadpool.submit(download_file, row)
threadpool.shutdown(True) # 等待线程池中的任务执行完毕后,在继续执行
# 进程池执行
def main2(datalist):
pool = Pool(POOL_SIZE)
pool.map(download_file, datalist)
pool.close() # 关闭进程池,不再接受新的进程
pool.join() # 主进程阻塞等待子进程的退出
# 协程执行(多线程)
def main3(datalist):
with concurrent.futures.ThreadPoolExecutor(max_workers=POOL_SIZE) as pool:
tasks = [asyncio.create_task(download_prepare(row, pool)) for row in datalist]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
# 协程执行(多进程)
def main4(datalist):
with concurrent.futures.ProcessPoolExecutor(max_workers=POOL_SIZE) as pool:
tasks = [asyncio.create_task(download_prepare(row, pool)) for row in datalist]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
# 协程执行(多进程,更直接的方式,无嵌套调用)
def main5(datalist):
loop = asyncio.get_running_loop()
with concurrent.futures.ProcessPoolExecutor(max_workers=POOL_SIZE) as pool:
futs = []
for row in datalist:
futs.append(loop.run_in_executor(pool, download_file, row))
done_s, pending_s = asyncio.run(asyncio.wait(futs))
# print(done_s, pending_s)
# 协程执行(多进程,并测试异步迭代器的使用)
async def main6(datalist):
with concurrent.futures.ProcessPoolExecutor(max_workers=POOL_SIZE) as pool:
obj_t = Reader(datalist, pool)
tasks = []
async for task in obj_t:
tasks.append(task)
done_s, pending_s = await asyncio.wait(tasks)
# print(done_s, pending_s)
if __name__ == "__main__":
print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
t1 = time.time()
datalist = [
"0_0_0_0_353",
"1_1_0_0_353",
"2_2_0_0_353",
"3_4_0_0_353",
"4_5_0_0_353",
"0_0_0_0_353",
"1_1_0_0_353",
"2_2_0_0_353",
"3_4_0_0_353",
"4_5_0_0_353",
"0_0_0_0_353",
"1_1_0_0_353",
"2_2_0_0_353",
"3_4_0_0_353",
"4_5_0_0_353",
"0_0_0_0_353",
"1_1_0_0_353",
"2_2_0_0_353",
"3_4_0_0_353",
"4_5_0_0_353",
"0_0_0_0_353",
"1_1_0_0_353",
"2_2_0_0_353",
"3_4_0_0_353",
"4_5_0_0_353",
]
main(datalist) # 10s
print("串行:普通的循环执行——执行时间:", float(time.time() - t1), "秒")
t1 = time.time()
main1(datalist) # 6s
print("并发:线程池执行——执行时间:", float(time.time() - t1), "秒")
t1 = time.time()
main2(datalist) # 6s
print("并行:进程池执行——执行时间:", float(time.time() - t1), "秒")
t1 = time.time()
main3(datalist) # 6s
print("并发:协程执行(多线程)——执行时间:", float(time.time() - t1), "秒")
t1 = time.time()
main4(datalist) # 6s
print("并行:协程执行(多进程)——执行时间:", float(time.time() - t1), "秒")
t1 = time.time()
main5(datalist) # 6s
print("并行:协程执行(多进程,更直接的方式,无嵌套调用) ——执行时间:", float(time.time() - t1), "秒")
t1 = time.time()
asyncio.run(main6(datalist)) # 6s
print("并行:协程执行(多进程,并测试异步迭代器的使用) ——执行时间:", float(time.time() - t1), "秒")
print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))