Python多进程多线程测试

今天在工作中遇到爬虫效率问题,在此处记录多进程、多线程测试脚本

#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'Seven'
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import time


def gcd(pair):
    a, b = pair
    low = min(a, b)
    for i in range(low, 0, -1):
        if a % i == 0 and b % i == 0:
            return i


numbers = [
    (1963309, 2265973), (1879675, 2493670), (2030677, 3814172),
    (1551645, 2229620), (1988912, 4736670), (2198964, 7876293)
]


def thread_map_test():
    start_time = time.time()
    with ThreadPoolExecutor(max_workers=4) as pool:
        results = pool.map(gcd, numbers)
    results = list(results)
    end_time = time.time()
    print(f'运行结果:{results}')
    print(f'多线程map运行时长:{end_time - start_time}')


def thread_submit_test():
    start_time = time.time()
    results = []
    with ThreadPoolExecutor(max_workers=4) as pool:
        for i in numbers:
            future = pool.submit(gcd, i)
            results.append(future)
    results = [result.result() for result in results]
    end_time = time.time()
    print(f'运行结果:{results}')
    print(f'多线程submit运行时长:{end_time - start_time}')


def process_map_test():
    start_time = time.time()
    with ProcessPoolExecutor(max_workers=4) as pool:
        results = pool.map(gcd, numbers)
    results = list(results)
    end_time = time.time()
    print(f'运行结果:{results}')
    print(f'多进程map运行时长:{end_time - start_time}')


def process_submit_test():
    start_time = time.time()
    results = []
    with ProcessPoolExecutor(max_workers=4) as pool:
        for i in numbers:
            future = pool.submit(gcd, i)
            results.append(future)
    results = [result.result() for result in results]
    end_time = time.time()
    print(f'运行结果:{results}')
    print(f'多进程submit运行时长:{end_time - start_time}')


if __name__ == '__main__':
    thread_map_test()
    thread_submit_test()
    process_map_test()
    process_submit_test()

 

当多进程/多线程传参时,一个为可变变量,一个为不可变变量,可参照如下代码进行传参:

with ProcessPoolExecutor(max_workers=4) as pool:
    pool.map(partial(data_crawl, variable_constant=variable_constant), variable_changed)

 

你可能感兴趣的:(个人笔记)