最近写了一个在电脑磁盘搜索全部文件的的一个小程序,效果达到了,但是效率5~6分钟,效率是十分的不理想。故而直接想到提升效率的多线程或者多进程,然后发现的一个诡异的事情,我使用的是官方的Cpython 版本的python 。发现,在Cpython 里面,多进程要比多线程快多了,甚至超一倍
。后来了解到在多线程里面,受到GIL全称global interpreter lock,全局解释器锁的影响,多线程是共用一个GIL,
多进程用的每一个进程一个CIL,所以效率更甚。
然而,问题出现了,python 多进程 AttributeError: Can't pickle local object
示例代码(测试用):
from threading import Thread
from multiprocessing import Process, Queue as mQueue
from queue import Queue
import time
def single_test():
my_sum = 0
for i in range(1, 10000000):
my_sum += i
print("单线程结果:", my_sum)
def thread_test():
def sum_func(q, start, end):
my_sum = 0
for i in range(start, end):
my_sum += i
q.put(my_sum)
def run_thread():
q = Queue()
t1 = Thread(target=sum_func, args=(q, 1, 5000000))
t2 = Thread(target=sum_func, args=(q, 5000000, 10000000))
t1.start()
t2.start()
t1.join()
t2.join()
my_sum = 0
while not q.empty():
my_sum += q.get()
print("多线程结果:", my_sum)
run_thread()
def process_test():
def sum_process_func(q, start, end):
my_sum = 0
for i in range(start, end):
my_sum += i
q.put(my_sum)
def run_process():
q = mQueue()
p1 = Process(target=sum_process_func, args=(q, 1, 5000000))
p2 = Process(target=sum_process_func, args=(q, 5000000, 10000000))
p1.start()
p2.start()
p1.join()
p2.join()
my_sum = 0
while not q.empty():
my_sum += q.get()
print("多进程结果:", my_sum)
run_process()
if __name__ == "__main__":
t0 = time.time()
single_test()
t1 = time.time()
thread_test()
t2 = time.time()
process_test()
t3 = time.time()
print(f"单线程耗时:{t1-t0}s")
print(f"多线程耗时:{t2-t1}s")
print(f"多进程耗时:{t3-t2}s")
from threading import Thread
from multiprocessing import Process, Queue as mQueue
from queue import Queue
import time
def single_test():
my_sum = 0
for i in range(1, 10000000):
my_sum += i
print("单线程结果:", my_sum)
def thread_test():
def sum_func(q, start, end):
my_sum = 0
for i in range(start, end):
my_sum += i
q.put(my_sum)
def run_thread():
q = Queue()
t1 = Thread(target=sum_func, args=(q, 1, 5000000))
t2 = Thread(target=sum_func, args=(q, 5000000, 10000000))
t1.start()
t2.start()
t1.join()
t2.join()
my_sum = 0
while not q.empty():
my_sum += q.get()
print("多线程结果:", my_sum)
run_thread()
def sum_process_func(q, start, end):
my_sum = 0
for i in range(start, end):
my_sum += i
q.put(my_sum)
def process_test():
def run_process():
q = mQueue()
p1 = Process(target=sum_process_func, args=(q, 1, 5000000))
p2 = Process(target=sum_process_func, args=(q, 5000000, 10000000))
p1.start()
p2.start()
p1.join()
p2.join()
my_sum = 0
while not q.empty():
my_sum += q.get()
print("多进程结果:", my_sum)
run_process()
if __name__ == "__main__":
t0 = time.time()
single_test()
t1 = time.time()
thread_test()
t2 = time.time()
process_test()
t3 = time.time()
print(f"单线程耗时:{t1-t0}s")
print(f"多线程耗时:{t2-t1}s")
print(f"多进程耗时:{t3-t2}s")
python闭包不支持pickle(序列化)。多进程需要函数能pickle。
1.更换pickle,考虑以下代码:
from pathos.multiprocessing import ProcessingPool as Pool
网址
https://github.com/uqfoundation/pathos