参照c++的线程池,使用python的threading库实现线程池。
import threading
import time
# 线程池的任务,包含一个可调用对象和一个参数数组
class ThreadTask(object):
def __init__(self, job, args=list()):
self.task = job
self.args = args
# 线程池对象
class ThreadPool(object):
# 初始化线程池的线程、任务队列、任务队列读写锁、睡眠唤醒线程的condition对象,并开启所有线程
def __init__(self, thread_length):
self.task_list = list()
self.task_lock = threading.RLock()
self.task_condition = threading.Condition()
self.thread_length = thread_length
self.thread_list = list()
for i in range(0 , thread_length):
self.thread_list.append(threading.Thread(name='Thread '+str(i), target=ThreadPool.thread_work,
args=[self]))
for thread in self.thread_list:
thread.start()
# 在加锁的情况下添加任务,如果任务队列为空,就发出一个信号,唤醒所有线程
def add_task(self, new_task):
self.task_lock.acquire()
if len(self.task_list) == 1:
self.task_condition.acquire()
self.task_condition.notifyAll()
self.task_condition.release()
self.task_list.append(new_task)
self.task_lock.release()
return True
# 每个线程的主函数,每个线程都不断地读取任务队列,有任务时执行任务,没有时自己睡眠
def thread_work(self):
while True:
self.task_lock.acquire()
if len(self.task_list) == 0:
self.task_lock.release()
self.task_condition.acquire()
self.task_condition.wait()
self.task_condition.release()
else:
temp_task = self.task_list.pop()
self.task_lock.release()
temp_task.task(temp_task.args)
线程池的好处就是,对于需要并发的任务,可以不反复地创建和销毁线程(这些操作对于操作系统来说,消化比较大),实现了对线程的重用。
而线程池对性能的提升并不是很高,测试一下:
# 大任务
def big_job(arg):
global end1, end2, end3
for i in range(0, 100000):
a = 9*9
if arg[0] == 9999:
if arg[1] == 1:
end1 = time.clock()
if arg[1] == 2:
end2 = time.clock()
if arg[1] == 3:
end3 = time.clock()
# 小任务
def small_job(arg):
global end1, end2, end3
a = 9*9
if arg[0] == 9999:
if arg[1] == 1:
end1 = time.clock()
if arg[1] == 2:
end2 = time.clock()
if arg[1] == 3:
end3 = time.clock()
end1 = 0
end2 = 0
end3 = 0
if __name__ == '__main__':
begin1 = time.clock()
tp = ThreadPool(5)
for x in range(0, 10000):
new_job = ThreadTask(xxx_job, [x, 1])
tp.add_task(new_job)
del tp
begin2 = time.clock()
tp = ThreadPool(100)
for x in range(0, 10000):
new_job = ThreadTask(xxx_job, [x, 2])
tp.add_task(new_job)
del tp
begin3 = time.clock()
for x in range(0, 10000):
xxx_job([x, 3])
# 防止主线程先跑完
while end1 == 0 or end2 == 0 or end3 == 0:
pass
print('5 thread used '+str(end1-begin1))
print('100 thread used '+str(end2-begin2))
print('1 thread used '+str(end3-begin3))
使用big_task的情况下,少量线程比串行执行快
---------Big Job--------
5 thread used 8.3851792898501
100 thread used 57.50439627036223
1 thread used 31.54597691246157
使用small_task的情况下,串行执行快:
---------Big Job--------
5 thread used 0.026785925509466475
100 thread used 0.04961936958315384
1 thread used 0.005536336222317945
毕竟线程之间的切换有性能消耗,且线程池中线程有锁,会阻塞和睡眠,都会影响性能。