【莫烦Python】Threading 学会多线程 Python
【2021最新版】Python 并发编程实战,用多线程、多进程、多协程加速程序运行
【莫烦Python】Multiprocessing 让你的多核计算机发挥真正潜力 Python
import threading
import time
def thread_job():
print("T1 start\n")
for i in range(10):
time.sleep(0.1)
print("T1 finish\n")
def T2_job():
print("T2 start\n")
print("T2 finish\n")
def main():
added_thread = threading.Thread(target=thread_job, name='T1')
thread2 = threading.Thread(target=T2_job, name='T2')
added_thread.start()
thread2.start()
added_thread.join()
#thread2.join()
print("all done\n")
# print(threading.active_count())
# print(threading.enumerate())
# print(threading.current_thread())
if __name__=='__main__':
main()
多线程爬虫的例子
blog_spider.py
import requests
urls = [
f"http://www.cnblogs.com/#p{page}"
for page in range(1, 50 + 1)
]
def craw(url):
r = requests.get(url)
print(url,len(r.text))
craw(urls[0])
01.multi_thread_craw.py
import blog_spider
import threading
import time
def multi_thread():
print("multi_thread begin")
threads = []
for url in blog_spider.urls:
threads.append(
threading.Thread(target=blog_spider.craw, args=(url,)) #加逗号,这是元组,不加逗号就是字符串了
)
for thread in threads:
thread.start()
for thread in threads:
thread.join()
print("multi_thread end")
if __name__ == "__main__":
start = time.time()
multi_thread()
end = time.time()
print("multi_thread cost:",end - start, "seconds")
知识点2.多线程调用的函数不能用return返回值,所以用队列保存——用于多线程数据通信的queue.Queue
生产者消费者爬虫例子来说明这几个概念:
好处添加和获取有阻塞,必须添加了元素才进行下面的代码,必须有空才获取元素
import threading
import time
from queue import Queue
def job(l,q):
for i in range(len(l)):
l[i] = l[i]**2
q.put(l) #多线程调用的函数不能用return返回值
return l
def multithreading():
q = Queue()
threads = []
data = [[1,2,3],[3,4,5],[4,4,4],[5,5,5]]
for i in range(4):
t = threading.Thread(target=job, args=(data[i],q))
t.start()
threads.append(t)
for thread in threads:
thread.join()
results = []
for _ in range(4):
results.append(q.get())
print(results)
if __name__=='__main__':
multithreading()
知识点3.锁,锁住第一个线程,等它处理完后再进行下一个——对共享内存的处理
import threading
def job1():
global A
lock.acquire()
for i in range(10):
A += 1
print("job1",A)
lock.release()
def job2():
global A
lock.acquire()
for i in range(10):
A += 10
print("job2",A)
lock.release()
if __name__=='__main__':
lock = threading.Lock()
A = 0
t1 = threading.Thread(target=job1)
t2 = threading.Thread(target=job2)
t1.start()
t2.start()
t1.join()
t2.join()
区别1.Queue 是 multiprocessing 里的一个功能 q=mp.Queue()
区别2.进程池 job可以有返回值了,map&async
import multiprocessing as mp
def job(x):
return x*x
def multicore():
# map--自动分配给定义个数的每一个进程/cpu核
pool = mp.Pool(processes=2)
res = pool.map(job,range(10))
print(res)
# async--一次只能在一个进程
res = pool.apply_async(job,(2,))
print(res.get())
# 迭代器——达到map的效果
multi_res = [pool.apply_async(job,(i,)) for i in range(10)]
print([res.get() for res in multi_res])
if __name__== '__main__':
multicore()
区别3.共享内存