Python多线程编程

Python多线程编程

1 GIL锁

Python多线程因为GIL的存在,它们并不能利用CPU多核优势,一个Python进程中,只允许有一个线程处于运行状态.但是在做阻塞的系统调用时,当前线程会释放GIL,让别的线程有执行机会.

2 threading

#对于io操作来说,多线程和多进程性能差别不大
#1.通过Thread类实例化

import time
import threading

def get_detail_html(url):
    print("get detail html started")
    time.sleep(2)
    print("get detail html end")


def get_detail_url(url):
    print("get detail url started")
    time.sleep(4)
    print("get detail url end")

if __name__ == "__main__":
    thread1 = threading.Thread(target=get_detail_url)
    thread2 = threading.Thread(target=get_detail_html)
    start_time = time.time()
    thread1.start()
    thread2.start()
    thread1.join()
    thread2.join()

    print("last time: {}".format(time.time() - start_time))
get detail url started
get detail html started
get detail html end
get detail url end
last time: 4.001551389694214
#2. 通过集成Thread来实现多线程

class GetDetailHtml(threading.Thread):
    def __init__(self, name):
        super().__init__(name=name)

    def run(self):
        print("get detail html started")
        time.sleep(2)
        print("get detail html end")


class GetDetailUrl(threading.Thread):
    def __init__(self, name):
        super().__init__(name=name)

    def run(self):
        print("get detail url started")
        time.sleep(4)
        print("get detail url end")

if  __name__ == "__main__":
    thread1 = GetDetailHtml("get_detail_html")
    thread2 = GetDetailUrl("get_detail_url")
    start_time = time.time()
    thread1.start()
    thread2.start()

    thread1.join()
    thread2.join()

    print ("last time: {}".format(time.time()-start_time))
get detail url started
get detail html started
get detail html end
get detail url end
last time: 4.0013439655303955

3 线程中通信与同步

3.1 共享变量通信

from threading import Lock, RLock #可重入的锁

# Lock在同一个线程里面, 只能调用一次, 否则会出现死锁
# RLock在同一个线程里面,可以连续调用多次acquire, 一定要注意acquire的次数要和release的次数相等
total = 0
lock = RLock()
def add():
    global lock
    global total
    for i in range(1000000):
        lock.acquire()
        total += 1
        lock.release()

def desc():
    global total
    global lock
    for i in range(1000000):
        lock.acquire()
        total -= 1
        lock.release()

import threading
thread1 = threading.Thread(target=add)
thread2 = threading.Thread(target=desc)
thread1.start()
thread2.start()


thread1.join()
thread2.join()
print(total)

#1. 用锁会影响性能
#2. 锁会引起死锁
0

3.2 queue通信

#通过queue的方式进行线程间同步
from queue import Queue
import time
import threading

def get_detail_html(queue):
    #爬取文章详情页
    url = queue.get()
    print("get detail html started")
    time.sleep(2)
    print(url)
    print("get detail html end")


def get_detail_url(queue):
    # 爬取文章列表页
    print("get detail url started")
    time.sleep(4)
    queue.put("https://www.baidu.com/")
    print("get detail url end")



if  __name__ == "__main__":
    detail_url_queue = Queue(maxsize=1000)
    thread_detail_url = threading.Thread(target=get_detail_url, args=(detail_url_queue,))
    html_thread = threading.Thread(target=get_detail_html, args=(detail_url_queue,))
    thread_detail_url.start()
    html_thread.start()
    detail_url_queue.join()
    html_thread.join()
get detail url started
get detail url end
get detail html started
https://www.baidu.com/
get detail html end

4 Condition


import threading
from threading import Condition
#通过condition完成协同读诗

class XiaoAi(threading.Thread):
    def __init__(self, cond):
        super().__init__(name="小爱")
        self.cond = cond

    def run(self):
        with self.cond:
            self.cond.wait()
            print("{} : 在 ".format(self.name))
            self.cond.notify()

            self.cond.wait()
            print("{} : 好啊 ".format(self.name))
            self.cond.notify()

            self.cond.wait()
            print("{} : 君住长江尾 ".format(self.name))
            self.cond.notify()

            self.cond.wait()
            print("{} : 共饮长江水 ".format(self.name))
            self.cond.notify()

            self.cond.wait()
            print("{} : 此恨何时已 ".format(self.name))
            self.cond.notify()

            self.cond.wait()
            print("{} : 定不负相思意 ".format(self.name))
            self.cond.notify()

class TianMao(threading.Thread):
    def __init__(self, cond):
        super().__init__(name="天猫精灵")
        self.cond = cond

    def run(self):
        with self.cond:
            print("{} : 小爱同学 ".format(self.name))
            self.cond.notify()
            self.cond.wait()

            print("{} : 我们来对古诗吧 ".format(self.name))
            self.cond.notify()
            self.cond.wait()

            print("{} : 我住长江头 ".format(self.name))
            self.cond.notify()
            self.cond.wait()

            print("{} : 日日思君不见君 ".format(self.name))
            self.cond.notify()
            self.cond.wait()

            print("{} : 此水几时休 ".format(self.name))
            self.cond.notify()
            self.cond.wait()

            print("{} : 只愿君心似我心 ".format(self.name))
            self.cond.notify()
            self.cond.wait()



if __name__ == "__main__":
    cond = threading.Condition()
    xiaoai = XiaoAi(cond)
    tianmao = TianMao(cond)

    #启动顺序很重要
    #在调用with cond之后才能调用wait或者notify方法
    #condition有两层锁, 一把底层锁会在线程调用了wait方法的时候释放, 上面的锁会在每次调用wait的时候分配一把并放入到cond的等待队列中,等到notify方法的唤醒
    xiaoai.start()
    tianmao.start()
天猫精灵 : 小爱同学 
小爱 : 在 
天猫精灵 : 我们来对古诗吧 
小爱 : 好啊 
天猫精灵 : 我住长江头 
小爱 : 君住长江尾 
天猫精灵 : 日日思君不见君 
小爱 : 共饮长江水 
天猫精灵 : 此水几时休 
小爱 : 此恨何时已 
天猫精灵 : 只愿君心似我心 
小爱 : 定不负相思意 

5 Semaphore


#Semaphore 是用于控制进入数量的锁
#文件, 读、写, 写一般只是用于一个线程写,读可以允许有多个

#做爬虫
import threading
import time

class HtmlSpider(threading.Thread):
    def __init__(self, url, sem):
        super().__init__()
        self.url = url
        self.sem = sem

    def run(self):
        time.sleep(2)
        print("got html text success")
        self.sem.release()

class UrlProducer(threading.Thread):
    def __init__(self, sem):
        super().__init__()
        self.sem = sem

    def run(self):
        for i in range(20):
            self.sem.acquire()
            html_thread = HtmlSpider("https://baidu.com/{}".format(i), self.sem)
            html_thread.start()

if __name__ == "__main__":
    sem = threading.Semaphore(3)
    url_producer = UrlProducer(sem)
    url_producer.start()
got html text success
......
got html text success
got html text success

6 ThreadPoolExecutor


from concurrent.futures import ThreadPoolExecutor, as_completed, wait, FIRST_COMPLETED

#未来对象,task的返回容器
#线程池, 为什么要线程池
#主线程中可以获取某一个线程的状态或者某一个任务的状态,以及返回值
#当一个线程完成的时候我们主线程能立即知道
#futures可以让多线程和多进程编码接口一致
import time

def get_html(times):
    time.sleep(times)
    print("get page {} success".format(times))
    return times

executor = ThreadPoolExecutor(max_workers=2)

#要获取已经成功的task的返回
urls = [3,2,4]
all_task = [executor.submit(get_html, (url)) for url in urls]
# wait(all_task, return_when=FIRST_COMPLETED)
# print("main")
for future in as_completed(all_task):
    data = future.result()
    print("get {} page".format(data))
#通过executor的map获取已经完成的task的值
# for data in executor.map(get_html, urls):
#     print("get {} page".format(data))
get page 2 success
get 2 page
get page 3 success
get 3 page
get page 4 success
get 4 page

你可能感兴趣的:(Python)