[pthon爬虫之路day14]:多线程下载斗图吧表情包,再也不用担心表情包不够用了Q!!!!!

先补充一个小知识,GIL全局解释器锁:
全局解释器锁(Global Interpreter Lock)是计算机程序设计语言解释器用于同步线程的工具,使得在同一进程内任何时刻仅有一个线程在执行。常见例子有CPython(JPython不使用GIL)与Ruby MRI。
简言之:
就是把一个CPU的利用率大幅度提高,并不会切换CPU。
好了,来看正题。
首先我们先用之前的办法同步下载:

import re
import requests
from lxml import etree
from urllib import request
import os
def parse_page(url):
    headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36"}
    response=requests.get(url,headers=headers)
    text=response.text
    html=etree.HTML(text)#解析器
    imgs=html.xpath('//div[@class="page-content text-center"]//img[@class!="gif"]')#xpath语法
    #下载图片时模拟浏览器进行访问###########################################################
    opener = request.build_opener()
    opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36')]
    request.install_opener(opener)
    for img in imgs:
        img_url=img.get('data-original')
        alt=img.get('alt')
        alt=re.sub(r'[??.。,!!::*]',"",alt)
        suffix=os.path.splitext(img_url)[1]
        filename=alt+suffix
        request.urlretrieve(img_url,r'C:\python38\new  project\biaoqing/'+filename)
        print(filename+'下载成功')

def main():
    for x in range(1,2):
        url='http://www.doutula.com/photo/list/?page=%d'%x
        parse_page(url)
if __name__ == '__main__':
    main()

发现下载的可慢了,然后来看看多线程吧!!!速度马上就提上来了!

import re
import requests
from lxml import etree
from urllib import request
import os
from queue import Queue
import threading
class Procuder(threading.Thread):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36"}
    def __init__(self,page_queue,img_queue,*args,**kwargs):#重写init
        super(Procuder,self).__init__(*args,**kwargs)
        self.page_queue=page_queue
        self.img_queue=img_queue
    def run(self):
        while True:
            if self.page_queue.empty():
                break
            url=self.page_queue.get()
            self.parse_page(url)

    def parse_page(self,url):

        response = requests.get(url, headers=self.headers)
        text = response.text
        html = etree.HTML(text)  # 解析器
        imgs = html.xpath('//div[@class="page-content text-center"]//img[@class!="gif"]')  # xpath语法
        # 下载图片时模拟浏览器进行访问###########################################################
        opener = request.build_opener()
        opener.addheaders = [('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36')]
        request.install_opener(opener)
        for img in imgs:
            img_url = img.get('data-original')
            alt = img.get('alt')
            alt = re.sub(r'[??.。,!!::*]', "", alt)
            suffix = os.path.splitext(img_url)[1]
            filename = alt + suffix
            self.img_queue.put((img_url,filename))
class Consumer(threading.Thread):
    def __init__(self,page_queue,img_queue,*args,**kwargs):
        super(Consumer,self).__init__(*args,**kwargs)
        self.page_queue=page_queue
        self.img_queue=img_queue
    def run(self):
        while True:
            if self.page_queue.empty() and self.img_queue.empty():
                break
            img_url,filename=self.img_queue.get()
            request.urlretrieve(img_url, r'C:\python38\new  project\biaoqing/' + filename)
            print(filename+'下载完成')

def main():
    page_queue=Queue(50)
    img_queue=Queue(1000)
    for x in range(1, 10):
        url = 'http://www.doutula.com/photo/list/?page=%d'%x
        page_queue.put(url)
    for x in range(5):
        t = Procuder(page_queue,img_queue)
        t.start()
    for x in range(5):
        t = Consumer(page_queue,img_queue)
        t.start()
if __name__ == '__main__':
    main()

来看看我保存的海量表情包:
[pthon爬虫之路day14]:多线程下载斗图吧表情包,再也不用担心表情包不够用了Q!!!!!_第1张图片
再也不怕斗不过图了!!
想不想像我一样每天提升一点点!快来关注互粉和我一起学爬虫吧!记录我的小白爬虫之路。
此处再次鸣谢B站up主神奇的老黄的爬虫学习视频。
链接在此:
https://www.bilibili.com/video/av44518113?p=73

你可能感兴趣的:(爬虫小白学习)