多线程练习


__author__ = 'sixkery'

'''多线程下载高清图片,因为图片太大,故用多线程下载图片
把图片的 url 放入队列,提取队列里的url,并下载。'''


import threading
import requests, time, os
from queue import Queue
from urllib import request


# 定义一个装饰器,查看运行时间
def run_time(func):
    def wrapper(*args,**kwargs):
        start = time.time()
        func(*args,**kwargs)
        end = time.time()
        print('程序运行时间{}'.format(end-start))
    return wrapper


class spider():
    def __init__(self):
        self.thread_num = 10 # 线程数
        self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36'}
        self.qurl = Queue() # 存储 url 的队列
        self.num = 1
        
        # 创建一个文件夹,用来存放下载图片
        if '图片' not in os.listdir('.'):
            os.mkdir('图片')
        self.path = os.path.join(os.path.abspath('.'),'图片')
        os.chdir(self.path)

    # 用来抓取图片的 url 并放入队列中 
    def product_url(self):
        for i in range(1):
            url = 'https://unsplash.com/napi/photos?page={}&per_page=12&order_by=latest'.format(i)
            response = requests.get(url,headers=self.headers)
            items = response.json()
            for item in items:
                url = item['links']['download']
                # 放入队列
                self.qurl.put(url)
                
    # 用来下载图片
    def download(self):
        while not self.qurl.empty():
            url = self.qurl.get()
            response = requests.get(url,headers=self.headers)
            title = url.split('/')[-2]
            with open(title + '.jpg','wb') as f:
                f.write(response.content)
            print('第{}张图片下载成功'.format(self.num))
            self.num += 1

    @run_time
    def run(self):
        self.product_url()

        ths = []
        
        for i in range(self.thread_num):
            th = threading.Thread(target=self.download)
            th.start()
            ths.append(th)
        for i in ths:
            i.join()



if __name__ == '__main__':
    spider().run()




你可能感兴趣的:(多线程练习)