download pics with multithreads

python3.4.2
最简单

show me the code

  • download.py
import json
import logging
import os
from pathlib import Path
from urllib.request import urlopen, Request
logger = logging.getLogger(__name__)

def get_links(client_id):
   headers = {'Authorization': 'Client-ID {}'.format(client_id)}
   req = Request('https://api.imgur.com/3/gallery/', headers=headers, method='GET')
   with urlopen(req) as resp:
       data = json.loads(resp.readall().decode('utf-8'))
   return map(lambda item: item['link'], data['data'])

def download_link(directory, link):
   logger.info('Downloading %s', link)
   download_path = directory / os.path.basename(link)
   with urlopen(link) as image, download_path.open('wb') as f:
       f.write(image.readall())

def setup_download_dir():
   download_dir = Path('images')
   if not download_dir.exists():
       download_dir.mkdir()
   return download_dir
  • single.py
from download import setup_download_dir, get_links, download_link
#logging部分
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.getLogger('requests').setLevel(logging.CRITICAL)
logger = logging.getLogger(__name__)
   ts = time()
   client_id = os.getenv('IMGUR_CLIENT_ID')
   if not client_id:
       raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
   download_dir = setup_download_dir()
   links = [l for l in get_links(client_id) if l.endswith('.jpg')]
   for link in links:
       download_link(download_dir, link)
   print('Took {}s'.format(time() - ts))
  • using threads
from queue import Queue
from threading import Thread
#每次迭代从queue中获取url,取的时候queue不为空,为空则block。完成了download_link,workerthread通知queue完成。如果worker没完成,queue.join()阻塞main。
class DownloadWorker(Thread):
   def __init__(self, queue):
       Thread.__init__(self)
       self.queue = queue

   def run(self):
       while True:
     #不停地从queue中获得url- 进行下载- 通知完成
           directory, link = self.queue.get()
           download_link(directory, link)
           self.queue.task_done()


def main():
   ts = time()
   client_id = os.getenv('IMGUR_CLIENT_ID')
   if not client_id:
       raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
   download_dir = setup_download_dir()
   links = [l for l in get_links(client_id) if l.endswith('.jpg')]
   # 和所有workerthread交流的queue
   queue = Queue()
   # 8个worker 共享一个queue,从中取任务,queue为thread-safe
   for x in range(8):
       worker = DownloadWorker(queue)
       #  即使所有worker被阻塞的时候,daemon为true的话main就结束
       worker.daemon = True
       worker.start() #开始工作
   # 任务放到queue里
   for link in links:
       logger.info('Queueing {}'.format(link))
       queue.put((download_dir, link))
   queue.join()
#main等queue为空
   print('Took {}'.format(time() - ts))

8核,快了4倍

你可能感兴趣的:(download pics with multithreads)