imgur.py

#!/usr/bin/env python

# -*- coding: utf-8 -*-

 

from __future__ import with_statement

import sys

import os

import urllib2

from urlparse import urlparse

import random

import re

import gevent

from gevent import monkey

monkey.patch_all()

 

def get(url):

  setup = urllib2.build_opener()

  # TODO: Write appropriate headers.

  setup.add_headers = [('None', 'None')]

  urllib2.install_opener(setup)

  try:

    request = urllib2.Request(url)

  except (urllib2.HTTPError, urllib2.URLError), e:

    sys.exit(-1)

  return setup.open(request)

 

def is_url(url):

  res = urlparse(url)

  return 'imgur.com' in res.netloc

 

def fetch(url):

  res = urlparse(url)

  key = res.path.split('/')[2]

  urll = 'https://imgur.com/a/%s/noscript' % key

  return get(urll).read(), key

 

def get_or_create_folder(key, folder=None):

  foldername = key

  if folder is not None:

    foldername = folder

  if not os.path.exists(foldername):

    os.makedirs(foldername)

  return foldername

  

def fetch_images(foldername, images):

  gevent.sleep(random.randint(0, 1) * 0.0001)

  path = os.path.join(foldername, images[1])

  with open(path, 'wb') as img:

    img.write(get(images[0]).read())

  print 'Done:\t%s' % images[0]

 

def save(url, folder=None):

  data, key = fetch(url)

  REGEX = re.compile(r'<img src="(http\:\/\/i\.imgur\.com\/([a-zA-Z0-9]{5}\.(jpg|png|gif)))"')

  images = REGEX.findall(data)

  foldername = get_or_create_folder(key, folder)

  return foldername, images

 

 

if __name__ == '__main__':

  url = sys.argv[1]

  try:

    folder = sys.argv[2]

  except IndexError:

    folder = None

  foldername, images = save(url, folder=folder)

  threads = [gevent.spawn(fetch_images, foldername, image) for image in images]

  gevent.joinall(threads)

 

你可能感兴趣的:(img)