https://stackoverflow.com/questions/13747964/urllib2-urlopen-read-timeout-block
def download_urls(url_and_path_list, num_concurrent, skip_existing):
# prepare the queue
queue = Queue.Queue()
for url_and_path in url_and_path_list:
queue.put(url_and_path)
# start the requested number of download threads to download the files
threads = []
for _ in range(num_concurrent):
t = DownloadThread(queue, skip_existing)
t.daemon = True
t.start()
queue.join()
class DownloadThread(threading.Thread):
def __init__(self, queue, skip_existing):
super(DownloadThread, self).__init__()
self.queue = queue
self.skip_existing = skip_existing
def run(self):
while True:
#grabs url from queue
url, path = self.queue.get()
if self.skip_existing and exists(path):
# skip if requested
self.queue.task_done()
continue
try:
urllib.urlretrieve(url, path)
except IOError:
print "Error downloading url '%s'." % url
#signals to queue job is done
self.queue.task_done()