import urllib.request import socket import re import sys import os import socket import random import threading targetDir = r"C:\pic" def destFile(path): if not os.path.isdir(targetDir): os.mkdir(targetDir) pos = path.rindex('/') a = random.randint(1,10000) b = '%d' %a t = os.path.join(targetDir, b+path[pos+1:]) return t def getPic(link): try: urllib.request.urlretrieve(link, destFile(link)) except: pass if __name__ == "__main__": m=4 for i in range(86981,131306):#71460,131306 hostname = "http://www.xxx.com/html/tupian/xxx/%d.html" %(i) req = urllib.request.Request(hostname) if(m==4): try: webpage = urllib.request.urlopen(req) except: print(i) m=0 continue else: m=m+1 continue contentBytes = webpage.read() print(i) print("*************************************") s=0; threads = [] # for k in range(10): for link, t in set(re.findall(r'(http:[^\s]*?(jpg|png|gif))', str(contentBytes))): print(link) try: socket.setdefaulttimeout(2) urllib.request.urlopen(link) except: break try: d=threading.Thread(target=getPic,args=(link,)) threads.append(d) #d.start() # urllib.request.urlretrieve(link, destFile(link)) except: pass s=s+1 for c in range(s): threads[c].start() print(c)