多线程下载sis001的网友自拍贴图版面的图片,这个程序并不能下载全部版面而是只下载第二页的内容。因为第二页是最新的内容哦。你需要一个sis001,5级以上的用户名和密码否则无法访问此版面。
#!/usr/bin/python
# -*- coding: cp936 -*-
#coding utf-8
import urllib
import urllib2
import re
import cookielib
import Queue
import threading
def downPic(tiezi_url):
req = urllib2.Request(q.get(),None,headers)
tiezi_html = opener.open(req).read()
#print tiezi_html
re_img = re.compile(r'\0:
th = threading.Thread(target=downPic,args=(tiezi_url,) )
th.start()
else:
break
=====
高效
#!/usr/bin/python
# -*- coding: cp936 -*-
#coding utf-8
import urllib
import urllib2
import re
import cookielib
import Queue
import threading
import socket
import time
import sys
import random
def log(message):
log = open("log.txt","a")
log.write(time.ctime()+" "+message+"\n")
log.close()
def getPic():
i = q.get()
if re.match("http",i):
#print "%s downloading..."%i
filename = re.split(r'/',i)
try:
req = urllib2.Request(i,None,headers)
res = opener.open(req).read()
savefile = '.\\img\\'+filename[-1]+ str(int(random.random()*100000000))+'.jpg'
open(savefile,'wb').write(res)
except:
etype, value, tb = sys.exc_info()
errormsg = i + "||"+str(etype) +"||"+ str(value)
log(errormsg)
pass
else:
img_url = "http://38.103.161.185/forum/%s"%i
filename = re.split(r'/',img_url)
#print "%s"%img_url
try:
req = urllib2.Request(img_url,None,headers)
res = opener.open(req).read()
savefile = '.\\img\\'+filename[-1]+ str(int(random.random()*100000000))+'.jpg'
open(savefile,'wb').write(res)
except:
etype, value, tb = sys.exc_info()
errormsg = i + "||"+str(etype) +"||"+ str(value)
log(errormsg)
pass
def downPic(tiezi_url,q):
req = urllib2.Request(tiezi_url,None,headers)
tiezi_html = opener.open(req).read()
#print tiezi_html
re_img = re.compile(r'\0:
th = threading.Thread(target=getPic)
th.start()
#print "Queue %s"%q.qsize()
else:
break
headers ={"User-agent":"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1"}
socket.setdefaulttimeout(30)
cj = cookielib.CookieJar()
#proxy = urllib2.ProxyHandler({'http': '127.0.0.1:8087'})
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
#urllib2.install_opener(opener)
data = {
"formhash":"3fec4925",
"referer":"index.php",
"loginfield":"username",
"240aa46b3893fb57c436c0a3785b61e7":"xxx",
"ea32b1cadbde4b66ca614e0bb593d1c9":"xxx",
"questionid":"0",
"answer":"",
"cookietime":"2592000",
"loginmode":"",
"styleid":"",
"loginsubmit":"true"}
post_data = urllib.urlencode(data)
req = urllib2.Request("http://38.103.161.185/forum/logging.php?action=login&",post_data,headers)
content=opener.open(req)
#print content.read()
req2 = urllib2.Request("http://38.103.161.185/forum/forum-62-1.html",None,headers)
board_html = opener.open(req2).read()
#print board_html
re_link = re.compile(r'\