多线程下载sis001的网友自拍贴图版面的图片

多线程下载sis001的网友自拍贴图版面的图片,这个程序并不能下载全部版面而是只下载第二页的内容。因为第二页是最新的内容哦。你需要一个sis001,5级以上的用户名和密码否则无法访问此版面。

#!/usr/bin/python
# -*- coding: cp936 -*-
#coding utf-8


import urllib
import urllib2
import re
import cookielib
import Queue
import threading


def downPic(tiezi_url):
    req = urllib2.Request(q.get(),None,headers)
    tiezi_html = opener.open(req).read()
    #print tiezi_html
    re_img = re.compile(r'\0:
        th = threading.Thread(target=downPic,args=(tiezi_url,) )
        th.start()
    else:
        break

=====

高效

#!/usr/bin/python
# -*- coding: cp936 -*-
#coding utf-8


import urllib
import urllib2
import re
import cookielib
import Queue
import threading
import socket
import time
import sys
import random

def log(message):
    log = open("log.txt","a")
    log.write(time.ctime()+" "+message+"\n")
    log.close()
	
def getPic():
	i = q.get()
	if re.match("http",i):     
		#print "%s downloading..."%i
		filename = re.split(r'/',i)
		try:
			req = urllib2.Request(i,None,headers)
			res = opener.open(req).read()
			savefile = '.\\img\\'+filename[-1]+ str(int(random.random()*100000000))+'.jpg'
			open(savefile,'wb').write(res)
		except:
			etype, value, tb = sys.exc_info()
			errormsg = i + "||"+str(etype) +"||"+ str(value)
			log(errormsg)
			pass
	else:
		img_url = "http://38.103.161.185/forum/%s"%i
		filename = re.split(r'/',img_url)
		#print "%s"%img_url
		try:
			req = urllib2.Request(img_url,None,headers)
			res = opener.open(req).read()
			savefile = '.\\img\\'+filename[-1]+ str(int(random.random()*100000000))+'.jpg'
			open(savefile,'wb').write(res)
		except:
			etype, value, tb = sys.exc_info()
			errormsg = i + "||"+str(etype) +"||"+ str(value)
			log(errormsg)
		pass
	

def downPic(tiezi_url,q):
    req = urllib2.Request(tiezi_url,None,headers)
    tiezi_html = opener.open(req).read()
    #print tiezi_html
    re_img = re.compile(r'\0:
            th = threading.Thread(target=getPic)
            th.start()
            #print "Queue %s"%q.qsize()
        else:
            break


headers ={"User-agent":"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1"}
socket.setdefaulttimeout(30)
cj = cookielib.CookieJar()
#proxy = urllib2.ProxyHandler({'http': '127.0.0.1:8087'})
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
#urllib2.install_opener(opener)
data =  {
    "formhash":"3fec4925",
    "referer":"index.php",
    "loginfield":"username",
    "240aa46b3893fb57c436c0a3785b61e7":"xxx",
    "ea32b1cadbde4b66ca614e0bb593d1c9":"xxx",
    "questionid":"0",
    "answer":"",
    "cookietime":"2592000",
    "loginmode":"",
    "styleid":"",
    "loginsubmit":"true"}
post_data = urllib.urlencode(data)
req = urllib2.Request("http://38.103.161.185/forum/logging.php?action=login&",post_data,headers)
content=opener.open(req)
#print content.read()
req2 = urllib2.Request("http://38.103.161.185/forum/forum-62-1.html",None,headers)
board_html = opener.open(req2).read()
#print board_html
re_link = re.compile(r'\



你可能感兴趣的:(Python)