之前写的一直没成功,原因是用的不是HTTPS相关的函数。这次仔细研究了一下,有几个需要注意的点,一个是POST模拟登陆的时候,header中的cookie值,不同的网站应该会有不同的要求;另一个是GET页面的时候,是需要加上POST得到的response中的set-cookie的。这样才能利用登陆的成功。
写完POST和GET页面后,顺便写了个简单的命令行实现。
import httplib, urllib import urllib2 import cookielib import sys file_text = "build_change.txt" resultTable = dict() host = 'buuuuuuu.knight.com' def Login(username, password , csrf = 'Gy2O70iSjOTbWhWgBLvf4HDuf4jUe4RP'): url = '/login/' values = { 'username' : username, 'password' : password, 'next' : '', 'csrfmiddlewaretoken': csrf, } headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Connection' : 'keep-alive', 'Cookie':'csrftoken=%s' % csrf , 'Referer':'https://buuuuuuu.knight.com/login/', 'Origin':'https://buuuuuuu.knight.com', 'Content-Type':'application/x-www-form-urlencoded', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', } values = urllib.urlencode(values) conn = httplib.HTTPSConnection(host, 443) conn.request("POST", url, values, headers) response = conn.getresponse() print 'Login: ', response.status, response.reason ''' hdata = response.getheaders() for i in xrange(len(hdata)): for j in xrange(len(hdata[i])): print hdata[i][j], print ''' return response.getheader("set-cookie") def GetHtml(_url , cookie): get_headers = { 'Host' : 'xxxxx.knight.com', 'Connection' : 'keep-alive' , 'Cache-Control' : 'max-age=0', 'Cookie' : cookie , 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36', 'Accept-Language' : 'zh-CN,zh;q=0.8,en;q=0.6', } conn=httplib.HTTPSConnection(host) conn.request("GET", _url,None,get_headers) res2=conn.getresponse() print "Get %s:" % _url ,res2.status, res2.reason ''' hdata1 = res2.getheaders() for i in xrange(len(hdata1)): for j in xrange(len(hdata1[i])): print hdata1[i][j], print ''' data = res2.read() fp = open("build_change.txt","w") fp.write(data) fp.close() def ParseHtml(): fp = open(file_text,"r") content = fp.readline() _pos = 0 while content: if content.find("class=\"change-body\"") >= 0: topic = content.split(">") resultTable[_pos] = topic[1] while content: content = fp.readline() resultTable[_pos] = resultTable[_pos] + content if content.find("</div>")>= 0: _pos = _pos + 1 break content = fp.readline() fp.close() print "Parse html success." def GenerateResultTxt(): f = open("build_change_result.txt","w") for m in resultTable.keys(): f.write("-------------------------------------------------------------------------------------------\n") f.write(resultTable[m]) f.close() print "Generate result success : build_change_result.txt ." def Help(): print '-h : help' print '-u : username(must)' print '-p : password(must)' print '-c : csrftoken(optional)' print '-s : sandbox build id(must)' print 'For example:' print '[1] python BuildChange.py -h' print '[2] python BuildChang.py -u u -p p -s s1 s2' print '[3] python BuildChang.py -u u -p p -c c -s s1 s2' def ParseParam(com): length = len(com) username = "" password = "" csrf = "" sid1 = "" sid2 = "" if length == 2 or length == 8 or length == 10: if com[1] == '-h': Help() for i in range(1,length): if com[i] == '-u' and i < (length-1): username = com[i+1] i += 1 elif com[i] == '-p' and i < (length-1): password = com[i+1] i += 1 elif com[i] == '-c' and i < (length-1): csrf = com[i+1] i += 1 elif com[i] == '-s' and i < (length-2): sid1 = com[i+1] sid2 = com[i+2] i += 2 if username == "" or password == "" or sid1 == "" or sid2 == "": print '[Error] Parameter error!' print '[Error] You can use \"python BuildChange.py -h\" to see how can use this script. ' else: if csrf == "": cookie = Login(username, password) else: cookie = Login(username, password, csrf) _url = "//changelog//between//%s//and//%s/" % (sid1, sid2) GetHtml(_url, cookie) ParseHtml() GenerateResultTxt() # C:\Python27\python.exe C:\Users\knight\Desktop\build\BuildChange.py -u xux -p KKKKKKKK -s 1859409 1858525 if __name__ == "__main__": ParseParam(sys.argv)