python爬虫学习日记(2)--分析POST信息(MD5加密)

这次依然是以黎明大学的教务管理系统为例,因为之前我认为他的POST是很简单的,可是后来发现,他进行了加密,而且用其他的信息来加密另一个信息,所以我这次重新分析一遍,这里涉及到javascript,我都不会,但是通过百度都能查到,下面看我是怎么一步步挤牙膏的。

1.读表单

我们先来读表单的内容,选取可能被加密的

<input class="tx1" id="txt_pewerwedsdfsdff" 
style="WIDTH:134px;HEIGHT:20px" 
type="password" maxLength="25" 
name="txt_pewerwedsdfsdff" 
value="" onblur="chkpwd(this)" 
onkeyup="chkpwd(this)">


<input  class="tx1" id="txt_sdertfgsadscxcadsads" 
name="txt_sdertfgsadscxcadsads" 
style="WIDTH:134px;HEIGHT:20px" 
onblur="chkyzm(this)" onkeyup="chkyzm(this)" 
maxlength="4">


这里我们可以看到,有涉及到两个有关javascript的。


2.查资料

通过查阅资料:

onkeyup 事件会在键盘按键被松开时发生。(瞬间执行)

onblur 事件会在对象失去焦点时发生。(移出输入框点击随机位置后后执行)


我们跳到这里他涉及到的两个chkpwd()和chkyzm()


3.整理分析代码

原来的代码很乱,我给他整理了

function chkpwd(obj) {
        if(obj.value!='') {
        var s=md5(document.all.txt_asmcdefsddsd.value+md5(obj.value).substring(0,30).toUpperCase()+'11317').substring(0,30).toUpperCase();
        document.all.dsdsdsdsdxcxdfgfg.value=s;
        }
        else {
        document.all.dsdsdsdsdxcxdfgfg.value=obj.value;
        }     
    }


function chkyzm(obj) {
    if(obj.value!='') {
        var s=md5(md5(obj.value.toUpperCase()).substring(0,30).toUpperCase()+'11317').substring(0,30).toUpperCase();
        document.all.fgfggfdgtyuuyyuuckjg.value=s;
        }else {
            document.all.fgfggfdgtyuuyyuuckjg.value=obj.value.toUpperCase();
        }
    }


其实我看不懂这些代码,但是我可以查,我先从参数开始看起

把this传给obj,这里的this我们要知道他是谁

没有找到详细的回答,但我从下面的代码猜测

obj.value

这个this代替的就是传递过来的参数的那个对象本身,而这个value就是他的值。


我们再接着读代码,

toUpperCase() 方法用于把字符串转换为大写。

substring() 方法用于提取字符串中介于两个指定下标之间的字符。

比如substring(0:30)就是取前30个字符。


这里就是繁琐的大写,取30位,还有一个就是MD5加密,用的是hash算法。

md5()这个函数可以在他导入的js看到

<script type="text/javascript" src="../js/md5.js">script>

这里我们就不打开了,代码太长,而且MD5加密可以通过python的库函数加密。


4.尝试实现

# -*- coding: utf-8 -*-
import urllib2
import urllib
import cookielib
import cStringIO
import re
import os
import hashlib

#避免编码出错#
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )

##########################################################
#cookie部分#

#cookie对象#
cookieJar = cookielib.MozillaCookieJar()

#绑定cookie对象和cookie处理器#
cookieSupport = urllib2.HTTPCookieProcessor(cookieJar)

#用于调试的两个httpHandler#
httpHandler = urllib2.HTTPHandler(debuglevel=1)
httpsHandler = urllib2.HTTPSHandler(debuglevel=1)

#创建opener,把httpHandler和HTTP处理器绑定#
opener = urllib2.build_opener(cookieSupport, httpHandler)

#安装opener#
urllib2.install_opener(opener)

##########################################################
#一些变量的定义#

#登陆页面地址#
loginpage = "http://jwmis.lmu.cn/_data/index_LOGIN.aspx"

#处理POST信息的页面#
postURL = "http://jwmis.lmu.cn/_data/index_LOGIN.aspx"

##########################################################
#测试部分,测试cookie是否获取成功#

'''
LoginCookies = urllib2.urlopen(loginpage)
#打印cookies,成功打印,说明cookie获取成功#
print cookieJar
'''

##########################################################
#编写要发送的信息#

#其他变量处理#

userid = raw_input("userid:\n")
password = raw_input("password:\n")
m = hashlib.md5()
m.update(password)
psw = m.hexdigest()
s1 = userid + psw[0:30].upper() + '11317'
m1 = hashlib.md5()
m1.update(s1)
s2 = m1.hexdigest()
s3 = s2[0:30].upper()
dsdsdsdsdxcxdfgfg = s3

#验证码的处理#
im_url = 'http://jwmis.lmu.cn/sys/ValidateCode.aspx'
im_data = urllib2.urlopen(im_url).read()
f=open('Code.png','wb')
f.write(im_data)
f.close()
validateCode = raw_input("Code:\n")

code = validateCode.upper()
m2 = hashlib.md5()
m2.update(code)
s4 = m2.hexdigest()
s5 = s4[0:30].upper() + '11317'
m3 = hashlib.md5()
m3.update(s5)
s6 = m3.hexdigest()
s7 = s6[0:30].upper()
fgfggfdgtyuuyyuuckjg = s7

#请求数据包#
PostData = {
    '__VIEWSTATE' : 'dDwtMTMwOTYyOTQ5Mjt0PDtsPGk8MD47aTwxPjtpPDI+Oz47bDx0PHA8bDxUZXh0Oz47bDzpu47mmI7ogYzkuJrlpKflraY7Pj47Oz47dDxwPGw8VGV4dDs+O2w8XDxzY3JpcHQgdHlwZT0idGV4dC9qYXZhc2NyaXB0Ilw+Clw8IS0tCmZ1bmN0aW9uIENoa1ZhbHVlKCl7CiB2YXIgdlU9JCgnVUlEJykuaW5uZXJIVE1MXDsKIHZVPXZVLnN1YnN0cmluZygwLDEpK3ZVLnN1YnN0cmluZygyLDMpXDsKIHZhciB2Y0ZsYWcgPSAiWUVTIlw7IGlmICgkKCd0eHRfYXNtY2RlZnNkZHNkJykudmFsdWU9PScnKXsKIGFsZXJ0KCfpobvlvZXlhaUnK3ZVKyfvvIEnKVw7JCgndHh0X2FzbWNkZWZzZGRzZCcpLmZvY3VzKClcO3JldHVybiBmYWxzZVw7Cn0KIGVsc2UgaWYgKCQoJ3R4dF9wZXdlcndlZHNkZnNkZmYnKS52YWx1ZT09JycpewogYWxlcnQoJ+mhu+W9leWFpeWvhuegge+8gScpXDskKCd0eHRfcGV3ZXJ3ZWRzZGZzZGZmJykuZm9jdXMoKVw7cmV0dXJuIGZhbHNlXDsKfQogZWxzZSBpZiAoJCgndHh0X3NkZXJ0ZmdzYWRzY3hjYWRzYWRzJykudmFsdWU9PScnICYmIHZjRmxhZyA9PSAiWUVTIil7CiBhbGVydCgn6aG75b2V5YWl6aqM6K+B56CB77yBJylcOyQoJ3R4dF9zZGVydGZnc2Fkc2N4Y2Fkc2FkcycpLmZvY3VzKClcO3JldHVybiBmYWxzZVw7Cn0KIGVsc2UgeyAkKCdkaXZMb2dOb3RlJykuaW5uZXJIVE1MPSdcPGZvbnQgY29sb3I9InJlZCJcPuato+WcqOmAmui/h+i6q+S7vemqjOivgS4uLuivt+eojeWAmSFcPC9mb250XD4nXDsKIHJldHVybiB0cnVlXDt9Cn0KZnVuY3Rpb24gU2VsVHlwZShvYmopewogdmFyIHM9b2JqLm9wdGlvbnNbb2JqLnNlbGVjdGVkSW5kZXhdLmdldEF0dHJpYnV0ZSgndXNySUQnKVw7CiB2YXIgdz1vYmoub3B0aW9uc1tvYmouc2VsZWN0ZWRJbmRleF0uZ2V0QXR0cmlidXRlKCdQd2RJRCcpXDsKICQoJ1VJRCcpLmlubmVySFRNTD1zXDsKIHNlbFR5ZU5hbWUoKVw7CiBpZihvYmoudmFsdWU9PSJTVFUiKSB7CiAgIGRvY3VtZW50LmFsbC5idG5HZXRTdHVQd2Quc3R5bGUuZGlzcGxheT0nJ1w7CiAgIGRvY3VtZW50LmFsbC5idG5SZXNldC5zdHlsZS5kaXNwbGF5PSdub25lJ1w7CiAgfQogZWxzZSB7CiAgICBkb2N1bWVudC5hbGwuYnRuUmVzZXQuc3R5bGUuZGlzcGxheT0nJ1w7CiAgICBkb2N1bWVudC5hbGwuYnRuR2V0U3R1UHdkLnN0eWxlLmRpc3BsYXk9J25vbmUnXDsKICB9fQpmdW5jdGlvbiBvcGVuV2luTG9nKHRoZVVSTCx3LGgpewp2YXIgVGZvcm0scmV0U3RyXDsKZXZhbCgiVGZvcm09J3dpZHRoPSIrdysiLGhlaWdodD0iK2grIixzY3JvbGxiYXJzPW5vLHJlc2l6YWJsZT1ubyciKVw7CnBvcD13aW5kb3cub3Blbih0aGVVUkwsJ3dpbktQVCcsVGZvcm0pXDsgLy9wb3AubW92ZVRvKDAsNzUpXDsKZXZhbCgiVGZvcm09J2RpYWxvZ1dpZHRoOiIrdysicHhcO2RpYWxvZ0hlaWdodDoiK2grInB4XDtzdGF0dXM6bm9cO3Njcm9sbGJhcnM9bm9cO2hlbHA6bm8nIilcOwpwb3AubW92ZVRvKChzY3JlZW4ud2lkdGgtdykvMiwoc2NyZWVuLmhlaWdodC1oKS8yKVw7aWYodHlwZW9mKHJldFN0cikhPSd1bmRlZmluZWQnKSBhbGVydChyZXRTdHIpXDsKfQpmdW5jdGlvbiBzaG93TGF5KGRpdklkKXsKdmFyIG9iakRpdiA9IGV2YWwoZGl2SWQpXDsKaWYgKG9iakRpdi5zdHlsZS5kaXNwbGF5PT0ibm9uZSIpCntvYmpEaXYuc3R5bGUuZGlzcGxheT0iIlw7fQplbHNle29iakRpdi5zdHlsZS5kaXNwbGF5PSJub25lIlw7fQp9CmZ1bmN0aW9uIHNlbFR5ZU5hbWUoKXsKICAkKCd0eXBlTmFtZScpLnZhbHVlPSROKCdTZWxfVHlwZScpWzBdLm9wdGlvbnNbJE4oJ1NlbF9UeXBlJylbMF0uc2VsZWN0ZWRJbmRleF0udGV4dFw7Cn0Kd2luZG93Lm9ubG9hZD1mdW5jdGlvbigpewoJdmFyIHNQQz1NU0lFP3dpbmRvdy5uYXZpZ2F0b3IudXNlckFnZW50K3dpbmRvdy5uYXZpZ2F0b3IuY3B1Q2xhc3Mrd2luZG93Lm5hdmlnYXRvci5hcHBNaW5vclZlcnNpb24rJyBTTjpOVUxMJzp3aW5kb3cubmF2aWdhdG9yLnVzZXJBZ2VudCt3aW5kb3cubmF2aWdhdG9yLm9zY3B1K3dpbmRvdy5uYXZpZ2F0b3IuYXBwVmVyc2lvbisnIFNOOk5VTEwnXDsKdHJ5eyQoJ3BjSW5mbycpLnZhbHVlPXNQQ1w7fWNhdGNoKGVycil7fQp0cnl7JCgndHh0X2FzbWNkZWZzZGRzZCcpLmZvY3VzKClcO31jYXRjaChlcnIpe30KdHJ5eyQoJ3R5cGVOYW1lJykudmFsdWU9JE4oJ1NlbF9UeXBlJylbMF0ub3B0aW9uc1skTignU2VsX1R5cGUnKVswXS5zZWxlY3RlZEluZGV4XS50ZXh0XDt9Y2F0Y2goZXJyKXt9Cn0KZnVuY3Rpb24gb3BlbldpbkRpYWxvZyh1cmwsc2NyLHcsaCkKewp2YXIgVGZvcm1cOwpldmFsKCJUZm9ybT0nZGlhbG9nV2lkdGg6Iit3KyJweFw7ZGlhbG9nSGVpZ2h0OiIraCsicHhcO3N0YXR1czoiK3NjcisiXDtzY3JvbGxiYXJzPW5vXDtoZWxwOm5vJyIpXDsKd2luZG93LnNob3dNb2RhbERpYWxvZyh1cmwsMSxUZm9ybSlcOwp9CmZ1bmN0aW9uIG9wZW5XaW4odGhlVVJMKXsKdmFyIFRmb3JtLHcsaFw7CnRyeXsKCXc9d2luZG93LnNjcmVlbi53aWR0aC0xMFw7Cn1jYXRjaChlKXt9CnRyeXsKaD13aW5kb3cuc2NyZWVuLmhlaWdodC0zMFw7Cn1jYXRjaChlKXt9CnRyeXtldmFsKCJUZm9ybT0nd2lkdGg9Iit3KyIsaGVpZ2h0PSIraCsiLHNjcm9sbGJhcnM9bm8sc3RhdHVzPW5vLHJlc2l6YWJsZT15ZXMnIilcOwpwb3A9cGFyZW50LndpbmRvdy5vcGVuKHRoZVVSTCwnJyxUZm9ybSlcOwpwb3AubW92ZVRvKDAsMClcOwpwYXJlbnQub3BlbmVyPW51bGxcOwpwYXJlbnQuY2xvc2UoKVw7fWNhdGNoKGUpe30KfQpmdW5jdGlvbiBjaGFuZ2VWYWxpZGF0ZUNvZGUoT2JqKXsKdmFyIGR0ID0gbmV3IERhdGUoKVw7Ck9iai5zcmM9Ii4uL3N5cy9WYWxpZGF0ZUNvZGUuYXNweD90PSIrZHQuZ2V0TWlsbGlzZWNvbmRzKClcOwp9CmZ1bmN0aW9uIGNoa3B3ZChvYmopIHsgIGlmKG9iai52YWx1ZSE9JycpICB7ICAgIHZhciBzPW1kNShkb2N1bWVudC5hbGwudHh0X2FzbWNkZWZzZGRzZC52YWx1ZSttZDUob2JqLnZhbHVlKS5zdWJzdHJpbmcoMCwzMCkudG9VcHBlckNhc2UoKSsnMTEzMTcnKS5zdWJzdHJpbmcoMCwzMCkudG9VcHBlckNhc2UoKVw7ICAgZG9jdW1lbnQuYWxsLmRzZHNkc2RzZHhjeGRmZ2ZnLnZhbHVlPXNcO30gZWxzZSB7IGRvY3VtZW50LmFsbC5kc2RzZHNkc2R4Y3hkZmdmZy52YWx1ZT1vYmoudmFsdWVcO30gfSAgZnVuY3Rpb24gY2hreXptKG9iaikgeyAgaWYob2JqLnZhbHVlIT0nJykgeyAgIHZhciBzPW1kNShtZDUob2JqLnZhbHVlLnRvVXBwZXJDYXNlKCkpLnN1YnN0cmluZygwLDMwKS50b1VwcGVyQ2FzZSgpKycxMTMxNycpLnN1YnN0cmluZygwLDMwKS50b1VwcGVyQ2FzZSgpXDsgICBkb2N1bWVudC5hbGwuZmdmZ2dmZGd0eXV1eXl1dWNramcudmFsdWU9c1w7fSBlbHNlIHsgICAgZG9jdW1lbnQuYWxsLmZnZmdnZmRndHl1dXl5dXVja2pnLnZhbHVlPW9iai52YWx1ZS50b1VwcGVyQ2FzZSgpXDt9fS8vLS1cPgpcPC9zY3JpcHRcPjs+Pjs7Pjt0PDtsPGk8MT47PjtsPHQ8O2w8aTwwPjs+O2w8dDxwPGw8VGV4dDs+O2w8XDxvcHRpb24gdmFsdWU9J1NUVScgdXNySUQ9J+WtpuOAgOWPtydcPuWtpueUn1w8L29wdGlvblw+Clw8b3B0aW9uIHZhbHVlPSdURUEnIHVzcklEPSflt6XjgIDlj7cnXD7mlZnluIjmlZnovoXkurrlkZhcPC9vcHRpb25cPgpcPG9wdGlvbiB2YWx1ZT0nU1lTJyB1c3JJRD0n5biQ44CA5Y+3J1w+566h55CG5Lq65ZGYXDwvb3B0aW9uXD4KXDxvcHRpb24gdmFsdWU9J0FETScgdXNySUQ9J+W4kOOAgOWPtydcPumXqOaIt+e7tOaKpOWRmFw8L29wdGlvblw+Cjs+Pjs7Pjs+Pjs+Pjs+Pjs+xtvmQwtvqLbqZW5GYw6ojpSryDk=',
    '__VIEWSTATEGENERATOR' : 'CAA0A5A7',
    'pcInfo' : 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0Windows NT 6.3; WOW645.0 (Windows) SN:NULL',
    'typeName' : '学生',
    'dsdsdsdsdxcxdfgfg' : dsdsdsdsdxcxdfgfg,
    'fgfggfdgtyuuyyuuckjg' : fgfggfdgtyuuyyuuckjg,
    'Sel_Type' : 'STU',
    'txt_asmcdefsddsd' : userid,
    'txt_pewerwedsdfsdff' : password,
    'txt_sdertfgsadscxcadsads' : validateCode,
    'sbtState' : ''
}

#设置cookies的值#
cookies = ''
#这里要从
for index, cookie in enumerate(cookieJar):
#print '[',index, ']';
#print cookie.name;
#print cookie.value;
#print "###########################"
    cookies = cookies+cookie.name+"="+cookie.value+";";
cookie = cookies[:-1]

#post请求头#
headers = {
    'Host' : 'jwmis.lmu.cn',
    'User-Agent' : 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/36.0',
    'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language' : 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
    'Accept-Encoding' : 'gzip, deflate',
    'Cookie' : cookie,
    'Connection' : 'keep-alive',
    'Content-Type' : 'application/x-www-form-urlencoded',
    'Content-Length' : '5587'
}

#合成post数据
data = urllib.urlencode(PostData)    
print "data:###############"
print  data
#创建request
#构造request请求
request = urllib2.Request(  postURL,data,headers  )
try:
    #访问页面
    response = urllib2.urlopen(request)
    #cur_url =  response.geturl()
    #print "cur_url:",cur_url
    status = response.getcode()
    print "yes"
except  urllib2.HTTPError, e:
    print "error"

#访问该链接#  
result = opener.open(request)  

#打印返回的内容#  
print result.read()
#print dsdsdsdsdxcxdfgfg

你可能感兴趣的:(python爬虫)