import requests
from lxml import etree
header = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64;'
' zh-CN; rv:1.9.2.10) Gecko/20100922'
' Ubuntu/10.10 (maverick) Firefox/3.6.10'
}
def main():
s = requests.Session()
view1,view2 = download_indentifying_code(s)
login(view1, view2, s)
def download_indentifying_code(s):
url = 'https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx'
r = s.get(url, headers=header)
tree = etree.HTML(r.text)
image_src = tree.xpath('//img[@id="imgCode"]/@src')
image_src = 'https://so.gushiwen.org' + str(image_src[0])
r_image = s.get(image_src, headers=header)
with open('indentifyingCode.png','wb') as fp:
fp.write(r_image.content)
__VIEWSTATE = tree.xpath('//div/input[@id="__VIEWSTATE"]/@value')
__VIEWSTATEGENERATOR = tree.xpath('//div/input[@id="__VIEWSTATEGENERATOR"]/@value')
return __VIEWSTATE, __VIEWSTATEGENERATOR
def login(view1, view2, s):
post_url = ' https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx '
code = input('请输入验证码:')
formdata = {
'__VIEWSTATE':view1,
'__VIEWSTATEGENERATOR':view2,
'from':'http://so.gushiwen.org/user/collect.aspx',
'email':'[email protected]',
'pwd':'123456789',
'code':code,
'denglu':'登录',
}
r = s.post(url=post_url, headers=header, data=formdata)
with open('gushi.html','w',encoding='utf8') as fp:
fp.write(r.text)
if __name__ == '__main__':
main()