Python爬虫-验证码登入

import requests
from lxml import etree

header = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64;'
                            ' zh-CN; rv:1.9.2.10) Gecko/20100922'
                            ' Ubuntu/10.10 (maverick) Firefox/3.6.10'
                }

def main():
    #创建一个会话
    s = requests.Session()
    #下载验证码图片到本地
    view1,view2 = download_indentifying_code(s)
    #向post地址发送请求
    login(view1, view2, s)


def download_indentifying_code(s):
    url = 'https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx'
    r = s.get(url, headers=header)
    tree = etree.HTML(r.text)
    image_src = tree.xpath('//img[@id="imgCode"]/@src')
    image_src = 'https://so.gushiwen.org' + str(image_src[0])
    #print(image_src)
    #下载图片
    r_image = s.get(image_src, headers=header)
    with open('indentifyingCode.png','wb') as fp:
        fp.write(r_image.content)
    #查找表单所需要的两个参数
    __VIEWSTATE = tree.xpath('//div/input[@id="__VIEWSTATE"]/@value')
    __VIEWSTATEGENERATOR = tree.xpath('//div/input[@id="__VIEWSTATEGENERATOR"]/@value')
    return __VIEWSTATE, __VIEWSTATEGENERATOR

def login(view1, view2, s):
    post_url = ' https://so.gushiwen.org/user/login.aspx?from=http%3a%2f%2fso.gushiwen.org%2fuser%2fcollect.aspx '
    #输入验证码
    code = input('请输入验证码:')
    formdata = {
        '__VIEWSTATE':view1,
        '__VIEWSTATEGENERATOR':view2,  #有令牌
        'from':'http://so.gushiwen.org/user/collect.aspx',
        'email':'[email protected]',
        'pwd':'123456789',
        'code':code,
        'denglu':'登录',
    }
    r = s.post(url=post_url, headers=header, data=formdata)
    with open('gushi.html','w',encoding='utf8') as fp:
        fp.write(r.text)

if __name__ == '__main__':
    main()
    

你可能感兴趣的:(Python爬虫,python)