Python自学记录--验证码识别模拟登陆

#!/usr/bin/env python
# coding:utf-8

import requests
from hashlib import md5
from lxml import etree
import time

t = int(round(time.time() * 1000)) #时间戳

class Chaojiying_Client(object):

    def __init__(self, username, password, soft_id):
        self.username = username
        password =  password.encode('utf8')
        self.password = md5(password).hexdigest()
        self.soft_id = soft_id
        self.base_params = {
            'user': self.username,
            'pass2': self.password,
            'softid': self.soft_id,
        }
        self.headers = {
            'Connection': 'Keep-Alive',
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
        }

    def PostPic(self, im, codetype):
        """
        im: 图片字节
        codetype: 题目类型 参考 http://www.chaojiying.com/price.html
        """
        params = {
            'codetype': codetype,
        }
        params.update(self.base_params)
        files = {'userfile': ('ccc.jpg', im)}
        r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
        return r.json()

    def ReportError(self, im_id):
        """
        im_id:报错题目的图片ID
        """
        params = {
            'id': im_id,
        }
        params.update(self.base_params)
        r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
        return r.json()



def tranformImgCode(imgPath,imgType):
    chaojiying = Chaojiying_Client('超级鹰账号', '超级鹰密码', '软件ID')   
    im = open(imgPath, 'rb').read()                                                 
    return chaojiying.PostPic(im,imgType)['pic_str']

# print(tranformImgCode(r'D:\代码保存\超级鹰_Python\a.jpg',1902))

session = requests.Session() #维持会话,可以让我们在跨请求时保存某些参数

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
#识别验证码
url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'

page_text = session.get(url=url,headers=headers).text

#解析验证码图片地址
tree = etree.HTML(page_text)

img_src = 'https://so.gushiwen.cn'+tree.xpath('//img[@id="imgCode"]/@src')[0]+'?t='+str(t)
print(img_src)


#将验证码保存到本地
img_data= session.get(img_src,headers=headers).content

with open(r'D:\代码保存\a.jpg','wb') as fp:
    fp.write(img_data)

#识别验证码
code_text = tranformImgCode(r'D:\代码保存\a.jpg',1902)
print(code_text)

login_url = 'https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'

data = {
    '__VIEWSTATE': 'uoQIcMK9IobQkeY9MVAmtKoq33r3OLASfUdWmTdVKWkPtsNNpQMuqH92Trfdy05CDl1WFTmp5wys9JzcE+3wziAytRHxPGrKiZP9/29sMhAoT3AIsd0WDlhIqoU=',
    '__VIEWSTATEGENERATOR': 'C93BE1AE',
    'from': 'http://so.gushiwen.cn/user/collect.aspx',
    'email': '网站账号',
    'pwd': '网站密码',
    'code': code_text,
    'denglu': '登录',
}

#点击登陆按钮发起请求,获取了登陆成功后对应的页码源码数据
page_text_login = session.post(url=login_url,headers=headers,data=data).text


with open(r'D:\代码保存\gushiwen.html','w',encoding='utf-8') as fp:
    fp.write(page_text_login)

你可能感兴趣的:(Python自学记录,python,爬虫)