带有验证码的模拟登录两种方式post和selenium

首先是验证码的识别

  • 如果验证码图片随着url地址改变而改变,即验证码的url地址是变化的,这种就很好办,直接请求并下载验证码利用打码平台进行验证码图片识别即可

  • 如果验证码图片的url地址不变,而验证码是随机改变的

  • post方法:设置session进行验证码的get请求并下载图片进行识别得到验证码的识别结果,再利用这个sesson进行post请求,把账号密码和验证码识别结果的表单数据进行post从而模拟登录

  • selenium方法:利用selenium进行页面打开,并用selenium记录下此时页面的cookies,然后用requests.get()携带这个cookies发送验证码url的请求获取验证码的识别结果,然后进行selenium的操作输入账号密码和验证码

打码平台使用的是超级鹰
此处以去哪网为例,尝试带有验证码的模拟登录
带有验证码的模拟登录两种方式post和selenium_第1张图片
post方法

import requests
from chaojiying import Chaojiying_Client
import re
import sys

# 超级鹰的账号信息
account = ''
password = ''
id = ''
image_kind = 1902 # 四位英文或数字
#验证码的url地址
img_url = 'https://user.qunar.com/captcha/api/image?k={en7mni(z&p=ucenter_login&c=ef7d278eca6d25aa6aec7272d57f0a9a'
#抓包的登录url
login_url = 'https://user.qunar.com/passport/loginx.jsp'

headers = {
    # 'referer': 'https://user.qunar.com/passport/login.jsp',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
}
# 维持会话,这个方法会自动保存cookies到本地,这样才可以完成账号和验证码信息的匹配
sesson = requests.session()
response = sesson.get(img_url,headers=headers)

with open('D://验证码.jpg','wb') as fp:
    fp.write(response.content)

chaojiying = Chaojiying_Client(account, password, id)
im = open('D://验证码.jpg', 'rb').read()
image_num = chaojiying.PostPic(im, image_kind)
real_number = ''
if image_num['err_str'] == 'OK':
    print('验证码识别成功')
    print('验证码是{0}'.format(image_num['pic_str']))
    real_number = image_num['pic_str']# 获得到的验证码的识别结果
else:
    print('验证码识别失败')
    sys.exit('验证码识别错误,模拟登录失败')

form_data = {
    'loginType': 0,
    'username': xxxxxxx,# 去哪网的账号密码信息
    'password': 'xxxxxxx',
    'remember': 0,
    'vcode': real_number,
}
headers = {
    'referer': 'https://user.qunar.com/passport/login.jsp',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
}
response = sesson.post(login_url,data=form_data,headers=headers)
print(response.headers)
print(response.cookies)
account_name = 'sxpc2379'#账号的名字
match = re.search(account_name,str(response.headers),re.S)
if match:
    print('模拟登录成功')
else :
    print('模拟登录失败')
    

selenium

import requests
from chaojiying import Chaojiying_Client# 必须先把官方的Api文件放到同级目录下
from selenium import webdriver
import time

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
}

def get_img_num():
    account = ''
    password = ''
    id = ''
    image_kind = 1902 # 四位英文或数字

    img_url = 'https://user.qunar.com/captcha/api/image?k={en7mni(z&p=ucenter_login&c=ef7d278eca6d25aa6aec7272d57f0a9a'
    cookies = dict()
    for one in driver.get_cookies():
        cookies[one['name']] = one['value']
    response = requests.get(img_url, cookies=cookies)

    with open('D://验证码.jpg','wb') as fp:
        fp.write(response.content)

    chaojiying = Chaojiying_Client(account, password, id)
    im = open('D://验证码.jpg', 'rb').read()
    image_num = chaojiying.PostPic(im, image_kind)
    if image_num['err_str'] == 'OK':
        print('验证码识别成功')
        print('验证码是{0}'.format(image_num['pic_str']))
        return image_num['pic_str']
    else:
        print('验证码识别失败')


# 此处登录窗口没有再iframe框架里面,如果再iframe框架里面一定要先进入框架里面
def login():# 记得先在主页点一次
    url = 'https://user.qunar.com/passport/login.jsp?ret=https%3A%2F%2Fwww.qunar.com%2F%3Fex_track%3Dauto_4e23ad93'
    driver = webdriver.Chrome()
    driver.get(url)
    driver.implicitly_wait(10)
    # 切换登录模式为账号密码登录
    first_botton = driver.find_element_by_class_name('pwd-login')
    first_botton.click()

    botton1 = driver.find_element_by_class_name('radio_normal')
    button1.click()

    input1 = driver.find_element_by_xpath('//div[@class="field-login"/div[1]/input')
    input1.clear()
    input1.send_keys('账号')

    inpur2 = driver.find_element_by_xpath('//div[@class="field-login"/div[4]/input')
    input2.clear()
    input2.send_keys('密码')

    image_input = driver.find_element_by_xpath('//div[@class="field-login"/div[5]/div/input')
    image_input.clear()
    image_num = get_img_num()
    image_input.send_keys(image_num)# 输入验证码

    login_button = driver.find_element_by_id('submit')# 点击登录按钮
    login_button.click()
    cookies = driver.get_cookies()
    print(cookies)
    driver.implicitly_wait(10)
    driver.refresh()
    print(driver.page_source)

if __name__ == '__main__':
    login()
    

你可能感兴趣的:(爬虫)