Python selenium自动化模拟12306登录 验证码识别 滑动滑块

模拟12306登录

验证码识别平台

本次选择超级鹰
https://www.chaojiying.com/
12306 验证码识别 价格体系
Python selenium自动化模拟12306登录 验证码识别 滑动滑块_第1张图片
注册充值之后生成软件id(一元即可)
Python selenium自动化模拟12306登录 验证码识别 滑动滑块_第2张图片
下载官方demo
Python selenium自动化模拟12306登录 验证码识别 滑动滑块_第3张图片
官方demo.py

#!/usr/bin/env python
# coding:utf-8

import requests
from hashlib import md5


class Chaojiying_Client(object):

    def __init__(self, username, password, soft_id):
        self.username = username
        password = password.encode('utf8')
        self.password = md5(password).hexdigest()
        self.soft_id = soft_id
        self.base_params = {
     
            'user': self.username,
            'pass2': self.password,
            'softid': self.soft_id,
        }
        self.headers = {
     
            'Connection': 'Keep-Alive',
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
        }

    def PostPic(self, im, codetype):
        """
        im: 图片字节
        codetype: 题目类型 参考 http://www.chaojiying.com/price.html
        """
        params = {
     
            'codetype': codetype,
        }
        params.update(self.base_params)
        files = {
     'userfile': ('ccc.jpg', im)}
        r = requests.post('http://upload.chaojiying.net/Upload/Processing.php',
                          data=params, files=files, headers=self.headers)
        return r.json()

    def ReportError(self, im_id):
        """
        im_id:报错题目的图片ID
        """
        params = {
     
            'id': im_id,
        }
        params.update(self.base_params)
        r = requests.post(
            'http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
        return r.json()


if __name__ == '__main__':
    chaojiying = Chaojiying_Client('username', 'password', '软件ID')
    im = open('./chaojiying_Python/a.jpg', 'rb').read()
    print(chaojiying.PostPic(im, 价格体系中的类型-9004))

测试图片
Python selenium自动化模拟12306登录 验证码识别 滑动滑块_第4张图片
结果返回坐标
在这里插入图片描述

模拟12306登录

https://kyfw.12306.cn/otn/resources/login.html

编码流程

selenium打开12306

driver = webdriver.Chrome()
driver.get('https://kyfw.12306.cn/otn/resources/login.html')

输入用户名密码

driver.find_element_by_xpath('/html/body/div[2]/div[2]/ul/li[2]/a').click()
driver.find_element_by_xpath('//*[@id="J-userName"]').send_keys(username)
driver.find_element_by_xpath('//*[@id="J-password"]').send_keys(password)

获取验证码图片

不能单独请求图片的url,请求之后图片就变了,和当前登录不匹配了。
采用截图的方法

方法一

截取全屏

driver.save_screenshot('code.jpg')

获取验证码图片的左上角、右下角坐标

img = driver.find_element_by_xpath('//*[@id="J-loginImg"]')
location = img.location  # 左上角坐标 x,y
size = img.size  # 长 宽
rangle = (int(location['x']), int(location['y']),
          int(location['x'])+size['width'], int(location['y'])+size['height'])

裁剪图片
导入裁剪图片的模块
py3+的安装

pip install pillow
from PIL import Image
img_code = Image.open('./selenium/img.png')
frame = img_code.crop(rangle)
frame.save('./selenium/code.png')
方法二

方法一太繁琐了,不适合懒人
一句搞定
直接定位验证码图片标签位置截图

driver.find_element_by_xpath(
    '//*[@id="J-loginImg"]').screenshot('./selenium/code.png')

超级鹰识别验证码

chaojiying = Chaojiying_Client('bobo328410948', 'bobo328410948', '910710')
im = open('./selenium/code.png', 'rb').read()
print(chaojiying.PostPic(im, 9004)['pic_str'])

动作链实现点击验证码

以某个元素为基准,定位

move_to_element_with_offset(ele, x, y)

处理验证码识别到的坐标为[ [x1, y1], [x2,y2], … ]

all_list = []
if '|' in result:
    list_item = result.split('|')
    count = len(list_item)
    for i in range(count):
        x_y = []
        x = int(list_item[i].split(',')[0])
        y = int(list_item[i].split(',')[1])
        x_y.append(x)
        x_y.append(y)
        all_list.append(x_y)
else:
    x_y = []
    x = int(result.split(',')[0])
    y = int(result.split(',')[1])
    x_y.append(x)
    x_y.append(y)
    all_list.append(x_y)
print(all_list)

根据处理过的坐标,进行动作链操作

for l in all_list:
    x = l[0]
    y = l[1]
    ActionChains(driver).move_to_element_with_offset(
        img, x, y).click().perform()
    time.sleep(0.5)

点击登录按钮

driver.find_element_by_xpath('//*[@id="J-login"]').click()
time.sleep(3)

滑动滑块

使用selenium滑动会被12306检测到,需要伪装一下
Python selenium自动化模拟12306登录 验证码识别 滑动滑块_第5张图片

# 防止12306禁止selenium
script = 'Object.defineProperty(navigator,"webdriver",{get:()=>undefined,});'
driver.execute_script(script)
span = driver.find_element_by_xpath('//*[@id="nc_1_n1z"]')
action = ActionChains(driver)
action.click_and_hold(span)
action.move_by_offset(350, 0).perform()
action.release()

大功告成

完整代码

from selenium import webdriver
from selenium.webdriver import ActionChains
from chaojiying import Chaojiying_Client
from PIL import Image
from key import *
import time

driver = webdriver.Chrome()
driver.maximize_window()  # 最大化浏览器
driver.get('https://kyfw.12306.cn/otn/resources/login.html')
# 防止12306禁止selenium
script = 'Object.defineProperty(navigator,"webdriver",{get:()=>undefined,});'
driver.execute_script(script)
time.sleep(1)
driver.find_element_by_xpath('/html/body/div[2]/div[2]/ul/li[2]/a').click()
time.sleep(1)
driver.find_element_by_xpath('//*[@id="J-userName"]').send_keys(username)
driver.find_element_by_xpath('//*[@id="J-password"]').send_keys(password)

# 直接定位验证码图片标签位置截图
img = driver.find_element_by_xpath('//*[@id="J-loginImg"]')
img.screenshot('./selenium/code.png')
# 获取验证码图片坐标
# driver.save_screenshot('./selenium/img.png')
# img = driver.find_element_by_xpath('//*[@id="J-loginImg"]')
# location = img.location  # 左上角坐标 x,y
# size = img.size  # 长 宽
# rangle = (int(location['x']), int(location['y']),
#           int(location['x'])+size['width'], int(location['y'])+size['height'])

# 根据坐标截取 验证码图片
# img_code = Image.open('./selenium/img.png')
# frame = img_code.crop(rangle)
# frame.save('./selenium/code.png')

# 超级鹰识别验证码 返回坐标值
chaojiying = Chaojiying_Client('bobo328410948', 'bobo328410948', '910710')
im = open('./selenium/code.png', 'rb').read()
result = chaojiying.PostPic(im, 9004)['pic_str']
print(result)

# 动作链点击坐标
all_list = []
if '|' in result:
    list_item = result.split('|')
    count = len(list_item)
    for i in range(count):
        x_y = []
        x = int(list_item[i].split(',')[0])
        y = int(list_item[i].split(',')[1])
        x_y.append(x)
        x_y.append(y)
        all_list.append(x_y)
else:
    x_y = []
    x = int(result.split(',')[0])
    y = int(result.split(',')[1])
    x_y.append(x)
    x_y.append(y)
    all_list.append(x_y)
print(all_list)

for l in all_list:
    x = l[0]
    y = l[1]
    ActionChains(driver).move_to_element_with_offset(
        img, x, y).click().perform()
    time.sleep(0.5)

driver.find_element_by_xpath('//*[@id="J-login"]').click()
time.sleep(3)

span = driver.find_element_by_xpath('//*[@id="nc_1_n1z"]')
action = ActionChains(driver)
action.click_and_hold(span)
action.move_by_offset(350, 0).perform()
action.release()

time.sleep(5)

driver.quit()

你可能感兴趣的:(Python,python,selenium,定位,爬虫,后端)