滑块验证码之模拟人工滑速

前言

代码直接运行即可

此处是用的selenium模拟,主要记录的难点是如何 模拟人工滑速
具体原理和利用到的东西都有做注释,逻辑完整,小白还是可以尝试理解的

一、正常简单模拟滑动

目标网址:点击

import base64
import time
import ddddocr
from selenium import webdriver
from selenium.webdriver.common.by import By

def text_dis(bg, fg):
    slide = ddddocr.DdddOcr(det=False, ocr=False)
    with open(bg, 'rb') as f:
        target_bytes = f.read()
    with open(fg, 'rb') as f:
        background_bytes = f.read()
    res = slide.slide_comparison(target_bytes, background_bytes) # 计算出距离
    return res.get('target')[0]

def get_slide():
    options = webdriver.ChromeOptions()
    # 对于老版本的浏览器不行
    options.add_argument('--disable-blink-features=AutomationControlled')
    driver = webdriver.Chrome(chrome_options=options)
    driver.maximize_window()
    driver.get('https://www.geetest.com/demo/slide-bind.html')
    driver.find_element(By.ID, 'username').send_keys('13697028751')  # 输入框输入账号和密码
    driver.find_element(By.ID, 'password').send_keys('19961227ai')
    time.sleep(2)
    driver.find_element(By.CSS_SELECTOR, 'div.btn').click()  # 点击登录
    time.sleep(2)
    img_src = driver.execute_script('return document.getElementsByClassName("geetest_canvas_bg geetest_absolute")[0].toDataURL("image/png");')
    print(img_src)
    im_base64 = img_src.split(',')[1]
    im_bytes = base64.b64decode(im_base64)
    print(im_base64)
    print(im_bytes)
    print('-----------------------------------')
    with open('./bg.png', 'wb') as f:
        f.write(im_bytes)
    temp = driver.execute_script("return document.getElementsByClassName('geetest_canvas_fullbg geetest_fade geetest_absolute')[0].toDataURL('image/png');")
    print(temp)
    temp_base64 = temp.split(',')[1]
    temp_bytes = base64.b64decode(temp_base64)
    with open('./temp.png', 'wb') as f:
        f.write(temp_bytes)

    distance = text_dis('bg.png', 'temp.png')  # 计算出距离
    print('距离:', distance)

    # 拖动滑块
    slide = driver.find_element(By.CSS_SELECTOR, 'div.geetest_slider_button')
    action_chains = webdriver.ActionChains(driver)

    # 点击,准备拖拽
    action_chains.click_and_hold(slide)  # 鼠标左键点击但不释放
    action_chains.pause(0.2)
    action_chains.move_by_offset(distance - 10, 0)
    action_chains.pause(0.8)
    action_chains.move_by_offset(10, 0)
    action_chains.pause(1.4)
    action_chains.move_by_offset(-10, 0)
    action_chains.release()
    action_chains.perform()
    time.sleep(20)

get_slide()

扩展

selenium常用模拟操作

1、行为控制
perform --- 执行所有准备好的Action
reset_actions --- 清空所有准备好的Action  #  该方法在 selenium 3.141.0版本不生效
pause --- 设置Action之间的动作时间间隔

2、鼠标操作
click --- 鼠标左键点击(可以指定或不指定元素对象)
click_and_hold --- 鼠标左键点击但不释放(可以指定或不指定元素对象)
release --- 释放鼠标点击动作(可以指定或不指定在目标元素对象上释放)
context_click --- 鼠标右键点击(可以指定或不指定元素对象)
double_click --- 鼠标左键双击(可以指定或不指定元素对象)
drag_and_drop --- 鼠标左键在两个元素之间拖拽
drag_and_drop_by_offset --- 鼠标左键拖拽元素到目标偏移位置
move_by_offset --- 鼠标移动指定偏移
move_to_element --- 鼠标移动到指定元素
move_to_element_with_offset --- 鼠标移动到指定元素的指定偏移位置

二、应对滑速检测代码

目标网址:点击

#!/usr/bin/env python 
# -*- coding:utf-8 -*-
import json
import random
import re
import time
import cv2
import base64
import os
import numpy as np
from PIL import Image
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait


def get_random_float(min, max, digits=4):
    return round(random.uniform(min, max), 4)

def base64_to_image(base64_code, img_name):
    dir_path = re.sub(r'/([a-z]|_|-)*.(png|jp(e)?g)$', '', img_name)
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    img_data = base64.b64decode(base64_code)
    file = open(img_name, 'wb')
    file.write(img_data)
    file.close()
    return img_name

class JD_Register(object):
    # jd 注册页面验证码
    def __init__(self, url, username, pwd=''):
        super(JD_Register, self).__init__()
        self.url = url          # 实际地址
        options = ChromeOptions()
        options.add_experimental_option('excludeSwitches', ['enable-automation'])
        self.driver = webdriver.Chrome(options=options)
        self.wait = WebDriverWait(self.driver, 10)
        self.username = username  # 账户信息
        self.password = pwd
        self.target_path = "./target_reg.png"
        self.template_path = "./template_reg.png"
        self.zoom = 1  # 网页图片缩放

    def open_url(self, url=None):
        self.driver.maximize_window()
        self.driver.get(url if url else self.url)

    def main(self):
        self.open_url()       # 打开网页
        self.loginOn()        # 登录打开验证码
        self._crack_slider()  # 模拟人滑动验证码

    def loginOn(self):
        print("------------------------ 进度1:填写账号~")
        self.driver.find_element(By.XPATH, '/html/body/div[4]/div[2]/div/div[2]/button').click()  # 同意
        time.sleep(2)
        self.driver.find_element(By.ID, 'form-phone').send_keys('13697028751')  # 输入框输入账号和密码
        time.sleep(2)
        self.driver.find_element(By.CLASS_NAME, 'form-item-getcode').click()  # 点击登录
        time.sleep(2)

    def _crack_slider(self):
        pic_success = self._get_pic()  # 获取图片

        if pic_success:
            # 模板匹配
            target = cv2.imread(self.target_path)             # 查看完整图片
            template = cv2.imread(self.template_path)         # 查看缺少图片
            distance = self._match_templet(target, template)  # 计算缺口最大距离
            tracks = self._get_tracks(distance * self.zoom)   # 拖拽轨迹计算
            self._slider_action(tracks)                       # 模拟人工移动滑块

        # 判断登录
        is_go_on = input('是否继续测试?y:是     其它:退出')
        if is_go_on and is_go_on.lower() == 'y':
            print("开始下一次尝试")
            return self._crack_slider()
        else:
            return False

    def _get_pic(self):
        """下载图片到本地"""
        # print("查找缺口图片")
        time.sleep(1)
        target = self.wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="slideAuthCode"]/div/div[1]/div[2]/div[1]/img')))
        template = self.wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="slideAuthCode"]/div/div[1]/div[2]/div[2]/img')))
        if target and template:
            print("------------------------ 进度2:下载滑块验证码图片")
            target_base64 = target.get_attribute('src')
            template_base64 = template.get_attribute('src')
            target_base64_str = re.sub(r'data:[a-z]*/[a-z]*;base64,', '', target_base64)
            template_base64_str = re.sub(r'data:[a-z]*/[a-z]*;base64,', '', template_base64)
            base64_to_image(target_base64_str, self.target_path)
            base64_to_image(template_base64_str, self.template_path)

            time.sleep(1)
            local_img = Image.open(self.target_path)  # 打开图片
            size_loc = local_img.size  # 获取图片大小尺寸
            self.zoom = 364 / int(size_loc[0])  # 对比判断图片是否一致,有无放大过
            print("计算缩放比例 zoom = %f" % round(self.zoom, 4))
            return True
        else:
            print("未找到缺口图片")
            return False

    def _match_templet(self, img_target, img_template):
        """
        模板匹配(用于寻找缺口)
        :param img_target: 带有缺口的背景图
        :param img_template: 缺口的滑块图
        :return: 缺口所在的位置的x轴距离
        """
        print("------------------------ 进度3:图片缺口模板匹配")

        # 滑块图片处理
        tpl = self.__handle_slider_img(img_template)         # 误差来源就在于滑块的背景图为白色
        blurred = cv2.GaussianBlur(img_target, (3, 3), 0)    # 图片高斯滤波
        gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)     # 图片灰度化
        width, height = tpl.shape[:2]
        result = cv2.matchTemplate(gray, tpl, cv2.TM_CCOEFF_NORMED) # 灰度化模板匹配  使用灰度化图片
        print("result = {}".format(len(np.where(result >= 0.5)[0])))

        # 查找数组中匹配的最大值
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
        left_up = max_loc
        right_down = (left_up[0] + height, left_up[1] + width)  # 通过获取最远距离
        cv2.rectangle(img_target, left_up, right_down, (7, 279, 151), 2)
        print("验证码位移距离为:%d" % left_up[0])
        return left_up[0]

    def __handle_slider_img(self, image):
        """
        对滑块进行二值化处理
        :param image: cv类型的图片对象
        """
        kernel = np.ones((8, 8), np.uint8)  # 去滑块的前景噪声内核
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # 灰度化

        # 灰化背景
        width, heigth = gray.shape
        for h in range(heigth):
            for w in range(width):
                if gray[w, h] == 0:
                    gray[w, h] = 96

        # 排除背景
        binary = cv2.inRange(gray, 96, 96)
        res = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)  # 开运算去除白色噪点
        return res

    def _get_cookie(self):
        cookie_items = self.driver.get_cookies()
        ck_dict = {}
        for cookie in cookie_items:
            ck_dict[cookie['name']] = cookie['value']
        print("cookie = %s" % ck_dict)
        self._save_to_file(json.dumps(ck_dict, separators=(',', ':'), ensure_ascii=False))
        self.driver.quit()

    def _save_to_file(self, str_data):
        file = None
        try:
            file = open("../static/temp/cookie.txt", "w")
            file.write(str_data)
        except:
            print("保存cookie异常")
        finally:
            if file:
                file.close()

    # ---- 拖拽轨迹计算 start ----
    def _get_tracks(self, distance):
        """
        根据偏移量获取移动轨迹3
        :param distance: 偏移量
        :return: 移动轨迹
        """
        print('------------------------ 进度4:计算图片拖拽轨迹')
        track = []
        mid1 = round(distance * random.uniform(0.1, 0.2))
        mid2 = round(distance * random.uniform(0.65, 0.76))
        mid3 = round(distance * random.uniform(0.84, 0.88))

        # 设置初始位置、初始速度、时间间隔
        current, v, t = 0, 0, 0.2
        distance = round(distance)

        # 四段加速度
        while current < distance:
            if current < mid1:
                a = random.randint(10, 15)
            elif current < mid2:
                a = random.randint(30, 40)
            elif current < mid3:
                a = -70
            else:
                a = random.randint(-25, -18)

            v0 = v              # 初速度 v0
            v = v0 + a * t      # 当前速度 v = v0 + at
            v = v if v >= 0 else 0
            move = v0 * t + 1 / 2 * a * (t ** 2)
            move = round(move if move >= 0 else 1)
            current += move     # 当前位移
            track.append(move)  # 加入轨迹

        # 超出范围
        back_tracks = []
        out_range = distance - current
        print("当前= {}, 距离= {}, 超出范围 = {}".format(current, distance, out_range))

        if out_range < -8:
            sub = int(out_range + 8)
            back_tracks = [-1, sub, -3, -1, -1, -1, -1]
        elif out_range < -2:
            sub = int(out_range + 3)
            back_tracks = [-1, -1, sub]

        print("向前轨道= {}, 返回轨道={}".format(track, back_tracks))
        return {'forward_tracks': track, 'back_tracks': back_tracks}

    # ---- 移动滑块 start ----
    def _slider_action(self, tracks):
        print("------------------------ 进度5:模拟人工移动滑块")
        # 点击滑块
        slider = self.wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="slideAuthCode"]/div/div[2]/div[3]')))
        if slider:
            ActionChains(self.driver).click_and_hold(slider).perform()  # 鼠标左键点击不释放,保持滑动状态

            # 正向滑动
            for track in tracks['forward_tracks']:
                yoffset_random = random.uniform(-2, 4)
                ActionChains(self.driver).move_by_offset(xoffset=track, yoffset=yoffset_random).perform()

            time.sleep(random.uniform(0.06, 0.5))

            # 反向滑动
            for back_tracks in tracks['back_tracks']:
                yoffset_random = random.uniform(-2, 2)
                ActionChains(self.driver).move_by_offset(xoffset=back_tracks, yoffset=yoffset_random).perform()

            # 抖动
            ActionChains(self.driver).move_by_offset(xoffset=get_random_float(0, -1.67), yoffset=get_random_float(-1, 1)).perform()
            ActionChains(self.driver).move_by_offset(xoffset=get_random_float(0, 1.67), yoffset=get_random_float(-1, 1)).perform()

            time.sleep(get_random_float(0.2, 0.6))
            ActionChains(self.driver).release().perform()

            print("滑块移动成功")
            return True
        else:
            print("未找到滑块")
            return False


if __name__ == '__main__':
    c = JD_Register(url='https://reg.jd.com/p/regPage', username='13965216565')
    c.main()

你可能感兴趣的:(python-爬虫,学习,python)