解决滑块验证问题(selenium, opencv)

本文环境配置:

系统=>windows10:64位

工具=>PyCharm:2018.1.4

语言=>Python:3.6

第三方库

urllib3

opencv

selenium

PIL

geckodriver.exe(火狐)或 Chromedriver.exe  

本次主要是使用selenium模拟滑动验证

最终目标:实现自动点击滑块,并且拖动完成验证

解决滑块验证问题(selenium, opencv)_第1张图片

 代码实现如下:

import random

from scrapy.http import HtmlResponse
from selenium.common.exceptions import TimeoutException
import time
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from PIL import Image
import cv2
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import numpy as np
import urllib3
from urllib3.connectionpool import xrange


class SeleniumMiddleware(object):
    box1 = 49
    box2 = 52
    convex = 8
    cut = 70

    image = None
    width = 0
    high = 0
    left = 0
    top = 0

    leftConvex = 0
    topConvex = 0

    def __init__(self):
        self.options = Options()
        
        self.browser = webdriver.Firefox(executable_path="F:\Python\geckodriver.exe",
                                         firefox_options=self.options)
        self.wait = WebDriverWait(self.browser, 20)
        
    # 加载图片
    def load(self, path):
        self.width = 0
        self.high = 0
        self.top = 0
        self.left = 0
        self.leftConvex = 0
        self.topConvex = 0
        self.image = cv2.imread(path)
        if (self.image is None):
            return False

        h, w, b = self.image.shape
        if (w <= 0) or (h <= 0):
            return False

        self.width = w
        self.high = h
        return True

    # 计算碎片顶边
    def getTop(self):
        if (self.image is None):
            return -1
        ckLeft = self.convex * 2
        ckHigh = self.high - (self.box1 + self.convex)
        # 遍历碎片指定列
        j = ckLeft
        for i in range(self.convex, ckHigh):
            if self.image[i, j][0] != 0 and self.image[i, j][1] != 0 and self.image[i, j][2] != 0:
                self.top = i
                return i
        return self.top

    # 计算碎片是否左凸
    def getLeftConvex(self):
        if (self.image is None):
            return False
        ckLeft = int(self.convex / 2)
        ckHigh = self.top + self.box1
        # 遍历碎片指定列
        j = ckLeft
        for i in range(self.top, ckHigh):
            if self.image[i, j][0] != 0 and self.image[i, j][1] != 0 and self.image[i, j][2] != 0:
                self.leftConvex = 1
                return True
        return False

    # 计算碎片是否上凸
    def getTopConvex(self):
        if (self.image is None):
            return False
        ckLeft = self.convex
        ckWidth = ckLeft + self.box1
        # 遍历碎片指定列
        i = self.top - int(self.convex / 2)
        for j in range(ckLeft, ckWidth):
            if self.image[i, j][0] != 0 and self.image[i, j][1] != 0 and self.image[i, j][2] != 0:
                self.topConvex = 1
                return True
        return False

    def handle(self, path):
        if (self.load(path) == False):
            return -1
        if (self.getTop() == -1):
            return -1
        self.getLeftConvex()
        self.getTopConvex()

        # 截取左侧图片
        leftImg = self.image[0:self.high, 0:self.cut]
        # 高斯模糊
        leftImg = cv2.GaussianBlur(leftImg, (3, 3), 0)
        # 灰度化
        leftImg = cv2.cvtColor(leftImg, cv2.COLOR_BGR2GRAY)
        # 二值化
        ret, leftImg = cv2.threshold(leftImg, 1, 255, 0)
        # 轮廓检测,RETR_EXTERNAL只提取最外层轮廓
        tmpImg, leftContours, hierarchy = cv2.findContours(leftImg, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if (len(leftContours) <= 0):
            return -1

        # 截取右侧图片
        rightImg = self.image[0:self.high, self.cut:self.width]
        # 高斯模糊
        rightImg = cv2.GaussianBlur(rightImg, (7, 7), 0)
        # 灰度化
        rightImg = cv2.cvtColor(rightImg, cv2.COLOR_BGR2GRAY)
        # 边缘检测
        rightImg = cv2.Canny(rightImg, 120, 255)
        # 二值化
        ret, rightImg = cv2.threshold(rightImg, 100, 255, 0)
        # 轮廓检测
        tmpImg, rightContours, hierarchy = cv2.findContours(rightImg, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        contoursLen = len(rightContours)
        if (contoursLen <= 0):
            return -1

        # 删除无效轮廓
        contours = []
        for i in range(contoursLen):
            err = 0
            # 计算最高坐标
            top = self.high
            bottom = 0
            for j in range(len(rightContours[i])):
                y = rightContours[i][j][0][1]
                if (y < self.top - 20):
                    err = 1
                    break
                if (y > self.top + self.box2 + 20):
                    err = 1
                    break
                if (y < top):
                    top = y
                if (y > bottom):
                    bottom = y

            if (err == 0):
                if (self.topConvex):
                    if (abs(self.top - (top - self.convex)) > 20):
                        err = 1
                else:
                    if (abs(self.top - top) > 20):
                        err = 1
            if (err == 0):
                h = bottom - top
                if (h < 40) or (h > 80):
                    err = 1
            if (err == 0):
                # 拟合
                epsilon = 0.001 * cv2.arcLength(rightContours[i], True)
                approx = cv2.approxPolyDP(rightContours[i], epsilon, True)
                contours.append(approx)
        rightContours = contours
        contoursLen = len(rightContours)



        # 提取最优轮廓
        j = -1
        k = 99999
        arr = [k for i in range(contoursLen)]
        # 面积
        leftArea = cv2.contourArea(leftContours[0])
        # 周长
        leftPerimeter = cv2.arcLength(leftContours[0], True)
        for i in range(contoursLen):
            # 面积
            # rightArea = cv2.contourArea(rightContours[i])
            # 周长
            # rightPerimeter = cv2.arcLength(rightContours[i], True)
            # 轮廓相似度
            match = cv2.matchShapes(leftContours[0], rightContours[i], 1, 0.0)
            arr[i] = match
        # 取差异最小数据
        for i in range(contoursLen):
            if (arr[i] < k):
                k = arr[i]
                j = i

        # 计算最左位置
        self.left = self.width
        for i in range(len(rightContours[j])):
            if (rightContours[j][i][0][0] < self.left):
                self.left = rightContours[j][i][0][0]
        if (self.leftConvex):
            self.left += (self.convex + 1)
        self.left += self.cut
        print('距离左边:', (self.left - 14) * 280 / 349)
        return (self.left - 14) * 280 / 349

    def show(self):
        if (self.image is None):
            return

        x1 = self.left
        y1 = self.top
        x2 = x1 + self.box1
        y2 = y1 + self.box1

        temp = self.image.copy()
        cv2.rectangle(temp, (x1, y1), (x2, y2), (0, 0, 255), thickness=2)
        cv2.imshow("image", temp)

        cv2.waitKey(0)
        cv2.destroyAllWindows()

    def get_track(self, distance):
        """[summary]

        根据偏移量获取移动轨迹

        Arguments:
            distance {[type]} -- 偏移量
        """
        distance += 20  # 先滑过一些
        # 移动轨迹
        track = []
        # 当前位移
        current = 0
        # 减速阀值(达到这个值开始减速)
        mid = distance * 4 / 5
        # 计算间隔
        t = 0.3
        # 初速度
        v = 0

        while current < distance:
            if current < mid:
                # 加速度为正
                a = 3
            else:
                # 加速度为负
                a = -2
            # 初速度v0
            v0 = v

            # 移动距离 x = v0t + 1/2*a*t*t
            move = v0 * t + 1 / 2 * a * t * t
            # 当前位移
            current += move
            # 加入轨迹
            track.append(round(move))
            # 当前速度 v = v0 + at
            v = v0 + a * t
        # 反着滑动到准确位置
        back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1]  # 总共等于-20
        return {'track': track, 'back_tracks': back_tracks}

    def move_to_gap(self, slider, tracks):
        """[summary]

        拖动滑块到缺口处
        动作: 点击且不释放鼠标-拖拽图片到缺口-释放鼠标

        Arguments:
            slider {[type]} -- 滑块
            tracks {[type]} -- 轨迹
        """

        ActionChains(self.browser).move_to_element(slider).perform()
        # 点击并按住滑块
        ActionChains(self.browser).click_and_hold(slider).perform()
        # 移动

        for x in tracks['track']:
            y = random.randint(-3, 3)

            ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=y).perform()

        time.sleep(0.6)
        for back_track in tracks['back_tracks']:
            ActionChains(self.browser).move_by_offset(xoffset=back_track, yoffset=0).perform()

        time.sleep(0.3)
        ActionChains(self.browser).move_by_offset(xoffset=4, yoffset=0).perform()  # 先移过一点
        time.sleep(0.9)
        ActionChains(self.browser).move_by_offset(xoffset=-4, yoffset=0).perform()  # 再退回来
        # 释放滑块
        time.sleep(0.6)
        ActionChains(self.browser).release().perform()

    def process_request(self):
        try:
            # print("url is :::::::::::", request.url)
            # print(':::::::::::::::::::::::::::::::')
            self.browser.get(
                'https://www.anjuke.com/captcha-verify/?callback=shield&from=antispam&serialID=582d45fc4d908d19194f5c8be25b69e5_048c47fa2baa45768dee22e218344755&history=aHR0cHM6Ly9zaGFuZ2hhaS5hbmp1a2UuY29tL2NvbW11bml0eS9qaW5nYW4v')

            if '向右滑动滑块' in self.browser.page_source:
                print('出现验证码-----------------')
                self.browser.execute_script(
                    "var captcha__puzzleImg = document.evaluate('//*[@class=\"dvc-captcha__puzzleImg\"]', document, null, 0, null).iterateNext();captcha__puzzleImg.style.backgroundColor='black';document.evaluate('//*[@class=\"dvc-operate\"]', document, null, 0, null).iterateNext().style.display='none';")
                time.sleep(1)
                # print('1111111111111111111111111111111111111')
                img = self.browser.find_element_by_css_selector('.info')
                img.screenshot('ele.png')

                im = Image.open('./ele.png')
                # print(im.size)
                cro = im.crop((127, 70, 476, 250))
                cro.save('./cut.png')
                time.sleep(3)
                left = self.handle('C:\\Users\lijk_\PycharmProjects\\untitled2\cut.png')
                # print(left)
                slide = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'dvc-slider__handler')))
                track = self.get_track(left)
                print(track)
                # self.show()
                self.move_to_gap(slide, track)

                for i in range(7):
                    if '向右滑动滑块' in self.browser.page_source:
                        time.sleep(1)
                        self.move_to_gap(slide, track)

                time.sleep(20)



        except TimeoutException as e:
            print('超时', e)


text = SeleniumMiddleware()
text.process_request()

小结。

其实上面的代码还可以进一步“优化”。例如,当尝试三次滑动后如果仍然没有“验证成功”,就应该主动刷新 使用try进入下一次验证过程。除此之外,以上只是对“滑块验证”进行了分析和模拟,实际情况是,通过了“滑块验证”后,网站监测到是机器模拟会重新出现一个点击验证。关于监测到selenium模拟https://blog.csdn.net/qq_42782937/article/details/86502492

以上代码还有待继续完善,也欢迎看到这篇博文的人多多指正不足之处。

你可能感兴趣的:(解决滑块验证问题(selenium, opencv))