使用pyppteer+ddddocr通过极验滑块验证码(附源码)

前言:

        验证码是大多数爬虫都需要克服的难题。pyppteer是目前市场主流的自动化工具之一,它的优势在于不易被浏览器检测到,ddddocr也是目前主流的验证码识别的第三方模块。接下来就通过python自动化工具和ddddocr等第三方模块完成极验滑块。

一:使用自动化工具打开网站

        目标网站:行为验证4.0-适应型验证码-滑动验证,点选验证,图片验证-极验GeeTest使用pyppteer+ddddocr通过极验滑块验证码(附源码)_第1张图片

 

    # 浏览器 启动参数
    start_parm = {
        # 关闭无头浏览器
        "headless": False,
        "args": [
            '--disable-infobars',  # 关闭自动化提示框
            '--no-sandbox',  # 关闭沙盒模式
            '--start-maximized',  # 窗口最大化模式

        ],
    }
    browser = await launch(**start_parm)
    page = await browser.newPage()

    # 设置网页 视图大小
    await page.setViewport(viewport={'width': 1920, 'height': 1080})
    await page.goto('https://www.geetest.com/adaptive-captcha-demo')

二:控制鼠标定位到指定元素

   await page.waitForXPath('//div[@class="type-config"]')  # 等待元素加载
    botton1 = await page.xpath('//div[@class="tab-item tab-item-1"]')  # 滑块拼图验证按钮
    await botton1[0].click()
    await page.click('#captcha', options={
        'button': 'left',
        'clickCount': 2,
        'delay': 300,  # 延迟点击(ms)
    })
    botton2 = await page.xpath('//*[@aria-label="点击按钮开始验证"]')  # 开始验证按钮
    await botton2[0].click()

三:提取滑块拼图照片url

elements_1 = await page.xpath(
            '//*[@id="captcha"]/div[2]/div[1]/div[4]/div[1]/div[2]/div/div/div[1]/div[1]/div[1]/@style')  # 滑块图片链接
        elements_2 = await page.xpath(
            '//*[@id="captcha"]/div[2]/div[1]/div[4]/div[1]/div[2]/div/div/div[1]/div[2]/@style')  # 背景图片链接
        for element in elements_1:
            sc = await page.evaluate('(element) => element.textContent', element)
            sc_url = sc.split('"')[1].split('"')[0]  # 提取滑块图片链接
            with open('slice.png', 'wb')as f1:
                f1.write(requests.get(sc_url).content)
        for element in elements_2:
            bg = await page.evaluate('(element) => element.textContent', element)
            bg_url = bg.split('"')[1].split('"')[0]  # 提取背景图片链接
            with open('bg.png', 'wb') as f2:
                f2.write(requests.get(bg_url).content)

四:获取拼图坐标偏移量

async def get_xy():
    det = ddddocr.DdddOcr(det=False, ocr=False)

    with open('slice.png', 'rb') as f:
        target_bytes = f.read()

    with open('bg.png', 'rb') as f:
        background_bytes = f.read()
    try:
        res = det.slide_match(target_bytes, background_bytes)
        print(res)
        return res.get('target')[0]
    except:
        return False

五:操作鼠标移动滑块

        其中x,y需要根据自己电脑进行微调。

        if target:
            # print(target)
            botton3 = await page.xpath(
                '//*[@id="captcha"]/div[2]/div[1]/div[4]/div[1]/div[2]/div/div/div[2]/div/div[3]')
            await botton3[0].hover()  # 鼠标悬停元素上
            await page.mouse.down()  # 鼠标落下
            await page.waitFor(500)
            x = 1116 + target
            y = 641
            await page.mouse.move(x, y, {'steps': 2})  # 鼠标移动
            await page.waitFor(500)
            await page.mouse.up()  # 鼠标松开
            time.sleep(2)
            elements_3 = await page.xpath('//*[@id="captcha"]/div[2]/div[1]/div[3]/div[2]/div/div[2]/text()')
            msg = ''
            for element in elements_3:
                msg = await page.evaluate('(element) => element.textContent', element)
            if msg == '验证通过':
                break
            else:
                print(msg)
        else:  # 获取坐标失败时刷新验证
            botton4 = await page.xpath('//*[@aria-label="刷新验证"]')
            await botton4[0].click()

使用pyppteer+ddddocr通过极验滑块验证码(附源码)_第2张图片

验证通过,欧耶~

六:完整代码

# coding:utf-8
import ddddocr
# coding:utf-8
import asyncio
import time

import requests
from pyppeteer.launcher import DEFAULT_ARGS

DEFAULT_ARGS.remove("--enable-automation")
from pyppeteer import launch


async def main():
    # 浏览器 启动参数
    start_parm = {
        # 关闭无头浏览器
        "headless": False,
        "args": [
            '--disable-infobars',  # 关闭自动化提示框
            '--no-sandbox',  # 关闭沙盒模式
            '--start-maximized',  # 窗口最大化模式

        ],
    }
    browser = await launch(**start_parm)
    page = await browser.newPage()

    # 设置网页 视图大小
    await page.setViewport(viewport={'width': 1920, 'height': 1080})
    await page.goto('https://www.geetest.com/adaptive-captcha-demo')
    time.sleep(2)
    await page.waitForXPath('//div[@class="type-config"]')  # 等待元素加载
    botton1 = await page.xpath('//div[@class="tab-item tab-item-1"]')  # 滑块拼图验证按钮
    await botton1[0].click()
    await page.click('#captcha', options={
        'button': 'left',
        'clickCount': 2,
        'delay': 300,  # 延迟点击(ms)
    })
    botton2 = await page.xpath('//*[@aria-label="点击按钮开始验证"]')  # 开始验证按钮
    await botton2[0].click()
    while True:
        time.sleep(5)
        elements_1 = await page.xpath(
            '//*[@id="captcha"]/div[2]/div[1]/div[4]/div[1]/div[2]/div/div/div[1]/div[1]/div[1]/@style')  # 滑块图片链接
        elements_2 = await page.xpath(
            '//*[@id="captcha"]/div[2]/div[1]/div[4]/div[1]/div[2]/div/div/div[1]/div[2]/@style')  # 背景图片链接
        for element in elements_1:
            sc = await page.evaluate('(element) => element.textContent', element)
            sc_url = sc.split('"')[1].split('"')[0]  # 提取滑块图片链接
            with open('slice.png', 'wb')as f1:
                f1.write(requests.get(sc_url).content)
        for element in elements_2:
            bg = await page.evaluate('(element) => element.textContent', element)
            bg_url = bg.split('"')[1].split('"')[0]  # 提取背景图片链接
            with open('bg.png', 'wb') as f2:
                f2.write(requests.get(bg_url).content)
        target = await get_xy()  # 得到滑块x坐标偏移量
        if target:
            # print(target)
            botton3 = await page.xpath(
                '//*[@id="captcha"]/div[2]/div[1]/div[4]/div[1]/div[2]/div/div/div[2]/div/div[3]')
            await botton3[0].hover()  # 鼠标悬停元素上
            await page.mouse.down()  # 鼠标落下
            await page.waitFor(500)
            x = 1116 + target
            y = 641
            await page.mouse.move(x, y, {'steps': 2})  # 鼠标移动
            await page.waitFor(500)
            await page.mouse.up()  # 鼠标松开
            time.sleep(2)
            elements_3 = await page.xpath('//*[@id="captcha"]/div[2]/div[1]/div[3]/div[2]/div/div[2]/text()')
            msg = ''
            for element in elements_3:
                msg = await page.evaluate('(element) => element.textContent', element)
            if msg == '验证通过':
                break
            else:
                print(msg)
        else:  # 获取坐标失败时刷新验证
            botton4 = await page.xpath('//*[@aria-label="刷新验证"]')
            await botton4[0].click()
    input('---验证通过---')
    await browser.close()


async def get_xy():
    det = ddddocr.DdddOcr(det=False, ocr=False)

    with open('slice.png', 'rb') as f:
        target_bytes = f.read()

    with open('bg.png', 'rb') as f:
        background_bytes = f.read()
    try:
        res = det.slide_match(target_bytes, background_bytes)
        print(res)
        return res.get('target')[0]
    except:
        return False


if __name__ == '__main__':
    main()
    asyncio.get_event_loop().run_until_complete(main())

 

你可能感兴趣的:(Python网络爬虫,爬虫,python)