抖音web爬虫【滑块验证码解决方法】

文章内容仅供参考学习,如有侵权请联系作者进行删除


实现过程:

1、通过node下载滑块的图片

2、通过python的cv2视觉识别图对图片进行定位位置

3、通过获取的位置再通过node的控制鼠标自动移动,已验证识别精准度为1/5左右

代码:

//nodejs
const puppeteer = require('puppeteer')
const request = require('request')
const fs = require('fs')
const execSync = require('child_process').execSync

async function run(){

    let options={
        args: ['--no-sandbox'],
        headless: true,
    }
    //返回浏览器实例
    let browser=await puppeteer.launch(options);
    //创建新页面,并返回页面对象
    let page=await browser.newPage();

    //进入指定页面
    await page.goto("https://www.douyin.com/hot");

    await sleep(3000)

    //下载滑块图片
    const backgroundSrc = await page.$eval('#captcha-verify-image', el => el.src);
    const slideSrc = await page.$eval('.captcha_verify_img_slide', el => el.src);
    await downloadImg(backgroundSrc, "./python/background.jpeg")
    await downloadImg(slideSrc, "./python/slide.png")

    //识别移动位置
    await sleep(2000)

    //p.py文件则是下面python代码
    const output = execSync('python python/p.py'); 
    let moveSize = parseInt(output.toString());

    //cv2视觉机器学习
    if(moveSize > 200){
        moveSize = moveSize-65;
    }else if(moveSize > 150){
        moveSize = moveSize-25;
    }else if(moveSize <= 70){
        moveSize = moveSize+30;
    }else if(moveSize < 100){
        moveSize = moveSize+10;
    }else if(moveSize < 134){
        moveSize = moveSize+30;
    }
    //console.log(moveSize)


    //开始滑动
    const element = await page.$('#secsdk-captcha-drag-wrapper>div:nth-child(2)');
    const size = await element.boundingBox();
    await page.mouse.move(size.x, size.y);//鼠标移动到滑块的的位置上方
    await page.mouse.down();//按下鼠标
    await page.mouse.move(size.x+moveSize+20, size.y, {steps:200});//鼠标向右移动滑块,y坐标不变,x坐标取值1000
    await page.mouse.up();//松开鼠标

    //获取cookie
    await sleep(3000)
    await page.setContent("")
    let cookie = await page.evaluate(() => document.cookie);


    //此处则拿到能正常请求的cookie,END
    console.log('cookie: '+cookie);
    browser.close();

}

async function downloadImg(src, path) {
  return new Promise(async function (resolve, reject) {
    let writeStream = fs.createWriteStream(path);
    let readStream = await request(src);
    await readStream.pipe(writeStream);
    readStream.on("end", function () {
    });
    readStream.on("error", function () {
    });
    writeStream.on("finish", function () {
      writeStream.end();
      resolve();
    });
  });
}

function sleep(ms){
    return new Promise(resolve=>{
        setTimeout(resolve,ms)
    })
}

run();

---------------------------------------------------------------------------------------------------------------------------------

//python
# coding=UTF-8

import cv2
import sys

def show(name):
    # 展示圈出来的位置
    cv2.imshow('Show', name)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


def _tran_canny(image):
    # 消除噪声
    image = cv2.GaussianBlur(image, (3, 3), 0)
    return cv2.Canny(image, 50, 150)


def detect_displacement(img_slider_path, image_background_path):
    # detect displacement
    # # 参数0是灰度模式
    image = cv2.imread(img_slider_path, 0)
    template = cv2.imread(image_background_path, 0)

    # 寻找最佳匹配
    res = cv2.matchTemplate(_tran_canny(image), _tran_canny(template), cv2.TM_CCOEFF_NORMED)
    # 最小值,最大值,并得到最小值, 最大值的索引
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)

    top_left = max_loc[0]  # 横坐标
    # 展示圈出来的区域
    x, y = max_loc  # 获取x,y位置坐标

    w, h = image.shape[::-1]  # 宽高
    cv2.rectangle(template, (x, y), (x + w, y + h), (7, 249, 151), 2)
    #show(template)
    return top_left-w

if __name__ == '__main__':
    top_left = detect_displacement(sys.path[0]+"/slide.png", sys.path[0]+"/background.jpeg")
    print(top_left)

你可能感兴趣的:(爬虫,node.js,python)