使用selenium识别小破站的验证码滑块

实现原理:使用selenium模拟登录,获取验证码有缺口的验证图片及没有缺口的图片,然后比对俩个图片的每一个像素点,知道了滑块需要拖动多远的距离,使用行为链进行拖动。这样就实现了模拟登录bilibili咯。

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import time
from io import BytesIO
from PIL import Image
import random


class LoginBili(object):
    login_url = 'https://passport.bilibili.com/login'

    def __init__(self):
        opt = webdriver.ChromeOptions()
        # opt.add_argument('--headless')
        self.driver = webdriver.Chrome(options=opt)

        self.username = 'xxx'
        self.pwd = 'xxx'

    def get_captcha(self):
        # 输入账号密码  点击登录
        self.driver.implicitly_wait(10)
        self.driver.get(self.login_url)
        self.driver.maximize_window()
        self.driver.find_element_by_id('login-username').send_keys(self.username)
        self.driver.find_element_by_id('login-passwd').send_keys(self.pwd)
        self.driver.find_element_by_class_name('btn-login').click()
        time.sleep(3)

        # 使用JS更改CSS样式 不显示空白
        self.driver.execute_script("document.querySelectorAll('canvas')[3].style=''")
        # 截取图片
        img1 = self.crop_img('img1.png')
        # 使用JS更改CSS样式 显示空白处
        self.driver.execute_script("document.querySelectorAll('canvas')[3].style='display: none;'")
        img2 = self.crop_img('img2.png')

        return img1, img2

    def crop_img(self, img_file_name):
        time.sleep(1)
        # 获取验证码图片大小 原本是直接用selenium获取图片大小的,但是定位有一些问题,于是直接手动设置图片的大小和坐标
        # img = self.driver.find_element_by_xpath('//canvas[@class="geetest_canvas_bg geetest_absolute"]')
        # location = img.location
        # size = img.size

        height = 160
        width = 253

        # 获取验证码图片的左上和右下的坐标
        x1, y1 = 1180, 390
        x2, y2 = x1 + width, y1 + height

        # 截取整个网页页面
        screen = self.driver.get_screenshot_as_png()
        # 使用进制流读取
        screen = Image.open(BytesIO(screen))
        # 截取验证码图片
        captcha = screen.crop((int(x1), int(y1), int(x2), int(y2)))
        captcha.save(img_file_name)
        screen.save('screen.png')

        return captcha

    def compare_captcha_one(self, img1, img2, i, j):
        # 对比每一个像素点 RGB
        pixel1 = img1.load()[i, j]
        pixel2 = img2.load()[i, j]
        # print(pixel2, pixel1)
        region = 60
        if abs(pixel1[0] - pixel2[0]) < region and abs(pixel1[1] - pixel2[1]) < region and abs(pixel1[2] - pixel2[2]) < region:
            return False
        else:
            return True

    def compare_captcha(self, img1, img2):
        # 滑块的宽度
        button_width = 42
        # 对比两张图片的所有像素点
        for x in range(img1.size[0]):
            for y in range(img2.size[1]):
                temp = self.compare_captcha_one(img1, img2, x, y)
                if temp:
                    return x + button_width

    def move_button(self, move):
        # 滑动按钮 降低速度
        actions = ActionChains(self.driver)
        button = self.driver.find_element_by_class_name('geetest_slider_button')

        while move:
            m = random.randint(0, move)
            actions.click_and_hold(button).move_by_offset(xoffset=m, yoffset=0)
            move -= m

        actions.release().perform()

    def run(self):
        img1, img2 = self.get_captcha()
        move_distance = self.compare_captcha(img1, img2)
        self.move_button(move_distance)


if __name__ == '__main__':
    login_bili = LoginBili()
    login_bili.run()

你可能感兴趣的:(Python爬虫,定位,selenium)