爬虫进阶:验证码突破--7、滑动验证码破解

测试网址是网易cc,只是研究爬虫自动化

1、Iframe窗口切换

import time, re
from selenium import webdriver
chrome_option = webdriver.ChromeOptions()
driver = webdriver.Chrome(chrome_options=chrome_option)
driver.set_window_size(1440, 900)
driver.get("http://cc.163.com/")
driver.implicitly_wait(10)
if driver.find_element_by_id("browser-zoom-tips"):
    driver.find_element_by_xpath("//div[@class='zoom-closeBt']").click()
#这里有个iframe,找到iframe父节点

爬虫进阶:验证码突破--7、滑动验证码破解_第1张图片

ele=driver.find_element_by_xpath("//*[@id='js-login-urs']/iframe")
driver.switch_to_frame(ele)
driver.find_element_by_class_name("j-inputtext dlemail j-nameforslide").send_keys("********")
driver.find_element_by_class_name("j-inputtext dlpwd").send_keys("*********")    
#driver.find_element_by_xpath("//div[@class='yidun_slider']").click()

2、滑动验证图片获取
如下图所示,在元素查找中能获得滑块和背景图的class属性,这样我们可以通过定位这个属性,将这两张图片下载下来
爬虫进阶:验证码突破--7、滑动验证码破解_第2张图片

target = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'yidun_bg-img')))
template = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'yidun_jigsaw')))
target_link = target.get_attribute('src')
template_link = template.get_attribute('src')
target_img = Image.open(BytesIO(requests.get(target_link).content))
template_img = Image.open(BytesIO(requests.get(template_link).content))
target_img.save('target.jpg')
template_img.save('template.png')
local_img = Image.open('target.jpg')

3、获取匹配距离

 def match(self, target, template):
        img_rgb = cv2.imread(target)
        img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
        template = cv2.imread(template, 0)
        run = 1
        w, h = template.shape[::-1]
        print(w, h)
        res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
        run = 1    
        # 使用二分法查找阈值的精确值
        L = 0
        R = 1
        while run < 20:
            run += 1
            threshold = (R + L) / 2
            print(threshold)
            if threshold < 0:
                print('Error')
                return None
            loc = np.where(res >= threshold)
            print(len(loc[1]))
            if len(loc[1]) > 1:
                L += (R - L) / 2
            elif len(loc[1]) == 1:
                print('目标区域起点x坐标为:%d' % loc[1][0])
                break
            elif len(loc[1]) < 1:
                R -= (R - L) / 2
        return loc[1][0]

4、由于人滑动滑块是非匀速滑动的,有一定的加速,或者说是后移,所以在模拟滑动时,也需要加上这一块:

 def get_tracks(self, distance):
        """
        拿到移动轨迹,模仿人的滑动行为,先匀加速后均减速
        匀变速运动基本公式:
        ①:v=v0+at
        ②:s=v0t+½at²
        ③:v²-v0²=2as
        :param distance:需要移动的距离
        :return:存放每0.3秒移动的距离
        """
        distance += 20  # 先滑过一点,最后再反着滑动回来
        # 初速度
        v = 0
        # 单位时间为0.3s来统计轨迹,轨迹即0.3s内的位移
        t = 0.3
        # 位移/轨迹列表,列表内的一个元素代表0.3s的位移
        forward_tracks = []
        # 当前位移
        current = 0
        # 到达mid值开始减速
        mid = distance * 4 / 5
        while current < distance:
            if current < mid:
                # 加速度越小,单位时间的位移越小,模拟的轨迹就越多越详细
                a = 2
            else:
                a = -3
            # 初速度
            v0 = v
            # 0.3秒时间内的位移
            s = v0 * t + 0.5 * a * (t ** 2)
            # 当前的位置
            current += s
            # 添加到轨迹列表,round()为保留一位小数且该小数要进行四舍五入
            forward_tracks.append(round(s))
            # 速度已经达到v,该速度作为下次的初速度
            v = v0 + a * t

        # 反着滑动到准确位置
        back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1]  # 总共等于-20
        return {'forward_tracks': forward_tracks, 'back_tracks': back_tracks}

5、以下就是完整的代码

from PIL import Image
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import logging
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import cv2
import numpy as np
from io import BytesIO
import time, requests   
LocalDay = time.strftime("%Y-%m-%d")
logFile = r'D:\log\{0}.log'.format(LocalDay)
logging.basicConfig(level=logging.DEBUG,
                 format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                 datefmt='%a, %d %b %Y %H:%M:%S',
                 filename=logFile,
                 filemode='a+')

console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)

class CrackSlider():
    """
    通过浏览器截图,识别验证码中缺口位置,获取需要滑动距离,并模仿人类行为破解滑动验证码
    """
    def __init__(self):
        self.url = 'http://cc.163.com/'
        self.driver = webdriver.Chrome(chrome_options=webdriver.ChromeOptions())
        self.wait = WebDriverWait(self.driver, 10)
        self.zoom = 1

    def open(self):
        self.driver.maximize_window()
        self.driver.get("http://cc.163.com/")
        self.driver.implicitly_wait(10)
        if self.driver.find_element_by_id("browser-zoom-tips"):
            self.driver.find_element_by_xpath("//div[@class='zoom-closeBt']").click()
        self.driver.find_element_by_xpath("//span[contains(.,'登录')]").click()
        ele = self.driver.find_element_by_xpath("//div[@id='js-login-urs']/iframe")
        self.driver.switch_to_frame(ele)
        self.driver.find_element_by_name("email").send_keys("[email protected]")
        self.driver.find_element_by_name("password").send_keys("huoyingchong64")
        #self.driver.find_element_by_xpath("//div[@class='yidun_slider']").click()

    def get_pic(self):
        time.sleep(2)
        target = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'yidun_bg-img')))
        template = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'yidun_jigsaw')))
        target_link = target.get_attribute('src')
        template_link = template.get_attribute('src')
        target_img = Image.open(BytesIO(requests.get(target_link).content))
        template_img = Image.open(BytesIO(requests.get(template_link).content))
        target_img.save('target.jpg')
        template_img.save('template.png')
        local_img = Image.open('target.jpg')
        size_loc = local_img.size
        self.zoom = 320 / int(size_loc[0])

    def get_tracks(self, distance):
        """
        拿到移动轨迹,模仿人的滑动行为,先匀加速后均减速
        匀变速运动基本公式:
        ①:v=v0+at
        ②:s=v0t+½at²
        ③:v²-v0²=2as
        :param distance:需要移动的距离
        :return:存放每0.3秒移动的距离
        """
        distance += 20  # 先滑过一点,最后再反着滑动回来
        # 初速度
        v = 0
        # 单位时间为0.3s来统计轨迹,轨迹即0.3s内的位移
        t = 0.3
        # 位移/轨迹列表,列表内的一个元素代表0.3s的位移
        forward_tracks = []
        # 当前位移
        current = 0
        # 到达mid值开始减速
        mid = distance * 4 / 5
        while current < distance:
            if current < mid:
                # 加速度越小,单位时间的位移越小,模拟的轨迹就越多越详细
                a = 2
            else:
                a = -3
            # 初速度
            v0 = v
            # 0.3秒时间内的位移
            s = v0 * t + 0.5 * a * (t ** 2)
            # 当前的位置
            current += s
            # 添加到轨迹列表,round()为保留一位小数且该小数要进行四舍五入
            forward_tracks.append(round(s))
            # 速度已经达到v,该速度作为下次的初速度
            v = v0 + a * t

        # 反着滑动到准确位置
        back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1]  # 总共等于-20
        return {'forward_tracks': forward_tracks, 'back_tracks': back_tracks}

    def match(self, target, template):
        img_rgb = cv2.imread(target)
        img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
        template = cv2.imread(template, 0)
        run = 1
        w, h = template.shape[::-1]
        print(w, h)
        res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
        run = 1

        # 使用二分法查找阈值的精确值
        L = 0
        R = 1
        while run < 20:
            run += 1
            threshold = (R + L) / 2
            print(threshold)
            if threshold < 0:
                print('Error')
                return None
            loc = np.where(res >= threshold)
            print(len(loc[1]))
            if len(loc[1]) > 1:
                L += (R - L) / 2
            elif len(loc[1]) == 1:
                print('目标区域起点x坐标为:%d' % loc[1][0])
                break
            elif len(loc[1]) < 1:
                R -= (R - L) / 2
        return loc[1][0]

    def crack_slider(self):
        slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'yidun_slider')))
        ActionChains(self.driver).click_and_hold(slider).perform()

        for track in tracks['forward_tracks']:
            ActionChains(self.driver).move_by_offset(xoffset=track, yoffset=0).perform()

        time.sleep(0.5)
        for back_tracks in tracks['back_tracks']:
            ActionChains(self.driver).move_by_offset(xoffset=back_tracks, yoffset=0).perform()

        ActionChains(self.driver).move_by_offset(xoffset=-4, yoffset=0).perform()
        ActionChains(self.driver).move_by_offset(xoffset=4, yoffset=0).perform()
        time.sleep(0.5)

        ActionChains(self.driver).release().perform()

        self.driver.find_element_by_id("dologin").click()

if __name__ == '__main__':
    cs = CrackSlider()
    cs.open()
    target = 'target.jpg'
    template = 'template.png'
    cs.get_pic()
    distance = cs.match(target, template)
    tracks = cs.get_tracks((distance +7) * 0.66)  # 对位移的缩放计算,*0.66本来应该是*cs.zoom的,不知道什么原因,比例总是错误
    cs.crack_slider()

参考地址:
http://www.cnblogs.com/llhtjwq/p/9210694.html
以上代码我的个人电脑测试没有问题,比例是按照我自己的电脑量身定做。。

你可能感兴趣的:(爬虫)