爬虫验证码:破解【点击旋转验证码】

这里破解某某动漫的点击翻转验证码为例:爬虫验证码:破解【点击旋转验证码】_第1张图片
分析页面的源码发现,这些翻转的图片,我们可以下载下来,既然可以获取到这些图片,那么这个的破解思路为:

  1. 获取1000张样本集合: 用selenium访问,通过截屏,以及切片,一次获取四张图片,然后点击"换一组",依次循环1000次
     
  2. 对获取的图片进行去重: 通过图片的rbg总值来去重,rgb总值相差3000以为,基本认为这是同一张图片,就只保留一张。
     
  3. 人为手动给去重的图片进行调整。
     
  4. 通过selenium来模拟登录,输入账号和密码
     
  5. 截屏,截取验证码图片
     
  6. 翻转每张验证码,记录翻转次数,通过和样本基本的每张图片的rgb进行一一比较,如果每个像素的rgb都小于100,那么这个翻转后的图片就是正确的位置,返回翻转次数。
     
  7. 依次翻转,记录各个图片需要翻转的次数
     
  8. 点击对应图片的元素,进行翻转
     
  9. 点击提交按钮,进行登录
     
  10. 判断是否登录成功

获取1000张样本集的代码:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import  By
from PIL import Image
import random
from io import BytesIO
import time


class Crack(object):
    def __init__(self, start_number, count):
        self.login_url = "http://www.1kkk.com/"
        self.start_number = start_number
        self.count = count
        self.chrome_options = webdriver.ChromeOptions()
        self.chrome_options.add_argument("--healess")
        self.browser = webdriver.Chrome()
        self.browser.maximize_window()
        self.wait = WebDriverWait(self.browser, 30)

    def login(self):
        """
        输入账号,密码
        :return:None
        """
        self.browser.get(self.login_url)
        self.browser.find_element_by_class_name("header-avatar").click()
        # 获取所有图片
        for num in range(self.start_number, self.start_number+self.count):
            self.image_png(num)
            self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "rotate-refresh"))).click()
            time.sleep(0.5)

    def save_screen_png(self):
        """
        获取网页截图
        :return: 截图对象
        """
        self.wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "rotate-background")))
        screen_image = self.browser.get_screenshot_as_png()
        screenshot = Image.open(BytesIO(screen_image))
        screenshot.save("screenshot{}.png".format(random.randint(1, 5)))
        return screenshot

    def image_png(self, num):
        """
        通过获取网页截图,然后进行切片,返回四张图片
        :return:
        """
        screenshot = self.save_screen_png()
        images = []
        for num_2 in range(1, 5):
            # 依次获取5张图片,存入iamges列表中
            images.append(self.get_image_position(screenshot, num, num_2))
        # 获取整体四张图片的坐标
        # 进行切片

    def get_image_position(self, screenshot, number, number_2):
        """
        获取四张图片的下标
        :return: left, top, right, bottom
        """
        image = self.wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='rotate-background'][{}]".format(number_2))))
        location = image.location
        size = image.size
        top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
            'width']
        image = screenshot.crop((left, top, right, bottom))
        image.save("./static/total_images/image{}_{}.png".format(number, number_2))
        return image

    def __del__(self):
        self.browser.quit()


def download(start_number, count):
    """
    初始化登录类,下载图片
    :param start_number:开启位置
    :param count: 数量
    :return:
    """
    c = Crack(start_number, count)
    c.login()
    del c


def main():
    download(1, 1000)


if __name__ == '__main__':
    main()

图片去重的代码:

from PIL import Image
import os
import gevent
from gevent import monkey


monkey.patch_all()

# 图片数量
gCount = 0

# 列表,用来保存rgb
rgb_dif_list = []

# 当前保存图片的名称
gNumber = 0


def sum_rgb(image):
    """
    计算rgb的值
    :param images: 图片
    :return: rgb的值
    """
    num = 0
    for i in range(image.size[0]):
        for y in range(image.size[1]):
            pixel = image.load()[i, y]
            num = num + image.load()[i, y][0] + image.load()[i, y][1] + image.load()[i, y][2]
    return num


def check_have_in(num):
    """
    通过rgb的总值,来判断是否已经存在列表
    :param num: Ture or False
    :return: 
    """
    global rgb_dif_list
    if num in rgb_dif_list:
        # 如果存在,就得删除
        return True
    else:
        # 否则就将rgb存入列表中,更改名字,并返回False
        return False


def delete(image_url):
    """
    删除图片
    :param image_url: 图片的url
    :return:
    """
    print("删除图片:", image_url)
    os.remove(image_url)
        

def start_check(start_number, count):
    global rgb_dif_list
    global gCount
    global gNumber
    images_url = "./static/total_images/{}"
    save_url = "./static/images/{}"
    for number_1 in range(start_number, start_number + count):
        for number_2 in range(1, 5):
            image_url = images_url.format("image{}_{}.png".format(number_1, number_2))
            if os.path.isfile(image_url):
                image = Image.open(image_url)
                # 通过元素的rgb三个值相加的总数,通过列表保存,如果在列表中存在就添加,否则就删除
                rgb_num = sum_rgb(image)
                print("image{}_{}.png".format(number_1, number_2), rgb_num)
                # 判断该图片的rgb是否已经存在列表中
                if rgb_num > 4000000:
                    continue
                for num in range(rgb_num-3000, rgb_num+3000):
                    check_result = check_have_in(num)
                    # 判断结果,做响应处理
                    if check_result:
                        # 存在情况,退出
                        break
                else:
                    rgb_dif_list.append(rgb_num)
                    gCount += 1
                    # 不存在情况,更改名字
                    gNumber += 1
                    image.save(save_url.format("images{}.png".format(gNumber)))
    if start_number+count == 501:
        print("剩余图片总数为", gCount)


def main():
    gevent.joinall([
        gevent.spawn(start_check, 1, 100),
        gevent.spawn(start_check, 101, 100),
        gevent.spawn(start_check, 201, 100),
        gevent.spawn(start_check, 301, 100),
        gevent.spawn(start_check, 401, 100),
    ])
    # start_check(1, 10)


if __name__ == "__main__":
    main()

验证码破解:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import os
from PIL import Image
from io import BytesIO


class Crack(object):
    def __init__(self):
        self.login_url = "http://www.1kkk.com/"
        # self.chrome_options = webdriver.ChromeOptions()
        # self.chrome_options.add_argument("--healess")
        self.browser = webdriver.Chrome()
        self.browser.maximize_window()
        self.wait = WebDriverWait(self.browser, 10)
        self.browser.get(self.login_url)
        time.sleep(2)

    def login(self):
        """
        输入账号,密码
        :return:None
        """
        try:
            self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, "header-avatar"))).click()
        except TimeoutException:
            self.browser.refresh()
            self.login()
            return
        # self.browser.find_element_by_class_name("header-avatar").click()
        name_page = self.browser.find_element_by_name("txt_name")
        name_page.send_keys("18218299414")
        password_page = self.browser.find_element_by_name("txt_password")
        password_page.send_keys("shao0812")
        true_or_false = True
        while true_or_false:
            true_or_false = False
            # 获取四张需要旋转的图片
            images = self.image_png()
            # 获取整体四张图片的几次
            turn_num_list = []
            for image in images:
                turn_num_list.append(self.image_turn_num(image))
            # print(turn_num_list)
            for i in turn_num_list:
                if i == 5:
                    self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'rotate-refresh'))).click()
                    time.sleep(3)
                    true_or_false = True
        # 根据上面得到的旋转次数点击图片
        self.click_image(turn_num_list)
        # 结果正确,点击登录按钮
        self.click_submit()
        # todo: 如果旋转出问题,就得重新.来
        # try:
        if self.browser.find_element_by_css_selector(".tip.color-main").text == "请点击下方图片,旋转至正确方向~":
            # 如果登录不成功,将重新刷新页面登录
            self.browser.refresh()
            self.login()
        time.sleep(5)

    def click_image(self, turn_num_list):
        """
        通过算出来的点击次数,来点击图片
        :param turn_num_list: 四张图需要点击的次数
        :return: None
        """
        for i in range(0, len(turn_num_list)):
            if turn_num_list[i] == 0:
                continue
            image = self.wait.until(
                EC.presence_of_element_located((By.XPATH, "//div[@class='rotate-background'][{}]".format(i+1))))
            for _ in range(turn_num_list[i]):
                image.click()
                time.sleep(1)

    def save_screen_png(self):
        """
        获取网页截图
        :return: 截图对象
        """
        screen_image = self.browser.get_screenshot_as_png()
        screenshot = Image.open(BytesIO(screen_image))
        # screenshot.save("screenshot.png")
        return screenshot

    def image_png(self):
        """
        通过获取网页截图,然后进行切片,返回四张图片
        :return:
        """
        screenshot = self.save_screen_png()
        images = []
        for num in range(1, 5):
            # 依次获取4张图片,存入iamges列表中
            images.append(self.get_image(screenshot, num))
        return images

    def get_image(self, screenshot, number):
        """
        获取四张图片的下标
        :return: left, top, right, bottom
        """
        image = self.wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='rotate-background'][{}]".format(number))))
        location = image.location
        size = image.size
        top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
            'width']
        image = screenshot.crop((left, top, right, bottom))
        # image.save("image{}.png".format(number))
        return image

    def image_turn_num(self, image):
        """
        用获取的图片跟图片库的图片比较,
        :param image: 原图
        :return:
        """
        for i in range(0, 4):
            # 原图最多转三次
            dir_path = "./static/images/"
            change_image = image.rotate(-90*i)
            # change_image.save("change{}.png".format(i))
            for or_path in os.listdir(dir_path):
                or_image = Image.open(os.path.join(dir_path, or_path))
                result = self.examine_pixel(or_image, change_image)
                if result:
                    return i
        return 5

    def examine_pixel(self, image1, image2):
        """
        判断来个图片是否相等
        :param image1: 图片1
        :param image2: 图片2
        :return:
        """
        thredhold = 100
        for x in range(image1.size[0]):
            for y in range(image1.size[1]):
                pixel1 = image1.load()[x, y]
                pixel2 = image2.load()[x, y]
                if not (abs(pixel1[0] - pixel2[0]) < thredhold and abs(pixel1[1] - pixel2[1]) < thredhold and abs(pixel1[2] - pixel2[2]) < thredhold):
                    return False
        return True

    def click_submit(self):
        """
        点击登录按钮
        :return: None
        """
        submit = self.wait.until(EC.element_to_be_clickable((By.ID, "btnLogin")))
        submit.click()

    def __del__(self):
        self.browser.quit()


def main():
    """pass"""
    c = Crack()
    c.login()


if __name__ == "__main__":
    main()

你可能感兴趣的:(爬虫)