首先将鼠标移至小块处,会出现滑动后的图片(即原图)
利用selenium找到验证码图片所在元素 这里为BY.CLASS_NAME = ‘gt_box’
这样,我们就可以截取整个网页,再利用图片元素返回的位置和宽高,裁切出验证码图片并保存
接下来,点击小块并按住不放,出现带缺口的图片
通过同样的方法,可以利用selenium裁剪出带缺口的验证码
具体想法,比较两张图片,得到待拼合的滑块和缺口的距离,依照距离拖动鼠标,完成拼图
首先,滑块的x轴坐标是不变的,滑块的大小也是不变,因此,我们只要比较在滑块右侧两张图片的像素点对应的RGB数据,如果差距超过一定范围,则代表像素点不同,这样我们就找到了缺口位置。
人为的滑动一般是先快后慢,我们需要模拟这个滑动轨迹,否则会被识别人机器,验证无法通过。这里使用先加速后减速,并加入随机波动的方法拟合人为滑动,成功率较好。
问题
由于之前的显示比例不是100%,因此使用selenium截取元素图片会出现位置上偏差,截出的图片与我们想要的图片不同
解决方案
1.将显示比例改为100% (win10 桌面右击 显示设置)
2.对截图坐标进行放缩 (见代码)
import random
import time
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
USER_NAME = '' # 账户
PASSWORD = '' # 密码
MULTIPE = 1.5 # 显示比例,我这里是150%
BORDER = 6 # 滑块左侧在验证码图片上的x轴坐标为6
class CrackBili(object):
def __init__(self):
self.url = 'https://passport.bilibili.com/login'
self.browser = webdriver.Chrome()
self.browser.maximize_window()
self.wait = WebDriverWait(self.browser, 10)
self.wait_pass = WebDriverWait(self.browser, 1)
self.user_name = USER_NAME
self.password = PASSWORD
def __del__(self):
self.browser.close()
def get_slider(self):
"""
获取滑块
:return: 滑块对象
"""
slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'gt_slider_knob')))
return slider
def get_position(self):
"""
获取验证码位置
:return: 验证码位置元组
"""
img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'gt_box')))
time.sleep(2)
location = img.location
size = img.size
top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
'width']
return (top, bottom, left, right)
def get_screenshot(self):
"""
获取网页截图
:return: 截图对象
"""
screenshot = self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
return screenshot
def get_bili_image(self, name='captcha.png'):
"""
获取验证码图片
:return: 图片对象
"""
top, bottom, left, right = self.get_position()
print('验证码位置', left, top, right, bottom)
screenshot = self.get_screenshot()
captcha = screenshot.crop(map(lambda x: int(x * MULTIPE), (left, top, right, bottom)))
captcha.save(name)
return captcha
def open(self):
"""
打开网页输入用户名密码
:return: None
"""
self.browser.get(self.url)
user_name = self.wait.until(EC.presence_of_element_located((By.ID, 'login-username')))
password = self.wait.until(EC.presence_of_element_located((By.ID, 'login-passwd')))
user_name.send_keys(self.user_name)
password.send_keys(self.password)
def get_gap(self, image1, image2):
"""
获取缺口偏移量
:param image1: 不带缺口图片
:param image2: 带缺口图片
:return:
"""
left = int(60 * MULTIPE) # 滑块最右测在验证码图片上的x坐标为60
for i in range(left, image1.size[0]):
for j in range(image1.size[1]):
if not self.is_pixel_equal(image1, image2, i, j):
left = i
return left
return left
def is_pixel_equal(self, image1, image2, x, y):
"""
判断两个像素是否相同
:param image1: 图片1
:param image2: 图片2
:param x: 位置x
:param y: 位置y
:return: 像素是否相同
"""
# 取两个图片的像素点
pixel1 = image1.load()[x, y]
pixel2 = image2.load()[x, y]
threshold = 60
if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
pixel1[2] - pixel2[2]) < threshold:
return True
else:
return False
def get_track(self, distance):
"""
根据偏移量获取移动轨迹
一开始加速,然后减速,生长曲线,且加入点随机变动
:param distance: 偏移量
:return: 移动轨迹
"""
# 移动轨迹
track = []
# 当前位移
current = 0
# 减速阈值
mid = distance * 3 / 4
# 间隔时间
t = 0.1
v = 0
while current < distance:
if current < mid:
a = random.randint(2, 3)
else:
a = - random.randint(6, 7)
v0 = v
v = v0 + a * t
move = v0 * t + 1 / 2 * a * t * t
current += move
track.append(round(move))
return track
def move_to_gap(self, slider, track):
"""
拖动滑块到缺口处
:param slider: 滑块
:param track: 轨迹
:return:
"""
ActionChains(self.browser).click_and_hold(slider).perform()
for x in track:
ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
time.sleep(0.5)
ActionChains(self.browser).release().perform()
def login(self):
"""
登录
:return: None
"""
submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'btn-login')))
submit.click()
time.sleep(10)
print('登录成功')
def crack(self):
# 输入用户名密码
self.open()
# 鼠标移到滑块对象上
slider = self.get_slider()
ActionChains(self.browser).move_to_element(slider).perform()
# 获取验证码图片
image1 = self.get_bili_image('captcha1.png')
ActionChains(self.browser).click_and_hold(slider).perform()
# 获取带缺口的验证码图片
image2 = self.get_bili_image('captcha2.png')
# 获取缺口位置
gap = self.get_gap(image1, image2)
print('缺口位置', gap)
# 截图中是150%的距离,要除掉
gap = int(gap / MULTIPE)
# 减去缺口位移
gap -= BORDER
# 获取移动轨迹
track = self.get_track(gap)
print('滑动轨迹', track)
# 拖动滑块
self.move_to_gap(slider, track)
try:
success = self.wait_pass.until(
EC.text_to_be_present_in_element((By.CLASS_NAME, 'gt_info_text'), '验证通过'))
print(success)
except TimeoutException:
success = None
# 失败后重试
if not success:
# 鼠标移到滑块对象上
self.crack()
else:
self.login()
if __name__ == '__main__':
crack = CrackBili()
crack.crack()
这里 我将获取网页、输入用户名密码、获取滑块方法单独拿了出来,主要是考虑到重新刷新页面再重新输入用户名密码、获取滑块可能会比较耗时,不过这个代码要多获取刷新验证码的按钮,实际上可能并差不了多少。
import random
import time
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
USER_NAME = '' # 账户
PASSWORD = '' # 密码
MULTIPE = 1.5 # 显示比例,我这里是150%
BORDER = 6 # 滑块左侧在验证码图片上的x轴坐标为6
class CrackBili(object):
def __init__(self):
self.url = 'https://passport.bilibili.com/login'
self.browser = webdriver.Chrome()
self.browser.maximize_window()
self.wait = WebDriverWait(self.browser, 10)
self.wait = WebDriverWait(self.browser, 1)
self.user_name = USER_NAME
self.password = PASSWORD
def __del__(self):
self.browser.close()
def get_slider(self):
"""
获取滑块
:return: 滑块对象
"""
slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'gt_slider_knob')))
return slider
def get_position(self):
"""
获取验证码位置
:return: 验证码位置元组
"""
img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'gt_box')))
time.sleep(2)
location = img.location
size = img.size
top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
'width']
return (top, bottom, left, right)
def get_screenshot(self):
"""
获取网页截图
:return: 截图对象
"""
screenshot = self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
return screenshot
def get_bili_image(self, name='captcha.png'):
"""
获取验证码图片
:return: 图片对象
"""
top, bottom, left, right = self.get_position()
print('验证码位置', left, top, right, bottom)
screenshot = self.get_screenshot()
captcha = screenshot.crop(map(lambda x: int(x * MULTIPE), (left, top, right, bottom)))
captcha.save(name)
return captcha
def open(self):
"""
打开网页输入用户名密码
:return: None
"""
self.browser.get(self.url)
user_name = self.wait.until(EC.presence_of_element_located((By.ID, 'login-username')))
password = self.wait.until(EC.presence_of_element_located((By.ID, 'login-passwd')))
user_name.send_keys(self.user_name)
password.send_keys(self.password)
def get_gap(self, image1, image2):
"""
获取缺口偏移量
:param image1: 不带缺口图片
:param image2: 带缺口图片
:return:
"""
left = int(60 * MULTIPE) # 滑块最右测在验证码图片上的x坐标为60
for i in range(left, image1.size[0]):
for j in range(image1.size[1]):
if not self.is_pixel_equal(image1, image2, i, j):
left = i
return left
return left
def is_pixel_equal(self, image1, image2, x, y):
"""
判断两个像素是否相同
:param image1: 图片1
:param image2: 图片2
:param x: 位置x
:param y: 位置y
:return: 像素是否相同
"""
# 取两个图片的像素点
pixel1 = image1.load()[x, y]
pixel2 = image2.load()[x, y]
threshold = 60
if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
pixel1[2] - pixel2[2]) < threshold:
return True
else:
return False
def get_track(self, distance):
"""
根据偏移量获取移动轨迹
一开始加速,然后减速,生长曲线,且加入点随机变动
:param distance: 偏移量
:return: 移动轨迹
"""
# 移动轨迹
track = []
# 当前位移
current = 0
# 减速阈值
mid = distance * 3 / 4
# 间隔时间
t = 0.10
v = 0
while current < distance:
if current < mid:
a = random.randint(2, 3)
else:
a = - random.randint(6, 7)
v0 = v
v = v0 + a * t
move = v0 * t + 1 / 2 * a * t * t
current += move
track.append(round(move))
return track
def move_to_gap(self, slider, track):
"""
拖动滑块到缺口处
:param slider: 滑块
:param track: 轨迹
:return:
"""
ActionChains(self.browser).click_and_hold(slider).perform()
for x in track:
ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
time.sleep(0.5)
ActionChains(self.browser).release().perform()
def login(self):
"""
登录
:return: None
"""
submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'btn-login')))
submit.click()
time.sleep(10)
print('登录成功')
def refresh(self):
refresh = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'gt_refresh_button')))
refresh.click()
def crack(self, slider):
# 将鼠标移至滑块对象
ActionChains(self.browser).move_to_element(slider).perform()
# 获取验证码图片
image1 = self.get_bili_image('captcha1.png')
ActionChains(self.browser).click_and_hold(slider).perform()
# 获取带缺口的验证码图片
image2 = self.get_bili_image('captcha2.png')
# 获取缺口位置
gap = self.get_gap(image1, image2)
# 截图中是150%的距离,要除掉
gap = int(gap / MULTIPE)
print('缺口位置', gap)
# 减去缺口位移
gap -= BORDER
# 获取移动轨迹
track = self.get_track(gap)
print('滑动轨迹', track)
# 拖动滑块
self.move_to_gap(slider, track)
try:
success = self.wait_pass.until(
EC.text_to_be_present_in_element((By.CLASS_NAME, 'gt_info_text'), '验证通过'))
print(success)
except TimeoutException:
success = None
# 失败后重试
if not success:
ActionChains(self.browser).move_to_element(slider).perform()
self.refresh()
self.crack(slider)
else:
self.login()
if __name__ == '__main__':
# 创建实例
crack = CrackBili()
# 输入用户名密码
crack.open()
# 获取滑块对象
slider = crack.get_slider()
crack.crack(slider)
Python3网络爬虫开发实战教程