现在极验验证码广泛应用于直播视频、金融服务、电子商务、游戏娱乐、政府企业等各大类型网站
对于这类验证,如果我们直接模拟表单请求,繁琐的认证参数与认证流程会让你蛋碎一地,我们可以用selenium驱动浏览器来解决这个问题,大致分为以下几个步骤:
1、输入用户名,密码
2、点击按钮验证,弹出没有缺口的图
3、获得没有缺口的图片
4、点击滑动按钮,弹出有缺口的图
5、获得有缺口的图片
6、对比两张图片,找出缺口,即滑动的位移
7、按照人的行为行为习惯,把总位移切成一段段小的位移
8、按照位移移动
9、完成登录
这里以某小说平台为例,自动登录获取cookies
代码如下:
import time
import re
import random
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from urllib.request import urlretrieve
from PIL import Image
from selenium.webdriver.common.action_chains import ActionChains
import pymysql
import json
import time
from selenium import webdriver
import win32api
import win32con
import win32clipboard
from ctypes import *
class yuewen():
def __init__(self):
"""
初始化属性,传入url地址,驱动路径,浏览器窗口最大化,伪造ua
"""
self.url = 'https://open.yuewen.com/'
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
# chrome_options.add_argument('--disable-dev-shm-usage')
# chrome_options.add_argument('--disable-gpu')
# chrome_options.add_argument('--headless')
self.driver = webdriver.Chrome(options=chrome_options)
self.driver.maximize_window()
#self.headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}
def input_username_password(self, account, password):
self.driver.get(self.url)#请求地址
time.sleep(1)
self.driver.find_element_by_class_name('op-button-sign').click()#找到class为op-button-sign的元素点击
time.sleep(1)
windll.user32.SetCursorPos(740,260)
win32api.mouse_event(win32con.MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0)
win32api.mouse_event(win32con.MOUSEEVENTF_LEFTUP, 0, 0, 0, 0)
# self.driver.find_element_by_id('opUserName').send_keys(account)#找到id为accountInput的输入框输入内容
# time.sleep(0.5)
# self.driver.find_element_by_id('opUserPwd').send_keys(password)#找到id为passwordInput的输入框输入内容
# time.sleep(0.5)
self.driver.find_element_by_class_name('el-input__inner').send_keys(account)
self.driver.find_element_by_name('password').send_keys(password)
time.sleep(3)
self.driver.find_element_by_class_name('loginin').click() # 找到class为op-button-sign的元素点击
# windll.user32.SetCursorPos(620, 620)
# win32api.mouse_event(win32con.MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0)
# win32api.mouse_event(win32con.MOUSEEVENTF_LEFTUP, 0, 0, 0, 0)
time.sleep(3)
#self.driver.find_element_by_class_name('el-button el-button--default loginin').click()#找到id为login-btn的元素点击
def get_img(self):
"""
获取验证码阴影图和原图
:return:
"""
self.driver.switch_to.frame('tcaptcha_iframe')
time.sleep(3)
# 获取有阴影的图片
src = self.driver.find_element_by_id('slideBg').get_attribute('src')
#分析图片地址,发现原图地址可以通过阴影图地址改动获取
src_bg = re.sub('img_index=1', 'img_index=0', src)
urlretrieve(src, 'img1.png')
urlretrieve(src_bg, 'img2.png')
captcha1 = Image.open('img1.png')
captcha2 = Image.open('img2.png')
return captcha1, captcha2
def resize_img(self, img):
"""
下载的图片把网页中的图片进行了放大,所以将图片还原成原尺寸
:param img: 图片
:return: 返回还原后的图片
"""
a = 2.428 # 通过本地图片与原网页图片的比较,计算出的缩放比例
(x, y) = img.size
x_resize = int(x // a)
y_resize = int(y // a)
img = img.resize((x_resize, y_resize), Image.ANTIALIAS)
return img
def is_pixel_equal(self, img1, img2, x, y):
"""
比较两张图片同一点上的像数值,差距大于设置标准返回False
:param img1: 阴影图
:param img2: 原图
:param x: 横坐标
:param y: 纵坐标
:return: 是否相等
"""
pixel1, pixel2 = img1.load()[x, y], img2.load()[x, y]
sub_index = 100
if abs(pixel1[0] - pixel2[0]) < sub_index and abs(pixel1[1] - pixel2[1]) < sub_index and abs(
pixel1[2] - pixel2[2]) < sub_index:
return True
else:
return False
def get_gap_offset(self, img1, img2):
'''
获取缺口的偏移量
'''
offset = None
distance = 70
for i in range(distance, img1.size[0]):
for j in range(img1.size[1]):
# 两张图片对比,(i,j)像素点的RGB差距,过大则该x为偏移值
if not self.is_pixel_equal(img1, img2, i, j):
offset = i
return offset
print(offset)
return offset
def get_track(self, offset):
'''
计算滑块的移动轨迹
'''
offset -= -8 # 滑块并不是从0开始移动,有一个初始值
a = offset / 4
track = [a, a, a, a]
print(track)
return track
def shake_mouse(self):
"""
模拟人手释放鼠标抖动
:return: None
"""
ActionChains(self.driver).move_by_offset(xoffset=-2, yoffset=0).perform()
ActionChains(self.driver).move_by_offset(xoffset=2, yoffset=0).perform()
def operate_slider(self, track):
"""
拖动滑块
:param track: 运动轨迹
:return:
"""
# 定位到拖动按钮
slider_bt = self.driver.find_element_by_xpath('//div[@class="tc-drag-thumb"]')
# 点击拖动按钮不放
ActionChains(self.driver).click_and_hold(slider_bt).perform()
# 按正向轨迹移动
for i in track:
ActionChains(self.driver).move_by_offset(xoffset=i, yoffset=0).perform()
time.sleep(random.random() / 100) # 每移动一次随机停顿0-1/100秒之间骗过了极验,通过率很高
time.sleep(random.random())
# 按逆向轨迹移动
back_tracks = [-1, -0.5, -1]
for i in back_tracks:
time.sleep(random.random() / 100)
ActionChains(self.driver).move_by_offset(xoffset=i, yoffset=0).perform()
# 模拟人手抖动
self.shake_mouse()
time.sleep(random.random())
# 松开滑块按钮
ActionChains(self.driver).release().perform()
def login(self, account, password):
'''
实现主要的登陆逻辑
'''
self.input_username_password(account, password)
time.sleep(5)
#获取cookie
a, b = self.get_img()
a = self.resize_img(a)
b = self.resize_img(b)
distance = self.get_gap_offset(a, b)
track = self.get_track(distance)
login = self.operate_slider(track)
time.sleep(4)
# 获取cookie
cookie = self.driver.get_cookies()
cookie_str = {}
# 组装cookie字符串
for item_cookie in cookie:
cookie_str[item_cookie["name"]] = item_cookie["value"]
return cookie_str
if __name__ == '__main__':
qq = yuewen()
account = '56318****@qq.com'
password = '********'
cookies = qq.login(account, password)