本文环境配置:
系统=>windows10:64位
工具=>PyCharm:2018.1.4
语言=>Python:3.6
第三方库
urllib3
opencv
selenium
PIL
geckodriver.exe(火狐)或 Chromedriver.exe
本次主要是使用selenium模拟滑动验证
最终目标:实现自动点击滑块,并且拖动完成验证
代码实现如下:
import random
from scrapy.http import HtmlResponse
from selenium.common.exceptions import TimeoutException
import time
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from PIL import Image
import cv2
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import numpy as np
import urllib3
from urllib3.connectionpool import xrange
class SeleniumMiddleware(object):
box1 = 49
box2 = 52
convex = 8
cut = 70
image = None
width = 0
high = 0
left = 0
top = 0
leftConvex = 0
topConvex = 0
def __init__(self):
self.options = Options()
self.browser = webdriver.Firefox(executable_path="F:\Python\geckodriver.exe",
firefox_options=self.options)
self.wait = WebDriverWait(self.browser, 20)
# 加载图片
def load(self, path):
self.width = 0
self.high = 0
self.top = 0
self.left = 0
self.leftConvex = 0
self.topConvex = 0
self.image = cv2.imread(path)
if (self.image is None):
return False
h, w, b = self.image.shape
if (w <= 0) or (h <= 0):
return False
self.width = w
self.high = h
return True
# 计算碎片顶边
def getTop(self):
if (self.image is None):
return -1
ckLeft = self.convex * 2
ckHigh = self.high - (self.box1 + self.convex)
# 遍历碎片指定列
j = ckLeft
for i in range(self.convex, ckHigh):
if self.image[i, j][0] != 0 and self.image[i, j][1] != 0 and self.image[i, j][2] != 0:
self.top = i
return i
return self.top
# 计算碎片是否左凸
def getLeftConvex(self):
if (self.image is None):
return False
ckLeft = int(self.convex / 2)
ckHigh = self.top + self.box1
# 遍历碎片指定列
j = ckLeft
for i in range(self.top, ckHigh):
if self.image[i, j][0] != 0 and self.image[i, j][1] != 0 and self.image[i, j][2] != 0:
self.leftConvex = 1
return True
return False
# 计算碎片是否上凸
def getTopConvex(self):
if (self.image is None):
return False
ckLeft = self.convex
ckWidth = ckLeft + self.box1
# 遍历碎片指定列
i = self.top - int(self.convex / 2)
for j in range(ckLeft, ckWidth):
if self.image[i, j][0] != 0 and self.image[i, j][1] != 0 and self.image[i, j][2] != 0:
self.topConvex = 1
return True
return False
def handle(self, path):
if (self.load(path) == False):
return -1
if (self.getTop() == -1):
return -1
self.getLeftConvex()
self.getTopConvex()
# 截取左侧图片
leftImg = self.image[0:self.high, 0:self.cut]
# 高斯模糊
leftImg = cv2.GaussianBlur(leftImg, (3, 3), 0)
# 灰度化
leftImg = cv2.cvtColor(leftImg, cv2.COLOR_BGR2GRAY)
# 二值化
ret, leftImg = cv2.threshold(leftImg, 1, 255, 0)
# 轮廓检测,RETR_EXTERNAL只提取最外层轮廓
tmpImg, leftContours, hierarchy = cv2.findContours(leftImg, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if (len(leftContours) <= 0):
return -1
# 截取右侧图片
rightImg = self.image[0:self.high, self.cut:self.width]
# 高斯模糊
rightImg = cv2.GaussianBlur(rightImg, (7, 7), 0)
# 灰度化
rightImg = cv2.cvtColor(rightImg, cv2.COLOR_BGR2GRAY)
# 边缘检测
rightImg = cv2.Canny(rightImg, 120, 255)
# 二值化
ret, rightImg = cv2.threshold(rightImg, 100, 255, 0)
# 轮廓检测
tmpImg, rightContours, hierarchy = cv2.findContours(rightImg, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contoursLen = len(rightContours)
if (contoursLen <= 0):
return -1
# 删除无效轮廓
contours = []
for i in range(contoursLen):
err = 0
# 计算最高坐标
top = self.high
bottom = 0
for j in range(len(rightContours[i])):
y = rightContours[i][j][0][1]
if (y < self.top - 20):
err = 1
break
if (y > self.top + self.box2 + 20):
err = 1
break
if (y < top):
top = y
if (y > bottom):
bottom = y
if (err == 0):
if (self.topConvex):
if (abs(self.top - (top - self.convex)) > 20):
err = 1
else:
if (abs(self.top - top) > 20):
err = 1
if (err == 0):
h = bottom - top
if (h < 40) or (h > 80):
err = 1
if (err == 0):
# 拟合
epsilon = 0.001 * cv2.arcLength(rightContours[i], True)
approx = cv2.approxPolyDP(rightContours[i], epsilon, True)
contours.append(approx)
rightContours = contours
contoursLen = len(rightContours)
# 提取最优轮廓
j = -1
k = 99999
arr = [k for i in range(contoursLen)]
# 面积
leftArea = cv2.contourArea(leftContours[0])
# 周长
leftPerimeter = cv2.arcLength(leftContours[0], True)
for i in range(contoursLen):
# 面积
# rightArea = cv2.contourArea(rightContours[i])
# 周长
# rightPerimeter = cv2.arcLength(rightContours[i], True)
# 轮廓相似度
match = cv2.matchShapes(leftContours[0], rightContours[i], 1, 0.0)
arr[i] = match
# 取差异最小数据
for i in range(contoursLen):
if (arr[i] < k):
k = arr[i]
j = i
# 计算最左位置
self.left = self.width
for i in range(len(rightContours[j])):
if (rightContours[j][i][0][0] < self.left):
self.left = rightContours[j][i][0][0]
if (self.leftConvex):
self.left += (self.convex + 1)
self.left += self.cut
print('距离左边:', (self.left - 14) * 280 / 349)
return (self.left - 14) * 280 / 349
def show(self):
if (self.image is None):
return
x1 = self.left
y1 = self.top
x2 = x1 + self.box1
y2 = y1 + self.box1
temp = self.image.copy()
cv2.rectangle(temp, (x1, y1), (x2, y2), (0, 0, 255), thickness=2)
cv2.imshow("image", temp)
cv2.waitKey(0)
cv2.destroyAllWindows()
def get_track(self, distance):
"""[summary]
根据偏移量获取移动轨迹
Arguments:
distance {[type]} -- 偏移量
"""
distance += 20 # 先滑过一些
# 移动轨迹
track = []
# 当前位移
current = 0
# 减速阀值(达到这个值开始减速)
mid = distance * 4 / 5
# 计算间隔
t = 0.3
# 初速度
v = 0
while current < distance:
if current < mid:
# 加速度为正
a = 3
else:
# 加速度为负
a = -2
# 初速度v0
v0 = v
# 移动距离 x = v0t + 1/2*a*t*t
move = v0 * t + 1 / 2 * a * t * t
# 当前位移
current += move
# 加入轨迹
track.append(round(move))
# 当前速度 v = v0 + at
v = v0 + a * t
# 反着滑动到准确位置
back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1] # 总共等于-20
return {'track': track, 'back_tracks': back_tracks}
def move_to_gap(self, slider, tracks):
"""[summary]
拖动滑块到缺口处
动作: 点击且不释放鼠标-拖拽图片到缺口-释放鼠标
Arguments:
slider {[type]} -- 滑块
tracks {[type]} -- 轨迹
"""
ActionChains(self.browser).move_to_element(slider).perform()
# 点击并按住滑块
ActionChains(self.browser).click_and_hold(slider).perform()
# 移动
for x in tracks['track']:
y = random.randint(-3, 3)
ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=y).perform()
time.sleep(0.6)
for back_track in tracks['back_tracks']:
ActionChains(self.browser).move_by_offset(xoffset=back_track, yoffset=0).perform()
time.sleep(0.3)
ActionChains(self.browser).move_by_offset(xoffset=4, yoffset=0).perform() # 先移过一点
time.sleep(0.9)
ActionChains(self.browser).move_by_offset(xoffset=-4, yoffset=0).perform() # 再退回来
# 释放滑块
time.sleep(0.6)
ActionChains(self.browser).release().perform()
def process_request(self):
try:
# print("url is :::::::::::", request.url)
# print(':::::::::::::::::::::::::::::::')
self.browser.get(
'https://www.anjuke.com/captcha-verify/?callback=shield&from=antispam&serialID=582d45fc4d908d19194f5c8be25b69e5_048c47fa2baa45768dee22e218344755&history=aHR0cHM6Ly9zaGFuZ2hhaS5hbmp1a2UuY29tL2NvbW11bml0eS9qaW5nYW4v')
if '向右滑动滑块' in self.browser.page_source:
print('出现验证码-----------------')
self.browser.execute_script(
"var captcha__puzzleImg = document.evaluate('//*[@class=\"dvc-captcha__puzzleImg\"]', document, null, 0, null).iterateNext();captcha__puzzleImg.style.backgroundColor='black';document.evaluate('//*[@class=\"dvc-operate\"]', document, null, 0, null).iterateNext().style.display='none';")
time.sleep(1)
# print('1111111111111111111111111111111111111')
img = self.browser.find_element_by_css_selector('.info')
img.screenshot('ele.png')
im = Image.open('./ele.png')
# print(im.size)
cro = im.crop((127, 70, 476, 250))
cro.save('./cut.png')
time.sleep(3)
left = self.handle('C:\\Users\lijk_\PycharmProjects\\untitled2\cut.png')
# print(left)
slide = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'dvc-slider__handler')))
track = self.get_track(left)
print(track)
# self.show()
self.move_to_gap(slide, track)
for i in range(7):
if '向右滑动滑块' in self.browser.page_source:
time.sleep(1)
self.move_to_gap(slide, track)
time.sleep(20)
except TimeoutException as e:
print('超时', e)
text = SeleniumMiddleware()
text.process_request()
其实上面的代码还可以进一步“优化”。例如,当尝试三次滑动后如果仍然没有“验证成功”,就应该主动刷新 使用try进入下一次验证过程。除此之外,以上只是对“滑块验证”进行了分析和模拟,实际情况是,通过了“滑块验证”后,网站监测到是机器模拟会重新出现一个点击验证。关于监测到selenium模拟https://blog.csdn.net/qq_42782937/article/details/86502492
以上代码还有待继续完善,也欢迎看到这篇博文的人多多指正不足之处。