一. 引入
1. 在selenium的模块中,我们总是要进行登录某一个网站后,他才会给我们数据,但是当我们登录的时候还会遇到验证码的情况,最简单的方式就是对接打码平台,但是这里我来介绍一种简单的验证方法.
二. 滑动验证码的处理
1. 前面的登录我就不过多的讲解了,直接上代码,这里模拟登录的是京东金融的网站.
import base64
import re
import time
import numpy as np
import cv2
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from matplotlib import pyplot as plt
import random
from selenium.webdriver import ChromeOptions, ActionChains
driver = webdriver.Chrome()
driver.get('https://union.jd.com/index')
driver.implicitly_wait(10)
driver.maximize_window()
driver.switch_to.frame(0)
time.sleep(1)
driver.find_element(By.ID, 'loginname').send_keys('17740597')
driver.find_element(By.ID, 'nloginpwd').send_keys('0521')
driver.find_element(By.ID, 'paipaiLoginSubmit').click()
time.sleep(1)
input()
driver.quit()
2. 登录进去了就是这样的界面,接下来就是我们的重头戏了,通过cv2模块和其他的来进行查找.
3.保存这两张图片,他们的地址如下.他们的编码的格式是字符串格式的,要转换为二进制的图片才可以进行保存.
big_url = driver.find_element(By.CSS_SELECTOR, 'div.JDJRV-bigimg>img').get_attribute('src')
small_url = driver.find_element(By.CSS_SELECTOR, 'div.JDJRV-smallimg>img').get_attribute('src')
big_base64_str = re.sub(r'data:[a-z]*/[a-z]*;base64,', '', big_url)
small_base64_str = re.sub(r'data:[a-z]*/[a-z]*;base64,', '', small_url)
big_binary_content = base64.b64decode(big_base64_str)
small_binary_content = base64.b64decode(small_base64_str)
with open('big.png',mode='wb') as f:
f.write(big_binary_content)
with open('small.png',mode='wb') as f:
f.write(small_binary_content)
4. 计算图片的缩放的比例,因为图片在浏览器中加载出来的和原图是不一样的,浏览器的数据基本都是经过渲染的.
time.sleep(1)
local_img = Image.open('big.png')
size_loc = local_img.size
zoom = 281 / int(size_loc[0])
print("计算缩放比例 zoom = %f" % round(zoom, 4))
5 先来进行小图片的处理
big_img = cv2.imread('big.png')
small_img = cv2.imread('small.png')
"""处理小图片"""
gray = cv2.cvtColor(small_img, cv2.COLOR_BGR2GRAY)
cv2.imwrite('small-gray1.png', gray)
width, heigth = gray.shape
for h in range(heigth):
for w in range(width):
if gray[w, h] == 0:
gray[w, h] = 96
cv2.imwrite('small-gray2.png', gray)
binary = cv2.inRange(gray, 96, 96)
kernel = np.ones((8, 8), np.uint8)
cv2.imwrite('small-kernel.png', kernel)
tpl = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
cv2.imwrite('small-gray3.png', tpl)
6.处理大图
"""模板处理"""
blurred = cv2.GaussianBlur(big_img, (3, 3), 0)
cv2.imwrite("target-blurred1.png", blurred)
target_img_gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
cv2.imwrite("big-blurred2.png", target_img_gray)
width, height = tpl.shape[:2]
result = cv2.matchTemplate(target_img_gray, tpl, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
left_up = max_loc
right_down = (left_up[0] + height, left_up[1] + width)
7.匹配出图片的位置,找到x轴的位置,这个距离就是需要移动的距离.
cv2.rectangle(big_img, left_up, right_down, (7, 279, 151), 2)
cv2.imwrite('dectected.png', big_img)
distance = max_loc[0]*zoom
print('distance的大小为: ',distance)
8.利用贝塞尔公式来模拟人手的移动.我们在移动验证码的时候总是由慢到快再到慢的顺序,所以像一个贝塞尔曲线,
def one_bezier_curve(a, b, t):
return (1 - t) * a + t * b
def n_bezier_curve(xs, n, k, t):
if n == 1:
return one_bezier_curve(xs[k], xs[k + 1], t)
else:
return (1 - t) * n_bezier_curve(xs, n - 1, k, t) + t * n_bezier_curve(xs, n - 1, k + 1, t)
def bezier_curve(xs, ys, num):
"""
:param xs: x 轴位置
:param ys: y 轴位置
:param num: 构建的贝塞尔曲线返回的次数
:return:
"""
b_xs, b_ys = [], []
n = 5
t_step = 1.0 / (num - 1)
t = np.arange(0.0, 1 + t_step, t_step)
for each in t:
b_xs.append(n_bezier_curve(xs, n, 0, each))
b_ys.append(n_bezier_curve(ys, n, 0, each))
return b_xs, b_ys
def get_random_range(min_, max_):
"""获取指定范围里面的小数"""
ran = random.random()
if max_ > ran > min_:
return ran
else:
return get_random_range(min_, max_)
xs = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
ys = [0, 0, distance * get_random_range(0.4, 0.8), distance, distance, distance]
plt.figure()
plt.plot(xs, ys, 'b')
num = random.randint(10, 15)
b_xs, b_ys = bezier_curve(xs, ys, num)
plt.plot(b_xs, b_ys, 'r')
plt.show()
print('贝塞尔曲线Y点位置b_ys:', b_ys)
print('贝塞尔曲线X点位置b_xs:', b_xs)
diff_y = list(map(lambda i: b_ys[i + 1] - b_ys[i], range(len(b_ys) - 1)))
print('由Y位置求出每次移动的距离diff_y:', diff_y)
9. 当我们来进行滑动的时候总是会上下的抖动鼠标,很少是一直同一个高度.
mid = sum(diff_y) / len(diff_y)
symbol = list(map(lambda i: 1 if i > mid else -1, diff_y))
symbol.insert(0, 1)
print(symbol)
diff_three_sqrt = list(map(lambda i: pow(abs(i), get_random_range(0.22, 0.35)), diff_y))
diff_three_sqrt.insert(0, 0)
diff_shake_y = list(map(lambda i: diff_three_sqrt[i] * symbol[i], range(len(diff_three_sqrt))))
print('每次抖动的距离diff_shake_y:', diff_shake_y)
diff_y_shake = list(map(lambda i: sum(diff_shake_y[:i]) + diff_shake_y[i], range(len(diff_shake_y))))
print('抖动的总距离(用于绘图)diff_y_shake:', diff_y_shake)
plt.plot(b_xs, diff_y_shake, 'y')
plt.show()
10. 计算滑动的距离.
diff_shake_y = diff_shake_y[1:]
forward_tracks = []
temp = 0
for i in diff_y:
t_i = round(i)
temp += i - t_i
forward_tracks.append(t_i)
back_tracks = [distance - sum(forward_tracks)]
tracks = {'forward_tracks': forward_tracks, 'back_tracks': back_tracks, 'forward_tracks_y': diff_shake_y}
print(tracks)
print('移动的距离tracks', sum(tracks['forward_tracks']))
11. 滑动滑块来进行模拟人手的滑动
"""移动滑块"""
time.sleep(1)
slider = driver.find_element(By.CSS_SELECTOR, '.JDJRV-slide-btn')
ActionChains(driver).click_and_hold(slider).perform()
for track_x, track_y in zip(tracks['forward_tracks'], tracks['forward_tracks_y']):
ActionChains(driver).move_by_offset(xoffset=track_x, yoffset=track_y).perform()
for back_tracks in tracks['back_tracks']:
yoffset_random = random.uniform(-2, 2)
ActionChains(driver).move_by_offset(xoffset=back_tracks, yoffset=yoffset_random).perform()
ActionChains(driver).release().perform()
input()
driver.quit()
三.完整的代码
import base64
import re
import time
import numpy as np
import cv2
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from matplotlib import pyplot as plt
import random
from selenium.webdriver import ChromeOptions, ActionChains
driver = webdriver.Chrome()
driver.get('https://union.jd.com/index')
driver.implicitly_wait(10)
driver.maximize_window()
driver.switch_to.frame(0)
time.sleep(1)
driver.find_element(By.ID, 'loginname').send_keys('1397')
driver.find_element(By.ID, 'nloginpwd').send_keys('5521')
driver.find_element(By.ID, 'paipaiLoginSubmit').click()
time.sleep(1)
big_url = driver.find_element(By.CSS_SELECTOR, 'div.JDJRV-bigimg>img').get_attribute('src')
small_url = driver.find_element(By.CSS_SELECTOR, 'div.JDJRV-smallimg>img').get_attribute('src')
big_base64_str = re.sub(r'data:[a-z]*/[a-z]*;base64,', '', big_url)
small_base64_str = re.sub(r'data:[a-z]*/[a-z]*;base64,', '', small_url)
big_binary_content = base64.b64decode(big_base64_str)
small_binary_content = base64.b64decode(small_base64_str)
with open('big.png',mode='wb') as f:
f.write(big_binary_content)
with open('small.png',mode='wb') as f:
f.write(small_binary_content)
time.sleep(1)
local_img = Image.open('big.png')
size_loc = local_img.size
zoom = 281 / int(size_loc[0])
print("计算缩放比例 zoom = %f" % round(zoom, 4))
big_img = cv2.imread('big.png')
small_img = cv2.imread('small.png')
"""处理小图片"""
gray = cv2.cvtColor(small_img, cv2.COLOR_BGR2GRAY)
cv2.imwrite('small-gray1.png', gray)
width, heigth = gray.shape
for h in range(heigth):
for w in range(width):
if gray[w, h] == 0:
gray[w, h] = 96
cv2.imwrite('small-gray2.png', gray)
binary = cv2.inRange(gray, 96, 96)
kernel = np.ones((8, 8), np.uint8)
cv2.imwrite('small-kernel.png', kernel)
tpl = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
cv2.imwrite('small-gray3.png', tpl)
"""模板处理"""
blurred = cv2.GaussianBlur(big_img, (3, 3), 0)
cv2.imwrite("target-blurred1.png", blurred)
target_img_gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)
cv2.imwrite("big-blurred2.png", target_img_gray)
width, height = tpl.shape[:2]
result = cv2.matchTemplate(target_img_gray, tpl, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
left_up = max_loc
right_down = (left_up[0] + height, left_up[1] + width)
cv2.rectangle(big_img, left_up, right_down, (7, 279, 151), 2)
cv2.imwrite('dectected.png', big_img)
distance = max_loc[0]*zoom
print('distance的大小为: ',distance)
def one_bezier_curve(a, b, t):
return (1 - t) * a + t * b
def n_bezier_curve(xs, n, k, t):
if n == 1:
return one_bezier_curve(xs[k], xs[k + 1], t)
else:
return (1 - t) * n_bezier_curve(xs, n - 1, k, t) + t * n_bezier_curve(xs, n - 1, k + 1, t)
def bezier_curve(xs, ys, num):
"""
:param xs: x 轴位置
:param ys: y 轴位置
:param num: 构建的贝塞尔曲线返回的次数
:return:
"""
b_xs, b_ys = [], []
n = 5
t_step = 1.0 / (num - 1)
t = np.arange(0.0, 1 + t_step, t_step)
for each in t:
b_xs.append(n_bezier_curve(xs, n, 0, each))
b_ys.append(n_bezier_curve(ys, n, 0, each))
return b_xs, b_ys
def get_random_range(min_, max_):
"""获取指定范围里面的小数"""
ran = random.random()
if max_ > ran > min_:
return ran
else:
return get_random_range(min_, max_)
xs = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
ys = [0, 0, distance * get_random_range(0.4, 0.8), distance, distance, distance]
plt.figure()
plt.plot(xs, ys, 'b')
num = random.randint(10, 15)
b_xs, b_ys = bezier_curve(xs, ys, num)
plt.plot(b_xs, b_ys, 'r')
plt.show()
print('贝塞尔曲线Y点位置b_ys:', b_ys)
print('贝塞尔曲线X点位置b_xs:', b_xs)
diff_y = list(map(lambda i: b_ys[i + 1] - b_ys[i], range(len(b_ys) - 1)))
print('由Y位置求出每次移动的距离diff_y:', diff_y)
mid = sum(diff_y) / len(diff_y)
symbol = list(map(lambda i: 1 if i > mid else -1, diff_y))
symbol.insert(0, 1)
print(symbol)
diff_three_sqrt = list(map(lambda i: pow(abs(i), get_random_range(0.22, 0.35)), diff_y))
diff_three_sqrt.insert(0, 0)
diff_shake_y = list(map(lambda i: diff_three_sqrt[i] * symbol[i], range(len(diff_three_sqrt))))
print('每次抖动的距离diff_shake_y:', diff_shake_y)
diff_y_shake = list(map(lambda i: sum(diff_shake_y[:i]) + diff_shake_y[i], range(len(diff_shake_y))))
print('抖动的总距离(用于绘图)diff_y_shake:', diff_y_shake)
plt.plot(b_xs, diff_y_shake, 'y')
plt.show()
diff_shake_y = diff_shake_y[1:]
forward_tracks = []
temp = 0
for i in diff_y:
t_i = round(i)
temp += i - t_i
forward_tracks.append(t_i)
back_tracks = [distance - sum(forward_tracks)]
tracks = {'forward_tracks': forward_tracks, 'back_tracks': back_tracks, 'forward_tracks_y': diff_shake_y}
print(tracks)
print('移动的距离tracks', sum(tracks['forward_tracks']))
"""移动滑块"""
time.sleep(1)
slider = driver.find_element(By.CSS_SELECTOR, '.JDJRV-slide-btn')
ActionChains(driver).click_and_hold(slider).perform()
for track_x, track_y in zip(tracks['forward_tracks'], tracks['forward_tracks_y']):
ActionChains(driver).move_by_offset(xoffset=track_x, yoffset=track_y).perform()
for back_tracks in tracks['back_tracks']:
yoffset_random = random.uniform(-2, 2)
ActionChains(driver).move_by_offset(xoffset=back_tracks, yoffset=yoffset_random).perform()
ActionChains(driver).release().perform()
input()
driver.quit()