import re
from PIL import Image
import pytesseract
# 自动识别验证码
def get_pictures(driver):
# 整个页面截图的图片存放路径
driver.save_screenshot(r'D:\Honest\picture\poo1.png')
# id是验证码在页面上的id
pg = driver.find_element_by_id('codeImg')
left = pg.location['x']
top = pg.location['y']
right = pg.size['width'] + left
height = pg.size['height'] + top
im = Image.open(r'D:\Honest\picture\poo1.png')
image_obj = im.crop((left, top, right, height))
# 验证码截图的图片存放路径
image_obj.save(r'D:\Honest\picture\poo2.png')
images = image_obj.convert("L") # 转灰度
pixdata = images.load()
w, h = images.size
# 像素值
threshold = 190
# 遍历所有像素,大于阈值的为黑色
for y in range(h):
for x in range(w):
if pixdata[x, y] < threshold:
pixdata[x, y] = 0
else:
pixdata[x, y] = 255
data = images.getdata()
w, h = images.size
black_point = 0
for x in range(1, w - 1):
for y in range(1, h - 1):
mid_pixel = data[w * y + x] # 中央像素点像素值
if mid_pixel < 50: # 找出上下左右四个方向像素点像素值
top_pixel = data[w * (y - 1) + x]
left_pixel = data[w * y + (x - 1)]
down_pixel = data[w * (y + 1) + x]
right_pixel = data[w * y + (x + 1)]
# 判断上下左右的黑色像素点总个数
if top_pixel < 10:
black_point += 1
if left_pixel < 10:
black_point += 1
if down_pixel < 10:
black_point += 1
if right_pixel < 10:
black_point += 1
if black_point < 1:
images.putpixel((x, y), 255)
black_point = 0
result = pytesseract.image_to_string(images) # 图片转文字
resultj = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", result) # 去除识别出来的特殊字符
result_four = resultj[0:4] # 只获取前4个字符
# print(result_four) # 打印识别的验证码
return result_four
调用例子:
from selenium import webdriver
from common.common_verification import get_pictures # 方法路径
def test_a():
driver = webdriver.Chrome()
driver.get(r"www.123.com")
# 账号
driver.find_element_by_name('loginname').send_keys('123')
# 密码
driver.find_element_by_name('password').send_keys('123')
# 验证码 调用方法 get_pictures(self.driver)
driver.find_element_by_id('code').send_keys(get_pictures(self.driver))
if __name__ == '__main__':
test_a()