from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
import random
def getRandomColor():
"""
获取一个随机颜色(r,g,b)格式的
:return:
"""
c1 = random.randint(0, 255)
c2 = random.randint(0, 255)
c3 = random.randint(0, 255)
if c1 == 255:
c1 = 0
if c2 == 255:
c2 = 0
if c3 == 255:
c3 = 0
return(c1, c2, c3)
def getRandomStr():
"""
获取一个随机数字,每个数字的颜色也是随机的
:return:
"""
random_num = str(random.randint(0, 9))
return random_num
def generate_captcha():
# 获取一个Image对象,参数分别是RGB模式。宽150,高30, 随机颜色
image = Image.new('RGB', (150, 50), (255,255,255))
# 获取一个画笔对象,将图片对象传过去
draw = ImageDraw.Draw(image)
# 获取一个font字体对象参数是ttf的字体文件的目录,以及字体的大小
font = ImageFont.truetype("Lohit-Bengali.ttf", size=32)
label = ""
for i in range(5):
random_char = getRandomStr()
label += random_char
# 在图片上写东西,参数是:定位,字符串,颜色,字体
draw.text((10+i*30, 0), random_char, getRandomColor(), font=font)
# 噪点噪线
width = 150
height = 30
# 画线
for i in range(3):
x1 = random.randint(0, width)
x2 = random.randint(0, width)
y1 = random.randint(0, height)
y2 = random.randint(0, height)
draw.line((x1, y1, x2, y2), fill=(0, 0, 0))
# 画点
for i in range(5):
draw.point([random.randint(0, width), random.randint(0, height)], fill=getRandomColor())
x = random.randint(0, width)
y = random.randint(0, height)
draw.arc((x, y, x + 4, y + 4), 0, 90, fill=(0, 0, 0))
# 保存到硬盘,名为test.png格式为png的图片
image.save(open(''.join(['captcha_images/', label, '.png']), 'wb'), 'png')
# image.save(open(''.join(['captcha_predict/', label, '.png']), 'wb'), 'png')
if __name__ == '__main__':
for i in range(150):
generate_captcha()
对验证码进行处理,首先将图片的RGB值(0~255,0~255,0~255)转为灰度值,再对灰度值二值化,得到像素点只有黑白(0,1)的图片,对该图片降噪,切分,得到训练集
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
def binarizaion(path):
img = Image.open(path)
img_gray = img.convert('L')
img_gray = np.array(img_gray)
w, h = img_gray.shape
for x in range(w):
for y in range(h):
gray = img_gray[x, y]
if gray <= 220:
img_gray[x, y] = 0
else:
img_gray[x, y] = 1
plt.figure('')
plt.imshow(img_gray, cmap='gray')
plt.axis('off')
plt.show()
return img_gray
def noiseReduction(img_gray, label):
height, width = img_gray.shape
for x in range(height):
for y in range(width):
cnt = 0
# 白色的点不用管
if img_gray[x, y] == 1:
continue
else:
try:
if img_gray[x-1, y-1] == 0:
cnt += 1
except:
pass
try:
if img_gray[x-1, y] == 0:
cnt += 1
except:
pass
try:
if img_gray[x-1, y+1] == 0:
cnt += 1
except:
pass
try:
if img_gray[x, y-1] == 0:
cnt += 1
except:
pass
try:
if img_gray[x, y+1] == 0:
cnt += 1
except:
pass
try:
if img_gray[x+1, y-1] == 0:
cnt += 1
except:
pass
try:
if img_gray[x+1, y] == 0:
cnt += 1
except:
pass
try:
if img_gray[x+1, y+1] == 0:
cnt += 1
except:
pass
if cnt < 4: # 周围少于4点就算是噪点
img_gray[x, y] = 1
plt.figure('')
plt.imshow(img_gray, cmap='gray')
plt.axis('off')
# plt.show()
plt.savefig(''.join(['clean_captcha_img/', label, '.png']))
def cutImg(label):
labels = list(label)
img = Image.open(''.join(['clean_captcha_img/', label, '.png']))
for i in range(5):
pic = img.crop((100*(1+i), 170, 100*(1+i)+100, 280))
plt.imshow(pic)
seq = get_save_seq(label[i])
pic.save(''.join(['cut_number/', str(label[i]), '/', str(seq), '.png']))
def get_save_seq(num):
numlist = os.listdir(''.join(['cut_number/', num, '/']))
if len(numlist) == 0 or numlist is None:
return 0
else:
max_file = 0
for file in numlist:
if int(file.split('.')[0]) > max_file:
max_file = int(file.split('.')[0])
return int(max_file)+1
def create_dir():
for i in range(10):
os.mkdir(''.join(['cut_number/', str(i)]))
def img_2_clean():
captchas = os.listdir(''.join(['captcha_images/']))
for captcha in captchas:
label = captcha.split('.')[0]
img_path = ''.join(['captcha_images/', captcha])
# 二值化
im = binarizaion(img_path)
# 降噪
noiseReduction(im, label)
def clean_to_cut():
captchas = os.listdir(''.join(['clean_captcha_img/']))
for captcha in captchas:
label = captcha.split('.')[0]
cutImg(label)
if __name__ == '__main__':
img_2_clean()
create_dir()
clean_to_cut()
lee = os.listdir('captcha_images/')
for name in lee:
lable = name.split('.')[0]
path = 'captcha_images/'+name
pic = binarizaion(path)
noiseReduction(pic,lable)