本博客主要探讨基于传统方法的验证码识别,更多的是做粘连扭曲的验证码识别的各种分割技术,其实在验证码识别这一块,深度学习做的已经非常好了,识别效率与速度都是不错的。【验证码识别】,我这里只是做一些技术探讨,关于【 基于投影的字符分割】 请查看。
import queue
from PIL import Image
def cfs(img):
"""传入二值化后的图片进行连通域分割"""
pixdata = img.load()
w, h = img.size
visited = set()
q = queue.Queue()
offset = [(-1, -1), (0, -1), (1, -1), (-1, 0), (1, 0), (-1, 1), (0, 1), (1, 1)]
cuts = []
for x in range(w):
for y in range(h):
x_axis = []
if pixdata[x, y] == 0 and (x, y) not in visited:
q.put((x, y))
visited.add((x, y))
while not q.empty():
x_p, y_p = q.get()
for x_offset, y_offset in offset:
x_c, y_c = x_p + x_offset, y_p + y_offset
if (x_c, y_c) in visited:
continue
visited.add((x_c, y_c))
try:
if pixdata[x_c, y_c] == 0:
q.put((x_c, y_c))
x_axis.append(x_c)
except:
pass
if x_axis:
min_x, max_x = min(x_axis), max(x_axis)
if max_x - min_x > 3:
# 宽度小于3的认为是噪点,根据需要修改
cuts.append((min_x, max_x + 1))
return cuts
def binarizing(img, threshold):
"""传入image对象进行灰度、二值处理"""
img = img.convert("L") # 转灰度
pixdata = img.load()
w, h = img.size
# 遍历所有像素,大于阈值的为黑色
for y in range(h):
for x in range(w):
if pixdata[x, y] < threshold:
pixdata[x, y] = 0
else:
pixdata[x, y] = 255
return img
img = Image.open('C98Q.png')
img = binarizing(img, 200)
cuts = cfs(img)
w, h = img.size
for i, item in enumerate(cuts):
box = (item[0], 0, item[1], h)
img.crop(box).save("./" + str(i+10) + ".png")
import numpy as np
import cv2
from PIL import Image
def getPoint(x,y,data,subdata=None):
a=[0,-1,0,1,0,-2,0,2,0,-3,0,3,0,-4,0,4,0,-5,0,5]
b=[1,0,-1,0,2,0,-2,0,3,0,-3,0,4,0,-4,0,5,0,-5,0]
width,height=data.shape
if subdata is None:
subdata=[]
if x>5 and y<height-5 and y>5 and x<width-5:
for i in range(20):
if data[x+a[i]][y+b[i]]==1:
subdata.append((x+a[i],y+b[i]))
data[x+a[i]][y+b[i]]=2
getPoint(x+a[i],y+b[i],data,subdata)
subdata.append((x,y))
def getcell(data):
list1=[]
index=0
flag=True
for y in range(data.shape[1]):
for x in range(data.shape[0]):
if data[x][y]==1:
if list1:
for i in range(len(list1)):
if (x,y) in list1[i]:
flag=False
if not flag:
continue
list1.append([])
getPoint(x,y,data,list1[index])#调用流水算法
index+=1
else :
continue
for index in range(len(list1)):
l=list1[index][0][0]
t=list1[index][0][1]
r=list1[index][0][0]
b=list1[index][0][1]
for i in list1[index]:
x=i[0]
y=i[1]
l=min(l,x)
t=min(t,y)
r=max(r,x)
b=max(b,y)
w=r-l+1
h=b-t+1
if (w*h <8):#去除小色块
continue
img0=np.zeros([w,h])#创建全0矩阵
for x,y in list1[index]:
img0[x-l][y-t]=1
img0[img0<1]=255
img1=Image.fromarray(img0)
img1=img1.convert('RGB')
img1.save('img2/'+str(index)+'.png')
if __name__=="__main__":
filename='captcha1.png'
data=cv2.imread(filename,2)
allimg=getcell(data)
连通域:https://blog.csdn.net/fox64194167/article/details/80557242
连通域:https://blog.csdn.net/qq_32590631/article/details/78806388
泛红填充:http://www.voidcn.com/article/p-vilyctpt-mr.html
python生成中文验证码:https://www.cnblogs.com/whu-zeng/p/4855480.html
Python生成验证码:https://blog.csdn.net/Dick633/article/details/83057808
区域填充之扫描线种子法的Python实现:https://blog.csdn.net/u013859301/article/details/53292523
基于HSV颜色的ROI区域提取 https://blog.csdn.net/xull88619814/article/details/82050800
文字切割垂直投影算法
https://blog.csdn.net/jylonger/article/details/88043031