汉字图片二值化提取

问题描述: 将汉字图片二值化,使汉字和背景分离
汉字图片二值化提取_第1张图片

import cv2
import matplotlib.pylab as plt
import numpy as np
%matplotlib inline
# # imread
# img = cv2.imdecode(np.fromfile(u'x:/中文路径/xx.jpg', dtype=np.uint8), flags)  #flags=cv2.IMREAD_COLOR...
# #imwrite
# cv2.imencode('.jpg', img)[1].tofile(u'x:/中文路径/xx.jpg')
img_paths = [...]
imgs = []
for ipath in img_paths:
    imgs.append(cv2.imdecode(np.fromfile(ipath, dtype=np.uint8),cv2.IMREAD_GRAYSCALE))
for o_img in imgs:  
    img = threshold(o_img)

用到的二值化函数:

def isBlackChar(img):
    return np.sum(img>100) > np.sum(img<100)

def threshold(o_img):
    length = np.min(o_img.shape)
    length = length if length%2==1 else length-1
    img = o_img
    # 中值滤波
    img = cv2.blur(img,(3,3))
    img = cv2.medianBlur(img,7)
    # 均值滤波
    img = cv2.blur(img,(3,3))
    img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,length,2)
    img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,length,2)
    
    if(not isBlackChar(img)):
        img = 255-img
    return img



你可能感兴趣的:(#,图像处理与计算机视觉,#,编程语言)