Python 新手实战之机器学习实现简单验证码识别(二):图片二值化除去噪点

图片二值化去噪点

import os
from PIL import Image
import numpy as np
import pandas as pd
from pandas import DataFrame
from threading import Thread
import queue
import time


class Clean(Thread):
    def __init__(self, que, out):
        """
        :param que:
        :type data: DataFrame
        """
        super(Clean, self).__init__()
        self.que = que
        self.out = out
        self.start()

    def to_grey(self, image):
        self.width, self.height = image.size
        data = np.asarray(image).reshape((self.width * self.height, 3))
        new_image = []
        for point in data:
            gp = (15 * point[0] + 75 * point[1] + 38 * point[2]) >> 7        #计算灰度值
            new_image.append(0 if gp < 180 else 255)                      #二值化处理(有更好的算法,这里直接用了180)
        return np.asarray(new_image).reshape((self.height, self.width))  #图片还原返回

    def clean_point(self, grey0, degree=200):
        grey = grey0.copy()                                   #创建一个副本 以免去除噪点后影响后续点的计算
        for x in range(1, len(grey) - 1):
            for y in range(1, len(grey[x]) - 1):
                mar = grey[x - 1:x + 2, y - 1: y + 2]        #取得点(x,y)及周围八个点
                if mar[mar > degree].size > 5:              #灰度值大于200的点的个数大于5就认为是噪点
                    grey0[x][y] = 255
        return grey0[1:-1, 1:-1]                           #返回原图 除去边框,上面没有除掉边框的噪点,并且边框用不到,直接舍去


    def clean(self, filename, filepath):
        image = Image.open(filepath)
        grey = self.to_grey(image)
        smooth = self.clean_point(grey)


    def run(self):
        while True:
            try:
                filename, filepath = self.que.get(block=False)
                res = self.clean(filename, filepath)
                if res == 0:
                    break
                self.que.task_done()
            except Exception as e:
                print(e)
                break


class Manager:
    def __init__(self, thread=1):
        self.threads = []
        files = os.listdir('./image')      #找出图片文件名
        filepaths = [os.path.join('./image/', file) for file in files] #构建图片路径
        filenames = [file[6:-4] for file in files]   #从文件名中获得验证码原文
        self.que = queue.Queue()               #创建任务队列和输出队列
        self.out = queue.Queue()
        self.init_work_queue(filenames, filepaths)  #初始化任务队列
        self.init_thread(thread)                     #初始化线程
        self.wait_allcomplete()

    def init_thread(self, thread):
        for i in range(thread):
            self.threads.append(Clean(self.que, self.out))

    def init_work_queue(self, filenames, filepaths):
        for i in range(len(filenames)):
            self.que.put((filenames[i], filepaths[i]))

    def wait_allcomplete(self):
        for item in self.threads:
            if item.isAlive():
                item.join()


if __name__ == '__main__':
    manager = Manager(2)    #初始化线程管理  2个线程
    data = pd.DataFrame()
    while True:
        try:
            data = data.append(manager.out.get(block=False), ignore_index=True)
        except:
            break
    data.to_csv('./csv/res.csv')

后续图片分割 数据处理在下一节完善

你可能感兴趣的:(python)