import os
from PIL import Image
import numpy as np
import pandas as pd
from pandas import DataFrame
from threading import Thread
import queue
import time
class Clean(Thread):
def __init__(self, que, out):
"""
:param que:
:type data: DataFrame
"""
super(Clean, self).__init__()
self.que = que
self.out = out
self.start()
def to_grey(self, image):
self.width, self.height = image.size
data = np.asarray(image).reshape((self.width * self.height, 3))
new_image = []
for point in data:
gp = (15 * point[0] + 75 * point[1] + 38 * point[2]) >> 7 #计算灰度值
new_image.append(0 if gp < 180 else 255) #二值化处理(有更好的算法,这里直接用了180)
return np.asarray(new_image).reshape((self.height, self.width)) #图片还原返回
def clean_point(self, grey0, degree=200):
grey = grey0.copy() #创建一个副本 以免去除噪点后影响后续点的计算
for x in range(1, len(grey) - 1):
for y in range(1, len(grey[x]) - 1):
mar = grey[x - 1:x + 2, y - 1: y + 2] #取得点(x,y)及周围八个点
if mar[mar > degree].size > 5: #灰度值大于200的点的个数大于5就认为是噪点
grey0[x][y] = 255
return grey0[1:-1, 1:-1] #返回原图 除去边框,上面没有除掉边框的噪点,并且边框用不到,直接舍去
def clean(self, filename, filepath):
image = Image.open(filepath)
grey = self.to_grey(image)
smooth = self.clean_point(grey)
def run(self):
while True:
try:
filename, filepath = self.que.get(block=False)
res = self.clean(filename, filepath)
if res == 0:
break
self.que.task_done()
except Exception as e:
print(e)
break
class Manager:
def __init__(self, thread=1):
self.threads = []
files = os.listdir('./image') #找出图片文件名
filepaths = [os.path.join('./image/', file) for file in files] #构建图片路径
filenames = [file[6:-4] for file in files] #从文件名中获得验证码原文
self.que = queue.Queue() #创建任务队列和输出队列
self.out = queue.Queue()
self.init_work_queue(filenames, filepaths) #初始化任务队列
self.init_thread(thread) #初始化线程
self.wait_allcomplete()
def init_thread(self, thread):
for i in range(thread):
self.threads.append(Clean(self.que, self.out))
def init_work_queue(self, filenames, filepaths):
for i in range(len(filenames)):
self.que.put((filenames[i], filepaths[i]))
def wait_allcomplete(self):
for item in self.threads:
if item.isAlive():
item.join()
if __name__ == '__main__':
manager = Manager(2) #初始化线程管理 2个线程
data = pd.DataFrame()
while True:
try:
data = data.append(manager.out.get(block=False), ignore_index=True)
except:
break
data.to_csv('./csv/res.csv')
后续图片分割 数据处理在下一节完善