可控制线程数,快速生带标记验证码,最大线程数根据电脑性能自行设定,至少需要10万训练集。
线程过多会导致执行卡顿,速度甚至不如单线程,这时就需要控制线程数,在程序执行期间始终保持较高速度运行,利用队列+最大线程数限制可解决问题
控制原理:创建max_thread个线程,判断queue队列中是否还有数据,若有继续在这max_thread个线程中执行,否则关闭,直到空队列,线程全部关闭
import os
import time
import queue # 用于控制线程数
import random
import threading
from PIL import Image # 图像处理库
from captcha.image import ImageCaptcha # 验证码生成库 # pip install captcha
# 验证码字符集:
# 数字
number_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
# 字母
letter_set = ['A', 'a', 'B', 'b', 'C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'H', 'h', 'I', 'i', 'J', 'j',
'K', 'k', 'L', 'l', 'M', 'm', 'N', 'n', 'O', 'o', 'P', 'p', 'Q', 'q', 'R', 'r', 'S', 's', 'T', 't',
'U', 'u', 'V', 'v', 'W', 'w', 'X', 'x', 'Y', 'y', 'Z', 'z']
all_char_set = number_set + letter_set
all_char_set_len = len(all_char_set)
# 图像大小
image_hight = 60
image_widht = 160
# 训练集
exercise_path = 'exercise_set/'
# 验证集
verify_path = 'verify_set/'
# 推理集
inference_path = 'inference_set/'
class Create_Image(object):
def __init__(self):
self.count = 100000 # 生成图片数量
self.path = exercise_path # 图片目录
self.max_len = 4 # 验证码长度
self.max_thread = 1000 # 最大线程数
# 生成验证码图片
def create_file(self, image_text_data):
image = ImageCaptcha()
image_text = image_text_data.split('_')[0]
image_file = Image.open(image.generate(image_text))
return image_text, image_file
# 验证码文本
def create_text(self):
text_data_list = []
for i in range(self.max_len):
text_char = random.choice(all_char_set)
text_data_list.append(text_char)
return ''.join(text_data_list)
# 多线程处理函数
def makefile(self, que):
while not que.empty():
q_name = que.get()
text, image = self.create_file(q_name)
time_tamp = int(time.time())
filename = q_name + '_' + str(time_tamp) + '.png'
image.save(self.path + filename)
print('保存 : %s' % (filename))
# 多线程
def many_T(self):
q = queue.Queue()
for i in range(self.count):
image_text = self.create_text()
q.put(image_text + '_' + str(i))
T_datas = []
for i in range(self.max_thread):
t = threading.Thread(target=self.makefile, args=(q, ))
t.start()
T_datas.append(t)
for T_data in T_datas:
T_data.join()
def main(self):
if not os.path.exists(self.path):
os.makedirs(self.path)
self.many_T()
def main():
Create_Image().main()
if __name__ == '__main__':
main()