环境:win10,anaconda3,gpu:Geforce 920M
生成验证码
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 10 10:25:12 2019
@author: ASUS
"""
from PIL import Image,ImageDraw,ImageFont
import random
import numpy as np
from scipy import ndimage
def randcolor():
colorArr=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9','A', 'B', 'C', 'D', 'E', 'F']
color=""
for i in range(6):
color+=colorArr[random.randint(0,14)]
return "#"+color
character=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9','a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
'v', 'w', 'x', 'y', 'z','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
'V', 'W', 'X', 'Y', 'Z']
size=(120,30)#图片尺寸
image_type="jpg"#图片保存的格式
mode="RGB"#图片模式
font_size=36#验证码字体大小
bg_color=(255,255,255)#背景颜色
character_font_color=(0,255,255)#字符颜色
font_file = "C:\\Windows\\Fonts\\Inkfree.ttf"#字体文件
#font_type = ImageFont.truetype(font_path, font_size)#字体类型
length=4#字符个数
line_num=3#线条个数
point_num=random.randint(5,100)#干扰点个数
arc_num=random.randint(0,3)#圆弧个数
num=1000000#验证码图片个数
for j in range(num):
str=""
for i in range(length):
str=str+character[random.randint(0,len(character)-1)]
img=Image.new(mode=mode,size=size,color=bg_color)
draw=ImageDraw.Draw(img,mode=mode)
#print(draw.getfont());
font_color=(random.randint(0,255),random.randint(0,255),random.randint(0,255))
font=ImageFont.truetype(font_file,font_size)
width,height=font.getsize(str)
draw.text(((size[0]-width)/2,(size[1]-height)/2),str,character_font_color,font)#PIL.ImageDraw.ImageDraw.text(xy, text, fill=None, font=None,
#anchor=None, spacing=0, align="left", direction=None, features=None, language=None)
for i in range(point_num):
x1=random.randint(0,size[0])
y1=random.randint(0,size[1])
draw.point([x1,y1],fill=randcolor())
for i in range(line_num):
x1=random.randint(0,size[0]*3/5)
x1=random.randint(0,size[1]*3/5)
x2=random.randint(x1,size[0])
y2=random.randint(y1,size[1])
start=random.uniform(0,90)
end=random.uniform(start,start*2)
draw.line([x1,y1,x2,y2],fill=randcolor(),width=random.randint(0,3))
for i in range(arc_num):
x1=random.randint(0,size[0]*3/5)
y1=random.randint(0,size[1]*3/5)
x2=random.randint(x1,size[0])
y2=random.randint(y1,size[1])
start=random.uniform(0,90)
end=random.uniform(start,start*2)
draw.arc([(x1,y1),(x2,y2)],start,end,randcolor())
img.save("C:\\Users\\ASUS\\Documents\\Code\\python\\scrapy\\captcha\\"+str.lower()+".jpg")
print(str)
TensorFlow训练
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 10 23:29:35 2019
@author: ASUS
"""
import tensorflow as tf
import numpy as np
from PIL import Image
import os
import random
import time
import sys
import cv2
os.environ['CUDA_VISIBLE_DEVICES']='0'
character_set=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9','a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
'v', 'w', 'x', 'y', 'z']
captcha_width=120
captcha_height=30
character_num=4
character_type_num=36
captcha_image_num=10000
captcha_image_train_percent=0.6
captcha_image_test_percent=0.4
captcha_image_path="C:\\Users\\ASUS\\Documents\\Code\\python\\scrapy\\captcha\\"
MODEL_SAVE_PATH="C:\\Users\\ASUS\\Documents\\Code\\python\\scrapy\\captcha_recognition_model\\"
captcha_test_path="C:\\Users\\ASUS\\Documents\\Code\\python\\scrapy\\Test\\"
filelist=os.listdir(captcha_image_path)
L=len(filelist)
batch_size=128
train_steps=[0]
Filep =open("predict.txt","w+")
Filet =open("train.txt","w+")
def weightInit(shape,name):
init=tf.truncated_normal(shape,stddev=0.1)
return tf.Variable(initial_value=init,name=name)
def biasInit(shape,name):
init=tf.constant(0.1,shape=shape)
return tf.Variable(init,name)
def get_next_batch_data_and_label(batchsize,step,filelist,path=captcha_image_path):
#filenames=os.listdir(path)
batch_data=np.zeros([batchsize,captcha_width*captcha_height])
batch_label=np.zeros([batchsize,character_type_num*character_num])
index=step[0]*batchsize
if (index+batchsize)>=L:
random.shuffle(filelist)
step[0]=0
index=step[0]*batchsize
#print("step=%d index=%d"%(step,index))
i=0
for file in filelist[index:index+batchsize]:
filename=os.path.join(path,file)
img=Image.open(filename)
img=img.convert("L")
#img=cv2.threshold(Img,127,255,cv2.THRESH_BINARY_INV)
img_arr=np.array(img)
img_data=img_arr.flatten()/255
batch_data[i,:]=img_data
#print("data")
#print(batch_data[i,:])
str=file.split('.')[0]
for j in range(character_num):
#print(str[j])
index1=j*character_type_num
if ord(str[j])<=ord('9'):
batch_label[i][index1+ord(str[j])-ord('0')]=1
#print(ord(str[j])-ord('0'))
#print(batch_label[i][index1+ord(str[j])-ord('0')])
elif ord(str[j])<=ord('Z'):
batch_label[i][index1+ord(str[j])-ord('A')+10]=1
#print(ord(str[j])-ord('A'))
#print(batch_label[i][index1+ord(str[j])-ord('A')])
else:
batch_label[i][index1+ord(str[j])-ord('a')+10]=1
#print(ord(str[j])-ord('a'))
#print(batch_label[i][index1+ord(str[j])-ord('a')])
#print("i=%d"%i)
#print(batch_label[i])
i=i+1
return batch_data,batch_label
def buildNet():
batch_size=128
X=tf.placeholder(tf.float32,[None,captcha_width*captcha_height],name="data_input")
Y=tf.placeholder(tf.float32,[None,character_type_num*character_num],name="label_input")
x_input=tf.reshape(X,[-1,captcha_height,captcha_width,1],name="x_input")
keep_prob=tf.placeholder(tf.float32,name='keep_prob')
#shape=(1,captcha_width*captcha_height)
W_conv1=weightInit([3,3,1,32],"W_conv11")
bias_conv1=biasInit([32],"bias_conv1")
conv1=tf.nn.relu(tf.nn.conv2d(x_input,W_conv1,strides=[1,1,1,1],padding='SAME',name="conv1")+bias_conv1)
conv1=tf.nn.max_pool(conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME',name="conv1_pool")
#conv1=tf.nn.dropout(conv1,keep_prob)
W_conv2=weightInit([3,3,32,64],"W_conv2")
bias_conv2=biasInit([64],"bias_conv2")
conv2=tf.nn.relu(tf.nn.conv2d(conv1,W_conv2,strides=[1,1,1,1],padding='SAME',name="conv2")+bias_conv2)
conv2=tf.nn.max_pool(conv2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME',name="conv2_pool")
#conv2=tf.nn.dropout(conv2,keep_prob)
W_conv3=weightInit([3,3,64,64],"W_conv3")
bias_conv3=biasInit([64],"bias_conv3")
conv3=tf.nn.relu(tf.nn.conv2d(conv2,W_conv3,strides=[1,1,1,1],padding='SAME',name="conv3")+bias_conv3)
conv3=tf.nn.max_pool(conv3,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME',name="conv3_pool")
#conv3=tf.nn.dropout(conv3,keep_prob)
"""
W_conv4=weightInit([3,3,64,64])
bias_conv4=biasInit([64])
conv4=tf.nn.relu(tf.nn.conv2d(conv3,W_conv4,strides=[1,1,1,1],padding='SAME')+bias_conv4)
conv4=tf.nn.max_pool(conv4,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
conv4=tf.nn.dropout(conv4,keep_prob)
"""
W_fc1=weightInit([15*4*64,1024],"W_fc1")
B_fc1=biasInit([1024],"B_fc1")
fc1=tf.reshape(conv3,[-1,15*4*64])
fc1=tf.nn.relu(tf.math.add(tf.matmul(fc1,W_fc1),B_fc1))
fc1=tf.nn.dropout(fc1,keep_prob)
"""
W_fc2=weightInit([1024,1024])
B_fc2=biasInit([1024])
fc1=tf.reshape(conv3,[-1,1024])
fc2=tf.nn.relu(tf.math.add(tf.matmul(fc1,W_fc2),B_fc2))
fc2=tf.nn.dropout(fc2,keep_prob)
"""
W_fc2=weightInit([1024,character_num*character_type_num],"W_fc2")
B_fc2=biasInit([character_num*character_type_num],"B_fc2")
output=tf.math.add(tf.matmul(fc1,W_fc2),B_fc2,name="output")
with tf.name_scope('Loss'):
loss=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output,labels=Y),name="loss")
tf.summary.scalar("loss", loss)
"""
LEARNING_RATE_BASE = 0.001 # 最初学习率
LEARNING_RATE_DECAY = 0.98 # 学习率的衰减率
LEARNING_RATE_STEP = 1 # 喂入多少轮BATCH-SIZE以后,更新一次学习率。一般为总样本数量/BATCH_SIZE
gloabl_steps = tf.Variable(0, trainable=False) # 计数器,用来记录运行了几轮的BATCH_SIZE,初始为0,设置为不可训练
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE
, gloabl_steps,
LEARNING_RATE_STEP,
LEARNING_RATE_DECAY,
staircase=True)
"""
optimizer=tf.train.AdamOptimizer(0.001).minimize(loss)
predict=tf.reshape(output,[-1,character_num,character_type_num],name='predict')
labels=tf.reshape(Y,[-1,character_num,character_type_num],name='lables')
predict_max_index=tf.argmax(predict,axis=2,name='predict_max_index')#在character_type_num维度取最大值,即最有可能的字符
#label=tf.placeholder(tf.float32,[-1])
labels_max_index=tf.argmax(labels,axis=2,name='labels_max_index')#在character_type_num维度取最大值1,即其代表的字符下标
predict_correct_vec=tf.equal(predict_max_index,labels_max_index,name="predict_correct_vec")
with tf.name_scope('accuracy'):
accuracy=tf.reduce_mean(tf.cast(predict_correct_vec,tf.float32),name="accuracy")
tf.summary.scalar('accuracy',accuracy)
print("sess begin ")
merged = tf.summary.merge_all()
saver=tf.train.Saver()
#config=tf.ConfigProto(allow_soft_placement=True)
#gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
#config.gpu_options.allow_growth=True
with tf.Session() as sess:
#loss_summary = tf.summary.scalar('loss',loss)
#acc_summary = tf.summary.scalar('accuracy',accuracy)
writer=tf.summary.FileWriter("./captcha_log",sess.graph)
sess.run(tf.global_variables_initializer())
"""
accuracy_summary = tf.scalar_summary("accuracy", accuracy)
loss_summary = tf.scalar_summary("loss", loss)
merged = tf.merge_all_summaries()
tf.summary.scalar('W_conv1', W_conv1)
"""
steps=0
step_=[0]
for epoch in range(60000):#(int)(l/batch_size)):
train_data,train_label=get_next_batch_data_and_label(batch_size,step_,filelist)
step_[0]=step_[0]+1
sess.run(optimizer,feed_dict={X:train_data,Y:train_label,keep_prob:0.75})
if steps%100==0:
test_data,test_label=get_next_batch_data_and_label(batch_size,step_,filelist)
summary,acc,loss_=sess.run([merged,accuracy,loss],feed_dict={X:test_data,Y:test_label,keep_prob:1.0})
#writer.add_summary(merged,epoch)
#writer.add_summary(loss_,epoch)
writer.add_summary(summary,steps)
#tf.summary.histogram('steps', steps)
File =open("record.txt","w+")
s="steps=\t"+str(steps)+"\t,accuracy=\t"+str(acc)+"\tloss="+str(loss_)+"\t\n"
File.write(s)
print("steps=%d,accuracy=%f"%(steps,acc))
if acc>0.99:
saver.save(sess,MODEL_SAVE_PATH+"captcha_recognition.model",global_step=steps)
train_steps[0]=steps
break
steps=steps+1
writer.close()
def vec2label(predict_vec):
str=""
for i in range(4):
str=str+character_set[predict_vec[i]]
return str
def test(path=captcha_test_path):
num=0
test_list=os.listdir(captcha_test_path)
random.shuffle(test_list)
#end_steps=len(test_list)
Saver_=tf.train.import_meta_graph(MODEL_SAVE_PATH+'captcha_recognition.model-'+str(train_steps[0])+'.meta')
with tf.Session() as sess:
Saver_.restore(sess,tf.train.latest_checkpoint(MODEL_SAVE_PATH))
graph=tf.get_default_graph()
input_data=graph.get_tensor_by_name("data_input:0")
keep_prob=graph.get_tensor_by_name("keep_prob:0")
predict_max_index=graph.get_tensor_by_name("predict_max_index:0")
#feed_dict={"X:0":img_data,"Y:0":}
for file in test_list:
filename=os.path.join(captcha_test_path,file)
img=Image.open(filename)
print(file)
img=img.convert("L")
img_arr=np.array(img)
img_data=img_arr.flatten()/255
predict_vec = sess.run(predict_max_index, feed_dict={input_data:[img_data], keep_prob : 1.0})
predict_val=np.squeeze(predict_vec)
print(predict_val)
predict_ans=vec2label(predict_val)
right_ans=file.split('.')[0]
if right_ans==predict_ans:
num=num+1
print("正确答案:%s 预测答案:%s 预测正确"%(right_ans,predict_ans))
s="正确答案:\t"+right_ans+" \t预测答案:\t"+predict_ans+"\t预测正确\n"
Filep.write(s)
else:
print("正确答案:%s 预测答案:%s 预测错误"%(right_ans,predict_ans))
s="正确答案:\t"+right_ans+" \t预测答案:\t"+predict_ans+"\t预测错误\n"
Filep.write(s)
s="正确预测数目:"+str(num)+" 验证码数目:"+str(len(test_list))+" 正确率:"+str(num/len(test_list))+"预测正确\n"
Filep.write(s)
print("正确预测数目:%d 验证码数目:%d 正确率:%f"%(num,len(test_list),num/len(test_list)))
if __name__=='__main__':
train_num=int(captcha_image_num*captcha_image_train_percent)
test_list=[]
train_list=filelist[:train_num]
test_list=filelist[train_num:]
random.seed(time.time())
random.shuffle(filelist)
print("filelist len:")
l=len(filelist)
print(l)
train_start=time.clock()
buildNet()
train_end=time.clock()
s="train over after take "+str(train_steps)+" steps cost "+str(train_end-train_start)+" s\n"
Filet.write(s)
Filet.close()
print("train over after take %d steps cost %s s"%(train_steps,(train_end-train_start)))
test_start=time.clock()
#test(captcha_test_path)
test_end=time.clock()
s="Test over cost :"+str(test_end-test_start)+"s\n"
Filep.write(s)
Filep.close()
print("Test over cost :%s s"%(test_end-test_start))
训练结果:
测试准确率在93%到95%之间
大约耗时2个小时训练
遇到的问题:
1.最开始在每一个卷积层后加dropout层防止过拟合,但最后一直训练正确率一直很低,在0.01到0.05范围内浮动,后来把dropout层去掉后正确率就上去了,可能是拟合能力不够
2.第一次成功后,调整了一些参数,我把之前训练好的模型删除后打算重新训练,但再次进行训练时在saver=tf.train.Saver()处报错,At least two variables have the same name: Variable,后来把每一个变量均命名后还是报错,发现是因为删除了模型导致的,重启IDE再次运行即可
3.用tensorboard进行可视化时, Fetch argument None has invalid type
4.再次测试报错:InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'keep_prob_10' with dtype float,查找原因是在原文件夹下有之前运行错误的日志文件,干扰到这次的运行,彻底删除后重启即解决