TensorFlow识别验证码

环境:win10,anaconda3,gpu:Geforce 920M

生成验证码

# -*- coding: utf-8 -*-
"""
Created on Wed Apr 10 10:25:12 2019

@author: ASUS
"""

from PIL import Image,ImageDraw,ImageFont
import random
import numpy as np
from scipy import ndimage

def randcolor():
    colorArr=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9','A', 'B', 'C', 'D', 'E', 'F']
    color=""
    for i in range(6):
        color+=colorArr[random.randint(0,14)]
    return "#"+color
    
character=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9','a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
            'v', 'w', 'x', 'y', 'z','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
            'V', 'W', 'X', 'Y', 'Z']

size=(120,30)#图片尺寸
image_type="jpg"#图片保存的格式
mode="RGB"#图片模式
font_size=36#验证码字体大小
bg_color=(255,255,255)#背景颜色
character_font_color=(0,255,255)#字符颜色
font_file = "C:\\Windows\\Fonts\\Inkfree.ttf"#字体文件
#font_type = ImageFont.truetype(font_path, font_size)#字体类型
length=4#字符个数
line_num=3#线条个数
point_num=random.randint(5,100)#干扰点个数
arc_num=random.randint(0,3)#圆弧个数

num=1000000#验证码图片个数
for j in range(num):
    str=""
    for i in range(length):
        str=str+character[random.randint(0,len(character)-1)]

    img=Image.new(mode=mode,size=size,color=bg_color)
    draw=ImageDraw.Draw(img,mode=mode)
    #print(draw.getfont());
    font_color=(random.randint(0,255),random.randint(0,255),random.randint(0,255))
    font=ImageFont.truetype(font_file,font_size)
    width,height=font.getsize(str)
    draw.text(((size[0]-width)/2,(size[1]-height)/2),str,character_font_color,font)#PIL.ImageDraw.ImageDraw.text(xy, text, fill=None, font=None, 
                                                                            #anchor=None, spacing=0, align="left", direction=None, features=None, language=None)
                                                                            
    for i in range(point_num):
        x1=random.randint(0,size[0])
        y1=random.randint(0,size[1])
        draw.point([x1,y1],fill=randcolor())
                                                                                
                                                                                
    for i in range(line_num):
        x1=random.randint(0,size[0]*3/5)
        x1=random.randint(0,size[1]*3/5)
        x2=random.randint(x1,size[0])
        y2=random.randint(y1,size[1])
        start=random.uniform(0,90)
        end=random.uniform(start,start*2)
        draw.line([x1,y1,x2,y2],fill=randcolor(),width=random.randint(0,3))
                                                                                    
                                                                                    
    for i in range(arc_num):
        x1=random.randint(0,size[0]*3/5)
        y1=random.randint(0,size[1]*3/5)
        x2=random.randint(x1,size[0])
        y2=random.randint(y1,size[1])
        start=random.uniform(0,90)
        end=random.uniform(start,start*2)
        draw.arc([(x1,y1),(x2,y2)],start,end,randcolor())
    
    img.save("C:\\Users\\ASUS\\Documents\\Code\\python\\scrapy\\captcha\\"+str.lower()+".jpg")
    print(str)
                                                                                        

 

TensorFlow训练

# -*- coding: utf-8 -*-
"""
Created on Wed Apr 10 23:29:35 2019

@author: ASUS
"""

import tensorflow as tf
import numpy as np
from PIL import Image
import os
import random
import time
import sys
import cv2

os.environ['CUDA_VISIBLE_DEVICES']='0' 

character_set=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9','a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
            'v', 'w', 'x', 'y', 'z']

captcha_width=120
captcha_height=30
character_num=4
character_type_num=36
captcha_image_num=10000

captcha_image_train_percent=0.6
captcha_image_test_percent=0.4

captcha_image_path="C:\\Users\\ASUS\\Documents\\Code\\python\\scrapy\\captcha\\"
MODEL_SAVE_PATH="C:\\Users\\ASUS\\Documents\\Code\\python\\scrapy\\captcha_recognition_model\\"
captcha_test_path="C:\\Users\\ASUS\\Documents\\Code\\python\\scrapy\\Test\\"
filelist=os.listdir(captcha_image_path)
L=len(filelist)

batch_size=128
train_steps=[0]

Filep =open("predict.txt","w+")
Filet =open("train.txt","w+")


def weightInit(shape,name):
    init=tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(initial_value=init,name=name)

def biasInit(shape,name):
    init=tf.constant(0.1,shape=shape)
    return tf.Variable(init,name)
 
    
def get_next_batch_data_and_label(batchsize,step,filelist,path=captcha_image_path):
    #filenames=os.listdir(path)
    
    batch_data=np.zeros([batchsize,captcha_width*captcha_height])
    batch_label=np.zeros([batchsize,character_type_num*character_num])
    index=step[0]*batchsize
    if (index+batchsize)>=L:
        random.shuffle(filelist)
        step[0]=0
        index=step[0]*batchsize
    #print("step=%d index=%d"%(step,index))
    
    i=0
    for file in filelist[index:index+batchsize]:
        filename=os.path.join(path,file)
        img=Image.open(filename)
        img=img.convert("L")
        #img=cv2.threshold(Img,127,255,cv2.THRESH_BINARY_INV)
        
        img_arr=np.array(img)
        
        img_data=img_arr.flatten()/255
        batch_data[i,:]=img_data
        #print("data")
        #print(batch_data[i,:])
        str=file.split('.')[0]
        for j in range(character_num):
            #print(str[j])
            index1=j*character_type_num
            if ord(str[j])<=ord('9'):
                batch_label[i][index1+ord(str[j])-ord('0')]=1
                #print(ord(str[j])-ord('0'))
                #print(batch_label[i][index1+ord(str[j])-ord('0')])
            elif ord(str[j])<=ord('Z'):
                batch_label[i][index1+ord(str[j])-ord('A')+10]=1
                #print(ord(str[j])-ord('A'))
                #print(batch_label[i][index1+ord(str[j])-ord('A')])
            else: 
                batch_label[i][index1+ord(str[j])-ord('a')+10]=1
                #print(ord(str[j])-ord('a'))
                #print(batch_label[i][index1+ord(str[j])-ord('a')])
          
        #print("i=%d"%i)
        #print(batch_label[i])
        i=i+1  
        
    return batch_data,batch_label
    
    
def buildNet():
    batch_size=128
    X=tf.placeholder(tf.float32,[None,captcha_width*captcha_height],name="data_input")
    Y=tf.placeholder(tf.float32,[None,character_type_num*character_num],name="label_input")
    x_input=tf.reshape(X,[-1,captcha_height,captcha_width,1],name="x_input")
    keep_prob=tf.placeholder(tf.float32,name='keep_prob')
    
    #shape=(1,captcha_width*captcha_height)
    W_conv1=weightInit([3,3,1,32],"W_conv11")
    bias_conv1=biasInit([32],"bias_conv1")
    conv1=tf.nn.relu(tf.nn.conv2d(x_input,W_conv1,strides=[1,1,1,1],padding='SAME',name="conv1")+bias_conv1)
    conv1=tf.nn.max_pool(conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME',name="conv1_pool")
    #conv1=tf.nn.dropout(conv1,keep_prob)
    
    W_conv2=weightInit([3,3,32,64],"W_conv2")
    bias_conv2=biasInit([64],"bias_conv2")
    conv2=tf.nn.relu(tf.nn.conv2d(conv1,W_conv2,strides=[1,1,1,1],padding='SAME',name="conv2")+bias_conv2)
    conv2=tf.nn.max_pool(conv2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME',name="conv2_pool")
    #conv2=tf.nn.dropout(conv2,keep_prob)
    
    W_conv3=weightInit([3,3,64,64],"W_conv3")
    bias_conv3=biasInit([64],"bias_conv3")
    conv3=tf.nn.relu(tf.nn.conv2d(conv2,W_conv3,strides=[1,1,1,1],padding='SAME',name="conv3")+bias_conv3)
    conv3=tf.nn.max_pool(conv3,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME',name="conv3_pool")
    #conv3=tf.nn.dropout(conv3,keep_prob)
    
    
    """
    W_conv4=weightInit([3,3,64,64])
    bias_conv4=biasInit([64])
    conv4=tf.nn.relu(tf.nn.conv2d(conv3,W_conv4,strides=[1,1,1,1],padding='SAME')+bias_conv4)
    conv4=tf.nn.max_pool(conv4,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
    conv4=tf.nn.dropout(conv4,keep_prob)
    """
    
    
    W_fc1=weightInit([15*4*64,1024],"W_fc1")
    B_fc1=biasInit([1024],"B_fc1")
    fc1=tf.reshape(conv3,[-1,15*4*64])
    fc1=tf.nn.relu(tf.math.add(tf.matmul(fc1,W_fc1),B_fc1))
    fc1=tf.nn.dropout(fc1,keep_prob)
    
    """
    W_fc2=weightInit([1024,1024])
    B_fc2=biasInit([1024])
    fc1=tf.reshape(conv3,[-1,1024])
    fc2=tf.nn.relu(tf.math.add(tf.matmul(fc1,W_fc2),B_fc2))
    fc2=tf.nn.dropout(fc2,keep_prob)
    """
    
    W_fc2=weightInit([1024,character_num*character_type_num],"W_fc2")
    B_fc2=biasInit([character_num*character_type_num],"B_fc2")
    output=tf.math.add(tf.matmul(fc1,W_fc2),B_fc2,name="output")
    with tf.name_scope('Loss'):
        loss=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output,labels=Y),name="loss")
        tf.summary.scalar("loss", loss)
    """
    LEARNING_RATE_BASE = 0.001  # 最初学习率
    LEARNING_RATE_DECAY = 0.98  # 学习率的衰减率
    LEARNING_RATE_STEP = 1  # 喂入多少轮BATCH-SIZE以后,更新一次学习率。一般为总样本数量/BATCH_SIZE
    gloabl_steps = tf.Variable(0, trainable=False)  # 计数器,用来记录运行了几轮的BATCH_SIZE,初始为0,设置为不可训练
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE
                                           , gloabl_steps,
                                           LEARNING_RATE_STEP,
                                           LEARNING_RATE_DECAY,
                                           staircase=True)
    """
    optimizer=tf.train.AdamOptimizer(0.001).minimize(loss)
    
    predict=tf.reshape(output,[-1,character_num,character_type_num],name='predict')
    labels=tf.reshape(Y,[-1,character_num,character_type_num],name='lables')
    
    predict_max_index=tf.argmax(predict,axis=2,name='predict_max_index')#在character_type_num维度取最大值,即最有可能的字符
    #label=tf.placeholder(tf.float32,[-1])
    labels_max_index=tf.argmax(labels,axis=2,name='labels_max_index')#在character_type_num维度取最大值1,即其代表的字符下标
    
    
    predict_correct_vec=tf.equal(predict_max_index,labels_max_index,name="predict_correct_vec")
    with tf.name_scope('accuracy'):
        accuracy=tf.reduce_mean(tf.cast(predict_correct_vec,tf.float32),name="accuracy")
        tf.summary.scalar('accuracy',accuracy)
    print("sess begin ")
    merged = tf.summary.merge_all()
    saver=tf.train.Saver()
    #config=tf.ConfigProto(allow_soft_placement=True)
    #gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.7)
    #config.gpu_options.allow_growth=True
    with tf.Session() as sess:
        #loss_summary = tf.summary.scalar('loss',loss)
        #acc_summary = tf.summary.scalar('accuracy',accuracy)
        writer=tf.summary.FileWriter("./captcha_log",sess.graph)
        
        sess.run(tf.global_variables_initializer())
        """
        accuracy_summary = tf.scalar_summary("accuracy", accuracy)
        loss_summary = tf.scalar_summary("loss", loss)
        merged = tf.merge_all_summaries()
        tf.summary.scalar('W_conv1', W_conv1)
        """
        steps=0
        step_=[0]
        for epoch in range(60000):#(int)(l/batch_size)):
            train_data,train_label=get_next_batch_data_and_label(batch_size,step_,filelist)
            step_[0]=step_[0]+1
            sess.run(optimizer,feed_dict={X:train_data,Y:train_label,keep_prob:0.75})
            if steps%100==0:
                test_data,test_label=get_next_batch_data_and_label(batch_size,step_,filelist)
                summary,acc,loss_=sess.run([merged,accuracy,loss],feed_dict={X:test_data,Y:test_label,keep_prob:1.0})
                #writer.add_summary(merged,epoch)
                #writer.add_summary(loss_,epoch)
                writer.add_summary(summary,steps)
                #tf.summary.histogram('steps', steps)
                File =open("record.txt","w+")
                s="steps=\t"+str(steps)+"\t,accuracy=\t"+str(acc)+"\tloss="+str(loss_)+"\t\n"
                File.write(s)
                print("steps=%d,accuracy=%f"%(steps,acc))
                if acc>0.99:
                    saver.save(sess,MODEL_SAVE_PATH+"captcha_recognition.model",global_step=steps)
                    train_steps[0]=steps
                    break
            steps=steps+1
        writer.close()          

def vec2label(predict_vec):
    str=""
    for i in range(4):
        str=str+character_set[predict_vec[i]]
    return str

def test(path=captcha_test_path):
    num=0
    test_list=os.listdir(captcha_test_path)
    random.shuffle(test_list)
    #end_steps=len(test_list)
    Saver_=tf.train.import_meta_graph(MODEL_SAVE_PATH+'captcha_recognition.model-'+str(train_steps[0])+'.meta')
    with tf.Session() as sess:
        Saver_.restore(sess,tf.train.latest_checkpoint(MODEL_SAVE_PATH))
        graph=tf.get_default_graph()
        input_data=graph.get_tensor_by_name("data_input:0")
        keep_prob=graph.get_tensor_by_name("keep_prob:0")
        predict_max_index=graph.get_tensor_by_name("predict_max_index:0")
        #feed_dict={"X:0":img_data,"Y:0":}
        for file in test_list:
            filename=os.path.join(captcha_test_path,file)
            img=Image.open(filename)
            print(file)
            img=img.convert("L")
            img_arr=np.array(img)
            img_data=img_arr.flatten()/255
            predict_vec = sess.run(predict_max_index, feed_dict={input_data:[img_data], keep_prob : 1.0})
            predict_val=np.squeeze(predict_vec)
            print(predict_val)
            predict_ans=vec2label(predict_val)
            right_ans=file.split('.')[0]
            if right_ans==predict_ans:
                num=num+1
                print("正确答案:%s 预测答案:%s  预测正确"%(right_ans,predict_ans))
                s="正确答案:\t"+right_ans+" \t预测答案:\t"+predict_ans+"\t预测正确\n"
                Filep.write(s)
            else: 
                print("正确答案:%s 预测答案:%s  预测错误"%(right_ans,predict_ans))
                s="正确答案:\t"+right_ans+" \t预测答案:\t"+predict_ans+"\t预测错误\n"
                Filep.write(s)
    s="正确预测数目:"+str(num)+" 验证码数目:"+str(len(test_list))+" 正确率:"+str(num/len(test_list))+"预测正确\n"
    Filep.write(s)
    print("正确预测数目:%d 验证码数目:%d 正确率:%f"%(num,len(test_list),num/len(test_list)))
        
if __name__=='__main__':
    
    train_num=int(captcha_image_num*captcha_image_train_percent)
    test_list=[]
    train_list=filelist[:train_num]
    test_list=filelist[train_num:]
    random.seed(time.time())
    random.shuffle(filelist)
    print("filelist len:")
    l=len(filelist)
    print(l)
    train_start=time.clock()
    
    buildNet()
    
    train_end=time.clock()
    s="train over after take "+str(train_steps)+" steps     cost "+str(train_end-train_start)+" s\n"
    Filet.write(s)
    Filet.close()
    print("train over after take %d steps cost %s s"%(train_steps,(train_end-train_start)))
    test_start=time.clock()
    
    #test(captcha_test_path)
    
    test_end=time.clock()
    s="Test over cost :"+str(test_end-test_start)+"s\n"
    Filep.write(s)
    Filep.close()
    print("Test over cost :%s s"%(test_end-test_start))

 

训练结果:

 

测试准确率在93%到95%之间

大约耗时2个小时训练

遇到的问题:

1.最开始在每一个卷积层后加dropout层防止过拟合,但最后一直训练正确率一直很低,在0.01到0.05范围内浮动,后来把dropout层去掉后正确率就上去了,可能是拟合能力不够

2.第一次成功后,调整了一些参数,我把之前训练好的模型删除后打算重新训练,但再次进行训练时在saver=tf.train.Saver()处报错,At least two variables have the same name: Variable,后来把每一个变量均命名后还是报错,发现是因为删除了模型导致的,重启IDE再次运行即可

3.用tensorboard进行可视化时, Fetch argument None has invalid type tf.summary.merge_all()要写在tf.summary.scalar()或是tf.summary.histogram()等方法之后,在tf.Session()之前,不然可能会报Fetch argument None has invalid type的错。

4.再次测试报错:InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'keep_prob_10' with dtype float,查找原因是在原文件夹下有之前运行错误的日志文件,干扰到这次的运行,彻底删除后重启即解决

 

 

你可能感兴趣的:(图像处理)