通过Tensorflow搭建CNN模型实现验证码识别;
当前准确率:92%左右;
训练用时:5小时(因为本项目预加载了训练模型,所以用时远少于5小时);
Tensorflow版本:1.13.1
实验数据:图像二维码
数据示例:
import os
import random
from PIL import Image
import numpy as np
import tensorflow as tf
from datetime import datetime
参数含义可直接参考代码注释;
class Config(object):
width = 160 # 验证码图片的宽
height = 60 # 验证码图片的高
char_num = 4 # 验证码字符个数
characters = range(10)
test_folder = '/home/kesci/input/captcha2599/captcha/test'
train_folder = '/home/kesci/input/captcha2599/captcha/train'
validation_folder = '/home/kesci/input/captcha2599/captcha/validation'
saver_folder = 'checkpoints'
alpha = 1e-3 # 学习率
Epoch = 100 # 训练轮次
batch_size = 64 # 批次数量
keep_prob = 0.5 # dropout比例
print_per_batch = 20 # 每多少次输出结果
save_per_batch = 20
验证码图片示例:
read_data():返回图片数组(numpy.array格式)和标签(即文件名);
label2vec():将文件名转为向量;
例: label = '1327' label_vec = [0,1,0,0,0,0,0,0,0,0, 0,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,0,0]
load_data():加载文件夹下所有图片,返回图片数组,标签和图片数量。
class ReadData:
def __init__(self):
self.test_img = os.listdir(Config.test_folder)
self.train_img = os.listdir(Config.train_folder)
self.sample_num = len(self.train_img)
def read_data(self, path):
img = Image.open(path).convert('L')
image_array = np.array(img)
image_data = image_array.flatten() / 255.0
# 切割图片路径
label = os.path.splitext(os.path.split(path)[1])[0]
label_vec = self.label2vec(label)
return image_data, label_vec
@staticmethod
def label2vec(label):
"""
将验证码标签转为40维的向量。
:param label: 1327
:return:
[0,1,0,0,0,0,0,0,0,0,
0,0,0,1,0,0,0,0,0,0,
0,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,1,0,0]
"""
label_vec = np.zeros(Config.char_num * len(Config.characters))
for i, num in enumerate(label):
idx = i * len(Config.characters) + int(num)
label_vec[idx] = 1
return label_vec
def load_data(self, folder):
"""
加载样本数据
:param folder: 图片存放文件夹
:return:
data:图片数据
label:图片标签
size:图片数量
"""
if os.path.exists(folder):
path_list = os.listdir(folder)
size = len(path_list)
data = np.zeros([size, Config.height * Config.width])
label = np.zeros([size, Config.char_num * len(Config.characters)])
for i, img_path in enumerate(path_list):
path = '%s/%s' % (folder, img_path)
data[i], label[i] = self.read_data(path)
return data, label, size
else:
raise IOError('No such directory, please check again.')
采用三层卷积,filter_size均为5,为避免过拟合,每层卷积后面均接dropout操作,最终将 的图像转为的矩阵。 大致结构如下:
模型结构
class CNN:
def __init__(self):
self.input_x = tf.placeholder(
tf.float32, [None, Config.width * Config.height], name='input_x')
self.input_y = tf.placeholder(
tf.float32, [None, Config.char_num * len(Config.characters)], name='input_y')
self.keep_prob = tf.placeholder("float")
self.training = tf.placeholder(tf.bool, name='is_training')
self.CNN_model()
@staticmethod
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
@staticmethod
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
@staticmethod
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
@staticmethod
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
def CNN_model(self):
x_image = tf.reshape(self.input_x,
[-1, Config.height, Config.width, 1], name='x_image')
# batch normalization
x_norm = tf.layers.batch_normalization(x_image,
training=self.training ,momentum=0.9)
# 卷积层 1:
w_cv1 = self.weight_variable([5, 5, 1, 32])
b_cv1 = self.bias_variable([32])
h_cv1 = tf.nn.relu(self.conv2d(x_norm, w_cv1) + b_cv1)
h_mp1 = self.max_pool_2x2(h_cv1)
# 卷积层 2
w_cv2 = self.weight_variable([5, 5, 32, 64])
b_cv2 = self.bias_variable([64])
h_cv2 = tf.nn.relu(self.conv2d(h_mp1, w_cv2) + b_cv2)
h_mp2 = self.max_pool_2x2(h_cv2)
# 卷积层 3
w_cv3 = self.weight_variable([5, 5, 64, 64])
b_cv3 = self.bias_variable([64])
h_cv3 = tf.nn.relu(self.conv2d(h_mp2, w_cv3) + b_cv3)
h_mp3 = self.max_pool_2x2(h_cv3)
# 全连接
W_fc1 = self.weight_variable([20 * 8 * 64, 128])
b_fc1 = self.bias_variable([128])
h_mp3_flat = tf.reshape(h_mp3, [-1, 20 * 8 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_mp3_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)
# 输出层
W_fc2 = self.weight_variable([128, Config.char_num * len(Config.characters)])
b_fc2 = self.bias_variable([Config.char_num * len(Config.characters)])
output = tf.add(tf.matmul(h_fc1_drop, W_fc2), b_fc2)
self.loss = (tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(labels=self.input_y, logits=output)))
predict = tf.reshape(output, [-1, Config.char_num,
len(Config.characters)], name='predict')
labels = tf.reshape(self.input_y, [-1, Config.char_num,
len(Config.characters)], name='labels')
self.predict_max_idx = tf.argmax(predict, axis=2, name='predict_max_idx')
labels_max_idx = tf.argmax(labels, axis=2, name='labels_max_idx')
predict_correct_vec = tf.equal(self.predict_max_idx, labels_max_idx)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
self.train_step = tf.train.AdamOptimizer(
Config.alpha).minimize(self.loss)
self.accuracy = tf.reduce_mean(tf.cast(predict_correct_vec, tf.float32))
训练&评估
next_batch():迭代器,分批次返还数据; feed_data():给模型“喂”数据;
x:图像数组;
y:图像标签;
keep_prob:dropout比例; evaluate():模型评估,用于验证集和测试集。 run_model():训练&评估
class Run:
def __init__(self):
read = ReadData()
self.test_x, self.test_y, self.test_num = read.load_data(folder=Config.test_folder)
self.train_x, self.train_y, self.train_num = read.load_data(folder=Config.train_folder)
self.val_x, self.val_y, self.val_num = read.load_data(folder=Config.validation_folder)
print('Images for train :{}, for validation : {}, for test : {}' \
.format(self.train_num, self.val_num, self.test_num))
self.run_model()
@staticmethod
def next_batch(x, y, length):
if length % Config.batch_size == 0:
times = int(length / Config.batch_size)
else:
times = int(length / Config.batch_size) + 1
start_id = 0
for _ in range(times):
end_id = min(start_id + Config.batch_size, length)
batch_data = x[start_id:end_id]
batch_label = y[start_id:end_id]
start_id = end_id
yield batch_data, batch_label
@staticmethod
def feed_data(x, y, keep_prob, is_training=True):
feed_dict = {model.input_x: x,
model.input_y: y,
model.keep_prob: keep_prob,
model.training: is_training}
return feed_dict
def evaluate(self, sess, val_x, val_y, val_size):
total_loss = 0.
total_acc = 0.
for x_, y_ in self.next_batch(val_x, val_y, val_size):
length = len(y_)
feed_dict = self.feed_data(x_, y_, 1.0, False)
val_acc, val_loss = sess.run([model.accuracy, model.loss], feed_dict=feed_dict)
total_acc += val_acc * length
total_loss += val_loss * length
return total_acc / val_size, total_loss / val_size
def run_model(self):
saver = tf.train.Saver(max_to_keep=1)
if not os.path.exists(Config.saver_folder):
os.mkdir(Config.saver_folder)
save_path = os.path.join(Config.saver_folder, 'best_validation')
total_batch = 0
best_acc = 0
last_improved_step = 0
require_steps = 100
flag = False
start_time = datetime.now()
# 本地模型保存位置
save_path = os.path.join(Config.saver_folder, 'best_validation')
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# 从头开始训练请注释掉以下两行代码
saver = tf.train.Saver()
saver.restore(sess=sess, save_path=save_path)
for epoch in range(Config.Epoch):
print('Epoch : {}'.format(epoch + 1))
for x, y in self.next_batch(self.train_x, self.train_y, self.train_num):
feed_dict = self.feed_data(x, y, Config.keep_prob, True)
sess.run(model.train_step, feed_dict=feed_dict)
if total_batch % Config.print_per_batch == 0:
# 输出在验证集和训练集上的准确率和损失值
feed_dict[model.keep_prob] = 1.0
feed_dict[model.training] = False
train_accuracy, train_loss = sess.run([model.accuracy, model.loss],
feed_dict=feed_dict)
val_acc, val_loss = self.evaluate(sess, self.val_x, self.val_y, self.val_num)
if val_acc > best_acc:
# 记录最好的结果
best_acc = val_acc
last_improved_step = total_batch
# 保存模型
saver.save(sess=sess, save_path=save_path)
improved = '*'
else:
improved = ''
msg = 'Step {:5}, train_acc:{:8.2%}, train_loss:{:6.2f},' \
' val_acc:{:8.2%}, val_loss:{:6.2f}, improved:{:3}'
print(msg.format(total_batch, train_accuracy, train_loss, val_acc, val_loss, improved))
if total_batch - last_improved_step > require_steps:
flag = True
break
total_batch += 1
if flag:
print('No improvement for over {} steps, auto-stopping....'.format(require_steps))
break
end_time = datetime.now()
time_diff = (end_time - start_time).seconds
print('Time Usage : {:.2f} hours'.format(time_diff / 3600.0))
# 输出在测试集上的准确率
test_acc, test_loss = self.evaluate(sess, self.test_x, self.test_y, self.test_num)
print("Test accuracy:{:8.2%}, loss:{:6.2f}".format(test_acc, test_loss))
sess.close()
model = CNN()
Run()
WARNING:tensorflow:From :36:
batch_normalization (from tensorflow.python.layers.normalization) is
deprecated and will be removed in a future version. Instructions for
updating: Use keras.layers.batch_normalization instead.
WARNING:tensorflow:From
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:263:
colocate_with (from tensorflow.python.framework.ops) is deprecated and
will be removed in a future version. Instructions for updating:
Colocations handled automatically by placer. WARNING:tensorflow:From
:61: calling dropout (from
tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be
removed in a future version. Instructions for updating: Please use
rate
instead ofkeep_prob
. Rate should be set torate = 1 - keep_prob
. Images for train :10000, for validation : 1000, for test :
1000 WARNING:tensorflow:From
/opt/conda/lib/python3.7/site-packages/tensorflow/python/training/saver.py:1266:
checkpoint_exists (from
tensorflow.python.training.checkpoint_management) is deprecated and
will be removed in a future version. Instructions for updating: Use
standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from checkpoints/best_validation
Epoch : 1 Step 0, train_acc: 99.22%, train_loss: 0.02, val_acc:
91.07%, val_loss: 0.08, improved:* Step 20, train_acc: 98.44%, train_loss: 0.02, val_acc: 90.42%, val_loss: 0.07, improved:
Step 40, train_acc: 99.22%, train_loss: 0.02, val_acc: 91.25%,
val_loss: 0.07, improved:* Step 60, train_acc: 99.22%,
train_loss: 0.02, val_acc: 90.93%, val_loss: 0.07, improved:
Step 80, train_acc: 98.83%, train_loss: 0.02, val_acc: 91.05%,
val_loss: 0.07, improved: Step 100, train_acc: 98.44%,
train_loss: 0.02, val_acc: 91.20%, val_loss: 0.07, improved:
Step 120, train_acc: 99.22%, train_loss: 0.02, val_acc: 90.90%,
val_loss: 0.08, improved: Step 140, train_acc: 99.22%,
train_loss: 0.02, val_acc: 90.88%, val_loss: 0.07, improved: No
improvement for over 100 steps, auto-stopping… Time Usage : 0.30
hours Test accuracy: 91.68%, loss: 0.07