前言:自己构建CNN网络结构训练一个验证码识别的模型
分析
假定验证码中只有:数字、大小写字母,验证码的数目是4个,eg: kx3S
步骤如下:
1.收集数据,验证码的数据集合可以自己生成
生成的验证码如下:
代码如下:
def random_code_text(code_size=4):
"""
随机产生验证码的字符
:param code_size:
:return:
"""
code_text = []
for i in range(code_size):
c = random.choice(code_char_set)
code_text.append(c)
return code_text
def generate_code_image(code_size=4):
"""
产生一个验证码的Image对象
:param code_size:
:return:
"""
image = ImageCaptcha()
code_text = random_code_text(code_size)
code_text = ''.join(code_text)
# 将字符串转换为验证码(流)
captcha = image.generate(code_text)
# 如果要保存验证码图片
# image.write(code_text, 'captcha/' + code_text + '.jpg')
# 将验证码转换为图片的形式
code_image = Image.open(captcha)
code_image = np.array(code_image)
return code_text, code_image
2.数据处理
将生成的数据转化为可以网络模型想要的格式,也可以用亚编码的格式
def text_2_vec(text):
vec = np.zeros((code_size, code_char_set_size))
k = 0
for ch in text:
index = code_char_2_number_dict[ch]
vec[k][index] = 1
k += 1
return np.array(vec.flat)
3.网络构建
可以采用三成网络结构进行模型构建
def code_cnn(x, y):
"""
构建一个验证码识别的CNN网络
:param x: Tensor对象,输入的特征矩阵信息,是一个4维的数据:[number_sample, height, weight, channels]
:param y: Tensor对象,输入的预测值信息,是一个2维的数据,其实就是验证码的值[number_sample, code_size]
:return: 返回一个网络
"""
"""
网络结构:构建一个简单的CNN网络,因为起始此时验证码图片是一个比较简单的数据,所以不需要使用那么复杂的网络结构,当然了:这种简单的网络结构,80%+的正确率是比较容易的,但是超过80%比较难
conv -> relu6 -> max_pool -> conv -> relu6 -> max_pool -> dropout -> conv -> relu6 -> max_pool -> full connection -> full connection
"""
# 获取输入数据的格式,[number_sample, height, weight, channels]
x_shape = x.get_shape()
# kernel_size_k: 其实就是卷积核的数目
kernel_size_1 = 32
kernel_size_2 = 64
kernel_size_3 = 64
unit_number_1 = 124
unit_number_2 = code_size * code_char_set_size
with tf.variable_scope('net', initializer=tf.random_normal_initializer(0, 0.1), dtype=tf.float32):
with tf.variable_scope('conv1'):
w = tf.get_variable('w', shape=[5, 5, x_shape[3], kernel_size_1])
b = tf.get_variable('b', shape=[kernel_size_1])
net = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
net = tf.nn.bias_add(net, b)
with tf.variable_scope('relu1'):
# relu6和relu的区别:relu6当输入的值大于6的时候,返回6,relu对于大于0的值不进行处理,relu6相对来讲具有一个边界
# relu: max(0, net)
# relu6: min(6, max(0, net))
net = tf.nn.relu6(net)
with tf.variable_scope('max_pool1'):
net = tf.nn.max_pool(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
with tf.variable_scope('conv2'):
w = tf.get_variable('w', shape=[3, 3, kernel_size_1, kernel_size_2])
b = tf.get_variable('b', shape=[kernel_size_2])
net = tf.nn.conv2d(net, w, strides=[1, 1, 1, 1], padding='SAME')
net = tf.nn.bias_add(net, b)
with tf.variable_scope('relu2'):
net = tf.nn.relu6(net)
with tf.variable_scope('max_pool2'):
net = tf.nn.max_pool(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
with tf.variable_scope('dropout1'):
tf.nn.dropout(net, keep_prob=keep_prob)
with tf.variable_scope('conv3'):
w = tf.get_variable('w', shape=[3, 3, kernel_size_2, kernel_size_3])
b = tf.get_variable('b', shape=[kernel_size_3])
net = tf.nn.conv2d(net, w, strides=[1, 1, 1, 1], padding='SAME')
net = tf.nn.bias_add(net, b)
with tf.variable_scope('relu3'):
net = tf.nn.relu6(net)
with tf.variable_scope('max_pool3'):
net = tf.nn.max_pool(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
with tf.variable_scope('fc1'):
net_shape = net.get_shape()
net_sample_feature_number = net_shape[1] * net_shape[2] * net_shape[3]
net = tf.reshape(net, shape=[-1, net_sample_feature_number])
w = tf.get_variable('w', shape=[net_sample_feature_number, unit_number_1])
b = tf.get_variable('b', shape=[unit_number_1])
net = tf.add(tf.matmul(net, w), b)
with tf.variable_scope('softmax'):
w = tf.get_variable('w', shape=[unit_number_1, unit_number_2])
b = tf.get_variable('b', shape=[unit_number_2])
net = tf.add(tf.matmul(net, w), b)
return net
4.训练模型
def train_code_cnn(model_path):
"""
模型训练
:param model_path:
:return:
"""
# 1. 构建相关变量:占位符
in_image_height = 60
in_image_weight = 160
x = tf.placeholder(tf.float32, shape=[None, in_image_height, in_image_weight, 1], name='x')
y = tf.placeholder(tf.float32, shape=[None, code_size * code_char_set_size], name='y')
# 1. 获取网络结构
network = code_cnn(x, y)
# 2. 构建损失函数(如果四个位置的值,只要有任意一个预测失败,那么我们损失就比较大)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=y))
# 3. 定义优化函数
train = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost)
# 4. 计算准确率
predict = tf.reshape(network, [-1, code_size, code_char_set_size])
max_idx_p = tf.argmax(predict, 2)
max_idx_y = tf.argmax(tf.reshape(y, [-1, code_size, code_char_set_size]), 2)
correct = tf.equal(max_idx_p, max_idx_y)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
# 5. 开始训练
saver = tf.train.Saver()
with tf.Session() as sess:
# a. 变量的初始化
sess.run(tf.global_variables_initializer())
# b. 开始训练
step = 1
while True:
# 1. 获取批次的训练数据
batch_x, batch_y = random_next_batch(batch_size=64, code_size=code_size)
# 2. 对数据进行一下处理
batch_x = tf.image.rgb_to_grayscale(batch_x)
# print(batch_x.shape)
batch_x = tf.image.resize_images(batch_x, size=(in_image_height, in_image_weight),
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
# 3. 训练
_, cost_, accuracy_ = sess.run([train, cost, accuracy], feed_dict={x: batch_x.eval(), y: batch_y})
print("Step:{}, Cost:{}, Accuracy:{}".format(step, cost_, accuracy_))
# 4. 每10次输出一次信息
if step % 10 == 0:
test_batch_x, test_batch_y = random_next_batch(batch_size=64, code_size=code_size)
# 2. 对数据进行一下处理
test_batch_x = tf.image.rgb_to_grayscale(test_batch_x)
test_batch_x = tf.image.resize_images(test_batch_x, size=(in_image_height, in_image_weight),
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
acc = sess.run(accuracy, feed_dict={x: test_batch_x.eval(), y: test_batch_y})
print("测试集准确率:{}".format(acc))
# 如果模型准确率0.7,模型保存,然后退出
if acc > 0.7 and accuracy_ > 0.7:
saver.save(sess, model_path, global_step=step)
break
step += 1
5.测试模型
6.保存并且做预测
随机输出一个结果如下:
详细代码可见:https://github.com/dctongsheng/Lenet