1. 创建自己的数据集,利用数字1-10 和 26个小写字母生成我们的训练数据集。
from captcha.image import ImageCaptcha
import random
CHAR_SET = ['1', '2', '3', '4', '5','6', '7', '8', '9', '0','a','b','c','d','e','f','g','h','i','j','k','l','m','n']
CHAR_LEN = len(CHAR_SET)
CAPTCHA_LEN = 4
for i in range(CHAR_LEN):
for j in range(CHAR_LEN):
for k in range(CHAR_LEN):
for m in range(CHAR_LEN):
captcha_text = CHAR_SET[i] + CHAR_SET[j] + CHAR_SET[k] + CHAR_SET[m]
image = ImageCaptcha()
image.write(captcha_text, './tmp/mydata/'+ captcha_text + '.jpg')
得到的数据集样本如下图所示:
2. 设计网络架构,进行训练
def model(self):
x = tf.reshape(self.X, shape=[-1, self.image_height, self.image_width, 1], name="input_x")
print(">>> input x: {}".format(x))
# 卷积层1
wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bc1 = tf.Variable(self.b_alpha * tf.random_normal([32]), name="bc1")
conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1), name="conv1")
print("conv1.shape-----", conv1.shape)
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="max_pool1")
print("conv1.shape-----", conv1.shape)
conv1 = tf.nn.dropout(conv1, self.keep_prob, name="dropout1")
print("conv1.shape-----", conv1.shape)
'''
conv1 = slim.conv2d(x, 32, [3,3], 1, padding="SAME", scope= "conv1", biases_initializer=tf.constant_initializer(0.05)) #默认activation_fn=nn.relu
print("conv1.shape-----", conv1.shape)
print("\n-----------------------------------------------------------------")
print(conv1)
print("-----------------------------------------------------------------\n")
pool1 = slim.max_pool2d(conv1, [2,2], [2,2], padding="SAME", scope="pool1")
print("pool1.shape-----", pool1.shape)
dropout1 = slim.dropout(pool1, self.keep_prob, scope="dropout1")
print("dropout1.shape-----", dropout1.shape)
'''
# 卷积层2
wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bc2 = tf.Variable(self.b_alpha * tf.random_normal([64]), name="bc2")
conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2), name="conv2")
print("conv2.shape-----", conv2.shape)
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="max_pool2")
print("conv2.shape-----", conv2.shape)
conv2 = tf.nn.dropout(conv2, self.keep_prob, name="dropout2")
print("conv2.shape-----", conv2.shape)
'''
conv2 = slim.conv2d(dropout1, 64, [3,3], 1, padding="SAME", scope= "conv2", biases_initializer=tf.constant_initializer(0.05))
print("conv2.shape-----", conv2.shape)
pool2 = slim.max_pool2d(conv2, [2,2], [2,2], padding="SAME", scope="pool2")
print("pool2.shape-----", pool2.shape)
dropout2 = slim.dropout(pool2, self.keep_prob, scope="dropout2")
print("dropout2.shape-----", dropout2.shape)
'''
# 卷积层3
wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer())
bc3 = tf.Variable(self.b_alpha * tf.random_normal([128]), name="bc3")
conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3), name="conv3")
print("conv3.shape-----", conv3.shape)
conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="max_pool3")
print("conv3.shape-----", conv3.shape)
conv3 = tf.nn.dropout(conv3, self.keep_prob, name="dropout3")
print("conv3.shape-----", conv3.shape)
next_shape = conv3.shape[1] * conv3.shape[2] * conv3.shape[3]
'''
conv3 = slim.conv2d(dropout2, 128, [3,3], 1, padding="SAME", scope= "conv3", biases_initializer=tf.constant_initializer(0.05))
print("conv3.shape-----", conv3.shape)
pool3 = slim.max_pool2d(conv3, [2,2], [2,2], padding="SAME", scope="pool3")
print("pool3.shape-----", pool3.shape)
dropout3 = slim.dropout(pool3, self.keep_prob, scope="dropout3")
print("dropout3.shape-----", dropout3.shape)
'''
# 全连接层1
wd1 = tf.get_variable(shape=[next_shape, 1024], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer(), name="fc1_w")
bd1 = tf.Variable(self.b_alpha * tf.random_normal([1024]), name="fc1_b")
dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])
print("after reshape.shape-----", dense.shape)
dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))
print("dense shape-----", dense.shape)
dense = tf.nn.dropout(dense, self.keep_prob)
print("dense shape-----", dense.shape)
'''
flatten = slim.flatten(dropout3)
print("flatten.shape-----", flatten.shape)
fc1 = slim.fully_connected(slim.dropout(flatten, self.keep_prob), 1024, activation_fn=tf.nn.relu, scope="fc1", biases_initializer=tf.constant_initializer(0.05))
print("fc1.shape-----", fc1.shape)
'''
# 全连接层2
wout = tf.get_variable(shape=[1024, self.max_captcha * self.char_set_len], dtype=tf.float32,
initializer=tf.contrib.layers.xavier_initializer(), name="fc2_w")
bout = tf.Variable(self.b_alpha * tf.random_normal([self.max_captcha * self.char_set_len]), name="fc2_b")
y_predict = tf.add(tf.matmul(dense, wout), bout, name="output")
'''
y_predict = slim.fully_connected(fc1, self.max_captcha * self.char_set_len, scope="output", biases_initializer=tf.constant_initializer(0.05))
print("y_predict.shape-----", y_predict.shape)
'''
return y_predict
def train_cnn(self, quantize = True):
y_predict = self.model()
print(">>> input batch predict shape: {}".format(y_predict.shape))
print(">>> End model test")
# 计算概率 损失
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_predict, labels=self.Y))
# 梯度下降
optimizer = tf.train.AdamOptimizer(learning_rate=0.0003).minimize(cost)
# 计算准确率
predict = tf.reshape(y_predict, [-1, self.max_captcha, self.char_set_len]) # 预测结果
max_idx_p = tf.argmax(predict, 2) # 预测结果
max_idx_l = tf.argmax(tf.reshape(self.Y, [-1, self.max_captcha, self.char_set_len]), 2) # 标签
# 计算准确率
correct_pred = tf.equal(max_idx_p, max_idx_l)
accuracy_char_count = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
accuracy_image_count = tf.reduce_mean(tf.reduce_min(tf.cast(correct_pred, tf.float32), axis=1))
# 模型保存对象
saver = tf.train.Saver(tf.global_variables())
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
# 恢复模型
if os.path.exists(self.model_save_dir):
try:
saver.restore(sess, self.model_save_dir)
# 判断捕获model文件夹中没有模型文件的错误
except ValueError:
print("model文件夹为空,将创建新模型")
else:
pass
step = 1
for i in range(5000):
batch_x, batch_y = self.get_batch(i, size=128)
_, cost_ = sess.run([optimizer, cost], feed_dict={self.X: batch_x, self.Y: batch_y, self.keep_prob: 0.75})
if step % 10 == 0:
batch_x_test, batch_y_test = self.get_batch(i, size=100)
acc_char = sess.run(accuracy_char_count, feed_dict={self.X: batch_x_test, self.Y: batch_y_test, self.keep_prob: 1.})
acc_image = sess.run(accuracy_image_count, feed_dict={self.X: batch_x_test, self.Y: batch_y_test, self.keep_prob: 1.})
print("{}次 >>>>>>字符率为 {} >>>>>>图片率为 {} >>>>>> loss {}".format(step, acc_char, acc_image, cost_))
# 图片准确率达到99%后保存并停止
if acc_image > 0.99:
saver.save(sess, self.model_save_dir)
break
# 每训练500轮就保存一次
if i % 500 == 0:
saver.save(sess, self.model_save_dir)
step += 1
saver.save(sess, self.model_save_dir)
3.对我们训练好的模型进行测试
def recognize_captcha(self):
label, captcha_array = self.gen_captcha_text_image(random.choice(self.img_list))
f = plt.figure()
ax = f.add_subplot(111)
ax.text(0.1, 0.9, "origin:" + label, ha='center', va='center', transform=ax.transAxes)
plt.imshow(captcha_array)
# 预测图片
image = self.convert2gray(captcha_array)
image = image.flatten() / 255
y_predict = self.model()
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, self.model_save_dir)
predict = tf.argmax(tf.reshape(y_predict, [-1, self.max_captcha, self.char_set_len]), 2)
text_list = sess.run(predict, feed_dict={self.X: [image], self.keep_prob: 1.})
predict_text = text_list[0].tolist()
print("正确: {} 预测: {}".format(label, predict_text))
# 显示图片和预测结果
p_text = ""
for p in predict_text:
p_text += str(self.char_set[p])
print(p_text)
plt.text(20, 1, 'predict:{}'.format(p_text))
plt.show()
预测结果: