卷积神经网络 实现验证码识别 ---- 识别率 99%

1. 创建自己的数据集,利用数字1-10 和 26个小写字母生成我们的训练数据集。

from captcha.image import ImageCaptcha
import random

CHAR_SET = ['1', '2', '3', '4', '5','6', '7', '8', '9', '0','a','b','c','d','e','f','g','h','i','j','k','l','m','n']
CHAR_LEN = len(CHAR_SET)

CAPTCHA_LEN = 4
for i in range(CHAR_LEN):
    for j in range(CHAR_LEN):
        for k in range(CHAR_LEN):
            for m in range(CHAR_LEN):
                captcha_text = CHAR_SET[i] + CHAR_SET[j] + CHAR_SET[k] + CHAR_SET[m]
                image = ImageCaptcha()
                image.write(captcha_text, './tmp/mydata/'+ captcha_text + '.jpg')

得到的数据集样本如下图所示:

卷积神经网络 实现验证码识别 ---- 识别率 99%_第1张图片

2. 设计网络架构,进行训练

    def model(self):
        x = tf.reshape(self.X, shape=[-1, self.image_height, self.image_width, 1], name="input_x")
        print(">>> input x: {}".format(x))

        # 卷积层1

        wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,
                              initializer=tf.contrib.layers.xavier_initializer())
        bc1 = tf.Variable(self.b_alpha * tf.random_normal([32]), name="bc1")
        conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1), name="conv1")
        print("conv1.shape-----", conv1.shape)
        conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="max_pool1")
        print("conv1.shape-----", conv1.shape)
        conv1 = tf.nn.dropout(conv1, self.keep_prob, name="dropout1")
        print("conv1.shape-----", conv1.shape)
        '''

        conv1 = slim.conv2d(x, 32, [3,3], 1, padding="SAME", scope= "conv1", biases_initializer=tf.constant_initializer(0.05))   #默认activation_fn=nn.relu
        print("conv1.shape-----", conv1.shape)
        print("\n-----------------------------------------------------------------")
        print(conv1)
        print("-----------------------------------------------------------------\n")
        pool1 = slim.max_pool2d(conv1, [2,2], [2,2], padding="SAME", scope="pool1")
        print("pool1.shape-----", pool1.shape)
        dropout1 = slim.dropout(pool1, self.keep_prob, scope="dropout1")
        print("dropout1.shape-----", dropout1.shape)
        '''

        # 卷积层2

        wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,
                              initializer=tf.contrib.layers.xavier_initializer())
        bc2 = tf.Variable(self.b_alpha * tf.random_normal([64]), name="bc2")
        conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2), name="conv2")
        print("conv2.shape-----", conv2.shape)
        conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="max_pool2")
        print("conv2.shape-----", conv2.shape)
        conv2 = tf.nn.dropout(conv2, self.keep_prob, name="dropout2")
        print("conv2.shape-----", conv2.shape)
        '''
        conv2 = slim.conv2d(dropout1, 64, [3,3], 1, padding="SAME", scope= "conv2", biases_initializer=tf.constant_initializer(0.05))
        print("conv2.shape-----", conv2.shape)
        pool2 = slim.max_pool2d(conv2, [2,2], [2,2], padding="SAME", scope="pool2")
        print("pool2.shape-----", pool2.shape)
        dropout2 = slim.dropout(pool2, self.keep_prob, scope="dropout2")
        print("dropout2.shape-----", dropout2.shape)
        '''
        # 卷积层3

        wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,
                              initializer=tf.contrib.layers.xavier_initializer())
        bc3 = tf.Variable(self.b_alpha * tf.random_normal([128]), name="bc3")
        conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3), name="conv3")
        print("conv3.shape-----", conv3.shape)
        conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="max_pool3")
        print("conv3.shape-----", conv3.shape)
        conv3 = tf.nn.dropout(conv3, self.keep_prob, name="dropout3")
        print("conv3.shape-----", conv3.shape)
        next_shape = conv3.shape[1] * conv3.shape[2] * conv3.shape[3]
        '''
        conv3 = slim.conv2d(dropout2, 128, [3,3], 1, padding="SAME", scope= "conv3", biases_initializer=tf.constant_initializer(0.05))
        print("conv3.shape-----", conv3.shape)
        pool3 = slim.max_pool2d(conv3, [2,2], [2,2], padding="SAME", scope="pool3")
        print("pool3.shape-----", pool3.shape)
        dropout3 = slim.dropout(pool3, self.keep_prob, scope="dropout3")
        print("dropout3.shape-----", dropout3.shape)
        '''
        # 全连接层1

        wd1 = tf.get_variable(shape=[next_shape, 1024], dtype=tf.float32,
                              initializer=tf.contrib.layers.xavier_initializer(), name="fc1_w")
        bd1 = tf.Variable(self.b_alpha * tf.random_normal([1024]), name="fc1_b")
        dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])
        print("after reshape.shape-----", dense.shape)
        dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))
        print("dense shape-----", dense.shape)
        dense = tf.nn.dropout(dense, self.keep_prob)
        print("dense shape-----", dense.shape)
        '''
        flatten = slim.flatten(dropout3)
        print("flatten.shape-----", flatten.shape)
        fc1 = slim.fully_connected(slim.dropout(flatten, self.keep_prob), 1024, activation_fn=tf.nn.relu, scope="fc1", biases_initializer=tf.constant_initializer(0.05))
        print("fc1.shape-----", fc1.shape)
        '''
        # 全连接层2
        wout = tf.get_variable(shape=[1024, self.max_captcha * self.char_set_len], dtype=tf.float32,
                               initializer=tf.contrib.layers.xavier_initializer(), name="fc2_w")
        bout = tf.Variable(self.b_alpha * tf.random_normal([self.max_captcha * self.char_set_len]), name="fc2_b")
        y_predict = tf.add(tf.matmul(dense, wout), bout, name="output")
        '''
        y_predict = slim.fully_connected(fc1, self.max_captcha * self.char_set_len, scope="output", biases_initializer=tf.constant_initializer(0.05))
        print("y_predict.shape-----", y_predict.shape)
        '''
        return y_predict
 def train_cnn(self, quantize = True):
        y_predict = self.model()
        print(">>> input batch predict shape: {}".format(y_predict.shape))
        print(">>> End model test")
        # 计算概率 损失
        cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_predict, labels=self.Y))
        # 梯度下降
        optimizer = tf.train.AdamOptimizer(learning_rate=0.0003).minimize(cost)
        # 计算准确率
        predict = tf.reshape(y_predict, [-1, self.max_captcha, self.char_set_len])  # 预测结果
        max_idx_p = tf.argmax(predict, 2)  # 预测结果
        max_idx_l = tf.argmax(tf.reshape(self.Y, [-1, self.max_captcha, self.char_set_len]), 2)  # 标签
        # 计算准确率
        correct_pred = tf.equal(max_idx_p, max_idx_l)
        accuracy_char_count = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        accuracy_image_count = tf.reduce_mean(tf.reduce_min(tf.cast(correct_pred, tf.float32), axis=1))

        # 模型保存对象
        saver = tf.train.Saver(tf.global_variables())
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            # 恢复模型
            if os.path.exists(self.model_save_dir):
                try:
                    saver.restore(sess, self.model_save_dir)
                # 判断捕获model文件夹中没有模型文件的错误
                except ValueError:
                    print("model文件夹为空,将创建新模型")
            else:
                pass
            step = 1
            for i in range(5000):
                batch_x, batch_y = self.get_batch(i, size=128)
                _, cost_ = sess.run([optimizer, cost], feed_dict={self.X: batch_x, self.Y: batch_y, self.keep_prob: 0.75})
                if step % 10 == 0:
                    batch_x_test, batch_y_test = self.get_batch(i, size=100)
                    acc_char = sess.run(accuracy_char_count, feed_dict={self.X: batch_x_test, self.Y: batch_y_test, self.keep_prob: 1.})
                    acc_image = sess.run(accuracy_image_count, feed_dict={self.X: batch_x_test, self.Y: batch_y_test, self.keep_prob: 1.})
                    print("{}次 >>>>>>字符率为 {} >>>>>>图片率为 {} >>>>>> loss {}".format(step, acc_char, acc_image, cost_))
                    # 图片准确率达到99%后保存并停止
                    if acc_image > 0.99:
                        saver.save(sess, self.model_save_dir)
                        break
                # 每训练500轮就保存一次
                if i % 500 == 0:
                    saver.save(sess, self.model_save_dir)
                step += 1
            saver.save(sess, self.model_save_dir)

3.对我们训练好的模型进行测试

 def recognize_captcha(self):
        label, captcha_array = self.gen_captcha_text_image(random.choice(self.img_list))

        f = plt.figure()
        ax = f.add_subplot(111)
        ax.text(0.1, 0.9, "origin:" + label, ha='center', va='center', transform=ax.transAxes)
        plt.imshow(captcha_array)
        # 预测图片
        image = self.convert2gray(captcha_array)
        image = image.flatten() / 255

        y_predict = self.model()

        saver = tf.train.Saver()
        with tf.Session() as sess:
            saver.restore(sess, self.model_save_dir)
            predict = tf.argmax(tf.reshape(y_predict, [-1, self.max_captcha, self.char_set_len]), 2)
            text_list = sess.run(predict, feed_dict={self.X: [image], self.keep_prob: 1.})
            predict_text = text_list[0].tolist()

        print("正确: {}  预测: {}".format(label, predict_text))
        # 显示图片和预测结果
        p_text = ""
        for p in predict_text:
            p_text += str(self.char_set[p])
        print(p_text)
        plt.text(20, 1, 'predict:{}'.format(p_text))
        plt.show()

预测结果:

卷积神经网络 实现验证码识别 ---- 识别率 99%_第2张图片

你可能感兴趣的:(深度学习,机器学习,神经网络,深度学习)