TensorFlow识别复杂验证码以及搭建生产环境(5)—— 设计神经网络

0x00 前言

本篇博文的代码主要修改自以前的一篇文章: Tensorflow应用之简单验证码识别

这个神经网络主要分为5层,如果想设计一个更加复杂的神经网络,请参考VGG16神经网络,共有16层。

这个5层神经网络包含3个卷积神经网络以及2个全连接神经网络。最后我的正确率达到了50%左右。

0x01 main.py

from gen_check_code import gen_captcha_text_and_image_new
from gen_check_code import number, alphabet
from test_check_code import get_test_captcha_text_and_image, get_test_sets_length
import numpy as np
import tensorflow as tf

text, image = gen_captcha_text_and_image_new()
print("验证码图像channel:", image.shape)  # (60, 160, 3)
# 图像大小
IMAGE_HEIGHT = image.shape[0]
IMAGE_WIDTH = image.shape[1]
image_shape = image.shape
MAX_CAPTCHA = len(text)
print("验证码文本最长字符数", MAX_CAPTCHA)  # 验证码最长4字符; 我全部固定为4,可以不固定. 如果验证码长度小于4,用'_'补齐


# 把彩色图像转为灰度图像(色彩对识别验证码没有什么用)
# 度化是将三分量转化成一样数值的过程
def convert2gray(img):
    if len(img.shape) > 2:
        gray = np.mean(img, -1)
        # 上面的转法较快,正规转法如下
        # r, g, b = img[:,:,0], img[:,:,1], img[:,:,2]
        # gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
        # int gray = (int) (0.3 * r + 0.59 * g + 0.11 * b);
        return gray
    else:
        return img


""" 
cnn在图像大小是2的倍数时性能最高, 如果你用的图像大小不是2的倍数,可以在图像边缘补无用像素。 
np.pad(image,((2,3),(2,2)), 'constant', constant_values=(255,))  # 在图像上补2行,下补3行,左补2行,右补2行 
"""


char_set = number + alphabet   # 如果验证码长度小于4, '_'用来补齐
CHAR_SET_LEN = len(char_set)

# 文本转向量
def text2vec(text):
    text_len = len(text)
    if text_len > MAX_CAPTCHA:
        raise ValueError('验证码最长4个字符')

    vector = np.zeros(MAX_CAPTCHA * CHAR_SET_LEN)

    def char2pos(c):
        try:
            if ord(c) <= ord('9'):
                k = ord(c)-ord('0')
            else:
                k = ord(c)-ord('a') + 10
        except:
            raise ValueError('No Map')
        return k

    for i, c in enumerate(text):
        idx = i * CHAR_SET_LEN + char2pos(c)
        vector[idx] = 1
    return vector


# 向量转回文本
def vec2text(vec):
    char_pos = vec.nonzero()[0]
    text = []
    for i, c in enumerate(char_pos):
        char_at_pos = i  # c/63
        char_idx = c % CHAR_SET_LEN
        if char_idx < 10:
            char_code = char_idx + ord('0')
        elif char_idx < 36:
            char_code = char_idx - 10 + ord('A')
        elif char_idx < 62:
            char_code = char_idx - 36 + ord('a')
        elif char_idx == 62:
            char_code = ord('_')
        else:
            raise ValueError('error')
        text.append(chr(char_code))
    return "".join(text)


# 生成一个训练batch
def get_next_batch(batch_size=128):
    batch_x = np.zeros([batch_size, IMAGE_HEIGHT * IMAGE_WIDTH])
    batch_y = np.zeros([batch_size, MAX_CAPTCHA * CHAR_SET_LEN])

    # 有时生成图像大小不是(60, 160, 3)
    def wrap_gen_captcha_text_and_image():
        while True:
            text, image = gen_captcha_text_and_image_new()
            if image.shape == image_shape:
                return text, image

    for i in range(batch_size):
        text, image = wrap_gen_captcha_text_and_image()
        image = convert2gray(image)


        batch_x[i, :] = image.flatten() / 255  # (image.flatten()-128)/128  mean为0
        batch_y[i, :] = text2vec(text)

    return batch_x, batch_y


####################################################################

X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT * IMAGE_WIDTH])
Y = tf.placeholder(tf.float32, [None, MAX_CAPTCHA * CHAR_SET_LEN])
keep_prob = tf.placeholder(tf.float32)  # dropout


# 定义CNN
def crack_captcha_cnn(w_alpha=0.01, b_alpha=0.1):
    x = tf.reshape(X, shape=[-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])

    # w_c1_alpha = np.sqrt(2.0/(IMAGE_HEIGHT*IMAGE_WIDTH)) #
    # w_c2_alpha = np.sqrt(2.0/(3*3*32))
    # w_c3_alpha = np.sqrt(2.0/(3*3*64))
    # w_d1_alpha = np.sqrt(2.0/(8*32*64))
    # out_alpha = np.sqrt(2.0/1024)

    # 定义三层的卷积神经网络

    # 定义第一层的卷积神经网络
    # 定义第一层权重
    w_c1 = tf.Variable(w_alpha * tf.random_normal([3, 3, 1, 32]))
    # 定义第一层的偏置
    b_c1 = tf.Variable(b_alpha * tf.random_normal([32]))
    # 定义第一层的激励函数
    conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding='SAME'), b_c1))
    # conv1 为输入  ksize 表示使用2*2池化,即将2*2的色块转化成1*1的色块
    conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    # dropout防止过拟合。
    conv1 = tf.nn.dropout(conv1, keep_prob)

    # 定义第二层的卷积神经网络
    w_c2 = tf.Variable(w_alpha * tf.random_normal([3, 3, 32, 64]))
    b_c2 = tf.Variable(b_alpha * tf.random_normal([64]))
    conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=[1, 1, 1, 1], padding='SAME'), b_c2))
    conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv2 = tf.nn.dropout(conv2, keep_prob)

    # 定义第三层的卷积神经网络
    w_c3 = tf.Variable(w_alpha * tf.random_normal([3, 3, 64, 64]))
    b_c3 = tf.Variable(b_alpha * tf.random_normal([64]))
    conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=[1, 1, 1, 1], padding='SAME'), b_c3))
    conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv3 = tf.nn.dropout(conv3, keep_prob)

    # Fully connected layer
    # 随机生成权重
    w_d = tf.Variable(w_alpha * tf.random_normal([11776, 2048]))
    # 随机生成偏置
    b_d = tf.Variable(b_alpha * tf.random_normal([2048]))
    dense = tf.reshape(conv3, [-1, w_d.get_shape().as_list()[0]])
    dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
    dense = tf.nn.dropout(dense, keep_prob)

    w_out = tf.Variable(w_alpha * tf.random_normal([2048, MAX_CAPTCHA * CHAR_SET_LEN]))
    b_out = tf.Variable(b_alpha * tf.random_normal([MAX_CAPTCHA * CHAR_SET_LEN]))
    out = tf.add(tf.matmul(dense, w_out), b_out)
    # out = tf.nn.softmax(out)
    return out


# 训练
def train_crack_captcha_cnn():
    # X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT * IMAGE_WIDTH])
    # Y = tf.placeholder(tf.float32, [None, MAX_CAPTCHA * CHAR_SET_LEN])
    # keep_prob = tf.placeholder(tf.float32)  # dropout
    output = crack_captcha_cnn()
    # loss
    # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, Y))
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y))
    # 最后一层用来分类的softmax和sigmoid有什么不同?
    # optimizer 为了加快训练 learning_rate应该开始大,然后慢慢衰
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

    predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
    max_idx_p = tf.argmax(predict, 2)
    max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
    correct_pred = tf.equal(max_idx_p, max_idx_l)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    saver = tf.train.Saver()
    with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            step = 0
            while True:
                batch_x, batch_y = get_next_batch(64)
                _, loss_ = sess.run([optimizer, loss], feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.75})
                print(step, loss_)

                # 每100 step计算一次准确率
                if step % 100 == 0:
                    batch_x_test, batch_y_test = get_next_batch(1600)
                    acc = sess.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.})
                    print(step, acc)
                    # 如果准确率大于50%,保存模型,完成训练
                    if (acc > 0.9999999999) & (step > 20000):
                        saver.save(sess, "./crack_capcha.model", global_step=step)
                        break
                step += 1

## 训练(如果要训练则去掉下面一行的注释)
train_crack_captcha_cnn()



def crack_captcha():
    output = crack_captcha_cnn()

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint('.'))

        predict = tf.argmax(tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
        count = 0


        # print("正确率:" + str(count) + "/1324")
        for i in range(get_test_sets_length()):
            text, image = get_test_captcha_text_and_image(i)
            image = convert2gray(image)
            captcha_image = image.flatten() / 255
            text_list = sess.run(predict, feed_dict={X: [captcha_image], keep_prob: 1})
            predict_text = text_list[0].tolist()
            # predict_text = str(predict_text)
            # predict_text = predict_text.replace("[", "").replace("]", "").replace(",", "").replace(" ","")
            tmp = ''
            for char_idx in predict_text:
                if char_idx < 10:
                    char_code = char_idx + ord('0')
                elif char_idx < 36:
                    char_code = char_idx - 10 + ord('a')
                tmp = tmp + chr(char_code)
            predict_text = tmp

            if text == predict_text:
                count += 1
                check_result = ",预测结果正确"
            else:
                check_result = ",预测结果不正确"
            print(str(i) + ':' + predict_text + check_result)

        print("正确率:" + str(count) + "/" + str(get_test_sets_length()))

# 测试(如果要测试则去掉下面一行的注释)
# crack_captcha()

0x02 相关的代码说明

crack_captcha_cnn: 定义一个神经网络
train_crack_captcha_cnn:训练神经网络
def crack_captcha:测试神经网络

0x03 一些参数说明

get_next_batch(64) 这个batch的大小 64,理论上batch数目变大可以使得降低网络的随机性,使得训练结果更加准确,但是容易发生OOM(out of memory)的错误。

if (acc > 0.9999999999) & (step > 20000)

这个确认什么时候要停止训练。

这个参数由帮助我训练的YY同学确定的,虽然我并不觉得这个参数写的怎么样。

0x04 总结

从下一篇博客开始进行真正的训练的过程。

你可能感兴趣的:(tensorflow)