我们要使用卷积神经网络实现验证码的识别,具体流程大致为:
1、使用python的captcha模块生成验证码图片。
2、使用tensorflow搭建神经网络模型。
3、将数据喂入神经网络进行训练。
4、保存训练好的网络模型。
下面我们来看具体的细节。
一、定义字符集,验证码一般为数字、字母。练习的时候可以先只考虑数字的情况,这样模型训练的会快些。代码如下:
number = ['0','1','2','3','4','5','6','7','8','9']
alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
ALPHABET = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
二、下面我们要从给定的字符集中选择4个字符,生成160*60的验证码图片,并将图片转化为numpy数组。然后将选择的四个字符生成为词向量形式。
1、生成图片并转化为数组。
# 传入数据集,从数据集中随机选择四个元素,然后返回这四个元素
# def random_captcha_text(char_set=number+alphabet+ALPHABET, captcha_size=4):
def random_captcha_text(char_set=number, captcha_size=4):
captcha_text = []
for i in range(captcha_size):
c = random.choice(char_set)
captcha_text.append(c)
return captcha_text
# 生成验证码图片,返回图片转化后的numpy数组,以及验证码字符文本
def gen_captcha_text_and_image():
image = ImageCaptcha()
captcha_text = random_captcha_text()
captcha_text = ''.join(captcha_text)
captcha = image.generate(captcha_text)
# image.write(captcha_text, captcha_text + '.jpg') # 将图片保存到硬盘
captcha_image = Image.open(captcha)
captcha_image = captcha_image.convert('L')
captcha_image = captcha_image.point(lambda i: 255 - i)
# 将图片取反,黑色变为白色,白色变为黑色,这样模型收敛更块
captcha_image = np.array(captcha_image)
return captcha_text, captcha_image
2、传入验证码文本,转化为词向量的形式,假设我们现在只使用数字集0-9。那么就是10分类,我们用一个长度为10的向量来表示一个数字,比如[1, 0, 0, 0, 0, 0, 0, 0, 0, 0]表示数字0,[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]表示数字1。我们有四个字符,所以是一个4*10的矩阵,再将这个矩阵拉平为一维的,就是长度为40的向量。
如果我们现在采用数字加大小写字母为字符集,那就是4*(10+26+26),再将矩阵拉平,就是长度为248的向量。代码如下:
# 传入验证码字符文本,生成对应的词向量
def text2vec(text):
text_len = len(text)
if text_len > MAX_CAPTCHA:
raise ValueError('验证码最长4个字符')
vector = np.zeros(MAX_CAPTCHA*CHAR_SET_LEN)
def char2pos(c):
if c =='_':
k = 62
return k
k = ord(c)-48
if k > 9:
k = ord(c) - 55
if k > 35:
k = ord(c) - 61
if k > 61:
raise ValueError('No Map')
return k
for i, c in enumerate(text):
idx = i * CHAR_SET_LEN + char2pos(c)
vector[idx] = 1
return vector
三、以上代码每次只生成一张验证码,当然每次传入网络一个样本也可以,但我们习惯一次喂入多个样本,所以我们还要一次性生成多张图片传入网络。代码如下。
def get_next_batch(batch_size=64):
batch_x = np.zeros([batch_size, IMAGE_HEIGHT*IMAGE_WIDTH])
batch_y = np.zeros([batch_size, MAX_CAPTCHA*CHAR_SET_LEN])
for i in range(batch_size):
text, image = gen_captcha_text_and_image()
batch_x[i,:] = image.flatten() # 将二维数组拉平为一维
batch_y[i,:] = text2vec(text)
return batch_x, batch_y
四、现在图片生成好了,对应的词向量也生成好了,要开始搭建网络了,我们采用三层卷积,一层全连接层,最后输出成,具体细节卷积过程等,可以见我上一篇文章手写汉字识别。代码如下:
def crack_captcha_cnn(w_alpha=0.01, b_alpha=0.1):
x = tf.reshape(X, shape=[-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
w_c1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=w_alpha))
b_c1 = tf.Variable(tf.random_normal([32], stddev=b_alpha))
conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding='SAME'), b_c1))
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv1 = tf.nn.dropout(conv1, keep_prob)
w_c2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=w_alpha))
b_c2 = tf.Variable(tf.random_normal([64], stddev=b_alpha))
conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=[1, 1, 1, 1], padding='SAME'), b_c2))
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv2 = tf.nn.dropout(conv2, keep_prob)
w_c3 = tf.Variable(tf.random_normal([3, 3, 64, 64], stddev=w_alpha))
b_c3 = tf.Variable(tf.random_normal([64], stddev=b_alpha))
conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=[1, 1, 1, 1], padding='SAME'), b_c3))
conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv3 = tf.nn.dropout(conv3, keep_prob)
w_d = tf.Variable(tf.random_normal([8*32*40, 1024], stddev=w_alpha))
b_d = tf.Variable(tf.random_normal([1024], stddev=b_alpha))
dense = tf.reshape(conv3, [-1, w_d.get_shape().as_list()[0]])
dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
dense = tf.nn.dropout(dense, keep_prob)
w_out = tf.Variable(tf.random_normal([1024, MAX_CAPTCHA*CHAR_SET_LEN], stddev=w_alpha))
b_out = tf.Variable(tf.random_normal([MAX_CAPTCHA*CHAR_SET_LEN], stddev=b_alpha))
out = tf.add(tf.matmul(dense, w_out), b_out)
return out
五、网络构建好了,现在需要构建损失函数,以及准确率等等,并开始训练了。具体代码如下:
def train_crack_captcha_cnn():
output = crack_captcha_cnn()
# loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, Y))
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y))
# 最后一层用来分类的softmax和sigmoid,可以自己选择
# optimizer 为了加快训练 learning_rate应该开始大,然后慢慢衰
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
max_idx_p = tf.argmax(predict, 2)
max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
correct_pred = tf.equal(max_idx_p, max_idx_l)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
while True:
batch_x, batch_y = get_next_batch(64)
sess.run(optimizer, feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.8})
# 每10 step计算一次准确率
if step % 10 == 0:
batch_x_test, batch_y_test = get_next_batch(100)
acc, loss_ = sess.run([accuracy, loss], feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 0.8})
print(step, loss_, acc)
saver.save(sess, "./model/crack_capcha1.model", global_step=step)
# 如果准确率大于90%,保存模型,完成训练
if acc > 0.9:
saver.save(sess, "./model/crack_capcha.model", global_step=step)
break
step += 1
整个结构基本就是这样,如果只采用数字集的话,基本一千次迭代,半小时左右,准确率就能到90%以上。如果采用数字加大小写字母,时间会稍微久一点。下面是完整的代码:
import tensorflow as tf
from captcha.image import ImageCaptcha
import numpy as np
from PIL import Image
import random
number = ['0','1','2','3','4','5','6','7','8','9']
alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
ALPHABET = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
# 传入数据集,从数据集中随机选择四个元素,然后返回这四个元素
# def random_captcha_text(char_set=number+alphabet+ALPHABET, captcha_size=4):
def random_captcha_text(char_set=number, captcha_size=4):
captcha_text = []
for i in range(captcha_size):
c = random.choice(char_set)
captcha_text.append(c)
return captcha_text
# 生成验证码图片,返回图片转化后的numpy数组,以及验证码字符文本
def gen_captcha_text_and_image():
image = ImageCaptcha()
captcha_text = random_captcha_text()
captcha_text = ''.join(captcha_text)
captcha = image.generate(captcha_text)
# image.write(captcha_text, captcha_text + '.jpg') # 将图片保存到硬盘
captcha_image = Image.open(captcha)
captcha_image = captcha_image.convert('L')
captcha_image = captcha_image.point(lambda i: 255 - i)
# 将图片取反,黑色变为白色,白色变为黑色,这样模型收敛更块
captcha_image = np.array(captcha_image)
return captcha_text, captcha_image
def text2vec(text):
text_len = len(text)
if text_len > MAX_CAPTCHA:
raise ValueError('验证码最长4个字符')
vector = np.zeros(MAX_CAPTCHA*CHAR_SET_LEN)
def char2pos(c):
if c =='_':
k = 62
return k
k = ord(c)-48
if k > 9:
k = ord(c) - 55
if k > 35:
k = ord(c) - 61
if k > 61:
raise ValueError('No Map')
return k
for i, c in enumerate(text):
idx = i * CHAR_SET_LEN + char2pos(c)
vector[idx] = 1
return vector
def get_next_batch(batch_size=64):
batch_x = np.zeros([batch_size, IMAGE_HEIGHT * IMAGE_WIDTH])
batch_y = np.zeros([batch_size, MAX_CAPTCHA * CHAR_SET_LEN])
for i in range(batch_size):
text, image = gen_captcha_text_and_image()
batch_x[i, :] = image.flatten() # 将二维数组拉平为一维
batch_y[i, :] = text2vec(text)
return batch_x, batch_y
def crack_captcha_cnn(w_alpha=0.01, b_alpha=0.1):
x = tf.reshape(X, shape=[-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
w_c1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=w_alpha))
b_c1 = tf.Variable(tf.random_normal([32], stddev=b_alpha))
conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding='SAME'), b_c1))
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv1 = tf.nn.dropout(conv1, keep_prob)
w_c2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=w_alpha))
b_c2 = tf.Variable(tf.random_normal([64], stddev=b_alpha))
conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=[1, 1, 1, 1], padding='SAME'), b_c2))
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv2 = tf.nn.dropout(conv2, keep_prob)
w_c3 = tf.Variable(tf.random_normal([3, 3, 64, 64], stddev=w_alpha))
b_c3 = tf.Variable(tf.random_normal([64], stddev=b_alpha))
conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=[1, 1, 1, 1], padding='SAME'), b_c3))
conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
conv3 = tf.nn.dropout(conv3, keep_prob)
w_d = tf.Variable(tf.random_normal([8 * 32 * 40, 1024], stddev=w_alpha))
b_d = tf.Variable(tf.random_normal([1024], stddev=b_alpha))
dense = tf.reshape(conv3, [-1, w_d.get_shape().as_list()[0]])
dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
dense = tf.nn.dropout(dense, keep_prob)
w_out = tf.Variable(tf.random_normal([1024, MAX_CAPTCHA * CHAR_SET_LEN], stddev=w_alpha))
b_out = tf.Variable(tf.random_normal([MAX_CAPTCHA * CHAR_SET_LEN], stddev=b_alpha))
out = tf.add(tf.matmul(dense, w_out), b_out)
return out
def train_crack_captcha_cnn():
output = crack_captcha_cnn()
# loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, Y))
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y))
# 最后一层用来分类的softmax和sigmoid,可以自己选择
# optimizer 为了加快训练 learning_rate应该开始大,然后慢慢衰
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
max_idx_p = tf.argmax(predict, 2)
max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
correct_pred = tf.equal(max_idx_p, max_idx_l)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
while True:
batch_x, batch_y = get_next_batch(64)
sess.run(optimizer, feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.8})
# 每10 step计算一次准确率
if step % 10 == 0:
batch_x_test, batch_y_test = get_next_batch(100)
acc, loss_ = sess.run([accuracy, loss], feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 0.8})
print(step, loss_, acc)
saver.save(sess, "./model/crack_capcha1.model", global_step=step)
# 如果准确率大于90%,保存模型,完成训练
if acc > 0.9:
saver.save(sess, "./model/crack_capcha.model", global_step=step)
break
step += 1
if __name__ == '__main__':
text, image = gen_captcha_text_and_image()
print("验证码图像channel:", image.shape) # (60, 160)
# 图像大小
IMAGE_HEIGHT = 60
IMAGE_WIDTH = 160
MAX_CAPTCHA = len(text)
print("验证码文本字符数", MAX_CAPTCHA)
# char_set = number + alphabet + ALPHABET
char_set = number
CHAR_SET_LEN = len(char_set)
X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT * IMAGE_WIDTH])
Y = tf.placeholder(tf.float32, [None, MAX_CAPTCHA * CHAR_SET_LEN])
keep_prob = tf.placeholder(tf.float32) # dropout
train_crack_captcha_cnn()