验证码识别实战
(1)数据集
若想正确识别 分割?—— 不具备通用性
整体识别?——
NZPP——> [13,25,15,15]——> []用独热码的形式
即 NZPP -> [13, 25, 15, 15]
[4, 26]
-> [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],
[0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0]]
(2)对数据集中的特征值、目标值怎么使用
(3)如何分类
如何衡量损失? mnist分类——交叉熵
softmax映射 +交叉熵
这里应用sigmoid交叉熵
(4)流程分析
1)读取图片数据
filename -> 标签值
2)解析csv文件,将标签值NZPP->[13, 25, 15, 15]
3)将filename和标签值联系起来
4)构建卷积神经网络->y_predict
5)构造损失函数
6)优化损失
7)计算准确率
8)开启会话、开启线程
最后代码如下 并输入了一张图片进行测试
import tensorflow as tf
import glob
import pandas as pd #涉及文件读取(csv)
import numpy as np
import os
tf.app.flags.DEFINE_integer("is_train", 2, "指定是否是训练模型,还是拿数据去预测")
FLAGS = tf.app.flags.FLAGS
def read_pic(): #读取图片数据
#1.构造文件名队列
#获取文件名列表,以前都是用os进行拼接
file_names = glob.glob("./GenPics/*.jpg")
file_names_test = glob.glob("./test/*.jpg")
#print("file_names:\n", file_names)
file_queue = tf.train.string_input_producer(file_names)
file_queue_test = tf.train.string_input_producer(file_names_test)
#2.读取与解码
#构造读图片的阅读器
reader = tf.WholeFileReader()
#读取阶段
filename, image = reader.read(file_queue)
filename_test, image_test = reader.read(file_queue_test)
#解码阶段
decoded = tf.image.decode_jpeg(image)
decoded_test = tf.image.decode_jpeg(image_test)
#更新形状,将图片形状确定下来以方便批处理
decoded.set_shape([20,80,3]) #高 宽 通道数
#print("decoded:\n",decoded)
decoded_test.set_shape([20,80,3])
#修改图片的类型 因为此时是unit8,但输入卷积层的数据必须是float32或者float64
image_cast = tf.cast(decoded, tf.float32)
image_cast_test = tf.cast(decoded_test, tf.float32)
#3.批处理
filename_batch, image_batch = tf.train.batch([filename, image_cast],batch_size=100, num_threads=1, capacity=100)
filename_batch_test, image_batch_test = tf.train.batch([filename_test, image_cast_test],batch_size=1, num_threads=1, capacity=1)
return filename_batch, image_batch,filename_batch_test, image_batch_test
#接下来解析csv文件,建立文件名和标签值的对应表格
def parse_csv():
# 读取文件
csv_data = pd.read_csv("./Genpics/labels.csv", names=["file_num", "chars"], index_col="file_num")
# 根据字母生成对应数字
# 如NZPP——>[13,25,15,15]
# 创建空列表 遍历
labels = []
for label in csv_data["chars"]:
# print(label)
letter = []
for word in label:
# print(word)
letter.append(ord(word) - ord("A")) # 将转好的数字放入letter
labels.append(letter) # 将letter放入label
csv_data["labels"] = labels
return csv_data
#将文件名与csv_data一一对应通过文件名查表
def filename2label(filename, csv_data):
#print(filename)
labels = []
for file_name in filename:
#filter方法判断是否是数字
file_num = "".join(list(filter(str.isdigit, str(file_name))))
#查表
target = csv_data.loc[int(file_num), "labels"]
labels.append(target)
#print(labels)
return np.array(labels)
#定义权重生成器
def create_weights(shape):
return tf.Variable(initial_value=tf.random_normal(shape=shape,stddev=0.01))
#构建卷积神经网络,得到y_predict
def create_model(x):
#x的形状为[None,20,80,3]
# 1)第一个卷积大层
with tf.variable_scope("conv1"):
# 卷积层
# 定义filter和偏置
conv1_weights = create_weights(shape=[5, 5, 3, 32])
conv1_bias = create_weights(shape=[32])
conv1_x = tf.nn.conv2d(input=x, filter=conv1_weights, strides=[1, 1, 1, 1], padding="SAME") + conv1_bias
# 激活层
relu1_x = tf.nn.relu(conv1_x)
# 池化层....5
pool1_x = tf.nn.max_pool(value=relu1_x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
# 2)第二个卷积大层
with tf.variable_scope("conv2"):
# x的形状为[None,10,40,32]
# 卷积层
# 定义filter和偏置
conv2_weights = create_weights(shape=[5, 5, 32, 64])
conv2_bias = create_weights(shape=[64])
conv2_x = tf.nn.conv2d(input=pool1_x, filter=conv2_weights, strides=[1, 1, 1, 1], padding="SAME") + conv2_bias
# 激活层
relu2_x = tf.nn.relu(conv2_x)
# 池化层
pool2_x = tf.nn.max_pool(value=relu2_x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
# 3)全连接层
# x的形状为[None,5,20,64]
with tf.variable_scope("full_connection"):
# 首先做形状修改
# [None, 5, 20, 64] - ------>[None, 5 * 20 * 64]
# [None, 5 * 20 * 64] * [] = [None, 4*26] 所以权重为[5 * 20 * 64, 4*26]
x_fc = tf.reshape(pool2_x, shape=[-1,5 * 20 * 64]) # 注意reshape没有None的用法 需要用-1
weights_fc = create_weights(shape=[5 * 20 * 64, 4*26])
bias_fc = create_weights(shape=[4*26])
y_predict = tf.matmul(x_fc, weights_fc) + bias_fc
return y_predict
if __name__ == "__main__":
filename, image, filename_test, image_test = read_pic() #变量接出来
csv_data = parse_csv()
#1.准备数据
x = tf.placeholder(tf.float32, shape=[None,20,80,3])
y_true = tf.placeholder(tf.float32, shape=[None,104]) #因为计算损失的时候需要一维
#2.构建模型
y_predict = create_model(x)
#3.构造损失函数
loss_list = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_predict)
loss = tf.reduce_mean(loss_list)
#4.优化损失
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
#5.计算准确率
equal_list = tf.reduce_all(
tf.equal(tf.argmax(tf.reshape(y_predict, shape=[-1, 4, 26]), axis=2),
tf.argmax(tf.reshape(y_true, shape=[-1, 4, 26]), axis=2)), axis=1)
accuracy = tf.reduce_mean(tf.cast(equal_list, tf.float32))
#初始化变量
init = tf.global_variables_initializer()
#(2)收集要显示的变量
#先收集损失和准确率
tf.summary.scalar("losses", loss)
tf.summary.scalar("accuracy",accuracy)
tf.summary.scalar("accuracy", accuracy)
#(3)合并所有变量op
merged = tf.summary.merge_all()
#创建模型保存与加载
saver = tf.train.Saver()
#开启会话
with tf.Session() as sess:
#初始化变量
sess.run(init)
#开启线程
coord=tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
# (1)创建一个events文件实例
file_writer = tf.summary.FileWriter("./tmp/summary2/", graph=sess.graph)
# 加载模型
if os.path.exists("./tmp/modelckpt2/checkpoint"):
saver.restore(sess, "./tmp/modelckpt2/cnn_model") # 注意modelckpt2这个文件夹要自己建立
# 也就是说 模型保存和加载的时候 也就是saver.save或saver.restore的路径需要自己建立 否则会蓝屏
# 但是创建envents实例化的路径可以不用自己建立
if FLAGS.is_train == 1:
for i in range(1000):
filename_value, image_value = sess.run([filename, image])
#print("filename_value:\n", filename_value)
print("image_value:\n", image_value)
labels = filename2label(filename_value, csv_data)
#将标签值转换为one-hot
labels_value = tf.reshape(tf.one_hot(labels, depth=26), [-1, 4*26]).eval()
_, error, accuracy_value = sess.run([optimizer, loss, accuracy],feed_dict={x:image_value, y_true:labels_value})
print("第%d次训练后损失为%f,准确率为%f" % (i+1, error, accuracy_value))
# 运行合变量op,写入事件文件当中
summary = sess.run(merged, feed_dict={x:image_value, y_true:labels_value})
file_writer.add_summary(summary, i)
if i % 100 == 0:
saver.save(sess, "./tmp/modelckpt2/cnn_model")
else:
#如果不是训练,则是用测试集对模型进行测试
for i in range(2):
filename_value_test, image_value_test = sess.run([filename_test, image_test])
labels_test = filename2label(filename_value_test, csv_data)
labels_value_test = tf.reshape(tf.one_hot(labels_test, depth=26), [-1, 4 * 26]).eval()
accuracy_value = sess.run(accuracy,feed_dict={x: image_value_test, y_true: labels_value_test})
print("真实值为\n" , labels_test )
y_predict_final = tf.argmax(tf.reshape(y_predict, shape=[-1, 4, 26]), axis=2)
print("预测值为\n",sess.run(y_predict_final,feed_dict={x: image_value_test, y_true: labels_value_test}))
#回收线程
coord.request_stop()
coord.join(threads)