数据集和源代码来自 https://blog.csdn.net/qq_36148847/article/details/79306762
萌新入门,如果过程有误请指正
当时做的时候数据集还只有几百张,今天看发现已经扩充了很多
下面的代码是当时实验的时候的代码,只识别了六个字
原博给的代码经过测试后,发现过拟合问题严重,模型不稳定,因此作以下修改:
1.更改了参数初始化方式:改成xavier初始化方法
参数初始化的目的是为了让神经网络在训练过程中学习到有用的信息,Glorot认为:优秀的初始化应该使得各层的激活值和状态梯度的方差在传播过程中的方差保持一致,Xavier初始化确实保证了Glorot条件。
2.更改了激活函数:改成leaky-relu
ReLU是将所有的负值都设为零,相反,Leaky ReLU是给所有负值赋予一个非零斜率。
3.增加了正则化过程:L2正则化
结果训练集准确率是96.77%左右,当时测试集太小了,准确率算出来没意义。
4.准确率开始下降的时候停止训练,准确率不变的时候停止训练
还有其他修改想起来再继续补……增加了tensorboard监测,不过被我注释掉一些
import sys
import os
import time
import random
import matplotlib.pyplot as plt
import matplotlib.image as ima
import numpy as np
import tensorflow as tf
from PIL import Image
SIZE = 1280
WIDTH = 32
HEIGHT = 40
NUM_CLASSES = 6
iterations = 300
SAVER_DIR = "D:/tf_car_license_dataset/province/"
from tensorflow.contrib.tensorboard.plugins import projector
PROVINCES = ("京","闽","粤","苏","沪","浙")
nProvinceIndex = 0
time_begin = time.time()
# 定义输入节点,对应于图片像素值矩阵集合和图片标签(即所代表的数字)
x = tf.placeholder(tf.float32, shape=[None, SIZE])
y_ = tf.placeholder(tf.float32, shape=[None, NUM_CLASSES])
x_image = tf.reshape(x, [-1, WIDTH, HEIGHT, 1])
count=0
pre_accuracy = 0
def relu(inputx, alpha=0., max_value=None):
'''ReLU.
alpha: slope of negative section.
'''
negative_part = tf.nn.relu(-inputx)
inputx = tf.nn.relu(inputx)
if max_value is not None:
inputx = tf.clip_by_value(inputx, tf.cast(0., dtype=tf.float32),
tf.cast(max_value, dtype=tf.float32))
inputx -= tf.constant(alpha, dtype=tf.float32) * negative_part
return inputx
# 定义卷积函数
def conv_layer(inputs, W, b, conv_strides, kernel_size, pool_strides, padding):
L1_conv = tf.nn.conv2d(inputs, W, strides=conv_strides, padding=padding)
L1_relu = relu(L1_conv + b)
return tf.nn.max_pool(L1_relu, ksize=kernel_size, strides=pool_strides, padding='SAME')
# 定义全连接层函数
def full_connect(inputs, W, b):
return tf.nn.relu(tf.matmul(inputs, W) + b)
if __name__ =='__main__' and sys.argv[1]=='train':
# 第一次遍历图片目录是为了获取图片总数
input_count = 0
for i in range(0,NUM_CLASSES):
dir = "D:/tf_car_license_dataset/train_images/training-set/chinese-characters/%s/" % i # 这里可以改成你自己的图片目录,i为分类标签
for rt, dirs, files in os.walk(dir):
for filename in files:
input_count += 1
# 定义对应维数和各维长度的数组
input_images = np.array([[0]*SIZE for i in range(input_count)])
input_labels = np.array([[0]*NUM_CLASSES for i in range(input_count)])
# 第二次遍历图片目录是为了生成图片数据和标签
index = 0
for i in range(0,NUM_CLASSES):
dir = "D:/tf_car_license_dataset/train_images/training-set/chinese-characters/%s/" % i # 这里可以改成你自己的图片目录,i为分类标签
for rt, dirs, files in os.walk(dir):
for filename in files:
filename = dir + filename
img = Image.open(filename)
width = img.size[0]
height = img.size[1]
for h in range(0, height):
for w in range(0, width):
# 通过这样的处理,使数字的线条变细,有利于提高识别准确率
if img.getpixel((w, h)) > 230:
input_images[index][w+h*width] = 1
else:
input_images[index][w+h*width] = 0
input_labels[index][i] = 1
index += 1
# 第一次遍历图片目录是为了获取图片总数
val_count = 0
for i in range(0,NUM_CLASSES):
dir = "D:/tf_car_license_dataset/train_images/validation-set/chinese-characters/%s/" % i # 这里可以改成你自己的图片目录,i为分类标签
for rt, dirs, files in os.walk(dir):
for filename in files:
val_count += 1
# 定义对应维数和各维长度的数组
val_images = np.array([[0]*SIZE for i in range(val_count)])
val_labels = np.array([[0]*NUM_CLASSES for i in range(val_count)])
# 第二次遍历图片目录是为了生成图片数据和标签
index = 0
for i in range(0,NUM_CLASSES):
dir = "D:/tf_car_license_dataset/train_images/validation-set/chinese-characters/%s/" % i # 这里可以改成你自己的图片目录,i为分类标签
for rt, dirs, files in os.walk(dir):
for filename in files:
filename = dir + filename
img = Image.open(filename)
width = img.size[0]
height = img.size[1]
for h in range(0, height):
for w in range(0, width):
# 通过这样的处理,使数字的线条变细,有利于提高识别准确率
if img.getpixel((w, h)) > 230:
val_images[index][w+h*width] = 1
else:
val_images[index][w+h*width] = 0
val_labels[index][i] = 1
index += 1
#tf.reset_default_graph()
with tf.Session() as sess:
#tf.reset_default_graph()
# 第一个卷积层
W_conv1 = tf.get_variable('W_conv1',[5, 5, 1, 6],initializer=tf.contrib.layers.xavier_initializer_conv2d())
b_conv1 = tf.Variable(tf.constant(0.1, shape=[6]), name="b_conv1")
conv_strides = [1, 1, 1, 1]
kernel_size = [1, 2, 2, 1]
pool_strides = [1, 2, 2, 1]
L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME')
# 第二个卷积层
W_conv2 = tf.get_variable('W_conv2',[5, 5, 6, 16],initializer=tf.contrib.layers.xavier_initializer_conv2d())
b_conv2 = tf.Variable(tf.constant(0.1, shape=[16]), name="b_conv2")
conv_strides = [1, 1, 1, 1]
kernel_size = [1, 1, 1, 1]
pool_strides = [1, 1, 1, 1]
L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME')
# 第三个卷积层
W_conv3 = tf.get_variable('W_conv3',[5, 5, 16, 120],initializer=tf.contrib.layers.xavier_initializer_conv2d())
b_conv3 = tf.Variable(tf.constant(0.1, shape=[120]), name="b_conv3")
conv_strides = [1, 1, 1, 1]
kernel_size = [1, 1, 1, 1]
pool_strides = [1, 1, 1, 1]
L3_pool = conv_layer(L2_pool, W_conv3, b_conv3, conv_strides, kernel_size, pool_strides, padding='SAME')
# 全连接层
W_fc1 = tf.get_variable('W_fc1',[16*20*120, 256],initializer=tf.contrib.layers.xavier_initializer_conv2d())
b_fc1 = tf.Variable(tf.constant(0.1, shape=[256]), name="b_fc1")
h_pool3_flat = tf.reshape(L3_pool, [-1, 16*20*120])
h_fc1 = full_connect(h_pool3_flat, W_fc1, b_fc1)
# dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
tf.summary.histogram('dropout_keep_probability', keep_prob)
# readout层
W_fc2 = tf.get_variable('W_fc2',[256, NUM_CLASSES],initializer=tf.contrib.layers.xavier_initializer_conv2d())
b_fc2 = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES]), name="b_fc2")
# 定义优化器和训练op
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2 )
#正则化
regularizer = tf.contrib.layers.l2_regularizer(0.0001)
#计算模型的正则化损失。一般只计算神经网络边上权重的正则化损失,而不使用偏置项
reg_term = regularizer(W_conv1)+regularizer(W_conv2)+regularizer(W_conv3)
cross_entropy = (tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))+reg_term)
train_step = tf.train.AdamOptimizer((1e-4)).minimize(cross_entropy)
c_entropy = tf.summary.scalar('cross_entropy',cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
acc = tf.summary.scalar('accuracy',accuracy)
# 初始化saver
saver = tf.train.Saver()
#合并到Summary中
merged = tf.summary.merge_all()
#选定可视化存储目录
writer = tf.summary.FileWriter('D:/mypath/train',sess.graph)
test_writer = tf.summary.FileWriter('D:/mypath/val')
sess.run(tf.global_variables_initializer())
''''''
time_elapsed = time.time() - time_begin
print("读取图片文件耗费时间:%d秒" % time_elapsed)
time_begin = time.time()
print ("一共读取了 %s 个训练图像, %s 个标签" % (input_count, input_count))
# 设置每次训练op的输入个数和迭代次数,这里为了支持任意图片总数,定义了一个余数remainder,譬如,如果每次训练op的输入个数为60,图片总数为150张,则前面两次各输入60张,最后一次输入30张(余数30)
batch_size = 65
iterations = iterations
batches_count = int(input_count / batch_size)
remainder = input_count % batch_size
print ("训练数据集分成 %s 批, 前面每批 %s 个数据,最后一批 %s 个数据" % (batches_count+1, batch_size, remainder))
# 执行训练迭代
for it in range(iterations):
# 这里的关键是要把输入数组转为np.array
for n in range(batches_count):
sess.run(train_step,feed_dict={x: input_images[n*batch_size:(n+1)*batch_size], y_: input_labels[n*batch_size:(n+1)*batch_size], keep_prob: 0.5})
if n%10==0:
result = sess.run(merged,feed_dict={x: input_images[n*batch_size:(n+1)*batch_size], y_: input_labels[n*batch_size:(n+1)*batch_size], keep_prob: 0.5})
writer.add_summary(result,n) #result是summary类型的,需要放入writer中,i步数(x轴)
if remainder > 0:
start_index = batches_count * batch_size;
sess.run(train_step,feed_dict={x: input_images[start_index:input_count-1], y_: input_labels[start_index:input_count-1], keep_prob: 0.5})
# 每完成五次迭代,判断准确度下降或者不变时退出迭代循环
iterate_accuracy = 0
if it%5 == 0:
iterate_accuracy = accuracy.eval(feed_dict={x: val_images, y_: val_labels, keep_prob: 1.0})
print ('第 %d 次训练迭代: 准确率 %0.5f%%' % (it, iterate_accuracy*100))
if it%10==0:
result = sess.run(merged,feed_dict={x: val_images, y_: val_labels, keep_prob: 1.0}) #merged也是需要run的
test_writer.add_summary(result,it) #result是summary类型的,需要放入writer中,i步数(x轴)
if iterate_accuracy == pre_accuracy:
count=count+1
elif count>0 and pre_accuracy>iterate_accuracy:
break;
else:
count=0
pre_accuracy = iterate_accuracy
if count>4 and iterate_accuracy>=0.9:
break;
writer.close()
print ('完成训练!')
time_elapsed = time.time() - time_begin
print ("训练耗费时间:%d秒" % time_elapsed)
time_begin = time.time()
# 保存训练结果
if not os.path.exists(SAVER_DIR):
print ('不存在训练数据保存目录,现在创建保存目录')
os.makedirs(SAVER_DIR)
saver_path = saver.save(sess, "%smodel.ckpt"%(SAVER_DIR))
if __name__ =='__main__' and sys.argv[1]=='predict':
saver = tf.train.import_meta_graph("%smodel.ckpt.meta"%(SAVER_DIR))
with tf.Session() as sess:
model_file=tf.train.latest_checkpoint(SAVER_DIR)
saver.restore(sess, model_file)
x_image = tf.reshape(x, [-1, WIDTH, HEIGHT, 1])
# 第一个卷积层
W_conv1 = sess.graph.get_tensor_by_name("W_conv1:0")
b_conv1 = sess.graph.get_tensor_by_name("b_conv1:0")
conv_strides = [1, 1, 1, 1]
kernel_size = [1, 2, 2, 1]
pool_strides = [1, 2, 2, 1]
L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME')
# 第二个卷积层
W_conv2 = sess.graph.get_tensor_by_name("W_conv2:0")
b_conv2 = sess.graph.get_tensor_by_name("b_conv2:0")
conv_strides = [1, 1, 1, 1]
kernel_size = [1, 1, 1, 1]
pool_strides = [1, 1, 1, 1]
L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME')
# 第三个卷积层
W_conv3 = sess.graph.get_tensor_by_name("W_conv3:0")
b_conv3 = sess.graph.get_tensor_by_name("b_conv3:0")
conv_strides = [1, 1, 1, 1]
kernel_size = [1, 1, 1, 1]
pool_strides = [1, 1, 1, 1]
L3_pool = conv_layer(L2_pool, W_conv3, b_conv3, conv_strides, kernel_size, pool_strides, padding='SAME')
# 全连接层
W_fc1 = sess.graph.get_tensor_by_name("W_fc1:0")
b_fc1 = sess.graph.get_tensor_by_name("b_fc1:0")
h_pool3_flat = tf.reshape(L3_pool, [-1, 16*20*120])
h_fc1 = full_connect(h_pool3_flat, W_fc1, b_fc1)
# dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# readout层
W_fc2 = sess.graph.get_tensor_by_name("W_fc2:0")
b_fc2 = sess.graph.get_tensor_by_name("b_fc2:0")
# 定义优化器和训练op
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
for n in range(1,12):
path = "D:/tf_car_license_dataset/test_images/%s.bmp" % (n)
img = Image.open(path)
width = img.size[0]
height = img.size[1]
img_data = [[0]*SIZE for i in range(1)]
for h in range(0, height):
for w in range(0, width):
if img.getpixel((w, h)) < 200:
img_data[0][w+h*width] = 0
else:
img_data[0][w+h*width] = 1
result = sess.run(y_conv, feed_dict = {x: np.array(img_data), keep_prob: 1.0})
print("the result of picture",n," is:",PROVINCES[np.argmax(result)])