熟练掌握基于TensorFlow的深度卷积神经网络的原理、实现分类网络LeNet5,并在手写数字识别数据集MNIST上验证和评价LeNet5的分类性能。
1)数据集的介绍及初步探索,数据预处理主要步骤并解释处理的原因和基本方法。
2)算法原理介绍
3)实现算法的大致描述(介绍该算法的思路)
4)验证并简要分析评价算法得出的结果
5)相关书籍,论文或者博客参考地址(Reference)
图 2. 卷积神经网络架构图
图 3. LeNet-5模型结构图
数据处理过程
'''
定义需要使用到的常量
'''
import glob
import os.path
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile
# 原始输入数据的目录,这个目录下有5个子目录,每个子目录底下保存这属于该
# 类别的所有图片。
INPUT_DATA = '../../datasets/flower_photos'
# 输出文件地址。我们将整理后的图片数据通过numpy的格式保存。
OUTPUT_FILE = '../../datasets/flower_processed_data.npy'
# 测试数据和验证数据比例。
VALIDATION_PERCENTAGE = 10
TEST_PERCENTAGE = 10
'''
定义数据处理过程
'''
# 读取数据并将数据分割成训练数据、验证数据和测试数据。
def create_image_lists(sess, testing_percentage, validation_percentage):
sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
is_root_dir = True
# 初始化各个数据集。
training_images = []
training_labels = []
testing_images = []
testing_labels = []
validation_images = []
validation_labels = []
current_label = 0
# 读取所有的子目录。
for sub_dir in sub_dirs:
if is_root_dir:
is_root_dir = False
continue
# 获取一个子目录中所有的图片文件。
extensions = ['jpg', 'jpeg', 'JPG', 'JPEG']
file_list = []
dir_name = os.path.basename(sub_dir)
for extension in extensions:
file_glob = os.path.join(INPUT_DATA, dir_name, '*.' + extension)
file_list.extend(glob.glob(file_glob))
if not file_list: continue
print("processing:", dir_name)
i = 0
# 处理图片数据。
for file_name in file_list:
i += 1
# 读取并解析图片,将图片转化为299*299以方便inception-v3模型来处理。
image_raw_data = gfile.FastGFile(file_name, 'rb').read()
image = tf.image.decode_jpeg(image_raw_data)
if image.dtype != tf.float32:
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.image.resize_images(image, [299, 299])
image_value = sess.run(image)
# 随机划分数据聚。
chance = np.random.randint(100)
if chance < validation_percentage:
validation_images.append(image_value)
validation_labels.append(current_label)
elif chance < (testing_percentage + validation_percentage):
testing_images.append(image_value)
testing_labels.append(current_label)
else:
training_images.append(image_value)
training_labels.append(current_label)
if i % 200 == 0:
print(i, "images processed.")
current_label += 1
# 将训练数据随机打乱以获得更好的训练效果。
state = np.random.get_state()
np.random.shuffle(training_images)
np.random.set_state(state)
np.random.shuffle(training_labels)
return np.asarray([training_images, training_labels,
validation_images, validation_labels,
testing_images, testing_labels])
'''
运行数据处理过程
'''
with tf.Session() as sess:
processed_data = create_image_lists(sess, TEST_PERCENTAGE, VALIDATION_PERCENTAGE)
# 通过numpy格式保存处理后的数据。
np.save(OUTPUT_FILE, processed_data)
MNIST数据处理
MNIST是一个手写体数字识别数据集,其包含了60000 张图片作为训练数据,10000 张图片作为测试数据。在MNIST 数据集中的每一张图片都代表了0~9 中的一个数字,图片的大小都为28 * 28。而TensorFlow对MNIST数据集做了很好地封装。TensorFlow 提供了一个类来处理MNIST 数据。这个类会自动下载并转化MNIST 数据的格式,将数据从原始的数据包中解析成训练和测试神经网络时使用的格式为28 * 28,同时TensorFlow会自动将MNIST数据集划分为训练集、验证集和测试集。
from tensorflow.examples.tutorials.mnist import input_data
'''
training dataset 和validating dataset组成了MNIST本身提供的训练数据集
'''
# 通过input_data.read_data_sets函数生成的类会自动将MNIST数据集划分为train 、validation 和test
mnist = input_data.read_data_sets("../../datasets/MNIST_data/", one_hot=True)
# 训练集含有的图片数
print("Training dataset size: ", mnist.train.num_examples)
# 验证集含有的图片数
print("Validating dataset size: ", mnist.validation.num_examples)
# 测试集含有的图片数
print("Testing dataset size: ", mnist.test.num_examples)
# 处理后的每张图片为一个长度为784(28*28)的一维数
print("Example training data: ", mnist.train.images[0])
print("Example training dataset label: ", mnist.train.labels[0])
batch_size = 100
# mnist.train.next_batch可以从所有的训练数据中读取一小部分(batch_size)作为一个训练批次
xs, ys = mnist.train.next_batch(batch_size)
print("X shape:", xs.shape)
print("Y shape:", ys.shape)
MNIST推理过程
import tensorflow as tf
'''
定义神经网络结构相关的参数
'''
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500
'''
通过tf.get_variable函数来获取变量
'''
def get_weight_variable(shape, regularizer):
weights = tf.get_variable("weights", shape, initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(weights))
return weights
'''
定义神经网络的前向传播过程
'''
def inference(input_tensor, regularizer):
with tf.variable_scope('layer1'):
weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer)
biases = tf.get_variable("biases", [LAYER1_NODE], initializer=tf.constant_initializer(0.0))
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
with tf.variable_scope('layer2'):
weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer)
biases = tf.get_variable("biases", [OUTPUT_NODE], initializer=tf.constant_initializer(0.0))
layer2 = tf.matmul(layer1, weights) + biases
return layer2
MNIST训练过程
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference
import os
'''
定义神经网络结构相关的参数
'''
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH = "MNIST_model/"
MODEL_NAME = "mnist_model"
'''
定义训练过程
'''
def train(mnist):
# 定义输入输出placeholder。
x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input')
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
y = mnist_inference.inference(x, regularizer)
global_step = tf.Variable(0, trainable=False)
# 定义损失函数、学习率、滑动平均操作以及训练过程。
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY,
staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
# 初始化TensorFlow持久化类。
saver = tf.train.Saver()
with tf.Session() as sess:
tf.global_variables_initializer().run()
for i in range(TRAINING_STEPS):
xs, ys = mnist.train.next_batch(BATCH_SIZE)
_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
if i % 1000 == 0:
print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
'''
main主函数入口
'''
def main(argv=None):
mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
train(mnist)
if __name__ == '__main__':
main()
MNIST评估过程
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference
import mnist_train
'''
每10秒加载一次最新的模型
'''
# 加载的时间间隔。
EVAL_INTERVAL_SECS = 10
def evaluate(mnist):
with tf.Graph().as_default() as g:
x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input')
validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
y = mnist_inference.inference(x, None)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
variable_averages = tf.train.ExponentialMovingAverage(mnist_train.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
while True:
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(mnist_train.MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
accuracy_score = sess.run(accuracy, feed_dict=validate_feed)
print("After %s training step(s), validation accuracy = %g" % (global_step, accuracy_score))
else:
print('No checkpoint file found')
return
time.sleep(EVAL_INTERVAL_SECS)
'''
main主函数入口
'''
def main(argv=None):
mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
evaluate(mnist)
if __name__ == '__main__':
main()
深度卷积神经网络
卷积神经网络与全连接神经网络的整体架构是很相似的
卷积神经网络与全连接神经网络的唯一区别在于神经网络中相邻两层的连接方式
为什么使用卷积神经网络代替全连接神经网络?
使用全连接神经网络处理图像的最大问题在于全连接层的参数太多。对于MNIST数据,每一张图片的大小是28 × 28 × l,其中28 × 28 为图片的大小,× l 表示图像是黑白的,只有一个色彩通道。假设第一层隐藏层的节点数为500 个,那么一个全链接层的州经网络将有28 × 28 × 500+500 =392500 个参数。同理当图片的尺寸更大,通道数更多时,采用全连接的方式会导致参数过多,从而产生过拟合的情况,因此需要一个更合理的神经网络结构来有效地减少神经网络中参数个数。卷积神经网络就可以达到这个目的。
在卷积神经网络的前几层中,每一层的节点都被组织成一个三维矩阵。图2中虚线部分展示了卷积神经网络的一个连接示意图,从图中可以看出卷积神经网络中前几层中每一个节点只和上一层中部分的节点相连。
一个卷积神经网络主要由以下5种结构组成:
卷积层与池化层
import tensorflow as tf
import numpy as np
'''
定义输入矩阵
'''
M = np.array([
[[1],[-1],[0]],
[[-1],[2],[1]],
[[0],[2],[-2]]
])
print("Matrix shape is: ",M.shape)
M = ( 1 − 1 0 − 1 2 1 0 2 − 2 ) M=\left(\begin{array}{ccc}{1} & {-1} & {0} \\ {-1} & {2} & {1} \\ {0} & {2} & {-2}\end{array}\right) M=⎝⎛1−10−12201−2⎠⎞
'''
定义卷积过滤器, 深度为1
'''
filter_weight = tf.get_variable('weights', [2, 2, 1, 1], initializer = tf.constant_initializer([
[1, -1],
[0, 2]]))
biases = tf.get_variable('biases', [1], initializer = tf.constant_initializer(1))
W = ( 1 − 1 0 2 ) W=\left(\begin{array}{cc}{1} & {-1} \\ {0} & {2}\end{array}\right) W=(10−12)
'''
调整输入的格式符合TensorFlow的要求
'''
M = np.asarray(M, dtype='float32')
M = M.reshape(1, 3, 3, 1)
'''
计算矩阵通过卷积层过滤器和池化层过滤器计算后的结果
'''
x = tf.placeholder('float32', [1, None, None, 1])
conv = tf.nn.conv2d(x, filter_weight, strides = [1, 2, 2, 1], padding = 'SAME')
bias = tf.nn.bias_add(conv, biases)
pool = tf.nn.avg_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
with tf.Session() as sess:
tf.global_variables_initializer().run()
convoluted_M = sess.run(bias,feed_dict={x:M})
pooled_M = sess.run(pool,feed_dict={x:M})
print("convoluted_M: \n", convoluted_M)
print("pooled_M: \n", pooled_M)
LeNet—5模型
LeNet-5 模型是Yann LeCun 教授于1998 年在论文Gradient-Based Learning Applied to Document Recognition中提出的,LeNet-5 模型可以达到大约99.2% 的正确率。LeNet-5模型总共有7层,图3展示了LeNet-5 模型的架构。
第一层,卷积层
这一层的输入就是原始的图像像素, LeNet-5 模型接受的输入层大小为32 x 32 x l 。第一个卷积层过滤器的尺寸为 5 × 5 ,深度为6 ,不使用全0填充,步长为1 。因为没有使用全0填充,所以这一层的输出的尺寸为32-5+1=28 , 深度为6 。这一个卷积层总共有5x5x1x6+6=156 个参数,其中6个为偏置项参数。因为下一层的节点矩阵有28 x28x6=4704个节点,每个节点和5 × 5=25 个当前层节点相连,所以本层卷积层总共有4704 x(25 + 1) = 122304 个连接。
第二层,池化层
这一层的输入为第一层的输出, 是一个28x28x6 的节点矩阵。本层采用的过滤器大小为2 × 2 ,长和宽的步长均为2,所以本层的输出矩阵大小为14 × 14 × 6 。
第三层,卷积层
本层的输入矩阵大小为14 × 14 × 6 ,使用的过滤器大小为5x5,深度为16 。本层不使用全0填充, 步长为1。本层的输出矩阵大小为10 x 10 × 16 。按照标准的卷积层,本层应该有5x5x6x16+16=2416个参数,10×10x16x(25+1) =41600个连接。
第四层,池化层
本层的输入矩阵大小为10 x 10 x 16 ,采用的过滤器大小为2× 2 ,步长为2 。本层的输出矩阵大小为5×5x16 。
第五层,全连接层
本层的输入矩阵大小为5×5x16 ,在LeNet-5模型的论文中将这一层称为卷积层,但是因为过滤器的大小就是5x5,所以和全连接层没有区别,在之后的TensorFlow程序实现中也会将这一层看成全连接层。本层的输出节点个数为120 ,总共有sxsx16x120+120=48120 个参数。
第六层,全连接层
本层的输入节点个数为120个,输出节点个数为84 个,总共参数为120x84+84=10164个。
第七层,全连接层
本层的输入节点个数为84个,输出节点个数为10个,总共参数为84×10+10=850个。
推理阶段
'''
设定神经网络的参数
'''
INPUT_NODE = 784
OUTPUT_NODE = 10
IMAGE_SIZE = 28
NUM_CHANNELS = 1
NUM_LABELS = 10
CONV1_DEEP = 32
CONV1_SIZE = 5
CONV2_DEEP = 64
CONV2_SIZE = 5
FC_SIZE = 512
'''
定义前向传播的过程
'''
def inference(input_tensor, train, regularizer):
with tf.variable_scope('layer1-conv1'):
conv1_weights = tf.get_variable(
"weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1_biases = tf.get_variable("bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0))
conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
with tf.name_scope("layer2-pool1"):
pool1 = tf.nn.max_pool(relu1, ksize = [1,2,2,1],strides=[1,2,2,1],padding="SAME")
with tf.variable_scope("layer3-conv2"):
conv2_weights = tf.get_variable(
"weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
initializer=tf.truncated_normal_initializer(stddev=0.1))
conv2_biases = tf.get_variable("bias", [CONV2_DEEP], initializer=tf.constant_initializer(0.0))
conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
with tf.name_scope("layer4-pool2"):
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
pool_shape = pool2.get_shape().as_list()
nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
reshaped = tf.reshape(pool2, [pool_shape[0], nodes])
with tf.variable_scope('layer5-fc1'):
fc1_weights = tf.get_variable("weight", [nodes, FC_SIZE],
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))
fc1_biases = tf.get_variable("bias", [FC_SIZE], initializer=tf.constant_initializer(0.1))
fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
if train: fc1 = tf.nn.dropout(fc1, 0.5)
with tf.variable_scope('layer6-fc2'):
fc2_weights = tf.get_variable("weight", [FC_SIZE, NUM_LABELS],
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1))
logit = tf.matmul(fc1, fc2_weights) + fc2_biases
return logit
训练阶段
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import LeNet5_infernece
import os
import numpy as np
'''
定义神经网络相关的参数
'''
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.01
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 6000
MOVING_AVERAGE_DECAY = 0.99
'''
定义训练过程
'''
def train(mnist):
# 定义输出为4维矩阵的placeholder
x = tf.placeholder(tf.float32, [
BATCH_SIZE,
LeNet5_infernece.IMAGE_SIZE,
LeNet5_infernece.IMAGE_SIZE,
LeNet5_infernece.NUM_CHANNELS],
name='x-input')
y_ = tf.placeholder(tf.float32, [None, LeNet5_infernece.OUTPUT_NODE], name='y-input')
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
y = LeNet5_infernece.inference(x,False,regularizer)
global_step = tf.Variable(0, trainable=False)
# 定义损失函数、学习率、滑动平均操作以及训练过程。
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY,
staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
# 初始化TensorFlow持久化类。
saver = tf.train.Saver()
with tf.Session() as sess:
tf.global_variables_initializer().run()
for i in range(TRAINING_STEPS):
xs, ys = mnist.train.next_batch(BATCH_SIZE)
reshaped_xs = np.reshape(xs, (
BATCH_SIZE,
LeNet5_infernece.IMAGE_SIZE,
LeNet5_infernece.IMAGE_SIZE,
LeNet5_infernece.NUM_CHANNELS))
_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: reshaped_xs, y_: ys})
if i % 1000 == 0:
print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
'''
主函数入口
'''
def main(argv=None):
mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
train(mnist)
if __name__ == '__main__':
main()
用于图片分类问题的卷积神经网络架构:
输入层 —>(卷积层 + —> 池化层?)+ —> 全连接层+
其中“卷积层+”表示一层或者多层卷积层;“池化层?”表示表示没有或者一层池化层;在多轮卷积层和池化层之后,卷积神经网络在输出之前一般会经过1~2 个全连接层 (比如,LeNet-5)
TensorFlow实战 Google 深度学习框架
LeNet-5详解与实现
Gradient-Based Learning Applied to Document Recognition
李宏毅深度学习