参考代码:05_basic_convnet.py
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
import matplotlib.pyplot as plt
import numpy as np
mnist = input_data.read_data_sets("./data/", one_hot=True)
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
# 数据申请,-1表示数据为定义。
x_tensor = tf.reshape(x, [-1, 28, 28, 1])
# tf.random_normal(shape, mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)
# shape: 输出张量的形状,必选
# mean: 正态分布的均值,默认为0
# stddev: 正态分布的标准差,默认为1.0
# dtype: 输出的类型,默认为tf.float32
# seed: 随机数种子,是一个整数,当设置之后,每次生成的随机数都一样
# name: 操作的名称
# 设置第一个卷基层
filter_size = 5
n_filters_1 = 16
# 权重矩阵是[height x width x input_channels x output_channels]
def weight_variable(shape):
'''Helper function to create a weight variable initialized with
a normal distribution
Parameters
----------
shape : list
Size of weight variable
'''
initial = tf.random_normal(shape, mean=0.0, stddev=0.01)
return tf.Variable(initial)
def bias_variable(shape):
'''Helper function to create a bias variable initialized with
a constant value.
Parameters
----------
shape : list
Size of weight variable
'''
initial = tf.random_normal(shape, mean=0.0, stddev=0.01)
return tf.Variable(initial)
W_conv1 = weight_variable([filter_size, filter_size, 1, n_filters_1])
b_conv1 = bias_variable([n_filters_1])
# %% Now we can build a graph which does the first layer of convolution:
# we define our stride as batch x height x width x channels
# instead of pooling, we use strides of 2 and more layers
# with smaller filters.
# tf.nn.relu()函数是将大于0的数保持不变,小于0的数置为0 [-2,-1,0,2,3]->[0,0,0,2,3]
h_conv1 = tf.nn.relu(
# tf.nn.conv2d是TensorFlow里面实现卷积的函数
tf.nn.conv2d(
# input指需要做卷积的输入图像,它要求是一个Tensor,
# 具有[batch, in_height, in_width, in_channels]这样的shape,
# 具体含义是[训练时一个batch的图片数量, 图片高度, 图片宽度, 图像通道数],
# 注意这是一个4维的Tensor,要求类型为float32和float64其中之一
input=x_tensor,
# filter:相当于CNN中的卷积核,它要求是一个Tensor,
# 具有[filter_height, filter_width, in_channels, out_channels]这样的shape,
# 具体含义是[卷积核的高度,卷积核的宽度,图像通道数,卷积核个数],要求类型与参数input相同,
# 有一个地方需要注意,第三维in_channels,就是参数input的第四维
filter=W_conv1,
# 参数strides:卷积时在图像每一维的步长,这是一个一维的向量,长度4
# 步长不为1的情况,文档里说了对于图片,因为只有两维,通常strides取[1,stride,stride,1]
strides=[1, 2, 2, 1],
# padding:string类型的量,只能是"SAME","VALID"其中之一,这个值决定了不同的卷积方式
# 当其为‘SAME’时,表示卷积核可以停留在图像边缘
# 参数:use_cudnn_on_gpu:bool类型,是否使用cudnn加速,默认为true
padding='SAME') +
b_conv1)
# And just like the first layer,add additional layer to creative a deep net.
n_filters_2 = 16
W_conv2 = weight_variable([filter_size, filter_size, n_filters_1, n_filters_2])
b_conv2 = bias_variable([n_filters_2])
h_conv2 = tf.nn.relu(
tf.nn.conv2d(input=h_conv1,
filter=W_conv2,
strides=[1, 2, 2, 1],
padding='SAME') +
b_conv2)
# We'll now reshape so we can connect to a fully-
# tf.reshape(tensor,shape,name=None)
# 函数的作用是将tensor变换为参数shape形式,其中的shape为一个列表形式,特殊的是列表可以实现逆序的遍历即list(-1).],
# -1所代表的含义是我们不用亲自去指定这一维的大小,函数会自动进行计算,但是列表中只能存在一个-1
h_conv2_flat = tf.reshape(h_conv2, [-1, 7 * 7 * n_filters_2])
# create a full-connected layer
n_fc = 1024
W_fc1 = weight_variable([7 * 7 * n_filters_2, n_fc])
b_fc1 = bias_variable([n_fc])
h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1)
# we can add dropout for regualarizing and to reduce overfiting like so:
keep_prob = tf.placeholder(tf.float32)
# tf.nn.f=dropout是防止或减轻过拟合的函数,一般用在全连接层
# tf.nn.dropout(x, keep_prob, noise_shape=None, seed=None,name=None)
# x:指输入 keep_prob: 设置神经元被选中的概率,在初始化时keep_prob是一个占位符. name:指定该操作的名字
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# fially softmax layer
# tf.nn.softmax( 函数的作用相当于softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis)
# logits, A non-empty Tensor. 一个非空张量
# Must be one of the following types: half, float32, float64.必须是以下类型之一:half, float32, float64
# axis=None, The dimension softmax would be performed on. 将被执行的softmax维度
# The default is -1 which indicates the last dimension.默认值是-1,表示最后一个维度。
# name=None, A name for the operation (optional).操作的名称(可选)。
# dim=None Deprecated alias for axis. 弃用,axis的别名)
W_fc2 = weight_variable([n_fc, 10])
b_fc1 = bias_variable([10])
y_pred = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc1)
# define loss/eval/training functions
# reduce_sum应该理解为压缩求和,用于降维
# tf.log:计算TensorFlow的自然对数 log(x,name=None)
cross_entropy = -tf.reduce_sum(y * tf.log(y_pred))
optimizer = tf.train.AdamOptimizer().minimize(cross_entropy)
# monitor accuracy 监控准确性
# tf.equal(A, B)是对比这两个矩阵或者向量的相等的元素,如果是相等的那就返回True,反正返回False,
# 返回的值的矩阵维度和A是一样的
# tf.argmax 返回一个向量,表示每个维度的最大元素的索引号 [1,3,5,6,4]->[4] [[1,8,5,2],[4,5,7,5]]->[1,2]
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
# tf.cast 将x的数据格式转化成dtype
# cast(
# x,
# dtype,
# name=None
# )
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
# a new session to imitialization the variables.
sess = tf.Session()
sess.run(tf.global_variables_initializer())
batch_size = 100
n_epochs = 5
# we'll train in minibatches and report accuracy
for epoch_i in range(n_epochs):
for batch_i in range(mnist.train.num_examples // batch_size): # '//'表示整数除法,返回一个不大于结果的整数。
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# run(fetches,feed_dict=None,options=None,run_metadata=None)这个函数执行一步 TensorFlow 运算,
# 通过运行必要的图块来执行每一个操作,并且计算每一个 fetches 中的张量的值,用相关的输入变量替换 feed_dict 中的值。keep_prob的值设为1时所有的神经元全部参与,其实是相当于暴力的记忆住了当前的训练集,因此在训练集上有很好的效果,但是一旦离开这个训练集,就会没办法拟合新的数据点而导致准确率下降。而当值适当减小时,其实就是模拟了人脑的记忆曲线,总会有些东西是会遗忘的,因而在训练时虽然收敛的比较慢,但是泛化能力确增强了。
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 0.5})
print(sess.run(accuracy, feed_dict={x: mnist.validation.images, y: mnist.validation.labels, keep_prob: 1.0
}))
def montage(W):
"""Draws all filters (n_input * n_output filters) as a
montage image separated by 1 pixel borders.
Parameters
----------
W : numpy.ndarray
Input array to create montage of.
Returns
-------
m : numpy.ndarray
Montage image.
"""
W = np.reshape(W, [W.shape[0], W.shape[1], 1, W.shape[2] * W.shape[3]])
n_plots = int(np.ceil(np.sqrt(W.shape[-1])))
m = np.ones(
(W.shape[0] * n_plots + n_plots + 1,
W.shape[1] * n_plots + n_plots + 1)) * 0.5
for i in range(n_plots):
for j in range(n_plots):
this_filter = i * n_plots + j
if this_filter < W.shape[-1]:
m[1 + i + i * W.shape[0]:1 + i + (i + 1) * W.shape[0],
1 + j + j * W.shape[1]:1 + j + (j + 1) * W.shape[1]] = (
np.squeeze(W[:, :, :, this_filter]))
return m
# let's take a look the kernels we've learned.
W = sess.run(W_conv1)
plt.imshow(montage(W / np.max(W)), cmap='coolwarm')
plt.show()