看了吴恩达的深度学习视频,跟着课后作业编写了一个卷积神经网络用来识别手势数字
关于卷积神经网络的概念以及算法就不多赘述了,主要梳理一下变成过程中的主要思路
X -> conv2d(卷积一次) ->Relu(非线性函数) ->Max_pool(最大池化) -> conv2d -> Relu -> Max_pool -> 一维化转换 -> FC(全连接层) -> softmax -> y_hat
从输入x到最终输出y_hat, 中间使用两次卷积、Relu以及最大池化,经过一维转换后进行全连接,因为是多分类,最终使用softmax输出
TensorFlow主要涉及到了几个步骤:
1. 读取数据集,并做相关预处理
2. 创建占位符,用于存储处理好的数据集
3. 根据网络结构实现向前传播,计算代价
4. 向后传播最小化代价
5. 创建神经网络模型
6. 多次迭代,计算训练集以及测试集精确度
创建的神经网络模型主要分为以下几个步骤:
1. 初始化所有要用到的参数,包括学习率(learning_rate)、迭代次数(epoch_num)、每次迭代代价(costs)、分块大小(minibatch_size)、随机种子(seed)、分块数量(minibatch_num)、图片大小(m,n_H0,n_W0, n_C0), 匹配数量(n_y) 具体代码如下:
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.009,
num_epochs=100, minibach_size=64, print_cost=True, isPlot=True):
ops.reset_default_graph()
tf.set_random_seed(1)
seed = 3
(m, n_H0, n_W0, n_C0) = X_train.shape
n_y = Y_train.shape[1]
costs = []
2.创建网络结构 :占位符 初始化参数 向前传播 计算代价 向后传播最小化代价
X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y)
parameters = initialize_parameters()
Z3 = forward_propagation(X, parameters)
cost = compute_cost(Z3, Y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
3. 创建Session任务
初始化所有变量 -》实现每次迭代 -》计算总代价 -》画图 -》计算精确度
每次迭代:
1. 计算minibatch大小,随机划分minibatch, 计算minibatch_num
2. 逐个minibatch计算代价 累加所有的代价
with tf.Session() as sess:
sess.run(init)
for epoch in range(num_epochs):
seed = seed + 1
minibach_cost = 0
minibach_num = int(m / minibach_size)
minibaches = cnn_utils.random_mini_batches(X_train, Y_train, minibach_size, seed)
for minibach in minibaches:
(minibach_X, minibach_Y) = minibach
_, temp_cost = sess.run([optimizer, cost], feed_dict={X: minibach_X, Y: minibach_Y})
minibach_cost += temp_cost / minibach_num
if print_cost:
if epoch % 5 == 0:
print("当前是第 " + str(epoch) + " 代,成本值为:" + str(minibach_cost))
if epoch % 1 == 0:
costs.append(minibach_cost)
if isPlot:
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
predict_op = tf.arg_max(Z3, 1)
correct_prediction = tf.equal(predict_op, tf.arg_max(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
print('current predict_accuracy =' + str(accuracy))
train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
test_accuary = accuracy.eval({X: X_test, Y: Y_test})
print("训练集准确度:" + str(train_accuracy))
print("测试集准确度:" + str(test_accuary))
150次迭代结果如下:
源代码如下
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops
import tf_utils
import cnn_utils
def create_placeholders(n_H0, n_W0, n_C0, n_y):
X = tf.placeholder(tf.float32, [None, n_H0, n_W0, n_C0])
Y = tf.placeholder(tf.float32, [None, n_y])
return X, Y
def initialize_parameters():
tf.set_random_seed(1)
W1 = tf.get_variable("W1", [4, 4, 3, 8], initializer=tf.contrib.layers.xavier_initializer(seed=0))
W2 = tf.get_variable("W2", [2, 2, 8, 16], initializer=tf.contrib.layers.xavier_initializer(seed=0))
parameters = {"W1": W1,
"W2": W2}
return parameters
def forward_propagation(X, parameters):
W1 = parameters['W1']
W2 = parameters['W2']
Z1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME')
A1 = tf.nn.relu(Z1)
P1 = tf.nn.max_pool(A1, ksize=[1, 8, 8, 1], strides=[1, 8, 8, 1], padding='SAME')
Z2 = tf.nn.conv2d(P1, W2, strides=[1, 1, 1, 1], padding='SAME')
A2 = tf.nn.relu(Z2)
P2 = tf.nn.max_pool(A2, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='SAME')
P = tf.contrib.layers.flatten(P2)
Z3 = tf.contrib.layers.fully_connected(P, 6, activation_fn=None)
return Z3
def compute_cost(Z3, Y):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z3, labels=Y))
return cost
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.009, num_epochs=100, minibach_size=64, print_cost=True, isPlot=True):
ops.reset_default_graph()
tf.set_random_seed(1)
seed = 3
(m, n_H0, n_W0, n_C0) = X_train.shape
n_y = Y_train.shape[1]
costs = []
X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y)
parameters = initialize_parameters()
Z3 = forward_propagation(X, parameters)
cost = compute_cost(Z3, Y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(num_epochs):
seed = seed + 1
minibach_cost = 0
minibach_num = int(m / minibach_size)
minibaches = cnn_utils.random_mini_batches(X_train, Y_train, minibach_size, seed)
for minibach in minibaches:
(minibach_X, minibach_Y) = minibach
_, temp_cost = sess.run([optimizer, cost], feed_dict={X: minibach_X, Y: minibach_Y})
minibach_cost += temp_cost / minibach_num
if print_cost:
if epoch % 5 == 0:
print("当前是第 " + str(epoch) + " 代,成本值为:" + str(minibach_cost))
if epoch % 1 == 0:
costs.append(minibach_cost)
if isPlot:
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
predict_op = tf.arg_max(Z3, 1)
correct_prediction = tf.equal(predict_op, tf.arg_max(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
print('current predict_accuracy =' + str(accuracy))
train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
test_accuary = accuracy.eval({X: X_test, Y: Y_test})
print("训练集准确度:" + str(train_accuracy))
print("测试集准确度:" + str(test_accuary))
return train_accuracy, test_accuary, parameters
def main():
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = tf_utils.load_dataset()
X_train = X_train_orig / 255.
X_test = X_test_orig / 255.
Y_train = cnn_utils.convert_to_one_hot(Y_train_orig, 6).T
Y_test = cnn_utils.convert_to_one_hot(Y_test_orig, 6).T
# X, Y = create_placeholders(64, 64, 3, 6)
# print(str(X), str(Y))
_, _, parameters = model(X_train, Y_train, X_test, Y_test, num_epochs=150)
'''
tf.reset_default_graph()
np.random.seed(1)
with tf.Session() as sess:
X, Y = create_placeholders(64, 64, 3, 6)
parameters = initialize_parameters()
Z3 = forward_propagation(X, parameters)
cost = compute_cost(Z3, Y)
init = tf.global_variables_initializer()
sess.run(init)
a = sess.run(cost, {X: np.random.randn(2, 64, 64, 3), Y: np.random.randn(2, 6)})
print(str(a))
sess.close()
'''
if __name__ == '__main__':
np.random.seed(1)
main()