'''利用Tensorflow框架来实现一个神经网络'''
import numpy as np
import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops
import tf_utils
import time
np.random.seed(1)
'''对于tensorflow的代码而言,实现代码的结构如下:①创建tensorflow变量(此时,尚未直接计算)②实现tensorflow变量之间的操作定义③初始化tensorflow变量init=tf.global_variables_initializer()④创建session进行评估损失的值并打印它的值⑤运行session,此时,之前编写的操作都会在这一步运行总之,我们需要初始化变量,并创建一个session来运行它使用session的两种方法:方法一:sess = tf.Session()result = sess.run(...,feed_dict={...})sess.close()方法二:with tf.Session as sess:result = sess.run(...,feed_dict={})'''
#计算损失函数
y_hat = tf.constant(36,name="y_hat") #定义y_hat为固定值36
y = tf.constant(39,name="y") #定义y为固定值39
loss = tf.Variable((y-y_hat)**2,name="loss")#为损失函数创建一个变量
init = tf.global_variables_initializer()
with tf.Session() as session:
session.run(init)
print(session.run(loss))
#输出:9
'''占位符(placeholders)占位符是一个对象,它的值只能在稍后指定,要指定占位符的值,可以使用一个feed字典(feed_dict变量)来传入接下来我们为x创建一个占位符'''
sess = tf.Session()
x = tf.placeholder(tf.int64,name="x")
print(sess.run(2*x,feed_dict={x:3}))
sess.close()
#输出:6
#定义一个线性函数
def linear_function():
#初始化W、X、b,类型都为tensor的随机变量,维度分别为(4,3)、(3,1)、(4,1)
np.random.seed(1)
X = np.random.randn(3,1)
W = np.random.randn(4,3)
b = np.random.randn(4,1)
Y = tf.add(tf.matmul(W,X),b) #Y = W*X+b
#tf.matmul为矩阵乘法,tf.add为加法
#创建一个session并运行它
sess = tf.Session()
result = sess.run(Y)
sess.close() #使用完毕,关闭它
return result
print("result = " + str(linear_function()))
'''输出:result = [[-2.15657382][ 2.95891446][-1.08926781][-0.84538042]]'''
#定义sigmoid函数
def sigmoid(z):
x = tf.placeholder(tf.float32,name="x") #使用占位符定义x
sigmoid = tf.sigmoid(x) #计算sigmoid函数
with tf.Session() as sess:
result = sess.run(sigmoid,feed_dict={x:z})
return result
print ("sigmoid(0) = " + str(sigmoid(0)))
print ("sigmoid(12) = " + str(sigmoid(12)))
'''输出:sigmoid(0) = 0.5sigmoid(12) = 0.999994'''
#去一个标签矢量和C类总数,返回一个独热编码
def one_hot_matrix(labels,C):
C = tf.constant(C,name="C")
one_hot_matrix = tf.one_hot(indices=labels,depth=C,axis=0)
sess = tf.Session()
one_hot = sess.run(one_hot_matrix)
sess.close()
return one_hot
labels = np.array([1,2,3,0,2,1])
one_hot = one_hot_matrix(labels,C=4)
print(str(one_hot))
'''输出:[[ 0. 0. 0. 1. 0. 0.][ 1. 0. 0. 0. 0. 1.][ 0. 1. 0. 0. 1. 0.][ 0. 0. 1. 0. 0. 0.]]'''
#初始化0和1
def ones(shape):
ones = tf.ones(shape)
sess = tf.Session()
ones = sess.run(ones)
sess.close()
return ones
print ("ones = " + str(ones([3])))
#输出:ones = [ 1. 1. 1.]
'''使用tensorflow来创建一个神经网络来识别手语训练集:从0到5的数字的1080张图片(64*64像素),每个数字有180张图片测试集:从0到5的数字的120张图片(64*64像素),每个数字有5张图片'''
#加载数据集
X_train_orig,Y_train_orig,X_test_orig,Y_test_orig,classes = tf_utils.load_dataset()
index = 66
plt.imshow(X_train_orig[index])
plt.show()
print("Y = "+str(np.squeeze(Y_train_orig[:,index])))
print(X_train_orig.shape)
print(X_test_orig.shape)
'''输出:Y = 0(1080, 64, 64, 3)(120, 64, 64, 3)'''
#数据预处理
X_train_flatten = X_train_orig.reshape(X_train_orig.shape[0],-1).T
X_test_flatten = X_test_orig.reshape(X_test_orig.shape[0],-1).T
X_train = X_train_flatten / 255
X_test = X_test_flatten / 255
#将Y转化为独热矩阵
Y_train = tf_utils.convert_to_one_hot(Y_train_orig,6)
Y_test = tf_utils.convert_to_one_hot(Y_test_orig,6)
print("训练集样本数 = " + str(X_train.shape[1]))
print("测试集样本数 = " + str(X_test.shape[1]))
print("X_train.shape: " + str(X_train.shape))
print("Y_train.shape: " + str(Y_train.shape))
print("X_test.shape: " + str(X_test.shape))
print("Y_test.shape: " + str(Y_test.shape))
'''输出:训练集样本数 = 1080测试集样本数 = 120X_train.shape: (12288, 1080)Y_train.shape: (6, 1080)X_test.shape: (12288, 120)Y_test.shape: (6, 120)'''
#创建placeholders
def create_placeholders(n_x,n_y):
X = tf.placeholder(tf.float32,[n_x,None],name="X")
Y = tf.placeholder(tf.float32,[n_y,None],name="Y")
return X,Y
X,Y = create_placeholders(12288,6)
print("X = " + str(X))
print("Y = " + str(Y))
'''输出:X = Tensor("X:0", shape=(12288, ?), dtype=float32)Y = Tensor("Y:0", shape=(6, ?), dtype=float32)'''
#初始化参数变量
def initialize_parameters():
tf.set_random_seed(1)
W1 = tf.get_variable("W1",[25,12288],initializer=tf.contrib.layers.xavier_initializer(seed=1))
b1 = tf.get_variable("b1",[25,1],initializer=tf.zeros_initializer())
W2 = tf.get_variable("W2",[12,25],initializer=tf.contrib.layers.xavier_initializer(seed=1))
b2 = tf.get_variable("b2",[12,1],initializer=tf.zeros_initializer())
W3 = tf.get_variable("W3",[6,12],initializer=tf.contrib.layers.xavier_initializer(seed=1))
b3 = tf.get_variable("b3",[6,1],initializer=tf.zeros_initializer())
parameters = {
"W1":W1,
"b1":b1,
"W2":W2,
"b2":b2,
"W3":W3,
"b3":b3
}
return parameters
tf.reset_default_graph() #用于清除默认图形堆栈并重置全局默认图形
with tf.Session() as sess:
parameters = initialize_parameters()
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))
'''输出:W1 = b1 = W2 = b2 = '''
#前向传播
def forward_propagation(X,parameters):
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
W3 = parameters["W3"]
b3 = parameters["b3"]
Z1 = tf.add(tf.matmul(W1,X),b1)
A1 = tf.nn.relu(Z1)
Z2 = tf.add(tf.matmul(W2,A1),b2)
A2 = tf.nn.relu(Z2)
Z3 = tf.add(tf.matmul(W3,A2),b3)
return Z3
tf.reset_default_graph() #用于清除默认图形堆栈并重置全局默认图形。
with tf.Session() as sess:
X,Y = create_placeholders(12288,6)
parameters = initialize_parameters()
Z3 = forward_propagation(X,parameters)
print("Z3 = " + str(Z3))
#输出:Z3 = Tensor("Add_2:0", shape=(6, ?), dtype=float32)
#计算成本函数
def compute_cost(Z3,Y):
logits = tf.transpose(Z3) #转置
labels = tf.transpose(Y)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels))
return cost
tf.reset_default_graph()
with tf.Session() as sess:
X,Y = create_placeholders(12288,6)
parameters = initialize_parameters()
Z3 = forward_propagation(X,parameters)
cost = compute_cost(Z3,Y)
print("cost = " + str(cost))
#输出:cost = Tensor("Mean:0", shape=(), dtype=float32)
#构建模型
def model(X_train,Y_train,X_test,Y_test,learning_rate=0.0001,
num_epochs=1500,minibatch_size=32,print_cost=True,is_plot=True):
ops.reset_default_graph() #能够重新运行模型而不覆盖tf变量
tf.set_random_seed(1)
seed = 3
(n_x,m) = X_train.shape
n_y = Y_train.shape[0]
costs = []
X,Y = create_placeholders(n_x,n_y)
parameters = initialize_parameters()
Z3 = forward_propagation(X,parameters)
cost = compute_cost(Z3,Y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(num_epochs):
epoch_cost = 0
num_minibatches = int(m / minibatch_size)
seed = seed + 1
minibatches = tf_utils.random_mini_batches(X_train,Y_train,minibatch_size,seed)
for minibatch in minibatches:
(minibatch_X,minibatch_Y) = minibatch
_,minibatch_cost = sess.run([optimizer,cost],feed_dict={X:minibatch_X,Y:minibatch_Y})
epoch_cost = epoch_cost + minibatch_cost / num_minibatches
if epoch % 5 == 0:
costs.append(epoch_cost)
if print_cost and epoch % 100 == 0:
print("epoch = " + str(epoch) + " epoch_cost = " + str(epoch_cost))
if is_plot:
plt.plot(np.squeeze(costs))
plt.ylabel("cost")
plt.xlabel('iterations(per tens)')
plt.title("learning_rate = " + str(learning_rate))
plt.show()
parameters = sess.run(parameters)
print("参数已保存到session")
correct_prediction = tf.equal(tf.argmax(Z3),tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,"float"))
print("训练集的准确率:",accuracy.eval({X:X_train,Y:Y_train}))
print("测试集的准确率:",accuracy.eval({X:X_test,Y:Y_test}))
return parameters
#开始时间
start_time = time.clock()
#开始训练
parameters = model(X_train, Y_train, X_test, Y_test)
#结束时间
end_time = time.clock()
#计算时差
print("CPU的执行时间 = " + str(end_time - start_time) + " 秒" )
'''输出:epoch = 0 epoch_cost = 1.85570189447epoch = 100 epoch_cost = 1.01645776539epoch = 200 epoch_cost = 0.733102379423epoch = 300 epoch_cost = 0.572938936226epoch = 400 epoch_cost = 0.468773578604epoch = 500 epoch_cost = 0.3810211113epoch = 600 epoch_cost = 0.313826778621epoch = 700 epoch_cost = 0.254280460603epoch = 800 epoch_cost = 0.203799342567epoch = 900 epoch_cost = 0.166511993291epoch = 1000 epoch_cost = 0.140936921718epoch = 1100 epoch_cost = 0.107750129745epoch = 1200 epoch_cost = 0.0862994250475epoch = 1300 epoch_cost = 0.0609485416137epoch = 1400 epoch_cost = 0.0509344103436参数已保存到session训练集的准确率: 0.999074测试集的准确率: 0.725CPU的执行时间 = 588.1220405000004 秒'''