import tensorflow as tf
import os
from tensorflow.keras import datasets
from tensorflow import keras
# 解决tensorflow输出无关信息问题
os.environ['TF_CPP_MIN_LOG_LECEL'] = "2"
"""
2:表示只输出与报错相关的信息
0:输出一切信息
1:输出部分信息
"""
# 将GPU设置为memory_growth模式
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, False)
# 加载数据并转换类型
(x,y),(x_test,y_test) = datasets.mnist.load_data()
x = tf.convert_to_tensor(x,dtype = tf.float32).gpu()
y = tf.convert_to_tensor(y,dtype = tf.int32).gpu()
x_test = tf.convert_to_tensor(x_test,dtype=tf.float32).gpu()
y_test = tf.convert_to_tensor(y_test,dtype=tf.int32).gpu()
print("x:",x.shape,tf.reduce_min(x).numpy(),tf.reduce_max(x).numpy())
print("y:",y.shape,tf.reduce_min(y).numpy(),tf.reduce_max(y).numpy())
# 数据分割成不同batch
# 先组合成为数据切片在切割成不同batch
train_db = tf.data.Dataset.from_tensor_slices((x,y)).shuffle(100000).batch(128) # 分成128个batch
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test)).shuffle(100000).batch(100)
# 查看数据集
train_iter = iter(train_db)
sample = next(train_iter)
print("sample:",sample[0].shape,sample[1].shape)
# 定义权值和偏置为:Tensor类型的Variable
"""
各层神经元数据流向[b,784] => [b,256] => [b,128] => [b,10]
"""
w1 = tf.Variable(tf.random.truncated_normal([784,256],stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256,128],stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128,10],stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))
# 定义学习率
lr = tf.constant(1e-3)
# 训练每一层epoch
for epoch in range(10):
# 训练 每一个batch
for step,(x,y) in enumerate(train_db):
# x特征归一化及像素点平铺化
x = tf.reshape((x - tf.reduce_min(x))/(tf.reduce_max(x)-tf.reduce_min(x)),[-1,28*28])
# 记录变量的梯度信息
with tf.GradientTape() as tape:
# 计算第一次输出h1
h1 = tf.nn.relu(x@w1 + b1)
# 计算第二层输出h2
h2 = tf.nn.relu(h1@w2 + b2)
# 计算第三层输出out
out = h2@w3 + b3
# 对y进行onehot编码
y_onehot = tf.one_hot(y,depth = 10)
# 计算损失函数
loss = tf.reduce_mean(tf.square(out - y_onehot))
# 计算梯度
gradients = tape.gradient(loss,[w1,b1,w2,b2,w3,b3])
# 权值更新
w1.assign_sub(lr*gradients[0])
b1.assign_sub(lr*gradients[1])
w2.assign_sub(lr*gradients[2])
b2.assign_sub(lr*gradients[3])
w3.assign_sub(lr*gradients[4])
b3.assign_sub(lr*gradients[5])
# 测试集验证
total_num,total_correct = 0,0
for step1,(x,y) in enumerate(test_db):
x = tf.reshape(x,[-1,28*28])
h1 = tf.nn.relu(x@w1 + b1)
h2 = tf.nn.relu(h1@w2 + b2)
out = h2@w3 + b3
prob = tf.nn.softmax(out,axis = 1)
pred = tf.argmax(prob,axis = 1)
correct = tf.cast(tf.equal(tf.cast(pred,dtype=tf.int32),y),dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_num += y.shape[0]
total_correct += correct
acc = total_correct/total_num
if step % 100 == 0:
print(epoch,step,"ACC:",acc)
本文为参考龙龙老师的“深度学习与TensorFlow 2入门实战“课程书写的学习笔记
by CyrusMay 2022 04 16