记录分别在TensorFlow1.x与TensorFlow2.x中使用单神经元完成MNIST手写数字识别的过程。
将回归值映射为各分类的概率
1.sigmod函数: y = 1 1 + e − z y= \frac{1}{1+e^{-z}} y=1+e−z1
将 z ∈ ( − ∞ , + ∞ ) z\in ( -\infty,+\infty ) z∈(−∞,+∞)映射到 y ∈ [ 0 , 1 ] y\in [0,1 ] y∈[0,1],0→0.5,连续可微
代入到平方损失函数,为非凸函数,有多个最小值,会产生局部最优
2.对数损失函数: L o s s = ∑ [ − y log ( y ^ ) − ( 1 − y ) log ( 1 − y ^ ) ] Loss=\sum[-y\log (\hat{y})-(1-y)\log( 1-\hat{y})] Loss=∑[−ylog(y^)−(1−y)log(1−y^)]为凸函数
1.softmax函数: P i = e − y i ∑ e − y k {P_i}= \frac{e^{-y_i}}{\sum e^{-y_k}} Pi=∑e−yke−yi
增大差距,映射到 y ∈ [ 0 , 1 ] y\in \left [0,1 \right ] y∈[0,1],各分类概率和为1
2.交叉熵损失函数 L o s s = ∑ − y log ( y ^ ) Loss=\sum-y\log (\hat{y}) Loss=∑−ylog(y^)
两个概率分布的距离
在tensorflow2.x中调用数据集;
训练集训练模型,验证集调整超参数,测试集测试模型效果
训练集60000个样本,取5000个样本作为验证集;测试集10000个样本
import tensorflow as tf2
import matplotlib.pyplot as plt
import numpy as np
mnist = tf2.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#维度转换,灰度值归一化,标签独热编码
x_train = x_train.reshape((-1, 784))
x_train = tf2.cast(x_train/255.0, tf2.float32)
x_test = x_test.reshape((-1, 784))
x_test = tf2.cast(x_test/255.0, tf2.float32)
y_train = tf2.one_hot(y_train, depth=10)
y_test = tf2.one_hot(y_test, depth=10)
#训练集训练模型,验证集调整超参数,测试集测试模型效果
#训练集60000个样本,取5000个样本作为验证集;测试集10000个样本
x_valid, y_valid = x_train[55000:], y_train[55000:]
x_train, y_train = x_train[:55000], y_train[:55000]
显示图片、标签与预测值
def show(images, labels, preds):
fig1 = plt.figure(1, figsize=(12, 12))
for i in range(16):
ax = fig1.add_subplot(4, 4, i+1)
ax.imshow(images[i].reshape(28, 28), cmap='binary')
label = np.argmax(labels[i])
pred = np.argmax(preds[i])
title = 'label:%d,pred:%d' % (label, pred)
ax.set_title(title)
ax.set_xticks([])
ax.set_yticks([])
import tensorflow.compat.v1 as tf
from sklearn.utils import shuffle
tf.disable_eager_execution()
with tf.name_scope('Model'):
x = tf.placeholder(tf.float32, [None, 784], name='X')
y = tf.placeholder(tf.float32, [None, 10], name='Y')
w = tf.Variable(tf.random_normal((784, 10)), name='W')
b = tf.Variable(tf.zeros((10)), name='B')
def model(x, w, b):
y0 = tf.matmul(x, w) + b#前向计算
y = tf.nn.softmax(y0)#结果分类
return y
pred = model(x, w, b)
#训练参数
train_epoch = 100
learning_rate = 0.1
batch_size = 100
batch_num = x_train.shape[0] // batch_size
#损失函数与准确率
step = 0
display_step = 5
loss_list = []
acc_list = []
loss_function = tf.reduce_mean(-y*tf.log(pred))
accuracy = tf.reduce_mean(tf.cast\
(tf.equal(tf.argmax(y, axis=1), tf.argmax(pred, axis=1)), tf.float32))
#优化器
optimizer = tf.train.GradientDescentOptimizer(learning_rate)\
.minimize(loss_function)
变量初始化
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
#tf转为numpy
x_train = sess.run(x_train)
x_valid = sess.run(x_valid)
x_test = sess.run(x_test)
y_train = sess.run(y_train)
y_valid = sess.run(y_valid)
y_test = sess.run(y_test)
迭代训练
for epoch in range(train_epoch):
if epoch % 10 == 0:
print('epoch:%d' % epoch)
for batch in range(batch_num):
xi = x_train[batch*batch_size:(batch+1)*batch_size]
yi = y_train[batch*batch_size:(batch+1)*batch_size]
sess.run(optimizer, feed_dict={x:xi, y:yi})
step = step + 1
if step % display_step == 0:
loss, acc = sess.run([loss_function, accuracy],\
feed_dict={x:x_valid, y:y_valid})
loss_list.append(loss)
acc_list.append(acc)
#打乱顺序
x_train, y_train = shuffle(x_train, y_train)
y_pred, acc = sess.run([pred, accuracy],\
feed_dict={x:x_test, y:y_test})
fig2 = plt.figure(2, figsize=(12, 6))
ax = fig2.add_subplot(1, 2, 1)
ax.plot(loss_list, 'r-')
ax.set_title('loss')
ax = fig2.add_subplot(1, 2, 2)
ax.plot(acc_list, 'b-')
ax.set_title('acc')
print('Accuracy:{:.2%}'.format(acc))
show(x_test, y_test, y_pred)
测试集上的准确率
验证集上的损失值与准确率曲线
测试集图片标签与预测
import tensorflow as tf
from sklearn.utils import shuffle
w = tf.Variable(tf.random.normal((784, 10)), tf.float32)
b = tf.Variable(tf.zeros(10), tf.float32)
def model(x, w, b):
y0 = tf.matmul(x, w) + b
y = tf.nn.softmax(y0)
return y
#损失函数
def loss_function(x, y, w, b):
pred = model(x, w, b)
loss = tf.keras.losses.categorical_crossentropy(
y_true=y, y_pred=pred)
return tf.reduce_mean(loss)
#准确率
def accuracy(x, y, w, b):
pred = model(x, w, b)
acc = tf.equal(tf.argmax(y, axis=1), tf.argmax(pred, axis=1))
acc = tf.cast(acc, tf.float32)
return tf.reduce_mean(acc)
#梯度
def grad(x, y, w, b):
with tf.GradientTape() as tape:
loss = loss_function(x, y, w, b)
return tape.gradient(loss, [w,b])
#训练参数
train_epoch = 10
learning_rate = 0.01
batch_size = 100
batch_num = x_train.shape[0] // batch_size
#展示间隔
step = 0
display_step = 5
loss_list = []
acc_list = []
#Adam优化器
optimizer = tf.keras.optimizers.Adam(learning_rate)
迭代训练
for epoch in range(train_epoch):
print('epoch:%d' % epoch)
for batch in range(batch_num):
xi = x_train[batch*batch_size: (batch+1)*batch_size]
yi = y_train[batch*batch_size: (batch+1)*batch_size]
grads = grad(xi, yi, w, b)
optimizer.apply_gradients(zip(grads, [w,b]))
step = step + 1
if step % display_step == 0:
loss_list.append(loss_function(x_valid, y_valid, w, b))
acc_list.append(accuracy(x_valid, y_valid, w, b))
#打乱顺序
x_train, y_train = shuffle(x_train.numpy(), y_train.numpy())
x_train = tf.cast(x_train, tf.float32)
y_train = tf.cast(y_train, tf.float32)
#验证集结果
fig2 = plt.figure(2, figsize=(12, 6))
ax = fig2.add_subplot(1, 2, 1)
ax.plot(loss_list, 'r-')
ax.set_title('loss')
ax = fig2.add_subplot(1, 2, 2)
ax.plot(acc_list, 'b-')
ax.set_title('acc')
#测试集结果
acc = accuracy(x_test, y_test, w, b)
print('Accuracy:{:.2%}'.format(acc))
y_pred = model(x_test, w, b)
show(x_test.numpy(), y_test, y_pred)
测试集上的准确率
验证集上的损失值与准确率曲线
测试集图片标签与预测
分类在回归的基础上通过softmax函数放大不同类之间的概率差异,损失函数改为凸的交叉熵损失函数。
在tf1.x中,feed_dict需要提交numpy数组,可通过sess.run(Tensor)将张量转换为数组;
sklearn.utils.shuffle不能打乱张量类型,在tf2.x中使用Tensor.numpy()将张量转换为数组。
使用Adam优化器,一轮的训练速度减慢,但收敛速度加快,模型准确率也提高。