[Tensorflow2.X][原创]入门基础之fashion_mnist分类
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
import os
import sys
def plot_learning_curves(history):
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()
fashion_mnist = tf.keras.datasets.fashion_mnist # fashion_mnist数据集
(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data() # 加载数据集
x_valid, x_train = x_train_all[:5000], x_train_all[5000:] # 从训练集中前5000个做为验证集,后面的作为训练集
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]
model = tf.keras.models.Sequential() # 构建模型
model.add(tf.keras.layers.Flatten(input_shape=[28, 28])) #将输入扁平化,即转为一维向量[28x28,1]
model.add(tf.keras.layers.Dense(300, activation='sigmoid')) # 全连接层,300为卷积核大小WX=b
model.add(tf.keras.layers.Dense(100, activation='sigmoid')) # 全连接层,100为卷积核大小
model.add(tf.keras.layers.Dense(10, activation='softmax')) # 全连接层,输出10个预测值
# 另外一种网络模型搭建的写法
# model = tf.keras.models.Sequential([
# # tf.keras.layers.Dense(300,activation='relu'), # relu ==> max(0,x)
# # tf.keras.layers.Dense(100,activation='relu'), # softmax将向量转为概率分布
# # tf.keras.layers.Dense(10,activation='softmax')
# # ])
print(model.summary()) #打印层结构
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=10, validation_data=(x_valid, y_valid))
plot_learning_curves(history)
model.summary()结构:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) (None, 784) 0
_________________________________________________________________
dense (Dense) (None, 300) 235500
_________________________________________________________________
dense_1 (Dense) (None, 100) 30100
_________________________________________________________________
dense_2 (Dense) (None, 10) 1010
=================================================================
Total params: 266,610
Trainable params: 266,610
Non-trainable params: 0
参数第2行计算出来是根据WX+b算出来的即
[None,784][784,300]+300=784x300+300=235500
参数第3行计算出来是根据WX+b算出来的即
[None,300][300,100]+100=300x100+100=30100其他依次类推
利用history打印训练变化参数的曲线:
最终结果:
loss: 0.5496 - accuracy: 0.8094 - val_loss: 0.5803 - val_accuracy: 0.7990