更新时间: 2010-10-5
在v2.x版中,有多种构建模型的方式,分别是基于keras的Sequential(序列式),subclass(子类式),functional(函数式)。下面一一介绍。
不管采用哪种方式,深度学习解决方案完成的过程不会变:数据准备 -> 模型构建 -> 损失函数 -> 优化器选择 -> 模型训练 -> 模型验证。
序列式模式很好理解,和平时画出来的深度网络的图一样,一层一层的堆叠起来。例如:
# import lib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# 载入数据
mnist = tf.keras.datasets.mnist
(train_imgs, train_labels), (test_imgs, test_labels) = mnist.load_data()
train_imgs, test_imgs = train_imgs / 255.0, test_imgs / 255.0
train_imgs = train_imgs.reshape(60000,28,28,1)
test_imgs = test_imgs.reshape(10000,28,28,1)
# 模型构建
# 构建模型中的网络包含的所有内容,例如卷积层,池化层,BN层,全连接层,dropout。
cnn_model = keras.Sequential()
cnn_model.add(layers.Conv2D(input_shape=(28,28,1),filters=32,kernel_size=(3,3),activation='relu'))
cnn_model.add(layers.BatchNormalization())
cnn_model.add(layers.Conv2D(filters=64,kernel_size=(3,3),activation='relu'))
cnn_model.add(layers.BatchNormalization())
cnn_model.add(layers.MaxPool2D((2,2)))
cnn_model.add(layers.Conv2D(filters=128,kernel_size=(3,3),activation='relu'))
cnn_model.add(layers.BatchNormalization())
cnn_model.add(layers.Conv2D(filters=256,kernel_size=(3,3),activation='relu'))
cnn_model.add(layers.BatchNormalization())
cnn_model.add(layers.MaxPool2D((2,2)))
cnn_model.add(layers.Conv2D(filters=512,kernel_size=(3,3),activation='relu'))
cnn_model.add(layers.BatchNormalization())
cnn_model.add(layers.Flatten())
cnn_model.add(layers.Dense(1000,activation='relu'))
cnn_model.add(layers.Dropout(0.2))
cnn_model.add(layers.Dense(100,activation='relu'))
cnn_model.add(layers.Dropout(0.2))
cnn_model.add(layers.Dense(10,activation='softmax'))
cnn_model.summary()
# 模型编译
# 损失函数,优化器选择,精度度量
cnn_model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# 模型训练
cnn_model.fit(train_imgs,train_labels,batch_size=256,epochs=10)
# 模型验证
cnn_model.evaluate(test_imgs,test_labels)
通过代码可以发现,整个过程为:建立Sequential()类,再给里面通过model.add()添加各种层。之后编译,训练,验证。
这种模式适合对TensorFlow比较熟悉的人去编程,相对于v1也方便了很多。
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D, Flatten, Dense
import numpy as np
# 准备数据
(train_imgs, train_labels), (test_imgs,
test_labels) = keras.datasets.mnist.load_data()
train_imgs, test_imgs = train_imgs / 255.0, test_imgs / 255.0
# 增加一个维度,通过numpy方式
# train_imgs = np.expand_dims(train_imgs,axis=-1)
# test_imgs = np.expand_dims(test_imgs,axis=-1)
# 增加一个维度,通过tf方式
# ...代表索引的前面的所有维度
train_imgs = train_imgs[..., tf.newaxis]
test_imgs = test_imgs[..., tf.newaxis]
train_ds = tf.data.Dataset.from_tensor_slices(
(train_imgs, train_labels)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((test_imgs, test_labels)).batch(32)
print(type(train_ds))
# 定义模型
class MnistModel(keras.Model):
def __init__(self):
super(MnistModel, self).__init__()
self.conv1 = Conv2D(
32, (3, 3), activation='relu', input_shape=(28, 28, 1))
self.bn1 = BatchNormalization()
self.conv2 = Conv2D(64, (3, 3), activation='relu')
self.bn2 = BatchNormalization()
self.maxpool1 = MaxPool2D((2, 2))
self.conv3 = Conv2D(128, (3, 3), activation='relu')
self.bn3 = BatchNormalization()
self.flatten = Flatten()
self.dense1 = Dense(1000, activation='relu')
self.dropout1 = keras.layers.Dropout(0.2)
self.dense2 = Dense(100, activation='relu')
self.output1 = Dense(10, activation='softmax')
def call(self, x):
cnn_block1 = self.bn1(self.conv1(x))
cnn_block2 = self.maxpool1(self.bn2(self.conv2(cnn_block1)))
cnn_block3 = self.bn3(self.conv3(cnn_block2))
dense_block = self.dense2(
self.dropout1(self.dense1(self.flatten(cnn_block3))))
return self.output1(dense_block)
# 实例化一个模型
model = MnistModel()
# 定义验证相关的类
train_loss = keras.metrics.Mean()
test_acc = keras.metrics.SparseCategoricalAccuracy()
# 选择优化器
optimizer = keras.optimizers.Adam()
# 定义训练过程
@tf.function
def train_step(x, y):
with tf.GradientTape() as tape:
y_ = model(x)
loss = keras.losses.SparseCategoricalCrossentropy()(y, y_)
# loss对于所有训练变量求梯度
gradients = tape.gradient(loss, model.trainable_variables)
# 梯度下降法更新参数
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
#定义测试过程
@tf.function
def test_step(x,y):
y_ = model(x)
test_acc(y, y_)
# 训练模型
MAX_STEPS = 5000
# tf.data.Dataset本质是上是一个可迭代对象,用iter()获取它的迭代器,用next则可以获取下个batch的数据。
iter_data = iter(train_ds)
for i in range(MAX_STEPS):
try:
img, label = next(iter_data)
if not (i+1) %100 :
test_step(img,label)
print(i, 'test acc:',test_acc.result())
else:
train_step(img,label)
print(i, train_loss.result())
except:
pass
for img, label in test_ds:
test_step(img,label)
print(test_acc.result())
中间用到一个装饰器@tf.function
,它的功能就是将该函数中涉及到的动态图转换为静态图。
上述代码还有些细节地方没有想明白,例如BN层和dropout层在训练集和测试集上是不一样的,但是这样似乎也能运行。
待更新