CIFAR100数据分为测试集和训练集,为50k+10k,每张图片大小为32323,共100种分类。正是由于图片小,分类多的特性使得该项目测试难度较大。本次网络共设置13层(2卷积层+2卷积层+2卷积层+2卷积层+2卷积层+3全连接层),所以数据在训练过程中速度会较慢。
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, Sequential
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.random.set_seed(2)
# 设置卷积层pipeline
conv_layers = [
layers.Conv2D(64, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.Conv2D(64, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
layers.Conv2D(128, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.Conv2D(128, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
layers.Conv2D(256, kernel_size[3, 3], padding='same', activation=tf.nn.relu),
layers.Conv2D(256, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same')
]
# 数据预处理
def preprocess(x, y):
x = tf.cast(x, dtype=tf.float32) / 255.
y = tf.cast(y, dtype=tf.int32)
return x, y
batch_size = 128
(x, y), (x_test, y_test) = datasets.cifar100.load_data()
# 因为y和y_test的形状不是向量,而是多了一个维度1,所以要变成向量,方便one_hot对应
y, y_test = tf.squeeze(x), tf.squeeze(y_test)
db = tf.data.Dataset.from_tensor_slices((x, y))
db = db.map(preprocess).shuffle(100000).batch(batch_size)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(preprocess).batch(batch_size)
def main():
# [b, 32, 32, 3] => [b, 1, 1, 512]
conv_net = Sequential(conv_layers) # 只能放列表且不能嵌套
fc_net = Sequential([
layers.Dense(256, activation=tf.nn.relu),
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(100, activation=tf.nn.relu)
])
conv_net.build(input_shape=[None, 32, 32, 3]) # 感觉这里不加也行
fc_net.build(input_shape=[None, 512])
# python中的扩接操作,例如两个list相加,[1, 2] + [3, 4] = [1, 2, 3, 4]
variables = conv_net.trainable_variables + fc_net.trainable_variables
# 优化器 梯度设置小一点,否则容易出现gradients vanish
optimizer = optimizers.Adam(lr=1e-4)
# 开始训练数据集
for epoch in range(50):
for step, (x, y) in enumerate(db):
with tf.GradientTape() as tape:
# 卷积和池化:[b, 32, 32, 3] => [b, 1, 1, 512] => [b, 512]
out = conv_net(x)
out = tf.reshape(out, [-1, 512])
# 全连接层:[b, 512] => [b, 100]
logits = fc_net(out)
y_onehot = tf.one_hot(y, depth=100)
# compute loss
loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
loss = tf.reduce_mean(loss)
grads = tape.gradient(loss, variables)
optimizer.apply_gradient(zip(grads, variables))
if step % 100 == 0:
print('epoch: %d, step: %d, loss: %.5f' % (epoch, step, loss))
# 测试: 每个数据集训练完成后测试一下正确率
correct_num, total_num = 0, 0
for x, y in db_test:
# [b, 32, 32, 3] => [b, 1, 1, 512] => [b, 512]
out = conv_net(x)
out = tf.reshape(out, [-1, 512])
# [b, 512] => [b, 10]
logits = fc_net(out)
prob = tf.nn.softmax(logits, axis=1)
# [b, 10] => [b] 注意这里返回的是数据是int64格式
pred = tf.argmax(prob, axis=1)
# int64 => int32
pred = tf.cast(pred, dtype=tf.int32)
correct = tf.cast(tf.equal(y, pred), dtype=tf.int32)
correct = tf.reduce_sum(correct)
corrcet_nunm += correct
total_num += x.shape[0]
accuracy = (correct_num / total_num)*100
print('epoch:%d, acc:%.2f' % (epoch, accuracy))
rerurn None
if __name__ == '__main__':
main()
LesNet、VGG具体是什么,还需要我继续探索,越学感觉东西越多,而且以前的东西也总是忘记。先尽可能多的记忆过一遍,然后等到实际用到了再加强学习吧。