在TensorFlow中,同样地,不需要手动下载、解析和加载CIFAR10数据集,通过datasets.cifar10.load_data()
函数就看人义直接加载切割好的训练集和测试集。例如:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Sequential, losses, optimizers, datasets
import os
from Chapter10.CIFAR10 import load_data
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def preprocess(x, y):
x = tf.cast(x, dtype=tf.float32) / 255.
y = tf.cast(y, dtype=tf.int32)
return x, y
# 在线下载,加载CIFAR10数据集
(x, y), (x_test, y_test) = load_data('/Users/xuruihang/.keras/datasets/cifar-10-batches-py')
# 删除y的一个维度,[b,1] => [b]
y = tf.squeeze(y, axis=1)
y_test = tf.squeeze(y_test, axis=1)
# 打印训练集和测试集的形状
print(x.shape, y.shape, x_test.shape, y_test.shape)
# 构建训练集对象,随机打乱,预处理,批量化
train_db = tf.data.Dataset.from_tensor_slices((x, y))
train_db = train_db.shuffle(1000).map(preprocess).batch(128)
# 构建测试集对象,预处理,批量化
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
train_db = train_db.map(preprocess).batch(128)
# 从训练集中采样一个Batch,并观察
sample = next(iter(train_db))
print('sample: ', sample[0].shape, sample[1].shape,
tf.reduce_min(sample[0]), tf.reduce_max(sample[0]))
运行结果如下图所示:
注: 这里的load_data()
是调用自己写的一段代码,因为直接下载会报错:
import numpy as np
import os
def load_batch(file):
import pickle
with open(file, 'rb') as fo:
d = pickle.load(fo, encoding='bytes')
d_decoded = {}
for k, v in d.items():
d_decoded[k.decode('utf8')] = v
d = d_decoded
data = d['data']
labels = d['labels']
data = data.reshape(data.shape[0], 3, 32, 32)
return data, labels
def load_data(path ='data/cifar-10-batches-py'):
"""Loads CIFAR10 dataset.
# Returns
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
"""
from tensorflow.python.keras import backend as K
num_train_samples = 50000
x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
y_train = np.empty((num_train_samples,), dtype='uint8')
for i in range(1, 6):
fpath = os.path.join(path, 'data_batch_' + str(i))
(x_train[(i - 1) * 10000: i * 10000, :, :, :],
y_train[(i - 1) * 10000: i * 10000]) = load_batch(fpath)
fpath = os.path.join(path, 'test_batch')
x_test, y_test = load_batch(fpath)
y_train = np.reshape(y_train, (len(y_train), 1))
y_test = np.reshape(y_test, (len(y_test), 1))
if K.image_data_format() == 'channels_last':
x_train = x_train.transpose(0, 2, 3, 1)
x_test = x_test.transpose(0, 2, 3, 1)
return (x_train, y_train), (x_test, y_test)
(x_train, y_train), (x_test, y_test) = load_data('/Users/xuruihang/.keras/datasets/cifar-10-batches-py')
详见Keras CIFAR10离线加载
可以看到,上述代码运行后,得到的训练集的 X \boldsymbol X X和 y \boldsymbol y y形状为: ( 50000 , 32 , 32 , 3 ) (50000,32,32,3) (50000,32,32,3)和 ( 50000 ) (50000) (50000),测试集的 X \boldsymbol X X和 y \boldsymbol y y形状为: ( 10000 , 32 , 32 , 3 ) (10000,32,32,3) (10000,32,32,3)和 ( 10000 ) (10000) (10000),分别代表了图片大小为 32 × 32 32×32 32×32,彩色图片,训练集样本数为50000,测试集样本数为10000。
CIFAR10图片识别任务并不简单,这主要是由于CIFAR10的图片内容需要大量细节才能呈现,而保存的图片分辨率仅有 32 × 32 32×32 32×32,使得主题部分信息较为模糊,甚至人眼都很难分辨。浅层的神经网络表达能力有限,很难训练优化到较好的性能,本节将基于表达能力更强的VGG13网络,根据我们的数据集特点修改部分网络结构,完成CIFAR10图片识别。修改如下:
下图是调整后的VGG13网络结构,我们统称之为VGG13网络模型。
我们将网络实现为两个子网络: 卷积子网络和全连接子网络。卷积子网络由5个子模块构成,每个子模块包含了Conv-Conv-MaxPooling单元结构,代码如下:
conv_layers = [ # 先创建包含多网络层的列表
# Conv-Conv-Pooling单元1
# 64个3×3卷积核,输入输出同大小
layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
# 高宽减半
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
# Conv-Conv-Pooling单元2,输出通道提升至128,高宽大小减半
layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
# Conv-Conv-Pooling单元3,输出通道提升至256,高宽大小减半
layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
# Conv-Conv-Pooling单元4,输出通道提升至512,高宽大小减半
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
# Conv-Conv-Pooling单元5,输出通道提升至512,高宽大小减半
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
]
# 利用前面创建的层列表构建网络容器
conv_net = Sequential(conv_layers)
全连接子网络包含了3个全连接层,每层添加ReLU非线性激活函数,最后一层除外。代码如下:
# 创建3层全连接层子网络
fc_net = Sequential([
layers.Dense(256, activation=tf.nn.relu),
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(10, activation=None)
])
子网络创建完成后,通过如下代码查看网络的参数量:
conv_net.build(input_shape=[None, 32, 32, 3])
fc_net.build(input_shape=[None, 512])
conv_net.summary()
fc_net.summary()
卷积网络总参数量约为940万个,全连接网络总参数量约为17.7万个,网络总参数量约为950万个,相比于原始版本的VGG13参数量减少了很多。
由于我们将网络实现为2个子网络,在进行梯度更新时,需要合并2个子网络的待优化参数列表。代码如下:
# 列表合并,合并2个子网络的参数
variables = conv_net.trainable_variables + fc_net.trainable_variables
# 对所有参数求梯度
grads = tape.gradient(loss, variables)
# 自动更新
optimizer.apply_gradients(zip(grads, variables))
运行代码即可开始训练模型,在训练完50个Epoch后,网络的测试准确率达到了77.5%。
完整代码:
import tensorflow as tf
from tensorflow.keras import layers, optimizers, datasets, Sequential
import os
from Chapter10.CIFAR10 import load_data
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
tf.random.set_seed(2345)
conv_layers = [ # 5 units of conv + max pooling
# unit 1
layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
# unit 2
layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
# unit 3
layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
# unit 4
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
# unit 5
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same')
]
def preprocess(x, y):
# [0~1]
x = 2*tf.cast(x, dtype=tf.float32) / 255.-1
y = tf.cast(y, dtype=tf.int32)
return x,y
# 在线下载,加载CIFAR10数据集
(x, y), (x_test, y_test) = load_data('/Users/xuruihang/.keras/datasets/cifar-10-batches-py')
y = tf.squeeze(y, axis=1)
y_test = tf.squeeze(y_test, axis=1)
print(x.shape, y.shape, x_test.shape, y_test.shape)
train_db = tf.data.Dataset.from_tensor_slices((x,y))
train_db = train_db.shuffle(1000).map(preprocess).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db = test_db.map(preprocess).batch(64)
sample = next(iter(train_db))
print('sample:', sample[0].shape, sample[1].shape,
tf.reduce_min(sample[0]), tf.reduce_max(sample[0]))
def main():
# [b, 32, 32, 3] => [b, 1, 1, 512]
conv_net = Sequential(conv_layers)
fc_net = Sequential([
layers.Dense(256, activation=tf.nn.relu),
layers.Dense(128, activation=tf.nn.relu),
layers.Dense(10, activation=None),
])
conv_net.build(input_shape=[None, 32, 32, 3])
fc_net.build(input_shape=[None, 512])
conv_net.summary()
fc_net.summary()
optimizer = optimizers.Adam(lr=1e-4)
# [1, 2] + [3, 4] => [1, 2, 3, 4]
# 列表合并,合并2个子网络的参数
variables = conv_net.trainable_variables + fc_net.trainable_variables
for epoch in range(50):
for step, (x,y) in enumerate(train_db):
with tf.GradientTape() as tape:
# [b, 32, 32, 3] => [b, 1, 1, 512]
out = conv_net(x)
# flatten, => [b, 512]
out = tf.reshape(out, [-1, 512])
# [b, 512] => [b, 10]
logits = fc_net(out)
# [b] => [b, 10]
y_onehot = tf.one_hot(y, depth=10)
# compute loss
loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
loss = tf.reduce_mean(loss)
grads = tape.gradient(loss, variables)
optimizer.apply_gradients(zip(grads, variables))
if step %100 == 0:
print(epoch, step, 'loss:', float(loss))
total_num = 0
total_correct = 0
for x,y in test_db:
out = conv_net(x)
out = tf.reshape(out, [-1, 512])
logits = fc_net(out)
prob = tf.nn.softmax(logits, axis=1)
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, dtype=tf.int32)
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_num += x.shape[0]
total_correct += int(correct)
acc = total_correct / total_num
print(epoch, 'acc:', acc)
if __name__ == '__main__':
main()
可以看到,准确率达到了77.41%。(破程序运行了一晚上,电脑直接起飞了)