卷积神经网络对CIFAR100实战

CIFAR100数据分为测试集和训练集,为50k+10k,每张图片大小为32323,共100种分类。正是由于图片小,分类多的特性使得该项目测试难度较大。本次网络共设置13层(2卷积层+2卷积层+2卷积层+2卷积层+2卷积层+3全连接层),所以数据在训练过程中速度会较慢。

import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, Sequential
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.random.set_seed(2)

# 设置卷积层pipeline
conv_layers = [
	   layers.Conv2D(64, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
	   layers.Conv2D(64, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
	   layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
	   
	   layers.Conv2D(128, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
	   layers.Conv2D(128, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
	   layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
	   
	   layers.Conv2D(256, kernel_size[3, 3], padding='same', activation=tf.nn.relu),
	   layers.Conv2D(256, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
	   layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
	   
	   layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
	   layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
	   layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
	   
	   layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
	   layers.Conv2D(512, kernel_size=[3, 3], padding='same', activation=tf.nn.relu),
	   layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same')
	   ]
	   
# 数据预处理
def preprocess(x, y):
	x = tf.cast(x, dtype=tf.float32) / 255.
	y = tf.cast(y, dtype=tf.int32)
	
	return x, y
	
batch_size = 128
(x, y), (x_test, y_test) = datasets.cifar100.load_data()
# 因为y和y_test的形状不是向量,而是多了一个维度1,所以要变成向量,方便one_hot对应
y, y_test = tf.squeeze(x), tf.squeeze(y_test)

db = tf.data.Dataset.from_tensor_slices((x, y))
db = db.map(preprocess).shuffle(100000).batch(batch_size)

db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(preprocess).batch(batch_size)

def main():
	# [b, 32, 32, 3] => [b, 1, 1, 512]
	conv_net = Sequential(conv_layers)   # 只能放列表且不能嵌套

	fc_net = Sequential([
			     layers.Dense(256, activation=tf.nn.relu),
			     layers.Dense(128, activation=tf.nn.relu),
			     layers.Dense(100, activation=tf.nn.relu)
			    ])
	conv_net.build(input_shape=[None, 32, 32, 3]) # 感觉这里不加也行
	fc_net.build(input_shape=[None, 512])

	# python中的扩接操作,例如两个list相加,[1, 2] + [3, 4] = [1, 2, 3, 4]
	variables = conv_net.trainable_variables + fc_net.trainable_variables

	# 优化器  梯度设置小一点,否则容易出现gradients vanish
	optimizer = optimizers.Adam(lr=1e-4)

	# 开始训练数据集
	for epoch in range(50):

		for step, (x, y) in enumerate(db):
			with tf.GradientTape() as tape:
				# 卷积和池化:[b, 32, 32, 3] => [b, 1, 1, 512] => [b, 512]
				out = conv_net(x)
				out = tf.reshape(out, [-1, 512])
				# 全连接层:[b, 512] => [b, 100]
				logits = fc_net(out)

				y_onehot = tf.one_hot(y, depth=100)
				# compute loss
				loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
				loss = tf.reduce_mean(loss)

			grads = tape.gradient(loss, variables)
			optimizer.apply_gradient(zip(grads, variables))

			if step % 100 == 0:
				print('epoch: %d, step: %d, loss: %.5f' % (epoch, step, loss))

		# 测试: 每个数据集训练完成后测试一下正确率
		correct_num, total_num = 0, 0
		for x, y in db_test:
			# [b, 32, 32, 3] => [b, 1, 1, 512] => [b, 512]
			out = conv_net(x)
			out = tf.reshape(out, [-1, 512])
			# [b, 512] => [b, 10]
			logits = fc_net(out)
			prob = tf.nn.softmax(logits, axis=1)
			# [b, 10] => [b] 注意这里返回的是数据是int64格式
			pred = tf.argmax(prob, axis=1)
			# int64 => int32
			pred = tf.cast(pred, dtype=tf.int32)
			
			correct = tf.cast(tf.equal(y, pred), dtype=tf.int32)
			correct = tf.reduce_sum(correct)
			
			corrcet_nunm += correct
			total_num += x.shape[0]

		accuracy = (correct_num / total_num)*100
		print('epoch:%d, acc:%.2f' % (epoch, accuracy))

	rerurn None

if __name__ == '__main__':
	main()

LesNet、VGG具体是什么,还需要我继续探索,越学感觉东西越多,而且以前的东西也总是忘记。先尽可能多的记忆过一遍,然后等到实际用到了再加强学习吧。

你可能感兴趣的:(自学)