有关公式、基本理论等大量内容摘自《动手学深度学习》(TF2.0版))
前面我们需要简洁实现都是用的Sequential来实现的,我们可能发现简洁实现很简单,但是内部细节可能很难控制。而自己从零开始实现又过于麻烦,而tf.keras.Model则可以实现上述的均衡。(具体内容就不详细介绍了)
再说这个事情先说个事情,就是前面我们训练的图片向量输入时都展成一维向量了,这样其实是不对的,因为这种方法破坏了纵向之间的数据关系。
可以通过如下程序进行验证:
import tensorflow as tf
import numpy as np
print(tf.__version__)
def corr2d(X, K):
h, w = K.shape
Y = tf.Variable(tf.zeros((X.shape[0] - h + 1, X.shape[1] - w +1)))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
Y[i,j].assign(tf.cast(tf.reduce_sum(X[i:i+h, j:j+w] * K), dtype=tf.float32))
return Y
X = tf.constant([[0,1,2], [3,4,5], [6,7,8]])
K = tf.constant([[0,1], [2,3]])
print(corr2d(X, K))
如下图所示:
结果如下图所示:
实现代码:
import tensorflow as tf
import numpy as np
print(tf.__version__)
#卷积函数
def corr2d(X, K):
h, w = K.shape
Y = tf.Variable(tf.zeros((X.shape[0] - h + 1, X.shape[1] - w +1)))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
Y[i,j].assign(tf.cast(tf.reduce_sum(X[i:i+h, j:j+w] * K), dtype=tf.float32))
return Y
#检测图像的矩阵
X = tf.Variable(tf.ones((6,8)))
X[:, 2:6].assign(tf.zeros(X[:,2:6].shape))
print(X)
#卷积核
K = tf.constant([[1,-1]], dtype = tf.float32)
#卷积后的结果
Y = corr2d(X, K)
print(Y)
通过上述我们可以发现卷积核可以检测矩阵的边缘(这只是举个例子,真正的图像的边缘会比这个复杂许多)
实现代码如下(其实和之前的思想一样的):
import tensorflow as tf
import numpy as np
print(tf.__version__)
#卷积函数
def corr2d(X, K):
h, w = K.shape
Y = tf.Variable(tf.zeros((X.shape[0] - h + 1, X.shape[1] - w +1)))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
Y[i,j].assign(tf.cast(tf.reduce_sum(X[i:i+h, j:j+w] * K), dtype=tf.float32))
return Y
#检测图像的矩阵
X = tf.Variable(tf.ones((6,8)))
X[:, 2:6].assign(tf.zeros(X[:,2:6].shape))
print(X)
#卷积核
K = tf.constant([[1,-1]], dtype = tf.float32)
#卷积后的结果
Y = corr2d(X, K)
print(Y)
X = tf.reshape(X, (1,6,8,1))
Y = tf.reshape(Y, (1,6,7,1))
print(Y)
conv2d = tf.keras.layers.Conv2D(1, (1,2))
print(Y.shape)
#预测值
Y_hat = conv2d(X)
for i in range(10):
with tf.GradientTape(watch_accessed_variables=False) as g:
g.watch(conv2d.weights[0])
Y_hat = conv2d(X)
#损失值
l = (abs(Y_hat - Y)) ** 2
dl = g.gradient(l, conv2d.weights[0])
#学习率
lr = 3e-2
update = tf.multiply(lr, dl)
#更新权重
updated_weights = conv2d.get_weights()
updated_weights[0] = conv2d.weights[0] - update
conv2d.set_weights(updated_weights)
if (i + 1)% 2 == 0:
print('batch %d, loss %.3f' % (i + 1, tf.reduce_sum(l)))
print(tf.reshape(conv2d.get_weights()[0],(1,2)))
关键字:都是学出来的。如果不理解,可以把神经网络当成一个黑盒,里面相互关运算和卷积运算都是模型学习的出来的。
这是卷积神经网络里面两个非常重要的两个参数。
填充:
验证代码如下:
import tensorflow as tf
import numpy as np
print(tf.__version__)
def comp_conv2d(conv2d, X):
X = tf.reshape(X,(1,) + X.shape + (1,))
Y = conv2d(X)
#input_shape = (samples, rows, cols, channels)
return tf.reshape(Y,Y.shape[1:3])
conv2d = tf.keras.layers.Conv2D(1, kernel_size=3, padding='same')
X = tf.random.uniform(shape=(8,8))
print(comp_conv2d(conv2d,X).shape)
步幅:
模拟代码如下(令高和宽上的步幅均为2):
import tensorflow as tf
import numpy as np
print(tf.__version__)
#模拟步幅
conv2d = tf.keras.layers.Conv2D(1, kernel_size=3, padding='same',strides=2)
print(comp_conv2d(conv2d, X).shape)
另一个比较复杂的代码:
import tensorflow as tf
import numpy as np
print(tf.__version__)
#模拟步幅(3,4)
conv2d = tf.keras.layers.Conv2D(1, kernel_size=(3,5), padding='valid', strides=(3,4))
print(comp_conv2d(conv2d, X).shape)
代码如下:
import tensorflow as tf
import numpy as np
print(tf.__version__)
#多通道输入
#卷积函数
def corr2d(X, K):
h, w = K.shape
if len(X.shape) <= 1:
X = tf.reshape(X, (X.shape[0],1))
Y = tf.Variable(tf.zeros((X.shape[0] - h + 1, X.shape[1] - w +1)))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
Y[i,j].assign(tf.cast(tf.reduce_sum(X[i:i+h, j:j+w] * K), dtype=tf.float32))
return Y
#实现含多个输入通道的互相关运算。我们只需要对每个通道做互相关运算,然后进行累加。
def corr2d_multi_in(X, K):
return tf.reduce_sum([corr2d(X[i], K[i]) for i in range(X.shape[0])],axis=0)
X = tf.constant([[[0,1,2],[3,4,5],[6,7,8]],
[[1,2,3],[4,5,6],[7,8,9]]])
K = tf.constant([[[0,1],[2,3]],
[[1,2],[3,4]]])
print(corr2d_multi_in(X, K))
import tensorflow as tf
import numpy as np
print(tf.__version__)
#多通道输入
#卷积函数
def corr2d(X, K):
h, w = K.shape
if len(X.shape) <= 1:
X = tf.reshape(X, (X.shape[0],1))
Y = tf.Variable(tf.zeros((X.shape[0] - h + 1, X.shape[1] - w +1)))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
Y[i,j].assign(tf.cast(tf.reduce_sum(X[i:i+h, j:j+w] * K), dtype=tf.float32))
return Y
#实现含多个输入通道的互相关运算。我们只需要对每个通道做互相关运算,然后进行累加。
def corr2d_multi_in(X, K):
return tf.reduce_sum([corr2d(X[i], K[i]) for i in range(X.shape[0])],axis=0)
X = tf.constant([[[0,1,2],[3,4,5],[6,7,8]],
[[1,2,3],[4,5,6],[7,8,9]]])
K = tf.constant([[[0,1],[2,3]],
[[1,2],[3,4]]])
print(corr2d_multi_in(X, K))
#多通道输出
def corr2d_multi_in_out(X, K):
return tf.stack([corr2d_multi_in(X, k) for k in K],axis=0)
print("K:",K)
print("K+1:",K+1)
print("K+2:",K+2)
K = tf.stack([K, K+1, K+2],axis=0)
print(K.shape)
print(corr2d_multi_in_out(X, K))
#等价于下述操作
print(corr2d_multi_in(X, K))
print(corr2d_multi_in(X, K+1))
print(corr2d_multi_in(X, K+2))
这里其实就是输入是3*3*3(代表三个输入通道*一个大小为3*3的矩阵),因为输入为3通道,所以单个卷积核为3个1*1的卷积核(例如浅蓝色为一个1*1卷积核,深蓝色代表另一个1*1的卷积核),单个卷积核采样时,对于该图生成一个3*3*3的矩阵,不过这三个矩阵会相加,所以结果时3*3的矩阵,由于有两个卷积核,所以输出2个3*3的矩阵。
实现代码如下:
import tensorflow as tf
import numpy as np
print(tf.__version__)
#多通道输入
#卷积函数
def corr2d(X, K):
h, w = K.shape
if len(X.shape) <= 1:
X = tf.reshape(X, (X.shape[0],1))
Y = tf.Variable(tf.zeros((X.shape[0] - h + 1, X.shape[1] - w +1)))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
Y[i,j].assign(tf.cast(tf.reduce_sum(X[i:i+h, j:j+w] * K), dtype=tf.float32))
return Y
#实现含多个输入通道的互相关运算。我们只需要对每个通道做互相关运算,然后进行累加。
def corr2d_multi_in(X, K):
return tf.reduce_sum([corr2d(X[i], K[i]) for i in range(X.shape[0])],axis=0)
X = tf.constant([[[0,1,2],[3,4,5],[6,7,8]],
[[1,2,3],[4,5,6],[7,8,9]]])
K = tf.constant([[[0,1],[2,3]],
[[1,2],[3,4]]])
#多通道输出
def corr2d_multi_in_out(X, K):
return tf.stack([corr2d_multi_in(X, k) for k in K],axis=0)
#1*1卷积核
def corr2d_multi_in_out_1x1(X, K):
c_i, h, w = X.shape
c_o = K.shape[0]
X = tf.reshape(X,(c_i, h * w))
K = tf.reshape(K,(c_o, c_i))
Y = tf.matmul(K, X)
return tf.reshape(Y, (c_o, h, w))
X = tf.random.uniform((3,3,3))
K = tf.random.uniform((2,3,1,1))
Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_in_out(X, K)
print(tf.norm(Y1-Y2) < 1e-6)
小结:
1*1卷积层可以调整通道数,例如上面样例,将3通道3*3的矩阵变化为2通道3*3的矩阵;在某种意义上,将通道维当作特征维,高宽上的数据当作样本数据,1*1卷积层与全连接层等价。
验证代码:
import tensorflow as tf
import numpy as np
print(tf.__version__)
def pool2d(X, pool_size, mode='max'):
p_h, p_w = pool_size
Y = tf.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w +1))
Y = tf.Variable(Y)
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
#最大池化
if mode == 'max':
Y[i,j].assign(tf.reduce_max(X[i:i+p_h, j:j+p_w]))
#平均池化
elif mode =='avg':
Y[i,j].assign(tf.reduce_mean(X[i:i+p_h, j:j+p_w]))
return Y
#图示验证
X = tf.constant([[0,1,2],[3,4,5],[6,7,8]],dtype=tf.float32)
print(pool2d(X, (2,2)))
#物体边缘检测
X = tf.Variable(tf.ones((6,8)))
X[:, 2:6].assign(tf.zeros(X[:,2:6].shape))
print(pool2d(X, (2,2)))
结果:
填充和步频:
tensorflow默认数据类型为'channels_last',所以这里使用(1,4,4,1)而不是(1,1,4,4)
验证代码:
import tensorflow as tf
import numpy as np
print(tf.__version__)
def pool2d(X, pool_size, mode='max'):
p_h, p_w = pool_size
Y = tf.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w +1))
Y = tf.Variable(Y)
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
#最大池化
if mode == 'max':
Y[i,j].assign(tf.reduce_max(X[i:i+p_h, j:j+p_w]))
#平均池化
elif mode =='avg':
Y[i,j].assign(tf.reduce_mean(X[i:i+p_h, j:j+p_w]))
return Y
#tensorflow default data_format == 'channels_last'
#so (1,4,4,1) instead of (1,1,4,4)
X = tf.reshape(tf.constant(range(16)), (1,4,4,1))
print(X)
#当步幅过大时会填充
#默认情况下,MaxPool2D实例里步幅和池化窗口形状相同
pool2d = tf.keras.layers.MaxPool2D(pool_size=[3,3])
print(pool2d(X))
#步幅为2
pool2d = tf.keras.layers.MaxPool2D(pool_size=[3,3],padding='same',strides=2)
print(pool2d(X))
多通道输入和输出
关键是记住多通道的池化,其实就是分别池化,没有卷积的求和,就行了。
2*4*4*1的池化过程中仅仅4*4的维度发生变化,变成n*n
验证代码:
import tensorflow as tf
import numpy as np
print(tf.__version__)
def pool2d(X, pool_size, mode='max'):
p_h, p_w = pool_size
Y = tf.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w +1))
Y = tf.Variable(Y)
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
#最大池化
if mode == 'max':
Y[i,j].assign(tf.reduce_max(X[i:i+p_h, j:j+p_w]))
#平均池化
elif mode =='avg':
Y[i,j].assign(tf.reduce_mean(X[i:i+p_h, j:j+p_w]))
return Y
X = tf.reshape(tf.constant(range(16)), (1,4,4,1))
#多通道
X = tf.stack([X, X+1], axis=3)
X = tf.reshape(X, (2,4,4,1))
print(X.shape)
pool2d = tf.keras.layers.MaxPool2D(3, padding='same', strides=2)
print(pool2d(X))
上述验证程序,池化后维度为(2, 2, 2, 1)
小结:
上述将卷积神经网络的基础都介绍完了,下面开始针对具体的卷积神经网络来进行介绍了~
实现代码·如下:
import tensorflow as tf
import numpy as np
print(tf.__version__)
#定义模型
net = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters=6,kernel_size=5,activation='sigmoid',input_shape=(28,28,1)),
tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
tf.keras.layers.Conv2D(filters=16,kernel_size=5,activation='sigmoid'),
tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(120,activation='sigmoid'),
tf.keras.layers.Dense(84,activation='sigmoid'),
tf.keras.layers.Dense(10,activation='sigmoid')
])
#构造一个单通道28*28的样本
X = tf.random.uniform((1,28,28,1))
#逐层进行前向计算来查看每个层的输出形状
for layer in net.layers:
X = layer(X)
print(layer.name, 'output shape\t', X.shape)
#在卷积层块中输入的高和宽在逐层减小。
# 卷积层由于使用高和宽均为5的卷积核,从而将高和宽分别减小4,而池化层则将高和宽减半,但通道数则从1增加到16。
#最后将数据展成1维,由全连接层则逐层减少输出个数,直到变成图像的类别数10。
#获取数据集
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
#查看数据集形状
print(train_images.shape)
#print(train_labels.shape)
print(test_images.shape)
#将数据集合改变形状,其实就是增加通道数
train_images = tf.reshape(train_images, (train_images.shape[0],train_images.shape[1],train_images.shape[2], 1))
print(train_images.shape)
test_images = tf.reshape(test_images, (test_images.shape[0],test_images.shape[1],test_images.shape[2], 1))
print(test_images.shape)
#定义模型损失函数、优化器等
optimizer = tf.keras.optimizers.SGD(learning_rate=0.9, momentum=0.0, nesterov=False)
net.compile(optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
#训练函数
net.fit(train_images, train_labels, epochs=5, validation_split=0.1)
小结:
学习特征表示
下面我们实现稍微简化过的AlexNet
import tensorflow as tf
import numpy as np
print(tf.__version__)
#使用gpu
# for gpu in tf.config.experimental.list_physical_devices('GPU'):
# tf.config.experimental.set_memory_growth(gpu, True)
#定义模型
net = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters=96,kernel_size=11,strides=4,activation='relu'),
tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
tf.keras.layers.Conv2D(filters=256,kernel_size=5,padding='same',activation='relu'),
tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
tf.keras.layers.Conv2D(filters=384,kernel_size=3,padding='same',activation='relu'),
tf.keras.layers.Conv2D(filters=384,kernel_size=3,padding='same',activation='relu'),
tf.keras.layers.Conv2D(filters=256,kernel_size=3,padding='same',activation='relu'),
tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(4096,activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(4096,activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(10,activation='sigmoid')
])
#随机定义一个矩阵
X = tf.random.uniform((1,224,224,1))
#观察每层结构
for layer in net.layers:
X = layer(X)
print(layer.name, 'output shape\t', X.shape)
#加载数据集
#虽然论文中AlexNet使用ImageNet数据集,但因为ImageNet数据集训练时间较长,我们仍用前面的Fashion-MNIST数据集来演示AlexNet。
# 读取数据的时候我们额外做了一步将图像高和宽扩大到AlexNet使用的图像高和宽224。这个可以通过tf.image.resize_with_pad来实现。
class DataLoader():
def __init__(self):
fashion_mnist = tf.keras.datasets.fashion_mnist
(self.train_images, self.train_labels), (self.test_images, self.test_labels) = fashion_mnist.load_data()
self.train_images = np.expand_dims(self.train_images.astype(np.float32)/255.0,axis=-1)
self.test_images = np.expand_dims(self.test_images.astype(np.float32)/255.0,axis=-1)
self.train_labels = self.train_labels.astype(np.int32)
self.test_labels = self.test_labels.astype(np.int32)
self.num_train, self.num_test = self.train_images.shape[0], self.test_images.shape[0]
def get_batch_train(self, batch_size):
index = np.random.randint(0, np.shape(self.train_images)[0], batch_size)
#need to resize images to (224,224)
resized_images = tf.image.resize_with_pad(self.train_images[index],224,224,)
return resized_images.numpy(), self.train_labels[index]
def get_batch_test(self, batch_size):
index = np.random.randint(0, np.shape(self.test_images)[0], batch_size)
#need to resize images to (224,224)
resized_images = tf.image.resize_with_pad(self.test_images[index],224,224,)
return resized_images.numpy(), self.test_labels[index]
batch_size = 128
dataLoader = DataLoader()
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
print("x_batch shape:",x_batch.shape,"y_batch shape:", y_batch.shape)
#训练
def train_alexnet():
epoch = 5
num_iter = dataLoader.num_train//batch_size
for e in range(epoch):
for n in range(num_iter):
print("轮:", e," 第",n,"/",num_iter,"次")
#每次随机选取128个样本进行训练
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
net.fit(x_batch, y_batch)
if n%20 == 0:
net.save_weights("AlexNet.h5")
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.0, nesterov=False)
net.compile(optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
#x训练一次
#net.fit(x_batch, y_batch)
print("---------------------")
#边训练边保存
train_alexnet()
net.load_weights("AlexNet.h5")
x_test, y_test = dataLoader.get_batch_test(2000)
net.evaluate(x_test, y_test, verbose=2)
小结:
主要分为VGG块和VGG网络搭建。
VGG块
VGG网络
(除了VGG-11外还有许多其他VGG网络,如果有兴趣可以去研究一下)
具体代码实现如下:
import tensorflow as tf
print(tf.__version__)
for gpu in tf.config.experimental.list_physical_devices('GPU'):
tf.config.experimental.set_memory_growth(gpu, True)
#VGG块
def vgg_block(num_convs, num_channels):
blk = tf.keras.models.Sequential()
for _ in range(num_convs):
blk.add(tf.keras.layers.Conv2D(num_channels,kernel_size=3,
padding='same',activation='relu'))
blk.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
return blk
#需要定义的网络
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
#实现VGG-11网络
def vgg(conv_arch):
net = tf.keras.models.Sequential()
for (num_convs, num_channels) in conv_arch:
net.add(vgg_block(num_convs,num_channels))
net.add(tf.keras.models.Sequential([tf.keras.layers.Flatten(),
tf.keras.layers.Dense(4096,activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(4096,activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(10,activation='sigmoid')]))
return net
net = vgg(conv_arch)
#因为VGG-11计算上比AlexNet更加复杂,出于测试的目的我们构造一个通道数更小,或者说更窄的网络在Fashion-MNIST数据集上进行训练。
ratio = 4
#缩减四倍
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
print("small_conv_arch:",small_conv_arch)
#降为[(1, 16), (1, 32), (2, 64), (2, 128), (2, 128)]
net = vgg(small_conv_arch)
import numpy as np
#获取数据
class DataLoader():
def __init__(self):
fashion_mnist = tf.keras.datasets.fashion_mnist
(self.train_images, self.train_labels), (self.test_images, self.test_labels) = fashion_mnist.load_data()
self.train_images = np.expand_dims(self.train_images.astype(np.float32)/255.0,axis=-1)
self.test_images = np.expand_dims(self.test_images.astype(np.float32)/255.0,axis=-1)
self.train_labels = self.train_labels.astype(np.int32)
self.test_labels = self.test_labels.astype(np.int32)
self.num_train, self.num_test = self.train_images.shape[0], self.test_images.shape[0]
def get_batch_train(self, batch_size):
index = np.random.randint(0, np.shape(self.train_images)[0], batch_size)
#need to resize images to (224,224)
resized_images = tf.image.resize_with_pad(self.train_images[index],224,224,)
return resized_images.numpy(), self.train_labels[index]
def get_batch_test(self, batch_size):
index = np.random.randint(0, np.shape(self.test_images)[0], batch_size)
#need to resize images to (224,224)
resized_images = tf.image.resize_with_pad(self.test_images[index],224,224,)
return resized_images.numpy(), self.test_labels[index]
batch_size = 128
dataLoader = DataLoader()
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
print("x_batch shape:",x_batch.shape,"y_batch shape:", y_batch.shape)
def train_vgg():
epoch = 5
num_iter = dataLoader.num_train//batch_size
for e in range(epoch):
for n in range(num_iter):
print("轮:", e, " 第", n, "/", num_iter, "次")
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
net.fit(x_batch, y_batch)
if n%20 == 0:
net.save_weights("VGG.h5")
optimizer = tf.keras.optimizers.SGD(learning_rate=0.05, momentum=0.0, nesterov=False)
net.compile(optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
#net.fit(x_batch, y_batch)
train_vgg()
#读取参数并预测
net.load_weights("VGG.h5")
x_test, y_test = dataLoader.get_batch_test(2000)
net.evaluate(x_test, y_test, verbose=2)
小结:
主要也是有两部分构成,NiN块和NiN网络。
NiN块
NiN网络
代码实现如下:
import tensorflow as tf
print(tf.__version__)
for gpu in tf.config.experimental.list_physical_devices('GPU'):
tf.config.experimental.set_memory_growth(gpu, True)
#NiN块
def nin_block(num_channels, kernel_size, strides, padding):
blk = tf.keras.models.Sequential()
blk.add(tf.keras.layers.Conv2D(num_channels, kernel_size,
strides=strides, padding=padding, activation='relu'))
blk.add(tf.keras.layers.Conv2D(num_channels, kernel_size=1,activation='relu'))
blk.add(tf.keras.layers.Conv2D(num_channels, kernel_size=1,activation='relu'))
return blk
#NiN模型
def NiN():
net = tf.keras.models.Sequential()
net.add(nin_block(96, kernel_size=11, strides=4, padding='valid'))
net.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2))
net.add(nin_block(256, kernel_size=5, strides=1, padding='same'))
net.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2))
net.add(nin_block(384, kernel_size=3, strides=1, padding='same'))
net.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2))
net.add(tf.keras.layers.Dropout(0.5))
net.add(nin_block(10, kernel_size=3, strides=1, padding='same'))
net.add(tf.keras.layers.GlobalAveragePooling2D())
net.add(tf.keras.layers.Flatten())
return net
net=NiN()
#构造一个高和宽均为224的单通道数据样本来观察每一层的输出形状
X = tf.random.uniform((1,224,224,1))
for blk in net.layers:
X = blk(X)
print(blk.name, 'output shape:\t', X.shape)
#获取数据
import numpy as np
class DataLoader():
def __init__(self):
fashion_mnist = tf.keras.datasets.fashion_mnist
(self.train_images, self.train_labels), (self.test_images, self.test_labels) = fashion_mnist.load_data()
self.train_images = np.expand_dims(self.train_images.astype(np.float32)/255.0,axis=-1)
self.test_images = np.expand_dims(self.test_images.astype(np.float32)/255.0,axis=-1)
self.train_labels = self.train_labels.astype(np.int32)
self.test_labels = self.test_labels.astype(np.int32)
self.num_train, self.num_test = self.train_images.shape[0], self.test_images.shape[0]
def get_batch_train(self, batch_size):
index = np.random.randint(0, np.shape(self.train_images)[0], batch_size)
#need to resize images to (224,224)
resized_images = tf.image.resize_with_pad(self.train_images[index],224,224,)
return resized_images.numpy(), self.train_labels[index]
def get_batch_test(self, batch_size):
index = np.random.randint(0, np.shape(self.test_images)[0], batch_size)
#need to resize images to (224,224)
resized_images = tf.image.resize_with_pad(self.test_images[index],224,224,)
return resized_images.numpy(), self.test_labels[index]
batch_size = 128
dataLoader = DataLoader()
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
print("x_batch shape:",x_batch.shape,"y_batch shape:", y_batch.shape)
#训练
def train_nin():
#net.load_weights("NiN.h5")
epoch = 5
num_iter = dataLoader.num_train//batch_size
for e in range(epoch):
for n in range(num_iter):
print("轮:", e, " 第", n, "/", num_iter, "次")
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
net.fit(x_batch, y_batch)
if n%20 == 0:
net.save_weights("NiN.h5")
# optimizer = tf.keras.optimizers.SGD(learning_rate=0.06, momentum=0.3, nesterov=False)
optimizer = tf.keras.optimizers.Adam(lr=1e-7)
net.compile(optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
#net.fit(x_batch, y_batch)
train_nin()
#加载+预测
net.load_weights("NiN.h5")
x_test, y_test = dataLoader.get_batch_test(2000)
net.evaluate(x_test, y_test, verbose=2)
其主要内容也是块和网络,不过与前面的几个模型的命名方式不太一样哟。
Inception 块
GoogLeNet模型
GoogLeNet模型的计算复杂,而且不如VGG那样便于修改通道数。
最终代码如下:
import tensorflow as tf
print(tf.__version__)
for gpu in tf.config.experimental.list_physical_devices('GPU'):
tf.config.experimental.set_memory_growth(gpu, True)
#定义Inception块
class Inception(tf.keras.layers.Layer):
def __init__(self,c1, c2, c3, c4):
super().__init__()
# 线路1,单1 x 1卷积层
self.p1_1 = tf.keras.layers.Conv2D(c1, kernel_size=1, activation='relu', padding='same')
# 线路2,1 x 1卷积层后接3 x 3卷积层
self.p2_1 = tf.keras.layers.Conv2D(c2[0], kernel_size=1, padding='same', activation='relu')
self.p2_2 = tf.keras.layers.Conv2D(c2[1], kernel_size=3, padding='same',
activation='relu')
# 线路3,1 x 1卷积层后接5 x 5卷积层
self.p3_1 = tf.keras.layers.Conv2D(c3[0], kernel_size=1, padding='same', activation='relu')
self.p3_2 = tf.keras.layers.Conv2D(c3[1], kernel_size=5, padding='same',
activation='relu')
# 线路4,3 x 3最大池化层后接1 x 1卷积层
self.p4_1 = tf.keras.layers.MaxPool2D(pool_size=3, padding='same', strides=1)
self.p4_2 = tf.keras.layers.Conv2D(c4, kernel_size=1, padding='same', activation='relu')
def call(self, x):
p1 = self.p1_1(x)
p2 = self.p2_2(self.p2_1(x))
p3 = self.p3_2(self.p3_1(x))
p4 = self.p4_2(self.p4_1(x))
return tf.concat([p1, p2, p3, p4], axis=-1) # 在通道维上连结输出
#进行模拟
Inception(64, (96, 128), (16, 32), 32)
#第一模块
b1 = tf.keras.models.Sequential()
b1.add(tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same', activation='relu'))
b1.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'))
#第二模块
b2 = tf.keras.models.Sequential()
b2.add(tf.keras.layers.Conv2D(64, kernel_size=1, padding='same', activation='relu'))
b2.add(tf.keras.layers.Conv2D(192, kernel_size=3, padding='same', activation='relu'))
b2.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'))
#第三模块
b3 = tf.keras.models.Sequential()
b3.add(Inception(64, (96, 128), (16, 32), 32))
b3.add(Inception(128, (128, 192), (32, 96), 64))
b3.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'))
#第四模块
b4 = tf.keras.models.Sequential()
b4.add(Inception(192, (96, 208), (16, 48), 64))
b4.add(Inception(160, (112, 224), (24, 64), 64))
b4.add(Inception(128, (128, 256), (24, 64), 64))
b4.add(Inception(112, (144, 288), (32, 64), 64))
b4.add(Inception(256, (160, 320), (32, 128), 128))
b4.add(tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'))
#第五模块
b5 = tf.keras.models.Sequential()
b5.add(Inception(256, (160, 320), (32, 128), 128))
b5.add(Inception(384, (192, 384), (48, 128), 128))
b5.add(tf.keras.layers.GlobalAvgPool2D())
#合并
net = tf.keras.models.Sequential([b1, b2, b3, b4, b5, tf.keras.layers.Dense(10)])
#演示数据,查看结构
X = tf.random.uniform(shape=(1, 96, 96, 1))
for layer in net.layers:
X = layer(X)
print(layer.name, 'output shape:\t', X.shape)
#获取数据
import numpy as np
class DataLoader():
def __init__(self):
fashion_mnist = tf.keras.datasets.fashion_mnist
(self.train_images, self.train_labels), (self.test_images, self.test_labels) = fashion_mnist.load_data()
self.train_images = np.expand_dims(self.train_images.astype(np.float32)/255.0,axis=-1)
self.test_images = np.expand_dims(self.test_images.astype(np.float32)/255.0,axis=-1)
self.train_labels = self.train_labels.astype(np.int32)
self.test_labels = self.test_labels.astype(np.int32)
self.num_train, self.num_test = self.train_images.shape[0], self.test_images.shape[0]
def get_batch_train(self, batch_size):
index = np.random.randint(0, np.shape(self.train_images)[0], batch_size)
#need to resize images to (224,224)
resized_images = tf.image.resize_with_pad(self.train_images[index],224,224,)
return resized_images.numpy(), self.train_labels[index]
def get_batch_test(self, batch_size):
index = np.random.randint(0, np.shape(self.test_images)[0], batch_size)
#need to resize images to (224,224)
resized_images = tf.image.resize_with_pad(self.test_images[index],224,224,)
return resized_images.numpy(), self.test_labels[index]
batch_size = 128
dataLoader = DataLoader()
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
print("x_batch shape:",x_batch.shape,"y_batch shape:", y_batch.shape)
#训练模型
def train_googlenet():
#net.load_weights("GoogLeNet.h5")
epoch = 5
num_iter = dataLoader.num_train//batch_size
for e in range(epoch):
for n in range(num_iter):
print("轮:", e, " 第", n, "/", num_iter, "次")
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
net.fit(x_batch, y_batch)
if n%20 == 0:
net.save_weights("GoogLeNet.h5")
# optimizer = tf.keras.optimizers.SGD(learning_rate=0.05, momentum=0.0, nesterov=False)
optimizer = tf.keras.optimizers.Adam(lr=1e-7)
net.compile(optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
x_batch, y_batch = dataLoader.get_batch_train(batch_size)
#net.fit(x_batch, y_batch)
train_googlenet()
#加载模型+预测
net.load_weights("GoogLeNet.h5")
x_test, y_test = dataLoader.get_batch_test(2000)
net.evaluate(x_test, y_test, verbose=2)
小结:
对全连接层和卷积层做批量归一化的方法稍有不同。下面我们将分别介绍这两种情况下的批量归一化。
对全连接层做批量归一化
对卷积层做批量归一化
预测时的批量归一化
从零实现归一化(LeNet)
import tensorflow as tf
import numpy as np
def batch_norm(is_training,X, gamma, beta, moving_mean, moving_var, eps, momentum):
# 判断是当前模式是训练模式还是预测模式
if not is_training:
# 如果是在预测模式下,直接使用传入的移动平均所得的均值和方差
X_hat = (X - moving_mean) / np.sqrt(moving_var + eps)
else:
assert len(X.shape) in (2, 4)
if len(X.shape) == 2:
# 使用全连接层的情况,计算特征维上的均值和方差
mean = X.mean(axis=0)
var = ((X - mean) ** 2).mean(axis=0)
else:
# 使用二维卷积层的情况,计算通道维上(axis=1)的均值和方差。这里我们需要保持
# X的形状以便后面可以做广播运算
mean = X.mean(axis=(0, 2, 3), keepdims=True)
var = ((X - mean) ** 2).mean(axis=(0, 2, 3), keepdims=True)
# 训练模式下用当前的均值和方差做标准化
X_hat = (X - mean) / np.sqrt(var + eps)
# 更新移动平均的均值和方差
moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
moving_var = momentum * moving_var + (1.0 - momentum) * var
Y = gamma * X_hat + beta # 拉伸和偏移
return Y, moving_mean, moving_var
#自定义一个BatchNorm层。它保存参与求梯度和迭代的拉伸参数gamma和偏移参数beta,同时也维护移动平均得到的均值和方差,以便能够在模型预测时被使用。
# BatchNorm实例所需指定的num_features参数对于全连接层来说应为输出个数,对于卷积层来说则为输出通道数。该实例所需指定的num_dims参数对于全连接层和卷积层来说分别为2和4。
class BatchNormalization(tf.keras.layers.Layer):
def __init__(self, decay=0.9, epsilon=1e-5, **kwargs):
self.decay = decay
self.epsilon = epsilon
super(BatchNormalization, self).__init__(**kwargs)
def build(self, input_shape):
self.gamma = self.add_weight(name='gamma',
shape=[input_shape[-1], ],
initializer=tf.initializers.ones,
trainable=True)
self.beta = self.add_weight(name='beta',
shape=[input_shape[-1], ],
initializer=tf.initializers.zeros,
trainable=True)
self.moving_mean = self.add_weight(name='moving_mean',
shape=[input_shape[-1], ],
initializer=tf.initializers.zeros,
trainable=False)
self.moving_variance = self.add_weight(name='moving_variance',
shape=[input_shape[-1], ],
initializer=tf.initializers.ones,
trainable=False)
super(BatchNormalization, self).build(input_shape)
def assign_moving_average(self, variable, value):
"""
variable = variable * decay + value * (1 - decay)
"""
delta = variable * self.decay + value * (1 - self.decay)
return variable.assign(delta)
@tf.function
def call(self, inputs, training):
if training:
batch_mean, batch_variance = tf.nn.moments(inputs, list(range(len(inputs.shape) - 1)))
mean_update = self.assign_moving_average(self.moving_mean, batch_mean)
variance_update = self.assign_moving_average(self.moving_variance, batch_variance)
self.add_update(mean_update)
self.add_update(variance_update)
mean, variance = batch_mean, batch_variance
else:
mean, variance = self.moving_mean, self.moving_variance
output = tf.nn.batch_normalization(inputs,
mean=mean,
variance=variance,
offset=self.beta,
scale=self.gamma,
variance_epsilon=self.epsilon)
return output
def compute_output_shape(self, input_shape):
return input_shape
#定义网络LeNet
net = tf.keras.models.Sequential(
[tf.keras.layers.Conv2D(filters=6,kernel_size=5),
BatchNormalization(),
tf.keras.layers.Activation('sigmoid'),
tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
tf.keras.layers.Conv2D(filters=16,kernel_size=5),
BatchNormalization(),
tf.keras.layers.Activation('sigmoid'),
tf.keras.layers.MaxPool2D(pool_size=2, strides=2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(120),
BatchNormalization(),
tf.keras.layers.Activation('sigmoid'),
tf.keras.layers.Dense(84),
BatchNormalization(),
tf.keras.layers.Activation('sigmoid'),
tf.keras.layers.Dense(10,activation='sigmoid')]
)
#获取数据+训练数据
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape((60000, 28, 28, 1)).astype('float32') / 255
x_test = x_test.reshape((10000, 28, 28, 1)).astype('float32') / 255
net.compile(loss='sparse_categorical_crossentropy',
optimizer=tf.keras.optimizers.RMSprop(),
metrics=['accuracy'])
history = net.fit(x_train, y_train,
batch_size=64,
epochs=5,
validation_split=0.2)
test_scores = net.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])
#查看第一个批量归一化层学习到的拉伸参数gamma和偏移参数beta
print(net.get_layer(index=1).gamma,net.get_layer(index=1).beta)
归一化的简单实现
import tensorflow as tf
import numpy as np
#定义模型
net = tf.keras.models.Sequential()
net.add(tf.keras.layers.Conv2D(filters=6,kernel_size=5))
net.add(tf.keras.layers.BatchNormalization())
net.add(tf.keras.layers.Activation('sigmoid'))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Conv2D(filters=16,kernel_size=5))
net.add(tf.keras.layers.BatchNormalization())
net.add(tf.keras.layers.Activation('sigmoid'))
net.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(120))
net.add(tf.keras.layers.BatchNormalization())
net.add(tf.keras.layers.Activation('sigmoid'))
net.add(tf.keras.layers.Dense(84))
net.add(tf.keras.layers.BatchNormalization())
net.add(tf.keras.layers.Activation('sigmoid'))
net.add(tf.keras.layers.Dense(10,activation='sigmoid'))
#获取数据+训练
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape((60000, 28, 28, 1)).astype('float32') / 255
x_test = x_test.reshape((10000, 28, 28, 1)).astype('float32') / 255
net.compile(loss='sparse_categorical_crossentropy',
optimizer=tf.keras.optimizers.RMSprop(),
metrics=['accuracy'])
history = net.fit(x_train, y_train,
batch_size=64,
epochs=5,
validation_split=0.2)
test_scores = net.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])
小结:
残差神经网络也有由残差块和ResNet网络完成的。
残差块
ResNet网络
具体实现代码:
import tensorflow as tf
from tensorflow.keras import layers,activations
#定义残差块
class Residual(tf.keras.Model):
def __init__(self, num_channels, use_1x1conv=False, strides=1, **kwargs):
super(Residual, self).__init__(**kwargs)
self.conv1 = layers.Conv2D(num_channels,
padding='same',
kernel_size=3,
strides=strides)
self.conv2 = layers.Conv2D(num_channels, kernel_size=3,padding='same')
if use_1x1conv:
self.conv3 = layers.Conv2D(num_channels,
kernel_size=1,
strides=strides)
else:
self.conv3 = None
self.bn1 = layers.BatchNormalization()
self.bn2 = layers.BatchNormalization()
def call(self, X):
Y = activations.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
return activations.relu(Y + X)
#查看输入和输出形状一致的情况
blk = Residual(3)
#tensorflow input shpe (n_images, x_shape, y_shape, channels).
#mxnet.gluon.nn.conv_layers (batch_size, in_channels, height, width)
X = tf.random.uniform((4, 6, 6 , 3))
blk(X).shape#TensorShape([4, 6, 6, 3])
#也可以在增加输出通道数的同时减半输出的高和宽
blk = Residual(6, use_1x1conv=True, strides=2)
blk(X).shape
#TensorShape([4, 3, 3, 6])
#ResNet模型
#ResNet的前两层跟之前介绍的GoogLeNet中的一样:在输出通道数为64、步幅为2的7×7卷积层后接步幅为2的3×3的最大池化层。
# 不同之处在于ResNet每个卷积层后增加的批量归一化层
net = tf.keras.models.Sequential(
[layers.Conv2D(64, kernel_size=7, strides=2, padding='same'),
layers.BatchNormalization(), layers.Activation('relu'),
layers.MaxPool2D(pool_size=3, strides=2, padding='same')])
#一个模块的通道数同输入通道数一致。由于之前已经使用了步幅为2的最大池化层,所以无须减小高和宽。
# 之后的每个模块在第一个残差块里将上一个模块的通道数翻倍,并将高和宽减半。
class ResnetBlock(tf.keras.layers.Layer):
def __init__(self,num_channels, num_residuals, first_block=False,**kwargs):
super(ResnetBlock, self).__init__(**kwargs)
self.listLayers=[]
for i in range(num_residuals):
if i == 0 and not first_block:
self.listLayers.append(Residual(num_channels, use_1x1conv=True, strides=2))
else:
self.listLayers.append(Residual(num_channels))
def call(self, X):
for layer in self.listLayers.layers:
X = layer(X)
return X
#为ResNet加入所有残差块。这里每个模块使用两个残差块。
class ResNet(tf.keras.Model):
def __init__(self,num_blocks,**kwargs):
super(ResNet, self).__init__(**kwargs)
self.conv=layers.Conv2D(64, kernel_size=7, strides=2, padding='same')
self.bn=layers.BatchNormalization()
self.relu=layers.Activation('relu')
self.mp=layers.MaxPool2D(pool_size=3, strides=2, padding='same')
self.resnet_block1=ResnetBlock(64,num_blocks[0], first_block=True)
self.resnet_block2=ResnetBlock(128,num_blocks[1])
self.resnet_block3=ResnetBlock(256,num_blocks[2])
self.resnet_block4=ResnetBlock(512,num_blocks[3])
self.gap=layers.GlobalAvgPool2D()
self.fc=layers.Dense(units=10,activation=tf.keras.activations.softmax)
def call(self, x):
x=self.conv(x)
x=self.bn(x)
x=self.relu(x)
x=self.mp(x)
x=self.resnet_block1(x)
x=self.resnet_block2(x)
x=self.resnet_block3(x)
x=self.resnet_block4(x)
x=self.gap(x)
x=self.fc(x)
return x
mynet=ResNet([2,2,2,2])
#这里每个模块里有4个卷积层(不计算 1×1卷积层),加上最开始的卷积层和最后的全连接层,共计18层。
# 这个模型通常也被称为ResNet-18。
# 通过配置不同的通道数和模块里的残差块数可以得到不同的ResNet模型,例如更深的含152层的ResNet-152。
# 虽然ResNet的主体架构跟GoogLeNet的类似,但ResNet结构更简单,修改也更方便。
# 这些因素都导致了ResNet迅速被广泛使用。
# 在训练ResNet之前,我们来观察一下输入形状在ResNet不同模块之间的变化。
X = tf.random.uniform(shape=(1, 224, 224 , 1))
for layer in mynet.layers:
X = layer(X)
print(layer.name, 'output shape:\t', X.shape)
#获取数据集,Fashion-MNIST数据集上训练ResNet
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train = x_train.reshape((60000, 28, 28, 1)).astype('float32') / 255
x_test = x_test.reshape((10000, 28, 28, 1)).astype('float32') / 255
mynet.compile(loss='sparse_categorical_crossentropy',
optimizer=tf.keras.optimizers.Adam(),
metrics=['accuracy'])
history = mynet.fit(x_train, y_train,
batch_size=64,
epochs=5,
validation_split=0.2)
test_scores = mynet.evaluate(x_test, y_test, verbose=2)
小结:
稠密块
过渡层
DenseNet网络
代码实现如下:
import tensorflow as tf
class BottleNeck(tf.keras.layers.Layer):
def __init__(self, growth_rate, drop_rate):
super(BottleNeck, self).__init__()
self.bn1 = tf.keras.layers.BatchNormalization()
self.conv1 = tf.keras.layers.Conv2D(filters=4 * growth_rate,
kernel_size=(1, 1),
strides=1,
padding="same")
self.bn2 = tf.keras.layers.BatchNormalization()
self.conv2 = tf.keras.layers.Conv2D(filters=growth_rate,
kernel_size=(3, 3),
strides=1,
padding="same")
self.dropout = tf.keras.layers.Dropout(rate=drop_rate)
self.listLayers = [self.bn1,
tf.keras.layers.Activation("relu"),
self.conv1,
self.bn2,
tf.keras.layers.Activation("relu"),
self.conv2,
self.dropout]
def call(self, x):
y = x
for layer in self.listLayers.layers:
y = layer(y)
y = tf.keras.layers.concatenate([x,y], axis=-1)
return y
class DenseBlock(tf.keras.layers.Layer):
def __init__(self, num_layers, growth_rate, drop_rate=0.5):
super(DenseBlock, self).__init__()
self.num_layers = num_layers
self.growth_rate = growth_rate
self.drop_rate = drop_rate
self.listLayers = []
for _ in range(num_layers):
self.listLayers.append(BottleNeck(growth_rate=self.growth_rate, drop_rate=self.drop_rate))
def call(self, x):
for layer in self.listLayers.layers:
x = layer(x)
return x
#定义一个有2个输出通道数为10的卷积块。使用通道数为3的输入时,我们会得到通道数为3+2×10=23的输出。
#卷积块的通道数控制了输出通道数相对于输入通道数的增长,因此也被称为增长率(growth rate)
blk = DenseBlock(2, 10)
X = tf.random.uniform((4, 8, 8,3))
Y = blk(X)
print(Y.shape)
#过渡层
#由于每个稠密块都会带来通道数的增加,使用过多则会带来过于复杂的模型。过渡层用来控制模型复杂度。
#通过1×1卷积层来减小通道数,并使用步幅为2的平均池化层减半高和宽,从而进一步降低模型复杂度。
class TransitionLayer(tf.keras.layers.Layer):
def __init__(self, out_channels):
super(TransitionLayer, self).__init__()
self.bn = tf.keras.layers.BatchNormalization()
self.conv = tf.keras.layers.Conv2D(filters=out_channels,
kernel_size=(1, 1),
strides=1,
padding="same")
self.pool = tf.keras.layers.MaxPool2D(pool_size=(2, 2),
strides=2,
padding="same")
def call(self, inputs):
x = self.bn(inputs)
x = tf.keras.activations.relu(x)
x = self.conv(x)
x = self.pool(x)
return x
#降低通道数为10
blk = TransitionLayer(10)
print(blk(Y).shape)
#结果:TensorShape([4, 4, 4, 10])
#DenseNet使用的是4个稠密块,我们可以设置每个稠密块使用多少个卷积层。
# 这里我们设成4,从而与上一节的ResNet-18保持一致。稠密块里的卷积层通道数(即增长率)设为32,所以每个稠密块将增加128个通道。
class DenseNet(tf.keras.Model):
def __init__(self, num_init_features, growth_rate, block_layers, compression_rate, drop_rate):
super(DenseNet, self).__init__()
self.conv = tf.keras.layers.Conv2D(filters=num_init_features,
kernel_size=(7, 7),
strides=2,
padding="same")
self.bn = tf.keras.layers.BatchNormalization()
self.pool = tf.keras.layers.MaxPool2D(pool_size=(3, 3),
strides=2,
padding="same")
self.num_channels = num_init_features
self.dense_block_1 = DenseBlock(num_layers=block_layers[0], growth_rate=growth_rate, drop_rate=drop_rate)
self.num_channels += growth_rate * block_layers[0]
self.num_channels = compression_rate * self.num_channels
self.transition_1 = TransitionLayer(out_channels=int(self.num_channels))
self.dense_block_2 = DenseBlock(num_layers=block_layers[1], growth_rate=growth_rate, drop_rate=drop_rate)
self.num_channels += growth_rate * block_layers[1]
self.num_channels = compression_rate * self.num_channels
self.transition_2 = TransitionLayer(out_channels=int(self.num_channels))
self.dense_block_3 = DenseBlock(num_layers=block_layers[2], growth_rate=growth_rate, drop_rate=drop_rate)
self.num_channels += growth_rate * block_layers[2]
self.num_channels = compression_rate * self.num_channels
self.transition_3 = TransitionLayer(out_channels=int(self.num_channels))
self.dense_block_4 = DenseBlock(num_layers=block_layers[3], growth_rate=growth_rate, drop_rate=drop_rate)
self.avgpool = tf.keras.layers.GlobalAveragePooling2D()
self.fc = tf.keras.layers.Dense(units=10,
activation=tf.keras.activations.softmax)
def call(self, inputs):
x = self.conv(inputs)
x = self.bn(x)
x = tf.keras.activations.relu(x)
x = self.pool(x)
x = self.dense_block_1(x)
x = self.transition_1(x)
x = self.dense_block_2(x)
x = self.transition_2(x)
x = self.dense_block_3(x)
x = self.transition_3(x,)
x = self.dense_block_4(x)
x = self.avgpool(x)
x = self.fc(x)
return x
def densenet():
return DenseNet(num_init_features=64, growth_rate=32, block_layers=[4,4,4,4], compression_rate=0.5, drop_rate=0.5)
mynet=densenet()
X = tf.random.uniform(shape=(1, 96, 96 , 1))
for layer in mynet.layers:
X = layer(X)
print(layer.name, 'output shape:\t', X.shape)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train = x_train.reshape((60000, 28, 28, 1)).astype('float32') / 255
x_test = x_test.reshape((10000, 28, 28, 1)).astype('float32') / 255
mynet.compile(loss='sparse_categorical_crossentropy',
optimizer=tf.keras.optimizers.Adam(),
metrics=['accuracy'])
history = mynet.fit(x_train, y_train,
batch_size=64,
epochs=5,
validation_split=0.2)
test_scores = mynet.evaluate(x_test, y_test, verbose=2)
mynet.save_weights("DenseNet.h5")
小结:
总结:累。。。大家都跟着教程的代码敲一敲可能更容易理解。用沫神的话说就是,这些结构就试出来,胜者为王,准确率高的就有其道理存在。