本文基于Deep Residual Learning for Image Recognition的最小层次18层搭建
本模型由于引入了残差(Residual)的概念(我更愿意把他看作为shortcut)使得网络层数加深不会导致更坏的结果,使得网络可以多层堆叠而不会更坏
由于网络过深设备算不过来,故只搭建18层
注意:博主目前还不会任何数据优化手段,只是对元数据进行了-1,1映射,请自行叠加骚操作
python3.6
tensorflow-gpu
cuda kit
cudnn
cnn
bpnn
batchnormalization
padding pooling sample
可以看到shortcut的加入我们的网络最多只会退化到x输入时的layer depth。
假设网络输出h(x) 由图有 h(x)=f(x)+x ,则我们要的结果是f(x) ,所以f(x)==h(x)-x,所以我们对f(x)进行优化,使得f(x)与ground true的loss收敛,这里我们用 cross entropy来评估loss。对loss求导获得更新值,剩下的其实就和cnn没啥区别了。
我们可以看到在最后还有一大坨full dense connect,那是用来分类的,cifar100不是一百分类问题嘛。不过由于参数量过大我们选择降低层数。
首先我先给大家展示一下参数量,否则各位gpu可能mmp
看到没有 可训练的参数高达1千万,当然也可以使用混合精度训练,或者直接fp16半精度训练,但本文不用。
模型关键是一个网络的basic block 多个相同的堆叠 会变成 resnet block,多个resnet block堆叠就差不多是一个resnet了。
注意,不同resnet block的kernel size相同,感知野也相同,但随着层次变深channel会增加,变成高维度数据
class BasicBlock(layers.Layer):
def __init__(self, kernel_num,stride=1):
super(BasicBlock,self).__init__()
self.conv1 = layers.Conv2D(kernel_num,(3,3),strides=stride,padding='same')
self.bin1 = layers.BatchNormalization()
self.relu = layers.Activation('relu')
self.conv2 = layers.Conv2D(kernel_num,(3,3),strides=1,padding='same')
self.bin2 = layers.BatchNormalization()
#以上是cnn流程不多赘述,下面是resnet的灵魂,将input数据做处理,如果上面卷积结果没有降维,就不做下采样 else中的内容
#如果降维了就做下采样 if中的内容
if stride != 1:
self.downsameple = Sequential()
self.downsameple.add(layers.Conv2D(kernel_num,(1,1),strides=stride))
#using kernel size (1,1) is not same with pooling opration
#target is that reshape input add convolution output legitlegitimate
else:
self.downsameple = lambda x:x
def call(self,inputs,training=None):
#b,h,w,c
conv1=self.conv1(inputs)
bn1=self.bin1(conv1)
relu1=self.relu(bn1)
conv2=self.conv2(relu1)
bn2=self.bin2(conv2)
residual=self.downsameple(inputs)
output=layers.add([residual,bn2])
output=tf.nn.relu(output)
return output
class ResNet(keras.Model):
def __init__(self,layers_dims,num_class=100):#layers_dims#[2,2,2,2]
super(ResNet,self).__init__()
self.stem=Sequential([
layers.Conv2D(64,(3,3),strides=(1,1)),
layers.BatchNormalization(),
layers.Activation('relu'),
layers.MaxPool2D(pool_size=(2,2),strides=(1,1),padding='same')
])
self.layer1 = self.build_resblock(64, layers_dims[0])
self.layer2 = self.build_resblock(128, layers_dims[1],stride=2)
self.layer3 = self.build_resblock(256, layers_dims[2], stride=2)
self.layer4 = self.build_resblock(512, layers_dims[3], stride=2)
#feature size will sucessive reduce
#output:[b,512,h(unknown),w(unknown)]=>GlobalAveragePooling2D()=>[b,512]
self.avgpool = layers.GlobalAveragePooling2D()
#在宽高上做个平均
self.fc = layers.Dense(num_class)
#全连接,我们直接输出结果,所以节点为100
def call(self,inputs,training=None):
x = self.stem(inputs)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x=self.avgpool(x)
x=self.fc(x)
return x
def build_resblock(self,kernel_num,blocks,stride=1):
res_blocks=Sequential()
res_blocks.add(BasicBlock(kernel_num,stride))
for _ in range(1,blocks):
res_blocks.add(BasicBlock(kernel_num,stride=1))
return res_blocks
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,Sequential
class BasicBlock(layers.Layer):
def __init__(self, kernel_num,stride=1):
super(BasicBlock,self).__init__()
self.conv1 = layers.Conv2D(kernel_num,(3,3),strides=stride,padding='same')
self.bin1 = layers.BatchNormalization()
self.relu = layers.Activation('relu')
self.conv2 = layers.Conv2D(kernel_num,(3,3),strides=1,padding='same')
self.bin2 = layers.BatchNormalization()
if stride != 1:
self.downsameple = Sequential()
self.downsameple.add(layers.Conv2D(kernel_num,(1,1),strides=stride))
#using kernel size (1,1) is not same with pooling opration
#target is that reshape input add convolution output legitlegitimate
else:
self.downsameple = lambda x:x
def call(self,inputs,training=None):
#b,h,w,c
conv1=self.conv1(inputs)
bn1=self.bin1(conv1)
relu1=self.relu(bn1)
conv2=self.conv2(relu1)
bn2=self.bin2(conv2)
residual=self.downsameple(inputs)
output=layers.add([residual,bn2])
output=tf.nn.relu(output)
return output
class ResNet(keras.Model):
def __init__(self,layers_dims,num_class=100):#layers_dims#[2,2,2,2]
super(ResNet,self).__init__()
self.stem=Sequential([
layers.Conv2D(64,(3,3),strides=(1,1)),
layers.BatchNormalization(),
layers.Activation('relu'),
layers.MaxPool2D(pool_size=(2,2),strides=(1,1),padding='same')
])
self.layer1 = self.build_resblock(64, layers_dims[0])
self.layer2 = self.build_resblock(128, layers_dims[1],stride=2)
self.layer3 = self.build_resblock(256, layers_dims[2], stride=2)
self.layer4 = self.build_resblock(512, layers_dims[3], stride=2)
#feature size will sucessive reduce
#output:[b,512,h(unknown),w(unknown)]=>GlobalAveragePooling2D()=>[b,512]
self.avgpool = layers.GlobalAveragePooling2D()
self.fc = layers.Dense(num_class)
def call(self,inputs,training=None):
x = self.stem(inputs)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x=self.avgpool(x)
x=self.fc(x)
return x
def build_resblock(self,kernel_num,blocks,stride=1):
res_blocks=Sequential()
res_blocks.add(BasicBlock(kernel_num,stride))
for _ in range(1,blocks):
res_blocks.add(BasicBlock(kernel_num,stride=1))
return res_blocks
def ResNet18():
return ResNet([2,2,2,2],num_class=100)
注意:千万不要把test数据给train了,这会污染数据,没有办法评估模型的泛化能力了,在比赛中这是作弊
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, optimizers, Sequential
import os
from ResNet import ResNet18_
#数据预处理
def preprocess(x, y):
x = 2*tf.cast(x, dtype=tf.float32) / 255.-1
y = tf.cast(y, dtype=tf.int32)
return x, y
(x, y), (x_test, y_test) = datasets.cifar10.load_data()
#数据集默认多一维 squeeze掉
y = tf.squeeze(y, axis=1)
y_test = tf.squeeze(y_test, axis=1)
train_db = tf.data.Dataset.from_tensor_slices((x, y))
train_db = train_db.shuffle(10000).map(preprocess).batch(200)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.map(preprocess).batch(200)
def main():
# [b,32,32,3]=>[b,1,1,512]
#调用网络
model = ResNet18_()
model.build(input_shape=(None,32,32,3))
#设置输入shape 因为是彩色图片channel为3
model.summary()
optimizer = optimizers.Adam(lr=1e-4)
for epoch in range(30):
for step, (x, y) in enumerate(train_db):
with tf.GradientTape() as tape:
#out == [b,100]
logits = model(x)
y_onehot = tf.one_hot(y, depth=10)
loss = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
#使用cross entropy作为loss评价
loss = tf.reduce_mean(loss)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
#反向传播 更新梯度
tot_num = 0
tot_correct = 0
for x, y in test_db:
logits = model(x)
prob = tf.nn.softmax(logits)
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, dtype=tf.int32)
correct = tf.cast(tf.equal(pred, y), dtype=tf.int32)
correct = tf.reduce_sum(correct)
tot_num += x.shape[0]
tot_correct += int(correct)
acc = tot_correct / tot_num
print(epoch, "acc", acc)
#每一个epoch看一下正确率
model.save_weights("network_check_point/ResNet.weight")
#保存模型
if __name__ == '__main__':
main()
hzu