PaddleDL打卡营学习总结

PaddlePaddleDL打卡营学习总结

  • 手势识别
    • 定义训练集和测试集
    • 建立AlexNet
    • 开始训练
    • 模型评估
  • 车牌识别
  • 分类网络Paddle1.7.1版本
    • LeNet
    • AlexNet
    • VGG
    • GoogLeNet
    • ResNet

本次学习主要设计的是CV方面的知识,虽然之前恶补过一些有关深度学习的知识,但对于小白的我,还是很难理解一些网络结构与代码的对应关系,最最重要的是,自己依据网络图输入输出,复现一些网络。才能加快学习速度。废话不多说,上干活:

手势识别

手势识别与数字识别很像,共种10种手势,代表0-10个数字。首先我们假设已经根据图像位置生成图像位置与label的列表,分别为train_data.list和test_data.list。其中一行的示例如下:
photos/P0000001.jpg 0
手势图片中信息含量很多,但是降低图像分辨率到100x100后仍然可以清晰的分辨出手势含义,为了降低计算量和存储我们将图片resize为(3,100,100)。

定义训练集和测试集

# 定义训练集和测试集的reader
def data_mapper(sample):
    img, label = sample
    img = Image.open(img)
    img = img.resize((100, 100), Image.ANTIALIAS)
    img = np.array(img).astype('float32')
    img = img.transpose((2, 0, 1))
    img = img/255.0
    img = img*2-1
    return img, label

def data_reader(data_list_path):
    def reader():
        with open(data_list_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                img, label = line.split('\t')
                yield img, int(label)
    return paddle.reader.xmap_readers(data_mapper, reader, cpu_count(), 512)

# 用于训练的数据提供器
train_reader = paddle.batch(reader=paddle.reader.shuffle(reader=data_reader('./train_data.list'), buf_size=256), batch_size=32)
# 用于测试的数据提供器
test_reader = paddle.batch(reader=data_reader('./test_data.list'), batch_size=32) 

建立AlexNet

# 定义 AlexNet 网络结构
class AlexNet(fluid.dygraph.Layer):
    def __init__(self, num_classes=10,is_train=True):
        super(AlexNet, self).__init__()
        name_scope = self.full_name()
        # AlexNet与LeNet一样也会同时使用卷积和池化层提取图像特征
        # 与LeNet不同的是激活函数换成了‘relu’
        self.conv1 = Conv2D(num_channels = 3, num_filters=32, filter_size=3, stride=1, padding=5, act='relu')
        self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv2 = Conv2D(num_channels = 32, num_filters=64, filter_size=3, stride=1, padding=2, act='relu')
        self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv3 = Conv2D(num_channels = 64, num_filters=48, filter_size=2, stride=1, padding=1, act='relu')
        self.conv4 = Conv2D(num_channels = 48, num_filters=48, filter_size=2, stride=1, padding=1, act='relu')
        self.conv5 = Conv2D(num_channels = 48, num_filters=32, filter_size=2, stride=1, padding=1, act='relu')
        self.pool5 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')

        self.fc1 = Linear(input_dim = 32*15*15,output_dim=4096, act='relu')
        if is_train:
            self.drop_ratio1 = 0.5
            self.drop_ratio2 = 0.5
        else:
            self.drop_ratio1 = 0.0
            self.drop_ratio2 = 0.0
        self.fc2 = Linear(input_dim = 4096,output_dim=4096, act='relu')
        self.fc3 = Linear(input_dim = 4096,output_dim=10)

        
    def forward(self, x):
        x = self.conv1(x) # 5, 32, 108, 108
        x = self.pool1(x) # [5, 32, 54, 54]
        x = self.conv2(x) # 5, 64, 56, 56
        x = self.pool2(x) # 5, 64, 28, 28
        x = self.conv3(x) # 5, 48, 29, 29
        x = self.conv4(x) # 5, 48, 30, 30
        x = self.conv5(x) # 5, 32, 31, 31
        x = self.pool5(x) # 5, 32, 15, 15
        x = fluid.layers.reshape(x, [x.shape[0], -1])   
        x = self.fc1(x)
        # 在全连接之后使用dropout抑制过拟合
        x= fluid.layers.dropout(x, self.drop_ratio1)
        x = self.fc2(x)
        # 在全连接之后使用dropout抑制过拟合
        x = fluid.layers.dropout(x, self.drop_ratio2)
        x = self.fc3(x)
        return x

开始训练

#用动态图进行训练
with fluid.dygraph.guard():
    model=AlexNet(10, True) #模型实例化
    model.train() #训练模式
    boundaries = [20000, 30000] # 学习率衰减步数
    values = [0.01, 0.001, 0.0001] # 学习率衰减值
    opt=fluid.optimizer.SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries, values=values), parameter_list=model.parameters())#优化器选用SGD随机梯度下降,学习率为0.001.

    epochs_num=300 #迭代次数
    
    for pass_num in range(epochs_num):
        
        for batch_id,data in enumerate(train_reader()):
            
            images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
            
            labels = np.array([x[1] for x in data]).astype('int64')
            labels = labels[:, np.newaxis]
            # print(images.shape)
            image=fluid.dygraph.to_variable(images)
            label=fluid.dygraph.to_variable(labels)
            predict=model(image)#预测
            # print(predict.numpy()[0],label.numpy()[0])
            loss=fluid.layers.softmax_with_cross_entropy(predict,label)
            # print(loss.numpy())
            avg_loss=fluid.layers.mean(loss)#获取loss值
            
            acc=fluid.layers.accuracy(predict,label)#计算精度
            
            if batch_id!=0 and batch_id%25==0:
                print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(pass_num,batch_id,avg_loss.numpy(),acc.numpy()))
                # if acc.numpy()==0:
                #     print(predict.numpy()[:5],label.numpy()[:5])
                #     break

            
            avg_loss.backward()
            opt.minimize(avg_loss)
            model.clear_gradients()
        if pass_num%50==0:
            fluid.save_dygraph(model.state_dict(),'AlexNet'+str(pass_num))
    fluid.save_dygraph(model.state_dict(),'AlexNet')#保存模型

模型评估

#模型校验
with fluid.dygraph.guard():
    accs = []
    model_dict, _ = fluid.load_dygraph('AlexNet200')
    model = AlexNet(10, False)
    model.load_dict(stat_dict=model_dict) #加载模型参数
    model.eval() #训练模式
    for batch_id,data in enumerate(test_reader()):#测试集
        images=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
        labels = np.array([x[1] for x in data]).astype('int64')
        labels = labels[:, np.newaxis]

        image=fluid.dygraph.to_variable(images)
        label=fluid.dygraph.to_variable(labels)
        
        predict=model(image)       
        acc=fluid.layers.accuracy(predict,label)
        accs.append(acc.numpy()[0])
        avg_acc = np.mean(accs)
    print(avg_acc)

经过上述训练,测试集的准确率可以达到0.9776786。

车牌识别

这一块的内容已经在博客位置写过,链接为https://blog.csdn.net/weixin_43938942/article/details/105271436
这里不再重复。

分类网络Paddle1.7.1版本

在这里我将LeNet,AlexNet,VGG,GoogLeNet,ResNet用Paddle1.7.1重新写了一下。为方便大家直接在AIstudio中运行这里链接给出https://aistudio.baidu.com/aistudio/projectdetail/372503主要是依据眼疾数据进行了测试。
下面代码为网络结构的代码的复制。

LeNet

LeNet在眼疾中没有测试,主要是眼疾图片信息含量高,LeNet准确率不高,且设计的LeNet输出尺寸为1x32x32

# 定义 LeNet 网络结构 input dim = 1x32x32
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
class LeNet(fluid.dygraph.Layer):
    def __init__(self, num_classes=2):
        super(LeNet, self).__init__()
        name_scope = self.full_name()
        self.name = 'LeNet'
        # 创建卷积和池化层块,每个卷积层使用Sigmoid激活函数,后面跟着一个2x2的池化
        self.conv1 = Conv2D(num_channels = 1, num_filters=6, filter_size=2, act='sigmoid')
        self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv2 = Conv2D(num_channels = 6, num_filters=16, filter_size=2, act='sigmoid')
        self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        # 创建第3个卷积层
        self.conv3 = Conv2D(num_channels = 16*7*7, num_filters=120, filter_size=4, act='sigmoid')
        # 创建全连接层,第一个全连接层的输出神经元个数为64, 第二个全连接层输出神经元个数为分裂标签的类别数
        self.fc1 = Linear(input_dim = 120, output_dim=64, act='sigmoid')
        self.drop_ratio1 = 0.5
        self.fc2 = Linear(input_dim = 64, output_dim=num_classes)
    # 网络的前向计算过程
    def forward(self, x):
        x = self.conv1(x)#  [5, 6, 31, 31]
        x = self.pool1(x)#  [5, 6, 15, 15]
        x = self.conv2(x)#  [5, 16, 14, 14]
        x = self.pool2(x)#  [5, 16, 7, 7]
        x = self.conv3(x)#  [5, 120, 1, 1]
        x = fluid.layers.reshape(x,[x.shape[0], -1])
        x = self.fc1(x)
        fluid.layers.dropout(x, self.drop_ratio1)
        x = self.fc2(x)
        return x

AlexNet

AlexNet网络结构 本次网络结构中的一些超参数设计的不太合理,大家可以根据自己需要自行修改超参数

# 定义 AlexNet 网络结构
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
class AlexNet(fluid.dygraph.Layer):
    def __init__(self, num_classes=2):
        super(AlexNet, self).__init__()
        name_scope = self.full_name()
        self.name = 'AlexNet'
        # AlexNet与LeNet一样也会同时使用卷积和池化层提取图像特征
        # 与LeNet不同的是激活函数换成了‘relu’
        self.conv1 = Conv2D(num_channels = 3, num_filters=96, filter_size=11, stride=4, padding=5, act='relu')
        self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv2 = Conv2D(num_channels = 96, num_filters=256, filter_size=5, stride=1, padding=2, act='relu')
        self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
        self.conv3 = Conv2D(num_channels = 256, num_filters=384, filter_size=3, stride=1, padding=1, act='relu')
        self.conv4 = Conv2D(num_channels = 384, num_filters=384, filter_size=3, stride=1, padding=1, act='relu')
        self.conv5 = Conv2D(num_channels = 384, num_filters=256, filter_size=3, stride=1, padding=1, act='relu')
        self.pool5 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')

        self.fc1 = Linear(input_dim = 256*7*7,output_dim=4096, act='relu')
        self.drop_ratio1 = 0.5
        self.fc2 = Linear(input_dim = 4096,output_dim=4096, act='relu')
        self.drop_ratio2 = 0.5
        self.fc3 = Linear(input_dim = 4096,output_dim=num_classes)

        
    def forward(self, x):
        x = self.conv1(x) # [5, 96, 56, 56]
        print('#',x.shape)
        x = self.pool1(x) # [5, 96, 28, 28]
        print('#',x.shape)
        x = self.conv2(x) # [5, 256, 28, 28]
        print('#',x.shape)
        x = self.pool2(x) # [5, 256, 14, 14]
        print('#',x.shape)
        x = self.conv3(x) # [5, 384, 14, 14]
        print('#',x.shape)
        x = self.conv4(x) # [5, 384, 14, 14]
        print('#',x.shape)
        x = self.conv5(x) # [5, 256, 14, 14]
        print('#',x.shape)
        x = self.pool5(x) # [5, 256, 7, 7]
        print('#',x.shape)
        x = fluid.layers.reshape(x, [x.shape[0], -1])   
        x = self.fc1(x)
        print('#',x.shape)
        # 在全连接之后使用dropout抑制过拟合
        x= fluid.layers.dropout(x, self.drop_ratio1)
        x = self.fc2(x)
        print('#',x.shape)
        # 在全连接之后使用dropout抑制过拟合
        x = fluid.layers.dropout(x, self.drop_ratio2)
        x = self.fc3(x)
        return x

VGG

VGG网络结构

import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
class VGG(fluid.dygraph.Layer):
    def __init__(self, num_classes = 2, conv_arch=((2, 64), 
                                (2, 128), (3, 256), (3, 512), (3, 512))):
        super(VGG, self).__init__()   
        self.name = 'VGG'
        self.vgg_blocks=[]
        iter_id = 0
        # 添加vgg_block
        # 这里一共5个vgg_block,每个block里面的卷积层数目和输出通道数由conv_arch指定
        channels = 3
        for (num_convs, num_channels) in conv_arch:
            block = self.add_sublayer('block_' + str(iter_id),
                ConvPool(num_channels = channels, 
                    num_filters = num_channels, 
                    filter_size = 3, 
                    pool_size = 2,
                    pool_stride = 2,
                    groups = num_convs,
                    pool_padding=0,
                    pool_type='max',
                    conv_stride=1,
                    conv_padding=1,
                    act=None))
            channels = num_channels
            self.vgg_blocks.append(block)
            iter_id += 1
        self.fc1 = Linear(input_dim = 512*7*7,output_dim=4096,act='relu')
        self.drop1_ratio = 0.5
        self.fc2= Linear(input_dim = 4096,output_dim=4096,act='relu')
        self.drop2_ratio = 0.5
        self.fc3 = Linear(input_dim = 4096,output_dim=num_classes)
    
    def forward(self, x):
        for item in self.vgg_blocks:
            x = item(x)
        # print(x.shape)
        x = fluid.layers.reshape(x, [x.shape[0], -1])
        x = fluid.layers.dropout(self.fc1(x), self.drop1_ratio)
        x = fluid.layers.dropout(self.fc2(x), self.drop2_ratio)
        x = self.fc3(x)
        return x

GoogLeNet

GoogLeNet网络结构

# GoogLeNet 网络搭建
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable

# GoogLeNet
# 定义Inception块
class Inception(fluid.dygraph.Layer):
    def __init__(self,num_channels, c1, c2, c3, c4):
        '''
        Inception模块的实现代码,
        c1,  图(b)中第一条支路1x1卷积的输出通道数,数据类型是整数
        c2,图(b)中第二条支路卷积的输出通道数,数据类型是tuple或list, 
               其中c2[0]是1x1卷积的输出通道数,c2[1]是3x3
        c3,图(b)中第三条支路卷积的输出通道数,数据类型是tuple或list, 
               其中c3[0]是1x1卷积的输出通道数,c3[1]是3x3
        c4,  图(b)中第一条支路1x1卷积的输出通道数,数据类型是整数
        '''
        super(Inception, self).__init__()
        # 依次创建Inception块每条支路上使用到的操作
        self.p1_1 = Conv2D(num_channels = num_channels, num_filters=c1, 
                           filter_size=1, act='relu')
        self.p2_1 = Conv2D(num_channels = num_channels, num_filters=c2[0], 
                           filter_size=1, act='relu')
        self.p2_2 = Conv2D(num_channels = c2[0], num_filters=c2[1], 
                           filter_size=3, padding=1, act='relu')
        self.p3_1 = Conv2D(num_channels = num_channels, num_filters=c3[0], 
                           filter_size=1, act='relu')
        self.p3_2 = Conv2D(num_channels = c3[0], num_filters=c3[1], 
                           filter_size=5, padding=2, act='relu')
        self.p4_1 = Pool2D(pool_size=3, pool_stride=1,  pool_padding=1, pool_type='max')
        self.p4_2 = Conv2D(num_channels = num_channels, num_filters=c4, filter_size=1, act='relu')

    def forward(self, x):
        # 支路1只包含一个1x1卷积 c1 x w x h
        p1 = self.p1_1(x)
        # 支路2包含 1x1卷积 + 3x3卷积  c2[1] x w x h
        p2 = self.p2_2(self.p2_1(x))
        # 支路3包含 1x1卷积 + 5x5卷积  c3[1] x w x h
        p3 = self.p3_2(self.p3_1(x))
        # 支路4包含 最大池化和1x1卷积  c4 x w x h
        p4 = self.p4_2(self.p4_1(x))
        # 将每个支路的输出特征图拼接在一起作为最终的输出结果 (c1+c2[1]+c3[1]+c4) x w x h
        return fluid.layers.concat([p1, p2, p3, p4], axis=1)  
    
class GoogLeNet(fluid.dygraph.Layer):
    def __init__(self, num_classes = 2):
        super(GoogLeNet, self).__init__()
        # GoogLeNet包含五个模块,每个模块后面紧跟一个池化层
        # 第一个模块包含1个卷积层
        self.name = 'GoogLeNet'
        self.conv1 = Conv2D(num_channels = 3,num_filters=64, filter_size=7, 
                            padding=3, act='relu')
        # 3x3最大池化
        self.pool1 = Pool2D(pool_size=3, pool_stride=2,  
                            pool_padding=1, pool_type='max')
        # 第二个模块包含2个卷积层
        self.conv2_1 = Conv2D(num_channels = 64, num_filters=64, 
                              filter_size=1, act='relu')
        self.conv2_2 = Conv2D(num_channels = 64, num_filters=192, 
                              filter_size=3, padding=1, act='relu')
        # 3x3最大池化
        self.pool2 = Pool2D(pool_size=3, pool_stride=2,  
                            pool_padding=1, pool_type='max')
        # 第三个模块包含2个Inception块
        self.block3_1 = Inception(num_channels = 192, c1 = 64, c2 = (96, 128), c3 = (16, 32), c4 = 32)
        self.block3_2 = Inception(num_channels = 256, c1 = 128, c2 = (128, 192), c3 = (32, 96), c4 = 64)
        # 3x3最大池化
        self.pool3 = Pool2D(pool_size=3, pool_stride=2,  
                               pool_padding=1, pool_type='max')
        # 第四个模块包含5个Inception块
        self.block4_1 = Inception(num_channels = 480, c1 = 192, c2 = (96, 208), c3 = (16, 48), c4 = 64)
        self.block4_2 = Inception(num_channels = 512, c1 = 160, c2 = (112, 224), c3 = (24, 64), c4 = 64)
        self.block4_3 = Inception(num_channels = 512, c1 = 128, c2 = (128, 256), c3 = (24, 64), c4 = 64)
        self.block4_4 = Inception(num_channels = 512, c1 = 112, c2 = (144, 288), c3 = (32, 64), c4 = 64)
        self.block4_5 = Inception(num_channels = 528, c1 = 256, c2 = (160, 320), c3 = (32, 128), c4 = 128)
        # 3x3最大池化
        self.pool4 = Pool2D(pool_size=3, pool_stride=2,  
                               pool_padding=1, pool_type='max')
        # 第五个模块包含2个Inception块
        self.block5_1 = Inception(num_channels = 832, c1 = 256, c2 = (160, 320), c3 = (32, 128), c4 = 128)
        self.block5_2 = Inception(num_channels = 832, c1 = 384, c2 = (192, 384), c3 = (48, 128), c4 = 128)
        # 全局池化,尺寸用的是global_pooling,pool_stride不起作用
        self.pool5 = Pool2D(pool_stride=1, 
                               global_pooling=True, pool_type='avg')
        self.fc = Linear(input_dim = 1024,output_dim=num_classes)

    def forward(self, x):
        x = self.pool1(self.conv1(x))
        x = self.pool2(self.conv2_2(self.conv2_1(x)))
        x = self.pool3(self.block3_2(self.block3_1(x)))
        x = self.block4_3(self.block4_2(self.block4_1(x)))
        x = self.pool4(self.block4_5(self.block4_4(x)))
        x = self.pool5(self.block5_2(self.block5_1(x)))
        x = fluid.layers.reshape(x, [x.shape[0], -1])
        x = self.fc(x)
        # print(x.shape)
        return x

ResNet

ResNet 网络结构

# ResNet
# ResNet中使用了BatchNorm层,在卷积层的后面加上BatchNorm以提升数值稳定性
# 定义卷积批归一化块
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
class ConvBNLayer(fluid.dygraph.Layer):
    def __init__(self,
                 num_channels,
                 num_filters,
                 filter_size,
                 stride=1,
                 groups=1,
                 act=None):
        """
        num_channels, 卷积层的输入通道数
        num_filters, 卷积层的输出通道数
        stride, 卷积层的步幅
        groups, 分组卷积的组数,默认groups=1不使用分组卷积
        act, 激活函数类型,默认act=None不使用激活函数
        """
        super(ConvBNLayer, self).__init__()

        # 创建卷积层
        self._conv = Conv2D(
            num_channels=num_channels,
            num_filters=num_filters,
            filter_size=filter_size,
            stride=stride,
            padding=(filter_size - 1) // 2,
            groups=groups,
            act=None,
            bias_attr=False)

        # 创建BatchNorm层
        self._batch_norm = BatchNorm(num_filters, act=act)

    def forward(self, inputs):
        y = self._conv(inputs)
        y = self._batch_norm(y)
        return y

# 定义残差块
# 每个残差块会对输入图片做三次卷积,然后跟输入图片进行短接
# 如果残差块中第三次卷积输出特征图的形状与输入不一致,则对输入图片做1x1卷积,将其输出形状调整成一致
class BottleneckBlock(fluid.dygraph.Layer):
    def __init__(self,
                 num_channels,
                 num_filters,
                 stride,
                 shortcut=True):
        super(BottleneckBlock, self).__init__()
        # 创建第一个卷积层 1x1
        self.conv0 = ConvBNLayer(
            num_channels=num_channels,
            num_filters=num_filters,
            filter_size=1,
            act='relu')
        # 创建第二个卷积层 3x3
        self.conv1 = ConvBNLayer(
            num_channels=num_filters,
            num_filters=num_filters,
            filter_size=3,
            stride=stride,
            act='relu')
        # 创建第三个卷积 1x1,但输出通道数乘以4
        self.conv2 = ConvBNLayer(
            num_channels=num_filters,
            num_filters=num_filters * 4,
            filter_size=1,
            act=None)

        # 如果conv2的输出跟此残差块的输入数据形状一致,则shortcut=True
        # 否则shortcut = False,添加1个1x1的卷积作用在输入数据上,使其形状变成跟conv2一致
        if not shortcut:
            self.short = ConvBNLayer(
                num_channels=num_channels,
                num_filters=num_filters * 4,
                filter_size=1,
                stride=stride)

        self.shortcut = shortcut

        self._num_channels_out = num_filters * 4

    def forward(self, inputs):
        y = self.conv0(inputs)
        conv1 = self.conv1(y)
        conv2 = self.conv2(conv1)

        # 如果shortcut=True,直接将inputs跟conv2的输出相加
        # 否则需要对inputs进行一次卷积,将形状调整成跟conv2输出一致
        if self.shortcut:
            short = inputs
        else:
            short = self.short(inputs)

        y = fluid.layers.elementwise_add(x=short, y=conv2)
        layer_helper = LayerHelper(self.full_name(), act='relu')
        return layer_helper.append_activation(y)
# 定义ResNet模型
class ResNet(fluid.dygraph.Layer):
    def __init__(self, layers=50, class_dim=1):
        """
        name_scope,模块名称
        layers, 网络层数,可以是50, 101或者152
        class_dim,分类标签的类别数
        """
        super(ResNet, self).__init__()
        self.name = 'ResNet'
        self.layers = layers
        supported_layers = [50, 101, 152]
        assert layers in supported_layers, \
            "supported layers are {} but input layer is {}".format(supported_layers, layers)

        if layers == 50:
            #ResNet50包含多个模块,其中第2到第5个模块分别包含3、4、6、3个残差块
            depth = [3, 4, 6, 3]
        elif layers == 101:
            #ResNet101包含多个模块,其中第2到第5个模块分别包含3、4、23、3个残差块
            depth = [3, 4, 23, 3]
        elif layers == 152:
            #ResNet50包含多个模块,其中第2到第5个模块分别包含3、8、36、3个残差块
            depth = [3, 8, 36, 3]
        
        # 残差块中使用到的卷积的输出通道数
        num_filters = [64, 128, 256, 512]

        # ResNet的第一个模块,包含1个7x7卷积,后面跟着1个最大池化层
        self.conv = ConvBNLayer(
            num_channels=3,
            num_filters=64,
            filter_size=7,
            stride=2,
            act='relu')
        self.pool2d_max = Pool2D(
            pool_size=3,
            pool_stride=2,
            pool_padding=1,
            pool_type='max')

        # ResNet的第二到第五个模块c2、c3、c4、c5
        self.bottleneck_block_list = []
        num_channels = 64
        for block in range(len(depth)):
            shortcut = False
            for i in range(depth[block]):
                bottleneck_block = self.add_sublayer(
                    'bb_%d_%d' % (block, i),
                    BottleneckBlock(
                        num_channels=num_channels,
                        num_filters=num_filters[block],
                        stride=2 if i == 0 and block != 0 else 1, # c3、c4、c5将会在第一个残差块使用stride=2;其余所有残差块stride=1
                        shortcut=shortcut))
                num_channels = bottleneck_block._num_channels_out
                self.bottleneck_block_list.append(bottleneck_block)
                shortcut = True

        # 在c5的输出特征图上使用全局池化
        self.pool2d_avg = Pool2D(pool_size=7, pool_type='avg', global_pooling=True)

        # stdv用来作为全连接层随机初始化参数的方差
        import math
        stdv = 1.0 / math.sqrt(2048 * 1.0)
        # 创建全连接层,输出大小为类别数目
        self.out = Linear(input_dim = 2048,output_dim=class_dim, 
            param_attr=fluid.param_attr.ParamAttr(
                          initializer=fluid.initializer.Uniform(-stdv, stdv)))

        
    def forward(self, inputs):
        y = self.conv(inputs)
        y = self.pool2d_max(y)
        for bottleneck_block in self.bottleneck_block_list:
            y = bottleneck_block(y)
        # print(y.shape)
        y = self.pool2d_avg(y)
        y = fluid.layers.reshape(y,[y.shape[0],-1])
        # print(y.shape)
        y = self.out(y)
        # print(y.shape)
        return y

最后国内深度学习框架,是要与其他pytorch,tensorflow等平台竞争的,不过以其中文api文档的优越性,相信未来一定会在国内火起来的。

你可能感兴趣的:(笔记,深度学习)