计算机视觉主干模型VGG16、MobileNetV2、ResNet50 pytorch版

计算机视觉主干模型pytorch版

    • (1) VGG16 pytorch
    • (2) MobileNetV1_pytorch
    • (3) MobileNetV2 pytorch
    • (4) MobileNetV3 pytorch
    • (5) ResNet50 pytorch

(1) VGG16 pytorch

  1. VGG16用3*3卷积提取特征,扩展通道数,通过最大池化缩小特征尺寸,经过5次后对数据平铺,最后经过两次全连接输出结果。

VGG16结构


'''
img[h,w,3] --> resize[224,224,3] -->
cnv1*2(k= 3,f = 64)[224,224,64] +   maxPool(s = 2)[112,112,64]-->
cnv2*2(k= 3,f = 128)[112,112,128] + maxPool(s = 2)[56,56,128]--->
cnv3*2(k= 3,f = 256)[56,56,256] +   maxPool(s = 2)[28,28,256]--->
cnv4*2(k= 3,f = 512)[28,28,512] +   maxPool(s = 2)[14,14,512]--->
cnv5*2(k= 3,f = 512)[14,14,512] +   maxPool(s = 2)[7,7,512]----->
flatten--> fc*2(4096) --> fc(1000) -->
out

'''

2 . VGG16代码

'''
VGG16 流程
cfgs['D'] (模型参数) -->
make_layers(cfgs['D']) (生成主干features)-->
VGG(make_layers(cfgs['D'])) (全连接 )-->
pretrained (是否迁移学习) -->
num_classes != 1000 (重新定义分类层)
'''

import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
from torchvision.models.utils import  load_state_dict_from_url

model_urls = {'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth'}

class VGG(nn.Module):
    def __init__(self,features,num_classes=1000,init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((7,7))  # output size of (7,7)
        self.clssifier = nn.Sequential(
            nn.Linear(512*7*7,4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096,4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096,num_classes))
        if init_weights:
            self._initialize_weights()

    def forward(self,x):
        x = self.features(x)   # [2, 512, 7, 7]
        x = self.avgpool(x)    # [2, 512, 7, 7]
        x = torch.flatten(x,1) # [2, 25088]  axis=1 从坐标轴起后面的数展平。
        x = self.clssifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m,nn.Conv2d):
                nn.init.normal_(m.weight)
                # nn.init.uniform(m.weight)
                #nn.init.kaiming_normal(m.weight,mode='fan_out',nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias,0)
            elif isinstance(m,nn.BatchNorm2d):
                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias,0)
            elif isinstance(m,nn.Linear):
                nn.init.constant_(m.weight,0.01)
                nn.init.constant_(m.bias,0)

def make_layers(cfg,batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size = 2,stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding=1)
            if batch_norm:
                layers += [conv2d,nn.BatchNorm2d(v),nn.ReLU(inplace=True)]
            else:
                layers += [conv2d,nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

cfgs = {
    'D':[64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']}

def vgg16(pretrained=False,progress=True,num_classes=1000):
    model = VGG(make_layers(cfgs['D']))
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['vgg16'],
                                              model_dir = './model_data',
                                              progress=progress)
        model.load_state_dict(state_dict,strict=False)

    if num_classes != 1000:
        model.clssifier = nn.Sequential(
            nn.Linear(512*7*7,4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096,4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096,num_classes))
    return model


if __name__ == '__main__':
    x = torch.randn([2,3,224,224])
    model = vgg16(num_classes=2)
    y = model(x)
    print(y.shape)

'''
torch.Size([2, 2])

Process finished with exit code 0
'''

(2) MobileNetV1_pytorch

MobileNetV1用深度可分离卷积大大减少了参数的数量,先用3x3的卷积核依次和输入n 个特征卷积,得到n个输出特征,在用1x1卷积核进行普通的卷积得到输出特征。比如输入16个特征,需要输出32个特征,普通卷积的参数为16x3x3x32=4608个。用深度可分离卷积需要的参数量为16x3x3x1+16x1x1x32=656个。通过深度可分离卷积可以大大减少模型的参数。


'''
'''
import torch
import torch.nn as nn

def conv_bn(inp, oup, stride = 1):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True)
    )

def conv_dw(inp, oup, stride = 1):
    return nn.Sequential(
        nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
        nn.BatchNorm2d(inp),
        nn.ReLU6(inplace=True),

        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        nn.ReLU6(inplace=True),
    )

class MobileNetV1(nn.Module):
    def __init__(self):
        super(MobileNetV1, self).__init__()
        self.stage1 = nn.Sequential(
            # 640,640,3 -> 320,320,32
            conv_bn(3, 32, 2),
            # 320,320,32 -> 320,320,64
            conv_dw(32, 64, 1),

            # 320,320,64 -> 160,160,128
            conv_dw(64, 128, 2),
            conv_dw(128, 128, 1),

            # 160,160,128 -> 80,80,256
            conv_dw(128, 256, 2),
            conv_dw(256, 256, 1),
        )
        # 80,80,256 -> 40,40,512
        self.stage2 = nn.Sequential(
            conv_dw(256, 512, 2),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
            conv_dw(512, 512, 1),
        )
        # 40,40,512 -> 20,20,1024
        self.stage3 = nn.Sequential(
            conv_dw(512, 1024, 2),
            conv_dw(1024, 1024, 1),
        )
        self.avg = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(1024, 1000)

    def forward(self, x):
        x = self.stage1(x)
        x = self.stage2(x)
        x = self.stage3(x)
        x = self.avg(x)
        # x = self.model(x)
        x = x.view(-1, 1024)
        x = self.fc(x)
        return x

def mobilenet_v1(pretrained=False, progress=True,num_classes= 1000):
    model = MobileNetV1()
    if pretrained:
        print("mobilenet_v1 has no pretrained model")
    if num_classes != 1000:
        model.fc = nn.Linear(1024, num_classes)
    return model

if __name__ == "__main__":
    input = torch.randn([8,3,416,416])
    model = mobilenet_v1(num_classes= 2)
    out = model(input)
    print(out.shape)
    
'''
torch.Size([8, 2])

Process finished with exit code 0
'''



(3) MobileNetV2 pytorch

MobileNetV2在MobileNetV1的基础上,增加Inverted resblock模块儿。Inverted体现在对输入首先利用1x1卷积进行升维,然后利用3x3深度可分离卷积进行特征提取,然后再利用1x1卷积降维。resblock体现在输入和上一步输出相加。如下所示:


'''
(1)Inverted resblock
    Inverted:input--> conv2d(1*1 升维)+BN+ReLU6 --> DepthwiseConv2D(3*3)+BN+ReLU6--> conv2d(1*1 降维)+BN --> output
    resblock: output + input

(2)MobileNetV2
                 input.shape   expand_ratio   channels   times   strides
     conv2d        224^2*3          -            32        1        2
     bottleneck    112^2*32         1            16        1        1
     bottleneck    112^2*16         6            24        2        2
     bottleneck    56^2*24          6            32        3        2
     bottleneck    28^2*32          6            64        4        2
     bottleneck    28^2*64          6            96        3        1
     bottleneck    14^2*96          6            160       3        2
     bottleneck    7^2*160          6            320       1        1
     conv2d 1*1    7^2*320          -            1280      1        1
     avgpool7*7    7^2*1280         -            -         1        -
     conv2d 1*1    1*1*k            -            k         -     

'''

'''
MobileNetV2
'''

import torch
import torch.nn as nn
from torchvision.models.utils import load_state_dict_from_url


__all__ = ['MobileNetV2', 'mobilenet_v2']

model_urls = {
    'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'}


def _make_divisible(v, divisor, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value,
                int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


class ConvBNReLU(nn.Sequential):
    def __init__(self, in_planes, out_planes, kernel_size=3,
                 stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_planes, out_planes, kernel_size,
                      stride, padding, groups=groups,
                      bias=False),
            nn.BatchNorm2d(out_planes),
            nn.ReLU6(inplace=True)
        )


class InvertedResidual(nn.Module):
    def __init__(self, inp, oup, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        hidden_dim = int(round(inp * expand_ratio))
        self.use_res_connect = self.stride == 1 and inp == oup

        layers = []
        if expand_ratio != 1:
            layers += [ConvBNReLU(inp, hidden_dim, kernel_size=1)]
        layers += [
            ConvBNReLU(hidden_dim, hidden_dim,
                       stride=stride, groups=hidden_dim),
            nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
            nn.BatchNorm2d(oup)
        ]
        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, width_mult=1.0,
                 inverted_residual_setting=None,
                 round_nearest=8):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = 32
        last_channel = 1280

        if inverted_residual_setting is None:
            inverted_residual_setting = [
                [1, 16, 1, 1],
                [6, 24, 2, 2],
                [6, 32, 3, 2],
                [6, 64, 4, 2],
                [6, 96, 3, 1],
                [6, 160, 3, 2],
                [6, 320, 1, 1],
            ]
        if len(inverted_residual_setting) == 0 or len(
                inverted_residual_setting[0]) != 4:
            raise ValueError(
                "inverted_residual_setting should be non_empty"
                "or a 4-element list,got{}".format(
                    inverted_residual_setting))
        input_channel = _make_divisible(
            input_channel * width_mult, round_nearest)
        self.last_channel = _make_divisible(
            last_channel * max(1.0, width_mult),
            round_nearest)
        features = [ConvBNReLU(3, input_channel, stride=2)]
        for t, c, n, s in inverted_residual_setting:
            out_channel = _make_divisible(c * width_mult,
                                          round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features += [block(input_channel, out_channel,
                                   stride, expand_ratio=t)]
                input_channel = out_channel
        features += [ConvBNReLU(input_channel, self.last_channel,kernel_size=1)]
        self.features = nn.Sequential(*features)
        self.classifier = nn.Sequential(nn.Dropout(0.2),nn.Linear(self.last_channel,num_classes))

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight)
                # nn.init.uniform(m.weight)
                # nn.init.kaiming_normal(m.weight,mode='fan_out',nonlinearity='relu')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.weight, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        x = self.features(x)
        x = x.mean([2, 3])
        x = self.classifier(x)
        return x


def mobilenet_v2(pretrained = False, progress = True, num_classes = 1000):
    model = MobileNetV2()
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
                                              model_dir = './model_data',
                                              progress = progress)
        model.load_state_dict(state_dict,strict=False)
    if num_classes != 1000:
        model.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(model.last_channel,num_classes))
    return model




if __name__ == '__main__':
    x = torch.randn([20, 3, 224, 224])
    # convBnReLU = ConvBNReLU(in_planes=3,out_planes=100)
    # convBnReLU(x).shape
    # output = InvertedResidual(3, 24, 2, 2)(x)
    # model = MobileNetV2()
    model = mobilenet_v2(num_classes = 20000)
    output = model(x)
    print(output.shape)
'''
torch.Size([20, 20000])

Process finished with exit code 0
'''

(4) MobileNetV3 pytorch

MobileNetV3的亮点是在InvertedResidual模块儿中添加了注意力机制SE,使用HS激活函数。


import torch.nn as nn
import math
import torch

def _make_divisible(v, divisor, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v

class h_sigmoid(nn.Module):
    def __init__(self, inplace=True):
        super(h_sigmoid, self).__init__()
        self.relu = nn.ReLU6(inplace=inplace)

    def forward(self, x):
        return self.relu(x + 3) / 6


class h_swish(nn.Module):
    def __init__(self, inplace=True):
        super(h_swish, self).__init__()
        self.sigmoid = h_sigmoid(inplace=inplace)

    def forward(self, x):
        return x * self.sigmoid(x)


class SELayer(nn.Module):
    def __init__(self, channel, reduction=4):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, _make_divisible(channel // reduction, 8)),
            nn.ReLU(inplace=True),
            nn.Linear(_make_divisible(channel // reduction, 8), channel),
            h_sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)  # [b,c,h,w] --> [b,c,1,1] --> [b,c]
        y = self.fc(y).view(b, c, 1, 1)  # 降维--> 升维
        return x * y


def conv_3x3_bn(inp, oup, stride):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
        h_swish()
    )


def conv_1x1_bn(inp, oup):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        h_swish()
    )


class InvertedResidual(nn.Module):
    def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs):
        super(InvertedResidual, self).__init__()
        assert stride in [1, 2]

        self.identity = stride == 1 and inp == oup

        if inp == hidden_dim:
            self.conv = nn.Sequential(
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                h_swish() if use_hs else nn.ReLU(inplace=True),
                # Squeeze-and-Excite
                SELayer(hidden_dim) if use_se else nn.Identity(),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )
        else:
            self.conv = nn.Sequential(
                # pw
                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
                nn.BatchNorm2d(hidden_dim),
                h_swish() if use_hs else nn.ReLU(inplace=True),
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                # Squeeze-and-Excite
                SELayer(hidden_dim) if use_se else nn.Identity(),
                h_swish() if use_hs else nn.ReLU(inplace=True),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )

    def forward(self, x):
        if self.identity:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV3(nn.Module):
    def __init__(self, num_classes=1000, width_mult=1.):
        self.num_classes = num_classes
        super(MobileNetV3, self).__init__()
        # setting of inverted residual blocks
        self.cfgs = [
            #` k, t, c, SE, HS, s
            [3,   1,  16, 0, 0, 1],
            [3,   4,  24, 0, 0, 2],
            [3,   3,  24, 0, 0, 1],
            [5,   3,  40, 1, 0, 2],
            [5,   3,  40, 1, 0, 1],
            [5,   3,  40, 1, 0, 1],
            [3,   6,  80, 0, 1, 2],
            [3, 2.5,  80, 0, 1, 1],
            [3, 2.3,  80, 0, 1, 1],
            [3, 2.3,  80, 0, 1, 1],
            [3,   6, 112, 1, 1, 1],
            [3,   6, 112, 1, 1, 1],
            [5,   6, 160, 1, 1, 2],
            [5,   6, 160, 1, 1, 1],
            [5,   6, 160, 1, 1, 1]
        ]

        input_channel = _make_divisible(16 * width_mult, 8)  # 16
        layers = [conv_3x3_bn(3, input_channel, 2)]

        block = InvertedResidual
        for k, t, c, use_se, use_hs, s in self.cfgs:
            output_channel = _make_divisible(c * width_mult, 8)
            exp_size = _make_divisible(input_channel * t, 8)
            layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs))
            input_channel = output_channel
        self.features = nn.Sequential(*layers)

        self.conv = conv_1x1_bn(input_channel, exp_size)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        output_channel = _make_divisible(1280 * width_mult, 8) if width_mult > 1.0 else 1280
        self.classifier = nn.Sequential(
            nn.Linear(exp_size, output_channel),
            h_swish(),
            nn.Dropout(0.2),
            nn.Linear(output_channel, num_classes),
        )

        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.conv(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                n = m.weight.size(1)
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

def mobilenet_v3(pretrained=False, **kwargs):
    model = MobileNetV3(**kwargs)
    if pretrained:
        state_dict = torch.load('./model_data/mobilenetv3-large-1cd25616.pth')
        model.load_state_dict(state_dict, strict=True)
    if model.num_classes != 1000:
        in_channel = next(model.classifier[-1].parameters()).shape[1]
        model.classifier[-1] = nn.Linear(in_channel, model.num_classes)
    return model

if __name__ == '__main__':
    x = torch.randn([8,3,416,416])
    model = mobilenet_v3(num_classes=2)
    y = model(x)
    print(y.shape)
'''
torch.Size([8, 2])

Process finished with exit code 0
'''

'''
features(x)
Sequential(
  (0): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): h_swish(
      (sigmoid): h_sigmoid(
        (relu): ReLU6(inplace=True)
      )
    )
  )
  (1): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Identity()
      (4): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (2): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Identity()
      (6): ReLU(inplace=True)
      (7): Conv2d(64, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (3): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(24, 72, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(72, 72, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=72, bias=False)
      (4): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Identity()
      (6): ReLU(inplace=True)
      (7): Conv2d(72, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (4): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(24, 72, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(72, 72, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=72, bias=False)
      (4): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): SELayer(
        (avg_pool): AdaptiveAvgPool2d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=72, out_features=24, bias=True)
          (1): ReLU(inplace=True)
          (2): Linear(in_features=24, out_features=72, bias=True)
          (3): h_sigmoid(
            (relu): ReLU6(inplace=True)
          )
        )
      )
      (6): ReLU(inplace=True)
      (7): Conv2d(72, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (5): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(40, 120, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(120, 120, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=120, bias=False)
      (4): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): SELayer(
        (avg_pool): AdaptiveAvgPool2d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=120, out_features=32, bias=True)
          (1): ReLU(inplace=True)
          (2): Linear(in_features=32, out_features=120, bias=True)
          (3): h_sigmoid(
            (relu): ReLU6(inplace=True)
          )
        )
      )
      (6): ReLU(inplace=True)
      (7): Conv2d(120, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (6): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(40, 120, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(120, 120, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=120, bias=False)
      (4): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): SELayer(
        (avg_pool): AdaptiveAvgPool2d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=120, out_features=32, bias=True)
          (1): ReLU(inplace=True)
          (2): Linear(in_features=32, out_features=120, bias=True)
          (3): h_sigmoid(
            (relu): ReLU6(inplace=True)
          )
        )
      )
      (6): ReLU(inplace=True)
      (7): Conv2d(120, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (7): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (3): Conv2d(240, 240, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=240, bias=False)
      (4): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Identity()
      (6): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (7): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (8): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(80, 200, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (3): Conv2d(200, 200, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=200, bias=False)
      (4): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Identity()
      (6): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (7): Conv2d(200, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (9): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(80, 184, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (3): Conv2d(184, 184, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=184, bias=False)
      (4): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Identity()
      (6): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (7): Conv2d(184, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (10): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(80, 184, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (3): Conv2d(184, 184, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=184, bias=False)
      (4): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Identity()
      (6): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (7): Conv2d(184, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (11): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (3): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
      (4): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): SELayer(
        (avg_pool): AdaptiveAvgPool2d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=480, out_features=120, bias=True)
          (1): ReLU(inplace=True)
          (2): Linear(in_features=120, out_features=480, bias=True)
          (3): h_sigmoid(
            (relu): ReLU6(inplace=True)
          )
        )
      )
      (6): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (7): Conv2d(480, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (12): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (3): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False)
      (4): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): SELayer(
        (avg_pool): AdaptiveAvgPool2d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=672, out_features=168, bias=True)
          (1): ReLU(inplace=True)
          (2): Linear(in_features=168, out_features=672, bias=True)
          (3): h_sigmoid(
            (relu): ReLU6(inplace=True)
          )
        )
      )
      (6): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (7): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (13): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (3): Conv2d(672, 672, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=672, bias=False)
      (4): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): SELayer(
        (avg_pool): AdaptiveAvgPool2d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=672, out_features=168, bias=True)
          (1): ReLU(inplace=True)
          (2): Linear(in_features=168, out_features=672, bias=True)
          (3): h_sigmoid(
            (relu): ReLU6(inplace=True)
          )
        )
      )
      (6): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (7): Conv2d(672, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (14): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (3): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False)
      (4): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): SELayer(
        (avg_pool): AdaptiveAvgPool2d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=960, out_features=240, bias=True)
          (1): ReLU(inplace=True)
          (2): Linear(in_features=240, out_features=960, bias=True)
          (3): h_sigmoid(
            (relu): ReLU6(inplace=True)
          )
        )
      )
      (6): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (7): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (15): InvertedResidual(
    (conv): Sequential(
      (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (3): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False)
      (4): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): SELayer(
        (avg_pool): AdaptiveAvgPool2d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=960, out_features=240, bias=True)
          (1): ReLU(inplace=True)
          (2): Linear(in_features=240, out_features=960, bias=True)
          (3): h_sigmoid(
            (relu): ReLU6(inplace=True)
          )
        )
      )
      (6): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
      (7): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (8): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
)




'''

'''
conv_1x1_bn(input_channel, exp_size):
(17) Sequential(
  (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): h_swish(
    (sigmoid): h_sigmoid(
      (relu): ReLU6(inplace=True)
    )
  )
)
'''


'''
nn.AdaptiveAvgPool2d((1, 1)):
(18) AdaptiveAvgPool2d(output_size=(1, 1))

'''

'''
classifier: 
(19) Sequential(
  (0): Linear(in_features=960, out_features=1280, bias=True)
  (1): h_swish(
    (sigmoid): h_sigmoid(
      (relu): ReLU6(inplace=True)
    )
  )
  (2): Dropout(p=0.2, inplace=False)
  (3): Linear(in_features=1280, out_features=1000, bias=True)
)


'''

(5) ResNet50 pytorch

ResNet50模型主要有两个模块儿:Conv Block和Identity Block。Conv Block负责下采样和提取特征,Identity Block是传统的残差结构,负责提取特征的扩展通道数。

'''
Conv Block:
    input --> Conv2d + BatchNorm + ReLU --> Conv2d + BatchNorm + ReLU--> Conv2d + BatchNorm --> o1
    input --> Conv2d + BatchNorm -->o2
    o1 + o2 --> ReLU --> output

Identity Block:
    input --> Conv2d + BatchNorm + ReLU --> Conv2d + BatchNorm + ReLU--> Conv2d + BatchNorm --> o1
    input + o2 --> ReLU --> output
    
ResNet50:
    input
    Zeropad
    Conv2d  + BatchNorm + ReLU + MaxPool
    Conv Block + Identity Block*2
    Conv Block + Identity Block*3
    Conv Block + Identity Block*5
    Conv Block + Identity Block*2
    AveragePooling2D
    Flatten
    output
'''

'''
Bottleneck根据downsample的输入确定是Conv Block还是Identity Block。downsample是由strides或者扩展输入通道与设定通道是否一致确定,downsample的内核是对特征层进行1*1,s=2 的下采样和BN组成。

Bottleneck的内核是(conv1*1 + conv3*3 + conv1*1 ) + residual

_make_layer的内核是Conv Block和Identity Block的组合。

ResNet的内核是其网络结构。

resnet50是实例化ResNet,加载参数,改变模型输出的类别数。
'''
import torch
import torch.nn as nn
from torchvision.models.utils import load_state_dict_from_url

model_urls = {
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
}


def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)

def conv1x1(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None):
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        self.block = block
        if replace_stride_with_dilation is None:
            replace_stride_with_dilation = [False, False, False]

        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

def resnet50(pretrained=False, progress=True, num_classes=1000):
    model = ResNet(Bottleneck, [3, 4, 6, 3])
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['resnet50'], model_dir='./model_data',
                                              progress=progress)
        model.load_state_dict(state_dict)

    if num_classes != 1000:
        model.fc = nn.Linear(512 * model.block.expansion, num_classes)
    return model

if __name__ == '__main__':
    x = torch.randn([20,3,224,224])
    mod = resnet50(num_classes=2)
    output = mod(x)
    print(output.shape)
    
'''
torch.Size([20, 2])

Process finished with exit code 0
'''

对比发现VGG16(大约574MB)模型参数占内存最大,resnet50次之(97MB),mobilenetv3(22.1MB)、mobilenetv1(17MB)、mobilenetv2(14.2MB)模型参数量依次减少。

你可能感兴趣的:(Pytorch,计算机视觉,pytorch,计算机视觉,python)