VGG16结构
'''
img[h,w,3] --> resize[224,224,3] -->
cnv1*2(k= 3,f = 64)[224,224,64] + maxPool(s = 2)[112,112,64]-->
cnv2*2(k= 3,f = 128)[112,112,128] + maxPool(s = 2)[56,56,128]--->
cnv3*2(k= 3,f = 256)[56,56,256] + maxPool(s = 2)[28,28,256]--->
cnv4*2(k= 3,f = 512)[28,28,512] + maxPool(s = 2)[14,14,512]--->
cnv5*2(k= 3,f = 512)[14,14,512] + maxPool(s = 2)[7,7,512]----->
flatten--> fc*2(4096) --> fc(1000) -->
out
'''
2 . VGG16代码
'''
VGG16 流程
cfgs['D'] (模型参数) -->
make_layers(cfgs['D']) (生成主干features)-->
VGG(make_layers(cfgs['D'])) (全连接 )-->
pretrained (是否迁移学习) -->
num_classes != 1000 (重新定义分类层)
'''
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
from torchvision.models.utils import load_state_dict_from_url
model_urls = {'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth'}
class VGG(nn.Module):
def __init__(self,features,num_classes=1000,init_weights=True):
super(VGG, self).__init__()
self.features = features
self.avgpool = nn.AdaptiveAvgPool2d((7,7)) # output size of (7,7)
self.clssifier = nn.Sequential(
nn.Linear(512*7*7,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,num_classes))
if init_weights:
self._initialize_weights()
def forward(self,x):
x = self.features(x) # [2, 512, 7, 7]
x = self.avgpool(x) # [2, 512, 7, 7]
x = torch.flatten(x,1) # [2, 25088] axis=1 从坐标轴起后面的数展平。
x = self.clssifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m,nn.Conv2d):
nn.init.normal_(m.weight)
# nn.init.uniform(m.weight)
#nn.init.kaiming_normal(m.weight,mode='fan_out',nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias,0)
elif isinstance(m,nn.BatchNorm2d):
nn.init.constant_(m.weight,1)
nn.init.constant_(m.bias,0)
elif isinstance(m,nn.Linear):
nn.init.constant_(m.weight,0.01)
nn.init.constant_(m.bias,0)
def make_layers(cfg,batch_norm=False):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size = 2,stride=2)]
else:
conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding=1)
if batch_norm:
layers += [conv2d,nn.BatchNorm2d(v),nn.ReLU(inplace=True)]
else:
layers += [conv2d,nn.ReLU(inplace=True)]
in_channels = v
return nn.Sequential(*layers)
cfgs = {
'D':[64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']}
def vgg16(pretrained=False,progress=True,num_classes=1000):
model = VGG(make_layers(cfgs['D']))
if pretrained:
state_dict = load_state_dict_from_url(model_urls['vgg16'],
model_dir = './model_data',
progress=progress)
model.load_state_dict(state_dict,strict=False)
if num_classes != 1000:
model.clssifier = nn.Sequential(
nn.Linear(512*7*7,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,num_classes))
return model
if __name__ == '__main__':
x = torch.randn([2,3,224,224])
model = vgg16(num_classes=2)
y = model(x)
print(y.shape)
'''
torch.Size([2, 2])
Process finished with exit code 0
'''
MobileNetV1用深度可分离卷积大大减少了参数的数量,先用3x3的卷积核依次和输入n 个特征卷积,得到n个输出特征,在用1x1卷积核进行普通的卷积得到输出特征。比如输入16个特征,需要输出32个特征,普通卷积的参数为16x3x3x32=4608个。用深度可分离卷积需要的参数量为16x3x3x1+16x1x1x32=656个。通过深度可分离卷积可以大大减少模型的参数。
'''
'''
import torch
import torch.nn as nn
def conv_bn(inp, oup, stride = 1):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU6(inplace=True)
)
def conv_dw(inp, oup, stride = 1):
return nn.Sequential(
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
nn.BatchNorm2d(inp),
nn.ReLU6(inplace=True),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU6(inplace=True),
)
class MobileNetV1(nn.Module):
def __init__(self):
super(MobileNetV1, self).__init__()
self.stage1 = nn.Sequential(
# 640,640,3 -> 320,320,32
conv_bn(3, 32, 2),
# 320,320,32 -> 320,320,64
conv_dw(32, 64, 1),
# 320,320,64 -> 160,160,128
conv_dw(64, 128, 2),
conv_dw(128, 128, 1),
# 160,160,128 -> 80,80,256
conv_dw(128, 256, 2),
conv_dw(256, 256, 1),
)
# 80,80,256 -> 40,40,512
self.stage2 = nn.Sequential(
conv_dw(256, 512, 2),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
)
# 40,40,512 -> 20,20,1024
self.stage3 = nn.Sequential(
conv_dw(512, 1024, 2),
conv_dw(1024, 1024, 1),
)
self.avg = nn.AdaptiveAvgPool2d((1,1))
self.fc = nn.Linear(1024, 1000)
def forward(self, x):
x = self.stage1(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.avg(x)
# x = self.model(x)
x = x.view(-1, 1024)
x = self.fc(x)
return x
def mobilenet_v1(pretrained=False, progress=True,num_classes= 1000):
model = MobileNetV1()
if pretrained:
print("mobilenet_v1 has no pretrained model")
if num_classes != 1000:
model.fc = nn.Linear(1024, num_classes)
return model
if __name__ == "__main__":
input = torch.randn([8,3,416,416])
model = mobilenet_v1(num_classes= 2)
out = model(input)
print(out.shape)
'''
torch.Size([8, 2])
Process finished with exit code 0
'''
MobileNetV2在MobileNetV1的基础上,增加Inverted resblock模块儿。Inverted体现在对输入首先利用1x1卷积进行升维,然后利用3x3深度可分离卷积进行特征提取,然后再利用1x1卷积降维。resblock体现在输入和上一步输出相加。如下所示:
'''
(1)Inverted resblock
Inverted:input--> conv2d(1*1 升维)+BN+ReLU6 --> DepthwiseConv2D(3*3)+BN+ReLU6--> conv2d(1*1 降维)+BN --> output
resblock: output + input
(2)MobileNetV2
input.shape expand_ratio channels times strides
conv2d 224^2*3 - 32 1 2
bottleneck 112^2*32 1 16 1 1
bottleneck 112^2*16 6 24 2 2
bottleneck 56^2*24 6 32 3 2
bottleneck 28^2*32 6 64 4 2
bottleneck 28^2*64 6 96 3 1
bottleneck 14^2*96 6 160 3 2
bottleneck 7^2*160 6 320 1 1
conv2d 1*1 7^2*320 - 1280 1 1
avgpool7*7 7^2*1280 - - 1 -
conv2d 1*1 1*1*k - k -
'''
'''
MobileNetV2
'''
import torch
import torch.nn as nn
from torchvision.models.utils import load_state_dict_from_url
__all__ = ['MobileNetV2', 'mobilenet_v2']
model_urls = {
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'}
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value,
int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3,
stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size,
stride, padding, groups=groups,
bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
layers += [ConvBNReLU(inp, hidden_dim, kernel_size=1)]
layers += [
ConvBNReLU(hidden_dim, hidden_dim,
stride=stride, groups=hidden_dim),
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup)
]
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self, num_classes=1000, width_mult=1.0,
inverted_residual_setting=None,
round_nearest=8):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
last_channel = 1280
if inverted_residual_setting is None:
inverted_residual_setting = [
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
if len(inverted_residual_setting) == 0 or len(
inverted_residual_setting[0]) != 4:
raise ValueError(
"inverted_residual_setting should be non_empty"
"or a 4-element list,got{}".format(
inverted_residual_setting))
input_channel = _make_divisible(
input_channel * width_mult, round_nearest)
self.last_channel = _make_divisible(
last_channel * max(1.0, width_mult),
round_nearest)
features = [ConvBNReLU(3, input_channel, stride=2)]
for t, c, n, s in inverted_residual_setting:
out_channel = _make_divisible(c * width_mult,
round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features += [block(input_channel, out_channel,
stride, expand_ratio=t)]
input_channel = out_channel
features += [ConvBNReLU(input_channel, self.last_channel,kernel_size=1)]
self.features = nn.Sequential(*features)
self.classifier = nn.Sequential(nn.Dropout(0.2),nn.Linear(self.last_channel,num_classes))
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight)
# nn.init.uniform(m.weight)
# nn.init.kaiming_normal(m.weight,mode='fan_out',nonlinearity='relu')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.constant_(m.weight, 0.01)
nn.init.zeros_(m.bias)
def forward(self, x):
x = self.features(x)
x = x.mean([2, 3])
x = self.classifier(x)
return x
def mobilenet_v2(pretrained = False, progress = True, num_classes = 1000):
model = MobileNetV2()
if pretrained:
state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
model_dir = './model_data',
progress = progress)
model.load_state_dict(state_dict,strict=False)
if num_classes != 1000:
model.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(model.last_channel,num_classes))
return model
if __name__ == '__main__':
x = torch.randn([20, 3, 224, 224])
# convBnReLU = ConvBNReLU(in_planes=3,out_planes=100)
# convBnReLU(x).shape
# output = InvertedResidual(3, 24, 2, 2)(x)
# model = MobileNetV2()
model = mobilenet_v2(num_classes = 20000)
output = model(x)
print(output.shape)
'''
torch.Size([20, 20000])
Process finished with exit code 0
'''
MobileNetV3的亮点是在InvertedResidual模块儿中添加了注意力机制SE,使用HS激活函数。
import torch.nn as nn
import math
import torch
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class h_sigmoid(nn.Module):
def __init__(self, inplace=True):
super(h_sigmoid, self).__init__()
self.relu = nn.ReLU6(inplace=inplace)
def forward(self, x):
return self.relu(x + 3) / 6
class h_swish(nn.Module):
def __init__(self, inplace=True):
super(h_swish, self).__init__()
self.sigmoid = h_sigmoid(inplace=inplace)
def forward(self, x):
return x * self.sigmoid(x)
class SELayer(nn.Module):
def __init__(self, channel, reduction=4):
super(SELayer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, _make_divisible(channel // reduction, 8)),
nn.ReLU(inplace=True),
nn.Linear(_make_divisible(channel // reduction, 8), channel),
h_sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c) # [b,c,h,w] --> [b,c,1,1] --> [b,c]
y = self.fc(y).view(b, c, 1, 1) # 降维--> 升维
return x * y
def conv_3x3_bn(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
h_swish()
)
def conv_1x1_bn(inp, oup):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
h_swish()
)
class InvertedResidual(nn.Module):
def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs):
super(InvertedResidual, self).__init__()
assert stride in [1, 2]
self.identity = stride == 1 and inp == oup
if inp == hidden_dim:
self.conv = nn.Sequential(
# dw
nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
h_swish() if use_hs else nn.ReLU(inplace=True),
# Squeeze-and-Excite
SELayer(hidden_dim) if use_se else nn.Identity(),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
else:
self.conv = nn.Sequential(
# pw
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
nn.BatchNorm2d(hidden_dim),
h_swish() if use_hs else nn.ReLU(inplace=True),
# dw
nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
# Squeeze-and-Excite
SELayer(hidden_dim) if use_se else nn.Identity(),
h_swish() if use_hs else nn.ReLU(inplace=True),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
def forward(self, x):
if self.identity:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV3(nn.Module):
def __init__(self, num_classes=1000, width_mult=1.):
self.num_classes = num_classes
super(MobileNetV3, self).__init__()
# setting of inverted residual blocks
self.cfgs = [
#` k, t, c, SE, HS, s
[3, 1, 16, 0, 0, 1],
[3, 4, 24, 0, 0, 2],
[3, 3, 24, 0, 0, 1],
[5, 3, 40, 1, 0, 2],
[5, 3, 40, 1, 0, 1],
[5, 3, 40, 1, 0, 1],
[3, 6, 80, 0, 1, 2],
[3, 2.5, 80, 0, 1, 1],
[3, 2.3, 80, 0, 1, 1],
[3, 2.3, 80, 0, 1, 1],
[3, 6, 112, 1, 1, 1],
[3, 6, 112, 1, 1, 1],
[5, 6, 160, 1, 1, 2],
[5, 6, 160, 1, 1, 1],
[5, 6, 160, 1, 1, 1]
]
input_channel = _make_divisible(16 * width_mult, 8) # 16
layers = [conv_3x3_bn(3, input_channel, 2)]
block = InvertedResidual
for k, t, c, use_se, use_hs, s in self.cfgs:
output_channel = _make_divisible(c * width_mult, 8)
exp_size = _make_divisible(input_channel * t, 8)
layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs))
input_channel = output_channel
self.features = nn.Sequential(*layers)
self.conv = conv_1x1_bn(input_channel, exp_size)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
output_channel = _make_divisible(1280 * width_mult, 8) if width_mult > 1.0 else 1280
self.classifier = nn.Sequential(
nn.Linear(exp_size, output_channel),
h_swish(),
nn.Dropout(0.2),
nn.Linear(output_channel, num_classes),
)
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = self.conv(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
n = m.weight.size(1)
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
def mobilenet_v3(pretrained=False, **kwargs):
model = MobileNetV3(**kwargs)
if pretrained:
state_dict = torch.load('./model_data/mobilenetv3-large-1cd25616.pth')
model.load_state_dict(state_dict, strict=True)
if model.num_classes != 1000:
in_channel = next(model.classifier[-1].parameters()).shape[1]
model.classifier[-1] = nn.Linear(in_channel, model.num_classes)
return model
if __name__ == '__main__':
x = torch.randn([8,3,416,416])
model = mobilenet_v3(num_classes=2)
y = model(x)
print(y.shape)
'''
torch.Size([8, 2])
Process finished with exit code 0
'''
'''
features(x)
Sequential(
(0): Sequential(
(0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
)
(1): InvertedResidual(
(conv): Sequential(
(0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
(1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Identity()
(4): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
(5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(2): InvertedResidual(
(conv): Sequential(
(0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
(6): ReLU(inplace=True)
(7): Conv2d(64, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(3): InvertedResidual(
(conv): Sequential(
(0): Conv2d(24, 72, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(72, 72, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=72, bias=False)
(4): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
(6): ReLU(inplace=True)
(7): Conv2d(72, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(4): InvertedResidual(
(conv): Sequential(
(0): Conv2d(24, 72, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(72, 72, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=72, bias=False)
(4): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): SELayer(
(avg_pool): AdaptiveAvgPool2d(output_size=1)
(fc): Sequential(
(0): Linear(in_features=72, out_features=24, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=24, out_features=72, bias=True)
(3): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
)
(6): ReLU(inplace=True)
(7): Conv2d(72, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(5): InvertedResidual(
(conv): Sequential(
(0): Conv2d(40, 120, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(120, 120, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=120, bias=False)
(4): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): SELayer(
(avg_pool): AdaptiveAvgPool2d(output_size=1)
(fc): Sequential(
(0): Linear(in_features=120, out_features=32, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=32, out_features=120, bias=True)
(3): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
)
(6): ReLU(inplace=True)
(7): Conv2d(120, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(6): InvertedResidual(
(conv): Sequential(
(0): Conv2d(40, 120, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(120, 120, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=120, bias=False)
(4): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): SELayer(
(avg_pool): AdaptiveAvgPool2d(output_size=1)
(fc): Sequential(
(0): Linear(in_features=120, out_features=32, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=32, out_features=120, bias=True)
(3): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
)
(6): ReLU(inplace=True)
(7): Conv2d(120, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(7): InvertedResidual(
(conv): Sequential(
(0): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(3): Conv2d(240, 240, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=240, bias=False)
(4): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
(6): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(7): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(8): InvertedResidual(
(conv): Sequential(
(0): Conv2d(80, 200, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(3): Conv2d(200, 200, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=200, bias=False)
(4): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
(6): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(7): Conv2d(200, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(9): InvertedResidual(
(conv): Sequential(
(0): Conv2d(80, 184, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(3): Conv2d(184, 184, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=184, bias=False)
(4): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
(6): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(7): Conv2d(184, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(10): InvertedResidual(
(conv): Sequential(
(0): Conv2d(80, 184, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(3): Conv2d(184, 184, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=184, bias=False)
(4): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): Identity()
(6): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(7): Conv2d(184, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(11): InvertedResidual(
(conv): Sequential(
(0): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(3): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
(4): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): SELayer(
(avg_pool): AdaptiveAvgPool2d(output_size=1)
(fc): Sequential(
(0): Linear(in_features=480, out_features=120, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=120, out_features=480, bias=True)
(3): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
)
(6): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(7): Conv2d(480, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(12): InvertedResidual(
(conv): Sequential(
(0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(3): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False)
(4): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): SELayer(
(avg_pool): AdaptiveAvgPool2d(output_size=1)
(fc): Sequential(
(0): Linear(in_features=672, out_features=168, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=168, out_features=672, bias=True)
(3): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
)
(6): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(7): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(13): InvertedResidual(
(conv): Sequential(
(0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(3): Conv2d(672, 672, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=672, bias=False)
(4): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): SELayer(
(avg_pool): AdaptiveAvgPool2d(output_size=1)
(fc): Sequential(
(0): Linear(in_features=672, out_features=168, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=168, out_features=672, bias=True)
(3): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
)
(6): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(7): Conv2d(672, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(14): InvertedResidual(
(conv): Sequential(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(3): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False)
(4): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): SELayer(
(avg_pool): AdaptiveAvgPool2d(output_size=1)
(fc): Sequential(
(0): Linear(in_features=960, out_features=240, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=240, out_features=960, bias=True)
(3): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
)
(6): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(7): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(15): InvertedResidual(
(conv): Sequential(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(3): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False)
(4): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): SELayer(
(avg_pool): AdaptiveAvgPool2d(output_size=1)
(fc): Sequential(
(0): Linear(in_features=960, out_features=240, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=240, out_features=960, bias=True)
(3): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
)
(6): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(7): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(8): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
)
'''
'''
conv_1x1_bn(input_channel, exp_size):
(17) Sequential(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
)
'''
'''
nn.AdaptiveAvgPool2d((1, 1)):
(18) AdaptiveAvgPool2d(output_size=(1, 1))
'''
'''
classifier:
(19) Sequential(
(0): Linear(in_features=960, out_features=1280, bias=True)
(1): h_swish(
(sigmoid): h_sigmoid(
(relu): ReLU6(inplace=True)
)
)
(2): Dropout(p=0.2, inplace=False)
(3): Linear(in_features=1280, out_features=1000, bias=True)
)
'''
ResNet50模型主要有两个模块儿:Conv Block和Identity Block。Conv Block负责下采样和提取特征,Identity Block是传统的残差结构,负责提取特征的扩展通道数。
'''
Conv Block:
input --> Conv2d + BatchNorm + ReLU --> Conv2d + BatchNorm + ReLU--> Conv2d + BatchNorm --> o1
input --> Conv2d + BatchNorm -->o2
o1 + o2 --> ReLU --> output
Identity Block:
input --> Conv2d + BatchNorm + ReLU --> Conv2d + BatchNorm + ReLU--> Conv2d + BatchNorm --> o1
input + o2 --> ReLU --> output
ResNet50:
input
Zeropad
Conv2d + BatchNorm + ReLU + MaxPool
Conv Block + Identity Block*2
Conv Block + Identity Block*3
Conv Block + Identity Block*5
Conv Block + Identity Block*2
AveragePooling2D
Flatten
output
'''
'''
Bottleneck根据downsample的输入确定是Conv Block还是Identity Block。downsample是由strides或者扩展输入通道与设定通道是否一致确定,downsample的内核是对特征层进行1*1,s=2 的下采样和BN组成。
Bottleneck的内核是(conv1*1 + conv3*3 + conv1*1 ) + residual
_make_layer的内核是Conv Block和Identity Block的组合。
ResNet的内核是其网络结构。
resnet50是实例化ResNet,加载参数,改变模型输出的类别数。
'''
import torch
import torch.nn as nn
from torchvision.models.utils import load_state_dict_from_url
model_urls = {
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
}
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
self.block = block
if replace_stride_with_dilation is None:
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def resnet50(pretrained=False, progress=True, num_classes=1000):
model = ResNet(Bottleneck, [3, 4, 6, 3])
if pretrained:
state_dict = load_state_dict_from_url(model_urls['resnet50'], model_dir='./model_data',
progress=progress)
model.load_state_dict(state_dict)
if num_classes != 1000:
model.fc = nn.Linear(512 * model.block.expansion, num_classes)
return model
if __name__ == '__main__':
x = torch.randn([20,3,224,224])
mod = resnet50(num_classes=2)
output = mod(x)
print(output.shape)
'''
torch.Size([20, 2])
Process finished with exit code 0
'''
对比发现VGG16(大约574MB)模型参数占内存最大,resnet50次之(97MB),mobilenetv3(22.1MB)、mobilenetv1(17MB)、mobilenetv2(14.2MB)模型参数量依次减少。