我们定义的网络如下所示
VGG(
(features): Sequential(
(conv0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu0): ReLU(inplace=True)
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU(inplace=True)
(conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu4): ReLU(inplace=True)
(pool5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu6): ReLU(inplace=True)
(conv7): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu7): ReLU(inplace=True)
(conv8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm8): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu8): ReLU(inplace=True)
(pool9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv10): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm10): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu10): ReLU(inplace=True)
(conv11): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm11): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu11): ReLU(inplace=True)
(conv12): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm12): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu12): ReLU(inplace=True)
(pool13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv14): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm14): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu14): ReLU(inplace=True)
(conv15): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm15): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu15): ReLU(inplace=True)
(conv16): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm16): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu16): ReLU(inplace=True)
)
(classifier): Sequential(
(linear1): Linear(in_features=512, out_features=512, bias=True)
(norm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(linear2): Linear(in_features=512, out_features=10, bias=True)
)
)
我们进行权重初始化
for m in net.modules():
#如果m是nn.Conv2d
if isinstance(m, nn.Conv2d):
#
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(0.5)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
下面是vgg——16代码中的实现
import math
import torch.nn as nn
from collections import OrderedDict
norm_mean, norm_var = 0.0, 1.0
#conv3-64×2-->maxpool-->conv3-128×2-->maxpool-->conv3-256×3-->maxpool-->conv3-512×3-->maxpool-->conv3-512×3-->maxpool
defaultcfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 512]
relucfg = [2, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39]
convcfg = [0, 3, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37]
#定义的vgg模型
class VGG(nn.Module):
#初始化传入参数,包括num_classes=10, init_weights=True, cfg=None, compress_rate=None
def __init__(self, num_classes=10, init_weights=True, cfg=None, compress_rate=None):
super(VGG, self).__init__()
self.features = nn.Sequential()
#最初,cfg为None
if cfg is None:
cfg = defaultcfg
self.relucfg = relucfg
self.covcfg = convcfg
self.compress_rate = compress_rate
#传入参数cfg[:-1]=[64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512]
#为卷积层和池化层,True用于设置是否加入BN,compress_rate表示压缩比
self.features = self.make_layers(cfg[:-1], True, compress_rate)
#下面是全连接层用于分类(512)-->(512)-->10
self.classifier = nn.Sequential(OrderedDict([
('linear1', nn.Linear(cfg[-2], cfg[-1])),
('norm1', nn.BatchNorm1d(cfg[-1])),
('relu1', nn.ReLU(inplace=True)),
('linear2', nn.Linear(cfg[-1], num_classes)),
]))
if init_weights:
#初始化
self._initialize_weights()
def make_layers(self, cfg, batch_norm=True, compress_rate=None):
layers = nn.Sequential()
#s
in_channels = 3
cnt = 0
for i, v in enumerate(cfg):
#如果是M,为最大池化层,添加池化层nn.MaxPool2d(kernel_size=2, stride=2)
if v == 'M':
layers.add_module('pool%d' % i, nn.MaxPool2d(kernel_size=2, stride=2))
#如果不是为卷积层
else:
#输入通道(输入通道, 输出通道, kernel_size=3, padding=1)
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
#设置压缩率conv2d.cp_rate
conv2d.cp_rate = compress_rate[cnt]
cnt += 1
#加入卷积层
layers.add_module('conv%d' % i, conv2d)
#加入BN层,v为输出通道数
layers.add_module('norm%d' % i, nn.BatchNorm2d(v))
#加入relu层,inplace=True母的是上层网络Conv2d中传递下来的tensor直接进行修改,这样能够节省运算内存,不用多存储其他变量
layers.add_module('relu%d' % i, nn.ReLU(inplace=True))
#输出通道变输入
in_channels = v
#卷积层定义完毕,返回
return layers
def forward(self, x):
#组装,卷积计算输入[1,3,32,32]-->[1, 512, 2, 2]
x = self.features(x)
# 平均值池化[1, 512, 2, 2]-->[1, 512, 2, 2]
x = nn.AvgPool2d(2)(x)
#形状调整[1, 512, 2, 2]-->[1,512]
x = x.view(x.size(0), -1)
#送入卷积层
x = self.classifier(x)
#输出
return x
def _initialize_weights(self):
for m in self.modules():
#如果m是nn.Conv2d
if isinstance(m, nn.Conv2d):
#卷积核尺度与输出通道相乘
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
#权重的初始化
m.weight.data.normal_(0, math.sqrt(2. / n))
#偏差的初始化,填充0
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(0.5)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
def vgg_16_bn(compress_rate=None):
return VGG(compress_rate=compress_rate)