pytorch权重初始化

我们定义的网络如下所示

VGG(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu3): ReLU(inplace=True)
    (conv4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu4): ReLU(inplace=True)
    (pool5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu6): ReLU(inplace=True)
    (conv7): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu7): ReLU(inplace=True)
    (conv8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm8): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu8): ReLU(inplace=True)
    (pool9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv10): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm10): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu10): ReLU(inplace=True)
    (conv11): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm11): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu11): ReLU(inplace=True)
    (conv12): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm12): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu12): ReLU(inplace=True)
    (pool13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv14): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm14): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu14): ReLU(inplace=True)
    (conv15): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm15): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu15): ReLU(inplace=True)
    (conv16): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm16): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu16): ReLU(inplace=True)
  )
  (classifier): Sequential(
    (linear1): Linear(in_features=512, out_features=512, bias=True)
    (norm1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (linear2): Linear(in_features=512, out_features=10, bias=True)
  )
)

我们进行权重初始化

 for m in net.modules():
    #如果m是nn.Conv2d
    if isinstance(m, nn.Conv2d):
        #
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
        if m.bias is not None:
            m.bias.data.zero_()
    elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(0.5)
        m.bias.data.zero_()
    elif isinstance(m, nn.Linear):
        m.weight.data.normal_(0, 0.01)
        m.bias.data.zero_()

下面是vgg——16代码中的实现

import math
import torch.nn as nn
from collections import OrderedDict


norm_mean, norm_var = 0.0, 1.0

#conv3-64×2-->maxpool-->conv3-128×2-->maxpool-->conv3-256×3-->maxpool-->conv3-512×3-->maxpool-->conv3-512×3-->maxpool
defaultcfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 512]
relucfg = [2, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39]
convcfg = [0, 3, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37]

#定义的vgg模型
class VGG(nn.Module):
    #初始化传入参数,包括num_classes=10, init_weights=True, cfg=None, compress_rate=None
    def __init__(self, num_classes=10, init_weights=True, cfg=None, compress_rate=None):
        super(VGG, self).__init__()
        self.features = nn.Sequential()
        #最初,cfg为None
        if cfg is None:
            cfg = defaultcfg

        self.relucfg = relucfg
        self.covcfg = convcfg
        self.compress_rate = compress_rate
        #传入参数cfg[:-1]=[64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512]
        #为卷积层和池化层,True用于设置是否加入BN,compress_rate表示压缩比
        self.features = self.make_layers(cfg[:-1], True, compress_rate)
        #下面是全连接层用于分类(512)-->(512)-->10
        self.classifier = nn.Sequential(OrderedDict([
            ('linear1', nn.Linear(cfg[-2], cfg[-1])),
            ('norm1', nn.BatchNorm1d(cfg[-1])),
            ('relu1', nn.ReLU(inplace=True)),
            ('linear2', nn.Linear(cfg[-1], num_classes)),
        ]))

        if init_weights:
            #初始化
            self._initialize_weights()

    def make_layers(self, cfg, batch_norm=True, compress_rate=None):
        layers = nn.Sequential()
        #s
        in_channels = 3
        cnt = 0
        for i, v in enumerate(cfg):
            #如果是M,为最大池化层,添加池化层nn.MaxPool2d(kernel_size=2, stride=2)
            if v == 'M':
                layers.add_module('pool%d' % i, nn.MaxPool2d(kernel_size=2, stride=2))
            #如果不是为卷积层
            else:
                #输入通道(输入通道, 输出通道, kernel_size=3, padding=1)
                conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
                
                #设置压缩率conv2d.cp_rate
                conv2d.cp_rate = compress_rate[cnt]
                cnt += 1
                
                #加入卷积层
                layers.add_module('conv%d' % i, conv2d)
                #加入BN层,v为输出通道数
                layers.add_module('norm%d' % i, nn.BatchNorm2d(v))
                #加入relu层,inplace=True母的是上层网络Conv2d中传递下来的tensor直接进行修改,这样能够节省运算内存,不用多存储其他变量
                layers.add_module('relu%d' % i, nn.ReLU(inplace=True))
                #输出通道变输入
                in_channels = v
        #卷积层定义完毕,返回
        return layers

    def forward(self, x):
        #组装,卷积计算输入[1,3,32,32]-->[1, 512, 2, 2]
        x = self.features(x)
        # 平均值池化[1, 512, 2, 2]-->[1, 512, 2, 2]
        x = nn.AvgPool2d(2)(x)   
        #形状调整[1, 512, 2, 2]-->[1,512]
        x = x.view(x.size(0), -1)
        #送入卷积层
        x = self.classifier(x)
        #输出
        return x

    def _initialize_weights(self):
        for m in self.modules():
            #如果m是nn.Conv2d
            if isinstance(m, nn.Conv2d):
                #卷积核尺度与输出通道相乘
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                #权重的初始化
                m.weight.data.normal_(0, math.sqrt(2. / n))
                #偏差的初始化,填充0
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(0.5)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


def vgg_16_bn(compress_rate=None):
    return VGG(compress_rate=compress_rate)

你可能感兴趣的:(软件使用与程序语法)