pytorch model

目录
  • 网络定义
  • model.named_children 返回名字 和 操作
  • model.modules() 可用于参数初始化
  • 其他的可以参考:
  • model.parameters() || torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)[source]

网络定义

import torch as torch
import torch.nn as nn
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet,self).__init__()
        layer1 = nn.Sequential()
        layer1.add_module('conv1',nn.Conv2d(1,6,5))
        layer1.add_module('pool1',nn.MaxPool2d(2,2))
        self.layer1 = layer1

        layer2 = nn.Sequential()
        layer2.add_module('conv2',nn.Conv2d(6,16,5))
        layer2.add_module('pool2',nn.MaxPool2d(2,2))
        self.layer2 = layer2

        layer3 = nn.Sequential()
        layer3.add_module('fc1',nn.Linear(16*5*5,120))
        layer3.add_module('fc2',nn.Linear(120,84))
        layer3.add_module('fc3',nn.Linear(84,10))
        self.layer3 = layer3

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(x.size(0),-1)#转换(降低)数据维度,进入全连接层
        x = self.layer3(x)
        return x

#代入数据检验
y = torch.randn(1,1,32,32)
model = LeNet()
out = model(y)
print(model)
print(out)

输出如下:

LeNet(
  (layer1): Sequential(
    (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (fc1): Linear(in_features=400, out_features=120, bias=True)
    (fc2): Linear(in_features=120, out_features=84, bias=True)
    (fc3): Linear(in_features=84, out_features=10, bias=True)
  )
)
tensor([[ 0.0211,  0.1407, -0.1831, -0.1182,  0.0221,  0.1467, -0.0523, -0.0663,
         -0.0351, -0.0434]], grad_fn=)

model.named_children 返回名字 和 操作

print("*"*50)
for name, module in model.named_children():
    print(name)
    print(module)

打印如下:

layer1
Sequential(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
layer2
Sequential(
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
layer3
Sequential(
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

可以用于forward,直接对输入遍历操作

def forward(self, x):
        for name, module in self.named_children():
            x = module(x)

model.modules() 可用于参数初始化

print("#"*200)
cnt = 0
for name in model.modules():
    cnt += 1
    print('-------------------------------------------------------cnt=',cnt)
    print(name)

输出如下:

########################################################################################################################################################################################################
-------------------------------------------------------cnt= 1
LeNet(
  (layer1): Sequential(
    (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (fc1): Linear(in_features=400, out_features=120, bias=True)
    (fc2): Linear(in_features=120, out_features=84, bias=True)
    (fc3): Linear(in_features=84, out_features=10, bias=True)
  )
)
-------------------------------------------------------cnt= 2
Sequential(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
-------------------------------------------------------cnt= 3
Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
-------------------------------------------------------cnt= 4
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
-------------------------------------------------------cnt= 5
Sequential(
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
-------------------------------------------------------cnt= 6
Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
-------------------------------------------------------cnt= 7
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
-------------------------------------------------------cnt= 8
Sequential(
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
-------------------------------------------------------cnt= 9
Linear(in_features=400, out_features=120, bias=True)
-------------------------------------------------------cnt= 10
Linear(in_features=120, out_features=84, bias=True)
-------------------------------------------------------cnt= 11
Linear(in_features=84, out_features=10, bias=True)

model.modules()主要用于参数初始化

cnt = 0
for name in model.modules():
    cnt += 1
    print('-------------------------------------------------------cnt=',cnt)
    print(name)
    if isinstance(name, nn.Conv2d):
        print('------------------isinstance(name, nn.Conv2d)------------------')
        print(name.weight)
        print(name.bias)
        print('--end----------------isinstance(name, nn.Conv2d)------------end------')

    if isinstance(name, nn.Conv2d):
        nn.init.kaiming_normal_(name.weight)
    elif isinstance(name, (nn.BatchNorm2d, nn.GroupNorm)):
        nn.init.constant_(name.weight, 1)
        nn.init.constant_(name.bias, 0)

其中参数部分输出如下:

------------------isinstance(name, nn.Conv2d)------------------
Parameter containing:
tensor([[[[-0.1561, -0.0194, -0.0260, -0.0042,  0.1716],
          [ 0.1181, -0.1380, -0.0448,  0.0674, -0.1972],
          [-0.0197,  0.0359,  0.1186,  0.0876, -0.0395],
          [-0.0619,  0.0095, -0.0702,  0.0122,  0.1573],
          [ 0.1170,  0.1758, -0.1655,  0.1489, -0.0956]]],
       ...
  [[[-0.1337, -0.0562, -0.0624,  0.0885, -0.0640],
          [-0.0302, -0.1192, -0.0637,  0.0083,  0.0181],
          [ 0.1388, -0.1690,  0.1132,  0.1686, -0.1189],
          [-0.0246, -0.1649, -0.1817, -0.0330, -0.0430],
          [ 0.0672, -0.0671,  0.0469,  0.1284,  0.1420]]]], requires_grad=True)
Parameter containing:
tensor([ 0.0548,  0.0547,  0.1328, -0.0452,  0.1668, -0.1915],
       requires_grad=True)
--end----------------isinstance(name, nn.Conv2d)------------end------

其他的可以参考:

https://blog.csdn.net/MrR1ght/article/details/105246412
model.children(): 返回模型的所有子模块的迭代器
model.modules():返回模型的所有模块(不仅仅是子模块,还包含当前模块)
model.named_children():返回当前子模块的迭代器。名字:模块
model.named_modules():

model.parameters() || torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)[source]

参数:

params (iterable) – 待优化参数的iterable或者是定义了参数组的dict
lr (float) – 学习率
momentum (float, 可选) – 动量因子(默认:0)
weight_decay (float, 可选) – 权重衰减(L2惩罚)(默认:0)
dampening (float, 可选) – 动量的抑制因子(默认:0)
nesterov (bool, 可选) – 使用Nesterov动量(默认:False)

例子:

optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
optimizer.zero_grad()
loss_fn(model(input), target).backward()
optimizer.step()

这里对model.parameters()比较好奇
于是我打印:

print(model.parameters())

打印出这玩意:

感觉是一个指针,于是我在这样打印:

print(*model.parameters())

这回输出一大串数字:部分如下:

Parameter containing:
tensor([[[[-0.1751,  0.1829,  0.1973,  0.0780,  0.1220],
          [-0.0497,  0.0943,  0.0827,  0.1829,  0.0239],
          [-0.1044,  0.1268,  0.0716, -0.0100,  0.1991],
          [-0.0730,  0.1762, -0.0787,  0.0686, -0.0069],
          [ 0.1316,  0.0897, -0.1068,  0.0744,  0.0524]]],

        [[[-0.1034, -0.1946, -0.1312,  0.1076,  0.0129],
          [ 0.0450,  0.0552,  0.1448, -0.1283, -0.1868],
          [-0.0260, -0.1928,  0.0519, -0.0493, -0.1028],
          [-0.0936,  0.1719, -0.0997,  0.0008,  0.0871],
          [ 0.0995, -0.1274,  0.0388,  0.0779,  0.0006]]],

        [[[ 0.1846, -0.0723,  0.0649, -0.0169, -0.1595],
          [ 0.0145, -0.1893,  0.0784, -0.0886, -0.0044],
          [ 0.1914, -0.1009, -0.0736, -0.0992, -0.1618],
          [-0.0291,  0.0997,  0.0549,  0.1267, -0.1661],
          [-0.1333,  0.0168,  0.0648,  0.1047, -0.1506]]],
            ...
     -4.0503e-03,  9.4014e-02, -8.5686e-02,  7.7082e-02]],
       requires_grad=True) Parameter containing:
tensor([-0.0106,  0.0448, -0.0001, -0.0914, -0.0310, -0.0628,  0.0899, -0.0047,
        -0.0390, -0.0291], requires_grad=True)

自定义参数

optimizer = torch.optim.SGD(params=[
        {'params': model.backbone.parameters(), 'lr': 0.1*opts.lr},
        {'params': model.classifier.parameters(), 'lr': opts.lr},
    ], lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay)

还看到另外的写法:

def get_1x_lr_params(self):
        modules = [self.backbone]
        for i in range(len(modules)):
            for m in modules[i].named_modules():
                if self.freeze_bn:
                    if isinstance(m[1], nn.Conv2d):
                        for p in m[1].parameters():
                            if p.requires_grad:
                                yield p
                else:
                    if isinstance(m[1], nn.Conv2d) or isinstance(m[1], SynchronizedBatchNorm2d) \
                            or isinstance(m[1], nn.BatchNorm2d):
                        for p in m[1].parameters():
                            if p.requires_grad:
                                yield p

    def get_10x_lr_params(self):
        modules = [self.aspp, self.decoder]
        for i in range(len(modules)):
            for m in modules[i].named_modules():
                if self.freeze_bn:
                    if isinstance(m[1], nn.Conv2d):
                        for p in m[1].parameters():
                            if p.requires_grad:
                                yield p
                else:
                    if isinstance(m[1], nn.Conv2d) or isinstance(m[1], SynchronizedBatchNorm2d) \
                            or isinstance(m[1], nn.BatchNorm2d):
                        for p in m[1].parameters():
                            if p.requires_grad:
                                yield p

train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr},
                        {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}]

# Define Optimizer
optimizer = torch.optim.SGD(train_params, momentum=args.momentum,
                       weight_decay=args.weight_decay, nesterov=args.nesterov)

你可能感兴趣的:(pytorch model)