[ pytorch ] ——基本使用:(8) 优化器optimizer的使用

 

##############
#  模型的定义  #
import torch
import torch.nn as nn
from torchvision import models

class MyModel(nn.Module):
    def __init__(self, ):  # input the dim of output fea-map of Resnet:
        super(MyModel, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.gap = nn.AdaptiveAvgPool1d(1)

        self.classifier = nn.Linear(2048, 512)

    def forward(self, input):  # input is 2048!

        x = self.conv1(input)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.gap(x)
        x = self.classifier(x)

        return x

 

1、有时候,训练时需要给不同层分配不同的学习率,这时就可以通过optimizer中 param_groups来分配。

  • 方法一
############
#  模型准备 #
model = MyModel()

param_groups = [
            {'params': model.module.base.parameters(),'lr':0.1,'lr_mult': 0.1},   #'lr'表示学习率,'lr_mult'表示学习率因子,就是 最终学习率='lr' * 'lr_mult'
            {'params': new_params, 'lr':0.1, 'lr_mult': 1.0}]

optimizer = torch.optim.SGD(param_groups, lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay,
                                    nesterov=True)

在param_groups中,有些的参数意义不太好理解:

'lr': 表示学习率

'lr_mult': 表示学习率因子,就是 最终学习率='lr' * 'lr_mult'

  • 方法二
############
#  模型准备 #
model = MyModel()

BASE_LR = 0.01
BIAS_LR_FACTOR = 0.1
WEIGHT_DECAY = 0.0005
WEIGHT_DECAY_BIAS = 0.0005

params = []
for key, w_variable in model.named_parameters():
    if not w_variable.requires_grad:
        continue
    lr = BASE_LR
    weight_decay = WEIGHT_DECAY
    if "classifier" in key:   # 替换你想特殊设置 lr 的层的权重
        print(key)
        lr = BASE_LR * BIAS_LR_FACTOR
        weight_decay = WEIGHT_DECAY_BIAS
    params += [{"params": [w_variable], "lr": lr, "weight_decay": weight_decay}]

opt_fns = torch.optim.Adam(params)

 

 

 

 

你可能感兴趣的:(Pytorch)