官方文档https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
一般优化器建立:
# 正常优化器建立
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam([var1, var2], lr=0.0001)
其中model.parameters()可以根据模型需要,对于不同的层等设置不同的参数,如下示例
# 示例1
optim.SGD([
{'params': model.base.parameters()},
{'params': model.classifier.parameters(), 'lr': 1e-3}
], lr=1e-2, momentum=0.9)
# 示例2 其中参数都是胡乱设置
bn_params = []
# Non-batchnorm parameters.
non_bn_parameters = []
for name, p in model.named_parameters():
if "bn" in name:
bn_params.append(p)
else:
non_bn_parameters.append(p)
optim_params = [
{"params": bn_params, "weight_decay": 1e-5, "lr": 0.01},
{"params": non_bn_parameters, "weight_decay": 1e-7},
]
optimizer = torch.optim.SGD(
optim_params,
lr=0.1,
momentum=0.9,
weight_decay=cfg.SOLVER.WEIGHT_DECAY,
)
一般学习率设置:
可参考https://blog.csdn.net/junqing_wu/article/details/93248190
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,20], gamma=0.1, last_epoch=-1, verbose=False)
其中milestones则是在那个epoch中进行学习率下降,下降幅度是由gamma决定;
其中有个动态进行学习率变化的,在更新的时候需要传入验证集的loss或者准确率等作为参考指标,这里就是根据验证集loss,在10个周期内不再下降(对应的设置patience=10,mode=‘min’),则进行学习率下降,factor=0.1
torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08, verbose=False)
举例如下:
>>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
>>> scheduler = ReduceLROnPlateau(optimizer, 'min')
>>> for epoch in range(10):
>>> train(...)
>>> val_loss = validate(...)
>>> # Note that step should be called after validate()
>>> scheduler.step(val_loss)
这里还有个目前很常用的余弦学习率,官方给出了两个实现
torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max, eta_min=0, last_epoch=-1, verbose=False)
torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0, T_mult=1, eta_min=0, last_epoch=-1, verbose=False)
对于基础的CosineAnnealingLR,参数含义:
optimizer (Optimizer) – Wrapped optimizer.
T_max (int) – Maximum number of iterations.
eta_min (float) – Minimum learning rate. Default: 0.
last_epoch (int) – The index of last epoch. Default: -1.
verbose (bool) – If True, prints a message to stdout for each update. Default: False.
对于第二个温和的,其中设置T_0,是第一个余弦周期,以后的余弦周期就是T_0 * T_mult
import torch
import torch.nn as nn
class simpleNet(nn.Module):
def __init__(self, in_dim, n_hidden, out_dim):
super(simpleNet, self).__init__()
self.layer1 = nn.Linear(in_dim, n_hidden)
self.layer2 = nn.Linear(n_hidden, out_dim)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
return x
net = simpleNet(5, 10, 8)
optimizer_CosineLR = torch.optim.SGD(net.parameters(), lr=0.1)
CosineLR = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_CosineLR, T_max=10, eta_min=0)
#其中dataset以及validate函数都未写全,只是表示一下
for epoch in range(10):
for input, target in dataset:
optimizer.zero_grad()
output = model(input)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
validate(...)
print(i, optimizer_CosineLR.param_groups[-1]['lr'])
CosineLR.step()
这里在pytorch1.1版本之后,schedule.step()要放在optimizer.step()之后了。
当然也可以自己设置学习率函数,在每次训练之前进行调用,以及设置warmup等,示例如下:
def adjust_learning_rate(optimizer, epoch, lr_type, lr_steps, warmup_eopch=10, warmup_factor=0.1):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
if lr_type == 'step':
decay = 0.1 ** (sum(epoch >= np.array(lr_steps)))
lr = args.lr * decay
decay = args.weight_decay
elif lr_type == 'cos':
import math
lr = 0.5 * args.lr * (1 + math.cos(math.pi * epoch / args.epochs))
decay = args.weight_decay
elif lr_type == 'warmup_step':
if epoch <= warmup_eopch:
alpha = epoch/warmup_eopch
warmup_factor = warmup_eopch * (1-alpha) + alpha
lr = args.lr * warmup_factor
decay = args.weight_decay
else:
decay = 0.1 ** (sum(epoch >= np.array(lr_steps)))
lr = args.lr * decay
decay = args.weight_decay
else:
raise NotImplementedError
for param_group in optimizer.param_groups:
param_group['lr'] = lr * param_group['lr_mult']
param_group['weight_decay'] = decay * param_group['decay_mult']
# 进行调用
for epoch in range(10):
adjust_learning_rate(optimizer, epoch, lr_type='cos', lr_step=30)
for input, target in dataset:
optimizer.zero_grad()
output = model(input)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
validate(...)
print(i, optimizer_CosineLR.param_groups[-1]['lr'])