深度学习中,学习率调整非常重要。
学习率大的优点:1、加快学习速率2、帮助跳出局部最优值
学习率大的缺点:1、导致模型训练不收敛。2、单单使用大学习率容易导致模型不精确
学习率小的优点:1、帮助模型收敛,有助于模型细化。2、提高模型精度。
学习率小的缺点:1、无法跳出局部最优值。2、收敛缓慢
以下是两种调整学习率的方法:
StepLR方法:
import torch.optim as optim
from torch.optim import lr_scheduler
# 定义优化器和初始学习率
optimizer = optim.SGD(model.parameters(), lr=0.1)
# 定义学习率调整器
scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
# 在训练过程中使用学习率调整器
for epoch in range(num_epochs):
# 训练模型
...
# 更新学习率
scheduler.step()
StepLR方法是最简单常用的学习率调整方法之一。它会在每过step_size轮时,将此前的学习率乘以gamma。通过调整step_size和gamma的值,可以灵活地控制学习率的变化。
CosineAnnealingLR方法:
import torch.optim as optim
from torch.optim import lr_scheduler
# 定义优化器和初始学习率
optimizer = optim.SGD(model.parameters(), lr=0.1)
# 定义学习率调整器
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)
# 在训练过程中使用学习率调整器
for epoch in range(num_epochs):
# 训练模型
...
# 更新学习率
scheduler.step()
CosineAnnealingLR方法会根据余弦函数的曲线调整学习率。T_max参数指定了一个周期的长度,学习率会在一个周期内从初始值下降到最小值,然后再回升到初始值。通过调整T_max的值,可以控制学习率的周期长度。
第一步导包:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch import nn, optim
from lenet5 import Lenet5
第二步定义主函数并加载数据集
def main():
batchsz = 128
# 加载CIFAR-10训练集
cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]), download=True)
cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True)
# 加载CIFAR-10测试集
cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]), download=True)
cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)
x, label = iter(cifar_train)._next_data()
print('x:', x.shape, 'label:', label.shape)
device = torch.device('cpu')这是是将代码写入计算机
model = Lenet5().to(device)这里是用了Lenet5网络
criteon = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
print(model)
第三步训练模型并评估
for epoch in range(10):
model.train()
for batchidx, (x, label) in enumerate(cifar_train):
# [b, 3, 32, 32]
# [b]
x, label = x.to(device), label.to(device)
logits = model(x)
# logits: [b, 10]
# label: [b]
# loss: tensor scalar
loss = criteon(logits, label)
# backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(epoch, 'loss:', loss.item())
model.eval()
with torch.no_grad():
# test
total_correct = 0
total_num = 0
for x, label in cifar_test:
x, label = x.to(device), label.to(device)
logits = model(x)
pred = logits.argmax(dim=1)
correct = torch.eq(pred, label).float().sum().item()
total_correct += correct
total_num += x.size(0)
acc = total_correct / total_num
print(epoch, 'test acc:', acc)
第四步主函数调用实现
if __name__ == '__main__':
main()
完整代码如下:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch import nn, optim
from lenet5 import Lenet5
def main():
batchsz = 128
# 加载CIFAR-10训练集
cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]), download=True)
cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True)
# 加载CIFAR-10测试集
cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]), download=True)
cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)
x, label = iter(cifar_train)._next_data()
print('x:', x.shape, 'label:', label.shape)
device = torch.device('cpu')
model = Lenet5().to(device)
criteon = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
print(model)
for epoch in range(10):
model.train()
for batchidx, (x, label) in enumerate(cifar_train):
# [b, 3, 32, 32]
# [b]
x, label = x.to(device), label.to(device)
logits = model(x)
# logits: [b, 10]
# label: [b]
# loss: tensor scalar
loss = criteon(logits, label)
# backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(epoch, 'loss:', loss.item())
model.eval()
with torch.no_grad():
# test
total_correct = 0
total_num = 0
for x, label in cifar_test:
x, label = x.to(device), label.to(device)
logits = model(x)
pred = logits.argmax(dim=1)
correct = torch.eq(pred, label).float().sum().item()
total_correct += correct
total_num += x.size(0)
acc = total_correct / total_num
print(epoch, 'test acc:', acc)
if __name__ == '__main__':
main()