SGD
是比较普通的优化器(其实SDG也挺好的)
Momentum
是SGD
的升级版,代码里可看到和SGD用的也是同一个函数,只是加了动量
RMSprop
是Momentum
的升级版
Adam
是RMSprop
的升级版
设置超参,准备数据 利用之前的回归来实现
LR = 0.01
batch_size = 10
epoches = 3
torch.manual_seed(15)
x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1)
y = x.pow(2)
dataset = Data.TensorDataset(x, y)
loader = Data.DataLoader(
dataset=dataset,
batch_size=batch_size,
shuffle=True,
num_workers=2)
搭建神经网络
class Net(torch.nn.Module):
def __init__(self, n_input, n_hidden, n_output):
super(Net, self).__init__()
self.hidden_layer = torch.nn.Linear(n_input, n_hidden)
self.output_layer = torch.nn.Linear(n_hidden, n_output)
def forward(self, input):
x = torch.relu(self.hidden_layer(input))
output = self.output_layer(x)
return output
训练并显示结果图 为不同优化器创建不同实例,用for循环遍历即可
def train():
net_SGD = Net(1, 20, 1)
net_Momentum = Net(1, 20, 1)
net_RMSprop = Net(1, 20, 1)
net_Adam = Net(1, 20, 1)
nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam]
optimizer_SGD = torch.optim.SGD(net_SGD.parameters(), lr=LR)
optimizer_Momentum = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
optimizer_RMSprop = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
optimizer_Adam = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
optimizers = [optimizer_SGD, optimizer_Momentum, optimizer_RMSprop, optimizer_Adam]
loss_function = torch.nn.MSELoss()
losses = [[], [], [], []]
for epoch in range(epoches):
for step, (batch_x, batch_y) in enumerate(loader):
for net, optimizer, loss_list in zip(nets, optimizers, losses):
pred_y = net(batch_x)
loss = loss_function(pred_y, batch_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_list.append(loss.data.numpy())
labels = ['SGD', 'Momentum', 'RMSprop', 'Adam']
for i, loss in enumerate(losses):
plt.plot(loss, label=labels[i])
plt.legend(loc='best')
plt.xlabel('Steps')
plt.ylabel('Loss')
plt.ylim((0, 0.2))
plt.show()
完整代码
import torch
import torch.nn
import torch.utils.data as Data
import matplotlib.pyplot as plt
class Net(torch.nn.Module):
def __init__(self, n_input, n_hidden, n_output):
super(Net, self).__init__()
self.hidden_layer = torch.nn.Linear(n_input, n_hidden)
self.output_layer = torch.nn.Linear(n_hidden, n_output)
def forward(self, input):
x = torch.relu(self.hidden_layer(input))
output = self.output_layer(x)
return output
LR = 0.01
batch_size = 10
epoches = 3
torch.manual_seed(15)
x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1)
y = x.pow(2)
dataset = Data.TensorDataset(x, y)
loader = Data.DataLoader(
dataset=dataset,
batch_size=batch_size,
shuffle=True,
num_workers=2)
def train():
net_SGD = Net(1, 20, 1)
net_Momentum = Net(1, 20, 1)
net_RMSprop = Net(1, 20, 1)
net_Adam = Net(1, 20, 1)
nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam]
optimizer_SGD = torch.optim.SGD(net_SGD.parameters(), lr=LR)
optimizer_Momentum = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
optimizer_RMSprop = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
optimizer_Adam = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
optimizers = [optimizer_SGD, optimizer_Momentum, optimizer_RMSprop, optimizer_Adam]
loss_function = torch.nn.MSELoss()
losses = [[], [], [], []]
for epoch in range(epoches):
for step, (batch_x, batch_y) in enumerate(loader):
for net, optimizer, loss_list in zip(nets, optimizers, losses):
pred_y = net(batch_x)
loss = loss_function(pred_y, batch_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_list.append(loss.data.numpy())
labels = ['SGD', 'Momentum', 'RMSprop', 'Adam']
for i, loss in enumerate(losses):
plt.plot(loss, label=labels[i])
plt.legend(loc='best')
plt.xlabel('Steps')
plt.ylabel('Loss')
plt.ylim((0, 0.2))
plt.show()
if __name__ == "__main__":
train()
结果图: