import torch import matplotlib.pyplot as plt import torch.optim as optim import random def grad_down(optimizer, x, w, y): optimizer.zero_grad() output = torch.mv(x, w) loss = MSELoss(output, y) loss.backward() optimizer.step() return loss.data.numpy() def MSELoss(output, target): return (1 / 50) * ((output.view(-1) - target) ** 2).sum() x = torch.Tensor([[i] for i in range(50)]) y = torch.Tensor([[i*2 + + random.random()] for i in range(50)]) w1 = torch.zeros(1, requires_grad=True) w2 = torch.zeros(1, requires_grad=True) w3 = torch.zeros(1, requires_grad=True) w4 = torch.zeros(1, requires_grad=True) w5 = torch.zeros(1, requires_grad=True) w6 = torch.zeros(1, requires_grad=True) w7 = torch.zeros(1, requires_grad=True) w8 = torch.zeros(1, requires_grad=True) learning_rate = 0.01 optimizer1 = optim.SGD([w1, ], learning_rate) optimizer2 = optim.Adam([w2, ], learning_rate) optimizer3 = optim.Adadelta([w3, ], learning_rate) optimizer4 = optim.Adagrad([w4, ], learning_rate) optimizer5 = optim.Adamax([w5, ], learning_rate) #optimizer6 = optim.LBFGS([w6, ], learning_rate) optimizer7 = optim.RMSprop([w7, ], learning_rate) optimizer8 = optim.RMSprop([w8, ], learning_rate) loss1 = [] loss2 = [] loss3 = [] loss4 = [] loss5 = [] loss6 = [] loss7 = [] loss8 = [] for step in range(10000): loss1.append(grad_down(optimizer1, x, w1, y)) loss2.append(grad_down(optimizer2, x, w2, y)) loss3.append(grad_down(optimizer3, x, w3, y)) loss4.append(grad_down(optimizer4, x, w4, y)) loss5.append(grad_down(optimizer5, x, w5, y)) #loss6.append(grad_down(optimizer6, x, w6, y)) loss7.append(grad_down(optimizer7, x, w7, y)) loss8.append(grad_down(optimizer8, x, w8, y)) print('step={}'.format(step)) plt.plot(loss2, label='Adam') plt.plot(loss3, label='Adadelta') plt.plot(loss4, label='Adagrad') plt.plot(loss5, label='Adamax') #plt.plot(loss6,LBFGS) plt.plot(loss7, label='RMSprop') plt.plot(loss8, label='SGRMSprop') plt.plot(loss1, label='SGD') plt.legend(loc=0) plt.show()