1 在numpy和pytorch实现梯度下降法(线性回归)
梯度下降法的一般步骤为:
(1) 设定初始值
(2) 求取梯度
(3) 在梯度方向上进行参数的更新
1.1 Numpy版本
假设欲拟合的目标函数为y = 2*x1 - 4*x2. 这是一个2元线性函数,自变量x是2维向量。通过梯度下降求解最优参数的代码如下:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
from matplotlib import style
#创建数据
N = 100
x1 = np.linspace(-10, 10, N)
x2 = np.linspace(-15, 5, N)
x = np.concatenate(([x1], [x2]), axis=0).T
w = np.array([2, -4])
y = np.dot(x, w)
fig = plt.figure()
ax1 = fig.add_subplot(111, projection='3d')
ax1.plot_wireframe(np.array([x1]),np.array([x2]),np.array([y]), rstride=5, cstride=5)
ax1.set_xlabel("x1")
ax1.set_ylabel("x2")
ax1.set_zlabel("y")
#梯度下降
EPOCHS = 50 #迭代总次数
LOSS_MIN = 0.0001 #loss的目标最小值,当loss小于此值时停止迭代
lr = 0.01
# w_GD = np.random.rand(2) #梯度下降(GD)过程中存储w的值
w_GD = np.zeros(2)
cost = [] #梯度下降(GD)过程中存储loss的值
w_all = []
for i in range(EPOCHS):
w_all.append(w_GD.copy())
y_predict = np.dot(x, w_GD) #使用当前w_GD的y预测值
loss = np.mean((y_predict-y)**2) #计算loss
cost.append(loss)
dw = np.mean(2*(y_predict-y) * x.T, axis=1) #计算梯度
w_GD -= lr*dw #梯度下降
print("loss:",loss)
print("w1:",w_GD[0],"w2",w_GD[1])
#画出梯度下降曲线
w_all = np.array(w_all)
fig = plt.figure()
ax2 = fig.add_subplot(111, projection='3d')
ax2.plot_wireframe(np.array([w_all[:,0]]),np.array([w_all[:,1]]),np.array([cost]))
ax2.set_xlabel("w1")
ax2.set_ylabel("w2")
ax2.set_zlabel("loss")
fig = plt.figure()
#画出loss-iteration曲线
plt.plot(range(len(cost)),cost)
plt.title('loss')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.show()
运行结果:
w1, w2
loss: 2.565443781623136e-08
w1: 1.9999674457769208, w2 -3.999977280651687
目标函数
图 1 y = 2*x1-4*x2的图像
图1展示了目标函数的图像,是一条在3维空间里的直线
损失函数的梯度下降
图 2 梯度下降轨迹
损失函数
图 3 loss-iteration曲线
由图2-图3可知,梯度下降法可以找到最优的w1, w2来实现对目标函数的最佳拟合。
1.1PyTorch版本
import torch
from torch.autograd import Variable
import numpy as np
N = 100
x = Variable(torch.randn(N,2))
w = Variable(torch.FloatTensor([2, -4]))
y = x*w
EPOCHS = 5000
lr = 0.01
w_GD = Variable(torch.FloatTensor([0, 0]), requires_grad=True)
cost = []
w_all = []
for i in range(EPOCHS):
w_all.append(w_GD.data)
y_predict = x*w_GD
loss = torch.mean((y_predict-y)**2)
cost.append(loss.data.numpy())
loss.backward()
#参数更新
w_GD.data -= lr*w_GD.grad.data
w_GD.grad.data.zero_()
print("loss:",loss)
print("w_GD:",w_GD)
输出:
loss: tensor(8.8394e-11, grad_fn=)
w_GD: tensor([ 2.0000, -4.0000], requires_grad=True)
2 用PyTorch实现一个简单的神经网络
这里采用官方教程给出的LeNet5网络为例,搭建一个简单的卷积神经网络,用于识别手写体数字。
import torch as t
import torchvision as tv
import torch.nn as nn
import torch.nn.functional as F
# 网络搭建
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def num_flat_features(self, x):
size = x.size()[1:]
num_features = 1
for s in size:
num_features *= s
return num_features
net = Net()
print(net)
#前向传播
x = t.randn(1,1,32,32)
out=net(x)
print("out:",out)
#损失函数
target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()
loss = criterion(out, target)
print("loss:", loss)
#后向传播
net.zero_grad()
loss.backward()
#更新参数
lr = 0.01
for f in net.parameters():
f.data.sub_(f.grad.data * lr)
#使用优化器
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.01)
optimizer.zero_grad()
output = net(x)
loss = criterion(output, target)
loss.backward()
optimizer.step()
输出:
Net(
(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
(fc1): Linear(in_features=400, out_features=120, bias=True)
(fc2): Linear(in_features=120, out_features=84, bias=True)
(fc3): Linear(in_features=84, out_features=10, bias=True)
)
out: tensor([[ 0.0253, -0.0078, 0.0713, 0.1756, 0.0836, 0.1335, -0.1235, 0.0425,
0.0714, 0.1090]], grad_fn=)
loss: tensor(1.0700, grad_fn=)