大体分为三步:
(1)前向传播,计算loss
(2)计算局部梯度
(3)反向传播,用链式求导法则计算梯度
import torch
x = torch.tensor(1.0) #指定输入x
y = torch.tensor(2.0) #指定输出y
w = torch.tensor(1.0, requires_grad=True) #初始化待求的w, requires_grad=True代表需要求导
y_predicted = w*x #预测值
loss = (y - y_predicted)**2 #计算预测值与真实值间的loss
print(loss)
loss.backward() #反向传播
print(w.grad) #打印此时的梯度
with torch.no_grad():
w -= 0.01*w.grad #更新梯度
print(w) #打印更新后的梯度
w.grad.zero_() #梯度清零,防止梯度累加
import numpy as np
X = np.array([1,2,3,4], dtype=np.float32) #输出X
Y = np.array([2,4,6,8], dtype=np.float32) #输出Y
w = 0.0 #初始化权重
def forward(x):
return w*x #预测值
def loss(y, y_pred):
return ((y_pred-y)**2).mean() #计算loss
#Loss = 1/N*(w*x-y)**2
#dLoss/dw = 1/N*2x(w*x-y)
def gradient(x,y,y_pred): #计算梯度
return np.dot(2*x, y_pred-y).mean()
print(f'Prediction before training:f(5)={forward(5):.3f}') #当权重为0是,预测值为0
learning_rate = 0.01 #学习率
n_iters = 20 #迭代次数
for epoch in range(n_iters):
y_pred = forward(X) #预测值
l = loss(Y,y_pred) #真实值与预测值的误差
dw = gradient(X,Y,y_pred) #求梯度
w -= learning_rate * dw #更新参数
if epoch % 2 == 0: #每隔两个epoch更新参数
print(f'epoch {epoch+1}: w={w:.3f},loss={l:.8f}')
print(f'Prediction after training:f(5)={forward(5):.3f}') #运行最后的权重算f(5)
import torch
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True) #初始化权重,并设置需要求导
def forward(x):
return w * x #预测
def loss(y, y_pred):
return ((y_pred-y)**2).mean() #算loss
print(f'Prediction before training:f(5)={forward(5).item():.3f}') #打印初始的预测
learning_rate = 0.1
n_iters = 20
for epoch in range(n_iters):
y_pred = forward(X)
l = loss(Y, y_pred)
l.backward() #等同于计算梯度
with torch.no_grad():
w -= learning_rate*w.grad #更新参数
w.grad.zero_() #梯度清零,防止梯度累计
if epoch % 2==0:
print(f'epoch {epoch+1}: w={w.item():.3f}, loss={l.item():.8f}')
print(f'Prediction after training:f(5)={forward(5).item():.3f}')