学习视频
本文讲述一些反向传播和更新参数理的代码实现细节
import torch
# 初始化所需数据
x = torch.tensor(1.0)
y = torch.tensor(2.0)
# w需要计算梯度->require_grad = True
w = torch.tensor(1.0,requeires_grad = True)
# 前向传播及计算损失,省去了将s平方的公式,直接写入loss公式
y_hat = w * x
loss = (y_hat - y) ** 2
print(loss)
# 反向传播
loss.backward() # pytorch可以为我们自动计算所需计算梯度的参数的梯度
print(w.grad)
tensor(1., grad_fn=)
tensor(-2.)
由以上我们不难发现,首先tensor数据类型的内部是包含了许多我们神经网络构建当中所需的参数,比如梯度(grad),因此,这也是为何我们要使用tensor数据类型的原因。
而且,pytorch有自带的方法来为我们自动计算并更新对应参数,因此我们只用简单调用即可。
import numpy as np
# 已知公式形式: f = w * x,未知参数w的数值
# 目标 w = 2
# 初始化数据
# 数据集 X
X = np.array([1,2,3,4],dtype = np.float32)
# 验证集 Y
Y = np.array([2,4,6,8],dtype = np.float32)
# 初始 w
w = 0.0
# 前向传播
def forward(x):
return w * X
# 计算损失
# 损失函数使用方差(MSE)
def loss(y,y_predicted):
return ((y_predicted - y)**2).mean()
# 计算梯度导数
# MSE:J = 1/N * (w*x - y)**2
# dJ/dw = 1/N * 2*x * (w*x) - y,即对上面公式的w求导数,x、y和N都看成常数
def gradient(x,y,y_predicted):
return np.dot(2*x,y_predicted - y).mean()
print(f"Prediction before training: f(5) = ",forward(5))
# 训练
learning_rate = 1e-2
n_iters = 20
for epoch in range(n_iters):
# 计算出prediction
y_pred = forward(X)
# 计算损失
l = loss(Y,y_pred)
# 计算梯度
dw = gradient(X,Y,y_pred)
# 更新w
w -= learning_rate * dw
if epoch % 2 == 0:
print(f"epoch{epoch+1}: w = {w:.3f}, loss = {l:.8f}")
print(f"Prediction after training: f(5) = ",forward(5))
Prediction before training: f(5) = [0. 0. 0. 0.]
epoch1: w = 1.200, loss = 30.00000000
epoch3: w = 1.872, loss = 0.76800019
epoch5: w = 1.980, loss = 0.01966083
epoch7: w = 1.997, loss = 0.00050331
epoch9: w = 1.999, loss = 0.00001288
epoch11: w = 2.000, loss = 0.00000033
epoch13: w = 2.000, loss = 0.00000001
epoch15: w = 2.000, loss = 0.00000000
epoch17: w = 2.000, loss = 0.00000000
epoch19: w = 2.000, loss = 0.00000000
Prediction before training: f(5) = [2. 4. 6. 8.]
import torch
# 已知公式形式: f = w * x,未知参数w的数值
# 目标 w = 2
# 初始化数据
# 数据集 X
X = torch.tensor([1,2,3,4],dtype = torch.float32)
# 验证集 Y
Y = torch.tensor([2,4,6,8],dtype = torch.float32)
# 初始 w
w = torch.tensor(0.0,dtype = torch.float32, requires_grad = True)
# 前向传播
def forward(x):
return w * X
# 计算损失
# 损失函数使用方差(MSE)
def loss(y,y_predicted):
return ((y_predicted - y)**2).mean()
print(f"Prediction before training: f(5) = ",forward(5))
# 训练
learning_rate = 1e-2
n_iters = 100
for epoch in range(n_iters):
# 计算出prediction
y_pred = forward(X)
# 计算损失
l = loss(Y,y_pred)
# 计算梯度
l.backward()
# 更新w
with torch.no_grad():
w -= learning_rate * w.grad
# 将梯度归零,以免梯度叠加
w.grad.zero_()
if epoch % 10 == 0:
print(f"epoch{epoch+1}: w = {w:.3f}, loss = {l:.8f}")
print(f"Prediction before training: f(5) = ",forward(5))
Prediction before training: f(5) = tensor([0., 0., 0., 0.], grad_fn=)
epoch1: w = 0.300, loss = 30.00000000
epoch11: w = 1.665, loss = 1.16278565
epoch21: w = 1.934, loss = 0.04506890
epoch31: w = 1.987, loss = 0.00174685
epoch41: w = 1.997, loss = 0.00006770
epoch51: w = 1.999, loss = 0.00000262
epoch61: w = 2.000, loss = 0.00000010
epoch71: w = 2.000, loss = 0.00000000
epoch81: w = 2.000, loss = 0.00000000
epoch91: w = 2.000, loss = 0.00000000
Prediction before training: f(5) = tensor([2.0000, 4.0000, 6.0000, 8.0000], grad_fn=)
ps:.grad.zero_()理由:链接