一个典型的神经网络的训练过程大致分为以下几个步骤:
1.首先定义神经网络的结构,并且定义各层的权重参数的规模和初始值。
2.然后将输入数据分成多个批次输入神经网络。
3.将输入数据通过整个网络进行计算。
4.每次迭代根据计算结果和真实结果的差值计算损失。
5.根据损失对权重参数进行反向求导传播。
6.更新权重值,更新过程使用下面的公式:
weight = weight + learning_rate * gradient
import torch
from torch.autograd import Variable
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
def forward(self, input, w1, w2):
x = torch.mm(input, w1)
x = torch.clamp(x, min=0) # 相当于 Relu
x = torch.mm(x, w2)
return x
def backward(self):
pass
model = Model()
x = Variable(torch.randn(batch_n, input_data), requires_grad=False)
y = Variable(torch.randn(batch_n, output_data), requires_grad=False)
w1 = Variable(torch.randn(input_data, hidden_layer), requires_grad=True)
w2 = Variable(torch.randn(hidden_layer, output_data), requires_grad=True)
# 定义模型的训练次数和学习速率
epoch_n = 30
learning_rate = 1e-6
# 模型训练和参数优化
for epoch in range(epoch_n):
y_pred = model(x, w1, w2)
loss = (y_pred - y).pow(2).sum() # 检测输出的loss是0维
print("Epoch:{}, Loss:{:.4f}".format(epoch, loss.data))
loss.backward() # 误差反向传播
w1.data -= learning_rate*w1.grad.data
w2.data -= learning_rate*w2.grad.data
w1.grad.data.zero_()
w2.grad.data.zero_()
import torch
from torch.autograd import Variable
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10
x = Variable(torch.randn(batch_n, input_data), requires_grad=False)
y = Variable(torch.randn(batch_n, output_data), requires_grad=False)
"""
# 直接嵌套
model = torch.nn.Sequential(
torch.nn.Linear(input_data, hidden_layer),
torch.nn.ReLU(),
torch.nn.Linear(hidden_layer, output_data)
)
print(model)
"""
# 以orderdict有序字典的方式进行传入
from collections import OrderedDict
models = torch.nn.Sequential(OrderedDict([
("Liner1", torch.nn.Linear(input_data, hidden_layer)),
("Relu1", torch.nn.ReLU()),
("Liner2", torch.nn.Linear(hidden_layer, output_data))
]))
epoch_n = 10000
learning_rate = 1e-6
loss_fn = torch.nn.MSELoss()
for epoch in range(epoch_n):
y_pred = models(x)
loss = loss_fn(y_pred, y)
if epoch % 1000 ==0:
print("Epoch:{}, Loss:{:.4f}".format(epoch, loss.data))
models.zero_grad()
# 要将本次计算得到的各个参数节点的梯度值通过grad.data.zero_()全部置零,如果不置零,则计算的梯度值会被一直累加,这样就会影响到后续的计算
loss.backward() # 误差反向传播
# 这个函数的功能在于让模型根据计算图自动计算每个节点的梯度值并根据需求进行保留,
for param in models.parameters(): # 访问模型中的全部参数是通过对“models.parameters()”进行遍历完成的,然后才对每个遍历的参数进行梯度更新。
param.data -= param.grad.data * learning_rate # 类似于 GD 优化方法
在PyTorch的torch.optim包中提供了非常多的 可实现参数自动优化的类,比如SGD、AdaGrad、RMSProp、Adam等
import torch
from torch.autograd import Variable
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10
x = Variable(torch.randn(batch_n, input_data), requires_grad=False)
y = Variable(torch.randn(batch_n, output_data), requires_grad=False)
# 以orderdict有序字典的方式进行传入
from collections import OrderedDict
models = torch.nn.Sequential(OrderedDict([
("Liner1", torch.nn.Linear(input_data, hidden_layer)),
("Relu1", torch.nn.ReLU()),
("Liner2", torch.nn.Linear(hidden_layer, output_data))
]))
epoch_n = 20
learning_rate = 1e-4
loss_fn = torch.nn.MSELoss()
# 使用了torch.optim包中的torch.optim.Adam类作为我们的模型参数的优化函数
# 需要优化的是模型中的全部参数,所以传递给 torch.optim.Adam类的参数是models.parameters
optimzer = torch.optim.Adam(models.parameters(), lr=learning_rate)
for epoch in range(epoch_n):
y_pred = models(x)
loss = loss_fn(y_pred, y)
print("Epoch:{}, Loss:{:.4f}".format(epoch, loss.data))
optimzer.zero_grad() # 调用optimzer.zero_grad来完成对模型参数梯度的归零
loss.backward() # loss.backward(),这个函数的功能在于让模 型根据计算图自动计算每个节点的梯度值并根据需求进行保留
optimzer.step() # optimzer.step,功能是使用计算得到的梯度值对各个节点的参数进行梯度更新