最近在阅读一本书籍–Dive-into-DL-Pytorch(动手学深度学习),链接:https://github.com/newmonkey/Dive-into-DL-PyTorch,自身觉得受益匪浅,在此记录下自己的学习历程。
本篇主要记录关于线性回归的知识(书中是二元线性回归,本文以一元线性回归进行小修改重写便于自身理解)。
#获取数据集
import torch
import numpy as np
import random
num_inputs=1
num_examples=1000
true_a=2
true_b=4
features=torch.tensor(np.random.normal(0, 1, (num_examples,num_inputs)), dtype=torch.float)
labels=true_a*features[:,0]+true_b
labels+=torch.tensor(np.random.normal(0, 0.01,size=labels.size()), dtype=torch.float)
其中labels(标签)代表公式里的y,features(特征)代表公式里的x。
batch_size=10 #批量大小
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices) # 样本的读取顺序是随机的
for i in range(0, num_examples, batch_size):
j = torch.LongTensor(indices[i: min(i + batch_size,
num_examples)]) # 最后⼀次可能不⾜⼀个batch
yield features.index_select(0, j), labels.index_select(0,
j)
a=torch.tensor(np.random.normal(0,0.01,(num_inputs,1)),dtype=torch.float32)
b=torch.zeros(1,dtype=torch.float32)
#设置允许梯度追踪
a.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)
对线性回归的矢量表达式的实现。我们采用torch.mm函数做矩阵相乘
def linreg(x,a,b):
return torch.mm(x,a)+b
def squared_loss(y_hat,y):
return (y_hat-y.view(y_hat.size()))**2/2
其中batch_size为批量大小,lr为学习率(正数)。这⾥的批量⼤⼩和学习率的值是⼈为设定的,并不是通过模型训练学出的,因此叫作超参数(hyperparameter)。反复调试来寻找合适的超参数可以提高模型的准确率。
def sgd(params,lr,batch_size):
for param in params:
param.data-=lr*param.grad/batch_size
lr=0.03 #学习率0.03
num_epochs=5 #迭代周期5
net=linreg
loss=squared_loss
for epoch in range(num_epochs):
# 在每⼀个迭代周期中,会使⽤训练数据集中所有样本⼀次(假设样本数能够被批量⼤⼩整除)
for x, y in data_iter(batch_size, features, labels):
l = loss(net(x, a, b), y).sum() # l是有关⼩批量X和y的损失
l.backward() # ⼩批量的损失对模型参数求梯度
sgd([a, b], lr, batch_size) # 使⽤⼩批量随机梯度下降迭代模型参数
#不要忘了梯度清零
a.grad.data.zero_()
b.grad.data.zero_()
train_l = loss(net(features, a, b), labels)
print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item()))
print(a)
print(b)
#结果
#epoch 1, loss 0.000052
#epoch 2, loss 0.000051
#epoch 3, loss 0.000052
#epoch 4, loss 0.000052
#epoch 5, loss 0.000052
#tensor([[2.0006]], requires_grad=True)
#tensor([4.0006], requires_grad=True)
可见,经过5次迭代训练后,训练得到的a=2.0006,b=4.0006,这两者与真实值true_a=2,trye_b=4十分相近。
#获取数据集
import torch
import numpy as np
import random
num_inputs=1
num_examples=1000
true_a=2
true_b=4
features=torch.tensor(np.random.normal(0, 1, (num_examples,num_inputs)), dtype=torch.float)
labels=true_a*features[:,0]+true_b
labels+=torch.tensor(np.random.normal(0, 0.01,size=labels.size()), dtype=torch.float)
import torch.utils.data as Data
batch_size = 10
# 将训练数据的特征和标签组合
dataset = Data.TensorDataset(features, labels)
# 随机读取⼩批量
data_iter = Data.DataLoader(dataset, batch_size, shuffle=True)
import torch.nn as nn
class LinearNet(nn.Module):
def __init__(self, n_feature):
super(LinearNet, self).__init__()
self.linear = nn.Linear(n_feature, 1)
# forward 定义前向传播
def forward(self, x):
y = self.linear(x)
return y
net = LinearNet(num_inputs)
from torch.nn import init
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)
loss = nn.MSELoss() #均方根误差
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.03)
print(optimizer)
#结果
#SGD (
#Parameter Group 0
# dampening: 0
# lr: 0.03
# momentum: 0
# nesterov: False
# weight_decay: 0
#)
num_epochs = 5
for epoch in range(1, num_epochs + 1):
for x, y in data_iter:
output = net(x)
l = loss(output, y.view(-1, 1))
optimizer.zero_grad() # 梯度清零,等价于net.zero_grad()
l.backward()
optimizer.step()
print('epoch %d, loss:%f' % (epoch,l.item()))
print(a)
print(b)
#结果
#epoch 1, loss:0.000197
#epoch 2, loss:0.000067
#epoch 3, loss:0.000173
#epoch 4, loss:0.000168
#epoch 5, loss:0.000099
#tensor([[2.0006]], requires_grad=True)
#tensor([4.0006], requires_grad=True)
可见,经过5次迭代训练后,训练得到的a=2.0006,b=4.0006,这两者与真实值true_a=2,trye_b=4十分相近。