本周首先学的是《动手学深度学习(pytorch版)》里面的线性回归的实现,但是有些东西看不懂,所以先在b站上找了两个简单的视频跟着学了一遍。
说是学了一遍,其实就是跟着视频敲了一遍代码。
课程来自:
PyTorch 从入门到精通(5)—用 Pytorch 实现一个简单的线性回归
先导入包和模块
import torch
import numpy as np
from tqdm import tqdm
我们给定样本集inputs和实际值targets
inputs = np.array( [[73,67,43],
[91,88,64],
[87,134,58],
[102,43,37],
[69,96,70]],dtype='float32')
targets = np.array([[56,70],
[81,101],
[119,133],
[22,37],
[103,119]],dtype='float32')
inputs_tensor = torch.from_numpy(inputs)
targets_tensor = torch.from_numpy(targets)
随机生成参数:权重和偏差
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
# print(w)
# print(b)
看一下生成的随机参数
tensor([[-2.2122, 0.4708, -0.9458],
[ 1.6726, 1.2096, -0.2875]], requires_grad=True)
tensor([-0.1555, -0.4671], requires_grad=True)
建立线性回归的函数
def model(x):
return x @ w.t() + b
pred = model(inputs_tensor) # 预测结果
# print(pred)
预测结果pred的值为
tensor([[ 116.6629, -163.4659],
[ 148.9410, -221.4479],
[ 375.6723, -230.6700],
[ -30.1504, -168.7918],
[ 211.2514, -214.7471]], grad_fn=< AddBackward0 >)
显然pred和targets差的很远。
建立损失函数
# metric ()
def mse(pred, ground_truth):
diff = pred - ground_truth
return torch.sum(diff*diff) / diff.numel()
loss = mse(pred, targets_tensor) # 损失函数
# print(loss)
求梯度,迭代一次
loss.backward() # 反向传播
# print(w)
# print(w.grad)
w.grad.zero_()
b.grad.zero_()
# print(w.grad)
# print(b.grad)
with torch.no_grad(): # 这里不用求梯度
w -= w.grad * 1e-5
b -= b.grad * 1e-5
w.grad.zero_()
b.grad.zero_()
# print(w)
# print(b)
迭代一次以后的结果为
tensor([[ 0.6583, 1.5759, 0.2232],
[ 1.3011, -0.0160, -0.3618]], requires_grad=True)
tensor([-0.6590, -1.1407], requires_grad=True)
(第一次参数没保存,重新随机了参数)
迭代5000次
for epoch in tqdm(range(5000)): # 不断迭代
preds = model(inputs_tensor)
loss = mse(preds, targets_tensor)
loss.backward()
with torch.no_grad():
w -= w.grad * 1e-5
b -= b.grad * 1e-5
w.grad.zero_()
b.grad.zero_()
# print(w)
# print(b)
迭代结果是:
100%|██████████| 5000/5000 [00:00<00:00, 5326.52it/s]tensor([[-0.3965, 0.8490, 0.6874],
[-0.2831, 0.8030, 0.8995]], requires_grad=True)
tensor([-0.3978, -1.7858], requires_grad=True)
使用迭代过后的参数求预测值
preds = model(inputs_tensor)
loss = mse(preds, targets_tensor)
print(loss) # 损失值
print(preds) # 预测值
print(targets) # 真实值
输出结果:
100%|██████████| 5000/5000 [00:00<00:00, 5600.33it/s]tensor(0.6086, grad_fn=)
tensor([[ 57.4097, 70.6969],
[ 82.0064, 100.3976],
[118.7603, 132.9620],
[ 21.0812, 36.9854],
[101.8528, 119.1335]], grad_fn=< AddBackward0 >)
[[ 56. 70.]
[ 81. 101.]
[119. 133.]
[ 22. 37.]
[103. 119.]]
对比pred和targets,已经非常接近了,这是一次成功的简单线性回归。
全部代码:
import torch
import numpy as np
from tqdm import tqdm
#temp
#linear
inputs = np.array( [[73,67,43],
[91,88,64],
[87,134,58],
[102,43,37],
[69,96,70]],dtype='float32')
targets = np.array([[56,70],
[81,101],
[119,133],
[22,37],
[103,119]],dtype='float32')
inputs_tensor = torch.from_numpy(inputs)
targets_tensor = torch.from_numpy(targets)
# y = x @ w^T + b
# y (batch, 2)
# x = (batch, 3)
# w^T (3, 2) = batch 2 w(2, 3)
# b(2)
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
# print(w)
# print(b)
def model(x):
return x @ w.t() + b
pred = model(inputs_tensor) # 预测结果
# print(pred)
# metric ()
def mse(pred, ground_truth):
diff = pred - ground_truth
return torch.sum(diff*diff) / diff.numel()
loss = mse(pred, targets_tensor) # 损失函数
# print(loss)
loss.backward()
# print(w)
# print(w.grad)
w.grad.zero_()
b.grad.zero_()
# print(w.grad)
# print(b.grad)
with torch.no_grad():
w -= w.grad * 1e-5
b -= b.grad * 1e-5
w.grad.zero_()
b.grad.zero_()
# print(w)
# print(b)
for epoch in tqdm(range(5000)): # 不断迭代
preds = model(inputs_tensor)
loss = mse(preds, targets_tensor)
loss.backward()
with torch.no_grad():
w -= w.grad * 1e-5
b -= b.grad * 1e-5
w.grad.zero_()
b.grad.zero_()
# print(w)
# print(b)
preds = model(inputs_tensor)
loss = mse(preds, targets_tensor)
print(loss) # 损失值
print(preds) # 预测值
print(targets) # 真实值
教材来自:
线性回归的从零开始实现
导入包和模块
%matplotlib inline
import torch
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random
解释:matplotlib包设置为嵌入显示
构造一个简单的人工训练数据集
num_inputs = 2 # 样本特征数
num_examples = 1000 # 样本数
true_w = [2, -3.4] # 真实权重
true_b = 4.2 # 偏置
features = torch.randn(num_examples, num_inputs,
dtype=torch.float32)
# print(features)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()),
dtype=torch.float32)
# print(lables)
注意,features的每一行是一个长度为2的向量,而labels的每一行是一个长度为1的向量(标量)
print(features[0], labels[0])
结果为:
tensor([ 0.9750, -1.1741]) tensor(10.1327)
我们将样本数设为10看一下创建的labels
num_examples = 1000 # 样本数
features = torch.randn(num_examples, num_inputs,
dtype=torch.float32)
# print(features)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
输出结果为:
tensor([ 7.4487, 4.6391, 0.6693, -2.4641, 7.1097, -0.6602, 8.2879, 4.1757, 6.6086, 3.5450])
这里也可以直接创建真实权重w和偏置b的tensor,然后可以直接用点乘计算
num_inputs = 2 # 样本特征数
num_examples = 1000 # 样本数
true_w = torch.tensor([[2, -3.4],
[2, -3.4]])
true_b = torch.tensor([4.2, 4.2])
labels = features @ true_w.t() + true_b
# print(labels)
输出结果为:
tensor([[ 7.4487, 7.4487],
[ 4.6391, 4.6391],
[ 0.6693, 0.6693],
[-2.4641, -2.4641],
[ 7.1097, 7.1097],
[-0.6602, -0.6602],
[ 8.2879, 8.2879],
[ 4.1757, 4.1757],
[ 6.6086, 6.6086],
[ 3.5450, 3.5450]])
通过生成第二个特征features[:, 1]和标签 labels 的散点图,可以更直观地观察两者间的线性关系
def use_svg_display():
display.set_matplotlib_formats('svg')
def set_figsize(figsize=(3.5, 2.5)):
use_svg_display() # 用矢量图显示
plt.rcParams['figure.figsize'] = figsize # 设置图的尺寸
set_figsize()
plt.scatter(features[:, 1].numpy(), labels.numpy(), 1); # 注意句末有;
定义一个函数:它每次返回batch_size(批量大小)个随机样本的特征和标签
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices) # 样本的读取顺序是随机的
for i in range(0, num_examples, batch_size):
j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)])
# 最后一次可能不足一个batch
yield features.index_select(0, j), labels.index_select(0, j)
读取第一个小批量数据样本并打印。每个批量的特征形状为(10, 2),分别对应批量大小和输入个数;标签形状为批量大小。
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
print(X, y)
break
输出结果:
tensor([[-0.0078, -1.3340],
[ 2.1067, -1.0562],
[-0.4234, -0.9623],
[-0.0717, 1.2881],
[ 0.0954, -0.0594],
[ 1.1012, 0.6528],
[ 0.3226, -0.2530],
[ 1.5390, 0.7010],
[ 0.5721, -1.0648],
[ 0.6747, -0.4751]])
tensor([ 8.6953, 12.0257, 6.6350, -0.3204, 4.5889, 4.1836, 5.7033, 4.8894,
8.9712, 7.1663])
初始化模型参数
我们将权重初始化成均值为0、标准差为0.01的正态随机数,偏差则初始化成0。
w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)), dtype=torch.float32)
b = torch.zeros(1, dtype=torch.float32)
w.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)
定义模型:线性回归矢量计算表达式
def linreg(X, w, b):
return torch.mm(X, w) + b
定义损失函数
def squared_loss(y_hat, y):
# 注意这里返回的是向量, 另外, pytorch里的 MSELoss并没有除以2
return (y_hat - y.view(y_hat.size())) ** 2 / 2
定义优化算法:小批量随机梯度下降算法
def sgd(params, lr, batch_size):
for param in params:
param.data -= lr * param.grad / batch_size
# 注意这里更改param时用的param.data
训练模型
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss
# 训练模型一共需要num_epochs个迭代周期
for epoch in range(num_epochs):
# 在每一个迭代周期中,会使用训练数据集中所有样本一次
# X和y分别是小批量样本的特征和标签
for X, y in data_iter(batch_size, features, labels):
l = loss(net(X, w, b), y).sum()
# l是有关小批量X和y的损失
l.backward()
# 小批量的损失对模型参数求梯度
sgd([w, b], lr, batch_size)
# 使用小批量随机梯度下降迭代模型参数
# 不要忘了梯度清零
w.grad.data.zero_()
b.grad.data.zero_()
train_l = loss(net(features, w, b), labels)
print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item()))
输出结果:
epoch 1, loss 0.028127
epoch 2, loss 0.000095
epoch 3, loss 0.000050
训练结果和真实参数的比较
print(true_w, '\n', w)
print(true_b, '\n', b)
结果:
[2, -3.4]
tensor([[ 1.9998],
[-3.3998]], requires_grad=True)
4.2
tensor([4.2001], requires_grad=True)