李沐大神 动手学深度学习 课程笔记
简单学习笔记,详情请阅读 https://zh-v2.d2l.ai/
1、batchsize 越小越容易收敛。因为batchsize小可能会带来比较大的噪声,但是对于复杂的神经网络是件好事情,更鲁棒。
%matplotlib inline
import torch
import random
from d2l import torch as d2l
# 构造数据集,带有噪声的线性模型
def synthetic_data(w, b, num_examples):
'生成 y = Xw + b + 噪声'
X = torch.normal(0, 1, (num_examples, len(w))) # 随机生成正态分布,(均值,方差,out)
# X[num_examples,num_feature],w:[num_feature],权重为1维,可以看成[num_feature,1]
y = torch.matmul(X, w) + b # [num_examples]
y += torch.normal(0, 0.01, y.shape) # 噪声
return X, y.reshape((-1,1)) # y:[num_examples,1]
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)
torch.Size([1000])
print('features:',features[0],'\nlabel:',labels[0])
d2l.set_figsize()
d2l.plt.scatter(features[:,1].detach().numpy(), #feature和label线性相关
labels.detach().numpy(),1)
features: tensor([-1.3025, -0.2801])
label: tensor([2.5508])
(图片无法上传)
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
# 样本随机读取
random.shuffle(indices)
for i in range(0, num_examples, batch_size):
batch_indices = torch.tensor(indices[i : min(i + batch_size, num_examples)])
#yield,生成器,必须用在函数体内部。需要next()才能真正开始迭代。类似return
yield features[batch_indices], labels[batch_indices]
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
print(X, '\n', y)
break
tensor([[-1.8272, 0.1264],
[ 0.5635, 1.9841],
[ 0.1902, -0.8521],
[ 0.0249, -1.8746],
[ 1.9976, -1.7883],
[-1.0550, -0.5962],
[ 1.3451, -0.5567],
[ 0.0129, -1.0751],
[-0.3702, 0.2639],
[ 0.8936, -0.6200]])
tensor([[ 0.1098],
[-1.4159],
[ 7.4907],
[10.6099],
[14.2886],
[ 4.1167],
[ 8.7922],
[ 7.8881],
[ 2.5766],
[ 8.0954]])
w = torch.normal(0,0.01, size=(2,1), requires_grad=True)
b = torch.zeros(1, requires_grad = True)
def linreg(X, w, b):
'''
线性回归模型
'''
return torch.matmul(X,w) + b
def squared_loss(y_hat, y):
return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
def sgd(params, lr, batch_size):
'''
params: [w,b]
小批量随机梯度下降
'''
with torch.no_grad():
for param in params:
param -= lr * param.grad / batch_size
param.grad.zero_()
lr = 0.01
num_epochs = 3
net = linreg
loss = squared_loss
for epoch in range(num_epochs):
for X, y in data_iter(batch_size, features, labels):
l = loss(net(X,w,b),y) #[batchsize, 1]
l.sum().backward()
sgd([w,b], lr, batch_size) #注意:这里batch_size简便传入,最后一个epoch可能小于batch_size
with torch.no_grad():
train_l = loss(net(features, w, b), labels) #[num_examples,1]
print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
epoch 1, loss 2.259320
epoch 2, loss 0.314628
epoch 3, loss 0.044144