def init_params():
w=torch.randn((num_inputs,1),requires_grad=True
b=torch.zeros(1,requires_grad=True)
W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float)
b = torch.zeros(num_outputs, dtype=torch.float)
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)
net = d2l.linreg
# 写法一
net = nn.Sequential(
nn.Linear(num_inputs, 1)
# 此处还可以传入其他层
)
# 写法二
net = nn.Sequential()
net.add_module('linear', nn.Linear(num_inputs, 1))
# net.add_module ......
# 写法三
from collections import OrderedDict
net = nn.Sequential(OrderedDict([
('linear', nn.Linear(num_inputs, 1))
# ......
]))
print(net)
print(net[0])
#输出
Sequential(
(linear): Linear(in_features=2, out_features=1, bias=True)
)
Linear(in_features=2, out_features=1, bias=True)
可以通过net.parameters()来查看模型所有的可学习参数,此函数将返回一个生成器。
for param in net.parameters():
print(param)
#输出:
Parameter containing:
tensor([[-0.0277, 0.2771]], requires_grad=True)
Parameter containing:
tensor([0.3395], requires_grad=True)
loss = d2l.squard_loss
loss = nn.MSELoss()
def accuracy(y_hat, y):
return (y_hat.argmax(dim=1) == y).float().mean().item()
optimizer = optim.SGD(net.parameters(), lr=0.03)
为不同子网络设置不同的学习率:
optimizer =optim.SGD([
# 如果对某个参数不指定学习率,就使用最外层的默认学习率
{'params': net.subnet1.parameters()}, # lr=0.03
{'params': net.subnet2.parameters(), 'lr': 0.01}
], lr=0.03)
train_ls,test_ls = [ ] , [ ]
dataset=torch.utils.data.TensorDataset(train_features,train_labels)
train_iter=torch.utils.data.DataLoader(dataset,batch_size,shuffle=True)
for _ in range(num_epochs):
每个epochs都将所有样本训练一次
for X, y in train_iter:
l = loss(net(X, w, b), y)
l = l.sum()
if w.grad is not None:
w.grad.data.zero_()
b.grad.data.zero_()
optimizer.zero_grad() # 梯度清零,等价于net.zero_grad()
# 梯度清零
if optimizer is not None:
optimizer.zero_grad()
elif params is not None and params[0].grad is not None:
for param in params:
param.grad.data.zero_()
if optimizer is None:
d2l.sgd(params, lr, batch_size)
l.backward()
d2l.sgd([w, b], lr, batch_size)
optimizer.step()
# 本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进:它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
for X, y in data_iter:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n