import torch
from torch import nn
from d2l import torch as d2l
net = nn.Sequential(nn.Flatten(),
nn.Linear(784, 256),
nn.ReLU(), # 与 3.7 节相比多了一层
nn.Linear(256, 10))
def init_weights(m):
if type(m) == nn.Linear: # 使用正态分布中的随机值初始化权重
nn.init.normal_(m.weight, std=0.01)
net.apply(init_weights)
Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=256, bias=True)
(2): ReLU()
(3): Linear(in_features=256, out_features=10, bias=True)
)
batch_size, lr, num_epochs = 256, 0.1, 10
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
(1)尝试添加不同数量的隐藏层(也可以修改学习率),怎样设置效果最好?
net2 = nn.Sequential(nn.Flatten(),
nn.Linear(784, 256),
nn.ReLU(),
nn.Linear(256, 128),
nn.ReLU(),
nn.Linear(128, 10))
def init_weights(m):
if type(m) == nn.Linear: # 使用正态分布中的随机值初始化权重
nn.init.normal_(m.weight, std=0.01)
net2.apply(init_weights)
batch_size2, lr2, num_epochs2 = 256, 0.3, 10
loss2 = nn.CrossEntropyLoss(reduction='none')
trainer2 = torch.optim.SGD(net2.parameters(), lr=lr2)
train_iter2, test_iter2 = d2l.load_data_fashion_mnist(batch_size2)
d2l.train_ch3(net2, train_iter2, test_iter2, loss2, num_epochs2, trainer2)
(2)尝试不同的激活函数,哪个激活函数效果最好?
net3 = nn.Sequential(nn.Flatten(),
nn.Linear(784, 256),
nn.Sigmoid(),
nn.Linear(256, 10))
net4 = nn.Sequential(nn.Flatten(),
nn.Linear(784, 256),
nn.Tanh(),
nn.Linear(256, 10))
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01)
net3.apply(init_weights)
net4.apply(init_weights)
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
batch_size, lr, num_epochs = 256, 0.1, 10
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net3.parameters(), lr=lr)
d2l.train_ch3(net3, train_iter, test_iter, loss, num_epochs, trainer)
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
Cell In[5], line 4
2 loss = nn.CrossEntropyLoss(reduction='none')
3 trainer = torch.optim.SGD(net3.parameters(), lr=lr)
----> 4 d2l.train_ch3(net3, train_iter, test_iter, loss, num_epochs, trainer)
File c:\Software\Miniconda3\envs\d2l\lib\site-packages\d2l\torch.py:340, in train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)
338 animator.add(epoch + 1, train_metrics + (test_acc,))
339 train_loss, train_acc = train_metrics
--> 340 assert train_loss < 0.5, train_loss
341 assert train_acc <= 1 and train_acc > 0.7, train_acc
342 assert test_acc <= 1 and test_acc > 0.7, test_acc
AssertionError: 0.5017133202234904
batch_size, lr, num_epochs = 256, 0.1, 10
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net4.parameters(), lr=lr)
d2l.train_ch3(net4, train_iter, test_iter, loss, num_epochs, trainer)
还是 ReLU 比较奈斯。
(3)尝试不同的方案来初始化权重,什么方案效果最好。
累了,不想试试了。略…