一从零实现
import torch
from torch import nn
from d2l import torch as d2
1.加载数据集
batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
2.初始化模型参数 输入是向量
num_inputs = 784 # 每个图像的灰度像素28*28=784 num_outputs = 10 # 10个类别 num_hiddens = 256 # 256隐藏单元
# 输入层--隐藏层 W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True) * 0.01) b1 = nn.Parameter(torch.zeros(num_hiddens, requires_grad=True)) # 隐藏层--输出层 W2 = nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True) * 0.01) b2 = nn.Parameter(torch.zeros(num_outputs, requires_grad=True)) params = [W1, b1, W2, b2]
3.实现relu激活函数
def relu(x): a = torch.zeros_like(x) # 全0 return torch.max(x, a)
4.实现模型
def net(x): x = x.reshape((-1, num_inputs)) # 二维图像转换为一个num_inputs向量 h = relu(x @ W1 + b1) # @ 矩阵乘法 隐藏层: 激活函数 return (h @ W2 + b2) # 输出 h*w2+b2
5.损失函数
loss = nn.CrossEntropyLoss(reduction='none')
6.训练
num_epochs = 10 lr = 0.1 updater = torch.optim.SGD(params, lr=lr) # 优化器 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)
二简单实现
1.创建网络 # 网络 256隐藏单元 展平--线性层输入---激活函数-- 线性层输出 net = nn.Sequential(nn.Flatten(), nn.Linear(784, 256), nn.ReLU(), nn.Linear(256, 10))
def init_weight(m): if type(m) == nn.Linear: nn.init.normal_(m.weight, std=0.01) net.apply(init_weight)
2.训练
batch_size, lr, num_epochs = 256, 0.1, 10 loss = nn.CrossEntropyLoss(reduction='none') trainer = torch.optim.SGD(net.parameters(), lr=lr) train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)