使用具有两个隐藏层的多层感知机和暂退法,拟合Fashion-MNIST数据集。
net = nn.Sequential(
nn.Flatten(),
nn.Linear(784, 256),
nn.ReLU(),
nn.Dropout(0.2), # 暂退概率为0.2
nn.Linear(256, 256),
nn.ReLU(),
nn.Dropout(0.5), # 暂退概率为0.5
nn.Linear(256, 10)
).to(device)
import os
from tensorboardX import SummaryWriter
from rich.progress import track
from torchvision.transforms import Compose, ToTensor
from torchvision.datasets import FashionMNIST
import torch
from torch.utils.data import DataLoader
from torch import nn, optim
def load_dataset():
"""加载数据集"""
root = "./dataset"
transform = Compose([ToTensor()])
mnist_train = FashionMNIST(root, True, transform, download=True)
mnist_test = FashionMNIST(root, False, transform, download=True)
dataloader_train = DataLoader(mnist_train, batch_size, shuffle=True,
num_workers=num_workers,
)
dataloader_test = DataLoader(mnist_test, batch_size, shuffle=False,
num_workers=num_workers,
)
return dataloader_train, dataloader_test
if __name__ == '__main__':
# 全局参数设置
num_epochs = 10
batch_size = 256
num_workers = 3
device = torch.device('cuda:0')
lr = 0.5
# 创建记录器
def log_dir():
root = "runs"
if not os.path.exists(root):
os.mkdir(root)
order = len(os.listdir(root)) + 1
return f'{root}/exp{order}'
writer = SummaryWriter(log_dir=log_dir())
# 数据集配置
dataloader_train, dataloader_test = load_dataset()
# 定义模型
net = nn.Sequential(
nn.Flatten(),
nn.Linear(784, 256),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(256, 256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, 10)
).to(device)
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01)
net.apply(init_weights)
criterion = nn.CrossEntropyLoss(reduction='none')
optimizer = optim.SGD(net.parameters(), lr=lr)
# 训练循环
for epoch in track(range(num_epochs), description='dropout'):
for X, y in dataloader_train:
X, y = X.to(device), y.to(device)
loss = criterion(net(X), y)
optimizer.zero_grad()
loss.mean().backward()
optimizer.step()
with torch.no_grad():
train_loss, train_acc, num_samples = 0.0, 0.0, 0
for X, y in dataloader_train:
X, y = X.to(device), y.to(device)
y_hat = net(X)
loss = criterion(y_hat, y)
train_loss += loss.sum()
train_acc += (y_hat.argmax(dim=1) == y).sum()
num_samples += y.numel()
train_loss /= num_samples
train_acc /= num_samples
test_acc, num_samples = 0.0, 0
for X, y in dataloader_test:
X, y = X.to(device), y.to(device)
y_hat = net(X)
test_acc += (y_hat.argmax(dim=1) == y).sum()
num_samples += y.numel()
test_acc /= num_samples
writer.add_scalars('metrics', {
'train_loss': train_loss,
'train_acc': train_acc,
'test_acc': test_acc
}, epoch)
writer.close()