train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model, opt = get_model()
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
我们将使用 PyTorch
预定义的 Conv2d类构建卷积层,我们定义3个卷积层,每个卷积层后跟着 ReLU,最后执行平均池化 ( PyTorch
里的 view
类似于 numpy
的 reshape
)
import pickle
import gzip
import torch
import math
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from pathlib import Path
import requests
import numpy as np
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"
PATH.mkdir(parents=True, exist_ok=True)
FILENAME = "mnist.pkl.gz"
# 读取数据到 x_train, y_train 和 x_valid, y_valid
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
((x_train, y_train), (x_valid, y_valid),
_) = pickle.load(f, encoding="latin-1")
# 数据转换
x_train, y_train, x_valid, y_valid = map(
torch.tensor, (x_train, y_train, x_valid, y_valid))
class Mnist_CNN(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 16, 3, 2, 1)
self.conv2 = nn.Conv2d(16, 16, 3, 2, 1)
self.conv3 = nn.Conv2d(16, 10, 3, 2, 1)
def forward(self, x):
x = x.view(-1, 1, 28, 28)
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.avg_pool2d(x, 4)
return x.view(-1, x.size(1))
# 损失函数
loss_func = F.cross_entropy
def accuracy(out, yb):
"""
# 定义 accuracy
"""
preds = torch.argmax(out, dim=1)
return (preds == yb).float().mean()
def loss_batch(model, loss_func, xb, yb, opt=None):
loss = loss_func(model(xb), yb)
if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()
return loss.item(), len(xb)
# 训练模型
lr = 0.1 # 学习率
epochs = 5 # 训练的轮数
bs = 64 # batch size
# # 数据 dataloader
train_ds = TensorDataset(x_train, y_train)
# train_dl = DataLoader(train_ds, batch_size=bs)
# ##
valid_ds = TensorDataset(x_valid, y_valid)
# valid_dl = DataLoader(valid_ds, batch_size=bs * 2)
def get_data(train_ds, valid_ds, bs):
return (
DataLoader(train_ds, batch_size=bs, shuffle=True),
DataLoader(valid_ds, batch_size=bs * 2),
)
def loss_batch(model, loss_func, xb, yb, opt=None):
loss = loss_func(model(xb), yb)
if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()
return loss.item(), len(xb)
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
for epoch in range(epochs):
model.train()
for xb, yb in train_dl:
loss_batch(model, loss_func, xb, yb, opt)
model.eval()
with torch.no_grad():
losses, nums = zip(
*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
)
val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
print(epoch, val_loss)
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model = Mnist_CNN()
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
torch.nn
还有另一个方便的类,我们可以使用它来简化代码:Sequential
。Sequential
对象以顺序方式运行其中包含的每个模块。这是编写神经网络的一种更简单的方法。
为了发挥它的优势,我们需要通过函数轻松定义定制的层。例如,PyTorch
没有 view
层,我们需要在网络中创建一个, Lambda
将会创建这个层,并应用于 Sequential
定义的网络中。
nn.Sequential
使用如下:
class Lambda(nn.Module):
def __init__(self, func):
super().__init__()
self.func = func
def forward(self, x):
return self.func(x)
def preprocess(x):
return x.view(-1, 1, 28, 28)
model = nn.Sequential(
Lambda(preprocess),
nn.Conv2d(1, 16, 3, 2, 1),
nn.ReLU(),
nn.Conv2d(16, 16, 3, 2, 1),
nn.ReLU(),
nn.Conv2d(16, 10, 3, 2, 1),
nn.ReLU(),
nn.AvgPool2d(4),
Lambda(lambda x: x.view(x.size(0), -1)),
)
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
我们的 CNN
很方便,但是只能适用于 MNIST,
因为
28*28
CNN
最后的 grid
大小为 4*4
让我们去掉这两个假设,这样我们的模型可以处理任何二维单通道图像。首先,我们可以通过将数据预处理移动到生成器中来移除初始 Lambda
层:接下来,我们可以将 nn.AvgPool2d
替换为 nn.AdaptiveAvgPool2d
,这允许我们定义所需的输出张量的大小,而不是现有的输入张量。因此,我们的模型可以处理任何大小的输入。
完整代码如下
import pickle
import gzip
import torch
import math
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from pathlib import Path
import requests
import numpy as np
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"
PATH.mkdir(parents=True, exist_ok=True)
FILENAME = "mnist.pkl.gz"
# 读取数据到 x_train, y_train 和 x_valid, y_valid
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
((x_train, y_train), (x_valid, y_valid),
_) = pickle.load(f, encoding="latin-1")
# 数据转换
x_train, y_train, x_valid, y_valid = map(
torch.tensor, (x_train, y_train, x_valid, y_valid))
class Lambda(nn.Module):
def __init__(self, func):
super().__init__()
self.func = func
def forward(self, x):
return self.func(x)
def preprocess(x, y):
return x.view(-1, 1, 28, 28), y
class WrappedDataLoader:
def __init__(self, dl, func):
self.dl = dl
self.func = func
def __len__(self):
return len(self.dl)
def __iter__(self):
batches = iter(self.dl)
for b in batches:
yield (self.func(*b))
def get_data(train_ds, valid_ds, bs):
return (
DataLoader(train_ds, batch_size=bs, shuffle=True),
DataLoader(valid_ds, batch_size=bs * 2),
)
lr = 0.1 # 学习率
epochs = 5 # 训练的轮数
bs = 64 # batch size
train_ds = TensorDataset(x_train, y_train)
valid_ds = TensorDataset(x_valid, y_valid)
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)
loss_func = F.cross_entropy
model = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.AdaptiveAvgPool2d(1),
Lambda(lambda x: x.view(x.size(0), -1)),
)
def loss_batch(model, loss_func, xb, yb, opt=None):
loss = loss_func(model(xb), yb)
if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()
return loss.item(), len(xb)
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
for epoch in range(epochs):
model.train()
for xb, yb in train_dl:
loss_batch(model, loss_func, xb, yb, opt)
model.eval()
with torch.no_grad():
losses, nums = zip(
*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
)
val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
print(epoch, val_loss)
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
0 0.38525849063396456
1 0.2657669427156448
2 0.24119357098937036
3 0.1926776697874069
4 0.1697926277399063
我们已经有一个通用的数据流程和训练流程,你可以用 PyTorch 训练任何数据。当然,还有很多事情需要做,例如:数据增强、超参数调节、训练监控、迁移学习等等。
Module
: 创建一个行为类似于函数的可调用函数,但也可以包含状态(如神经网络层权重),它知道 Parameter
包含哪些参数,并可以将他们的梯度归零,在循环中更新权重等等。Parameter
: tensor
的包装器,告诉 Module
需要更新的权重,仅更新 requires_grad
属性的 tensor
.functional
: 一个模块(通常按照惯例导入为 F 名字空间),包含了激活函数,损失函数,以及卷积和线性层等非状态版本的层。torch.optim
: 包含了优化器如 SGD
等,用于在反向传播中更新权重。Dataset
: 具有 __len__
和 __getitem__
对象的抽象接口;DataLoader
: 获取任何数据集并创建一个迭代器,该迭代器返回一批数据。What is torch.nn really? — PyTorch Tutorials 1.13.1+cu117 documentation