【PyTorch】教程:torch.nn (3)

理解 torch.nn

train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model, opt = get_model()
fit(epochs, model, loss_func, opt, train_dl, valid_dl)

切换到 CNN

我们将使用 PyTorch 预定义的 Conv2d类构建卷积层,我们定义3个卷积层,每个卷积层后跟着 ReLU,最后执行平均池化 ( PyTorch 里的 view 类似于 numpyreshape

import pickle
import gzip
import torch
import math
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from pathlib import Path
import requests
import numpy as np

DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"
PATH.mkdir(parents=True, exist_ok=True)
FILENAME = "mnist.pkl.gz"

# 读取数据到 x_train, y_train 和 x_valid, y_valid
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
    ((x_train, y_train), (x_valid, y_valid),
     _) = pickle.load(f, encoding="latin-1")

# 数据转换
x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid))


class Mnist_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, 2, 1)
        self.conv2 = nn.Conv2d(16, 16, 3, 2, 1)
        self.conv3 = nn.Conv2d(16, 10, 3, 2, 1)
  
    def forward(self, x):
        x = x.view(-1, 1, 28, 28)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.avg_pool2d(x, 4)
        return x.view(-1, x.size(1))
      


# 损失函数
loss_func = F.cross_entropy


def accuracy(out, yb):
    """
    # 定义 accuracy
    """
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()


def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), len(xb)


# 训练模型
lr = 0.1  # 学习率
epochs = 5  # 训练的轮数
bs = 64  # batch size

# # 数据 dataloader
train_ds = TensorDataset(x_train, y_train)
# train_dl = DataLoader(train_ds, batch_size=bs)

# ##
valid_ds = TensorDataset(x_valid, y_valid)
# valid_dl = DataLoader(valid_ds, batch_size=bs * 2)


def get_data(train_ds, valid_ds, bs):
    return (
        DataLoader(train_ds, batch_size=bs, shuffle=True),
        DataLoader(valid_ds, batch_size=bs * 2),
    )


def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), len(xb)


def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_dl:
            loss_batch(model, loss_func, xb, yb, opt)

        model.eval()
        with torch.no_grad():
            losses, nums = zip(
                *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
            )
        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)

        print(epoch, val_loss)


train_dl, valid_dl = get_data(train_ds, valid_ds, bs)

model = Mnist_CNN()
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)

nn.Sequential

torch.nn 还有另一个方便的类,我们可以使用它来简化代码:SequentialSequential 对象以顺序方式运行其中包含的每个模块。这是编写神经网络的一种更简单的方法。

为了发挥它的优势,我们需要通过函数轻松定义定制的层。例如,PyTorch 没有 view 层,我们需要在网络中创建一个, Lambda 将会创建这个层,并应用于 Sequential 定义的网络中。

nn.Sequential 使用如下:

class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func
    def forward(self, x):
        return self.func(x)
  
def preprocess(x):
    return x.view(-1, 1, 28, 28)

model = nn.Sequential(
    Lambda(preprocess),
    nn.Conv2d(1, 16, 3, 2, 1),
    nn.ReLU(),
    nn.Conv2d(16, 16, 3, 2, 1),
    nn.ReLU(),
    nn.Conv2d(16, 10, 3, 2, 1),
    nn.ReLU(),
    nn.AvgPool2d(4),
    Lambda(lambda x: x.view(x.size(0), -1)),
)

opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)

包装 DataLoader

我们的 CNN 很方便,但是只能适用于 MNIST, 因为

  • 假设输入是 28*28
  • CNN 最后的 grid 大小为 4*4

让我们去掉这两个假设,这样我们的模型可以处理任何二维单通道图像。首先,我们可以通过将数据预处理移动到生成器中来移除初始 Lambda 层:接下来,我们可以将 nn.AvgPool2d 替换为 nn.AdaptiveAvgPool2d ,这允许我们定义所需的输出张量的大小,而不是现有的输入张量。因此,我们的模型可以处理任何大小的输入。

完整代码如下

import pickle
import gzip
import torch
import math
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from pathlib import Path
import requests
import numpy as np

DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"
PATH.mkdir(parents=True, exist_ok=True)
FILENAME = "mnist.pkl.gz"

# 读取数据到 x_train, y_train 和 x_valid, y_valid
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
    ((x_train, y_train), (x_valid, y_valid),
     _) = pickle.load(f, encoding="latin-1")

# 数据转换
x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid))


class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x):
        return self.func(x)

def preprocess(x, y):
    return x.view(-1, 1, 28, 28), y


class WrappedDataLoader:
    def __init__(self, dl, func):
        self.dl = dl
        self.func = func

    def __len__(self):
        return len(self.dl)

    def __iter__(self):
        batches = iter(self.dl)
        for b in batches:
            yield (self.func(*b))


def get_data(train_ds, valid_ds, bs):
    return (
        DataLoader(train_ds, batch_size=bs, shuffle=True),
        DataLoader(valid_ds, batch_size=bs * 2),
    )


lr = 0.1  # 学习率
epochs = 5  # 训练的轮数
bs = 64  # batch size

train_ds = TensorDataset(x_train, y_train)
valid_ds = TensorDataset(x_valid, y_valid)

train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)

loss_func = F.cross_entropy

model = nn.Sequential(
    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AdaptiveAvgPool2d(1),
    Lambda(lambda x: x.view(x.size(0), -1)),
)


def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), len(xb)


def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_dl:
            loss_batch(model, loss_func, xb, yb, opt)

        model.eval()
        with torch.no_grad():
            losses, nums = zip(
                *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
            )
        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)

        print(epoch, val_loss)

opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)

0 0.38525849063396456
1 0.2657669427156448
2 0.24119357098937036
3 0.1926776697874069
4 0.1697926277399063

节后语

我们已经有一个通用的数据流程和训练流程,你可以用 PyTorch 训练任何数据。当然,还有很多事情需要做,例如:数据增强、超参数调节、训练监控、迁移学习等等。

  • torch.nn
    • Module: 创建一个行为类似于函数的可调用函数,但也可以包含状态(如神经网络层权重),它知道 Parameter 包含哪些参数,并可以将他们的梯度归零,在循环中更新权重等等。
    • Parameter: tensor 的包装器,告诉 Module 需要更新的权重,仅更新 requires_grad 属性的 tensor .
    • functional: 一个模块(通常按照惯例导入为 F 名字空间),包含了激活函数,损失函数,以及卷积和线性层等非状态版本的层。
  • torch.optim: 包含了优化器如 SGD 等,用于在反向传播中更新权重。
  • Dataset: 具有 __len____getitem__ 对象的抽象接口;
  • DataLoader: 获取任何数据集并创建一个迭代器,该迭代器返回一批数据。

【参考】

What is torch.nn really? — PyTorch Tutorials 1.13.1+cu117 documentation

你可能感兴趣的:(PyTorch,pytorch,深度学习,python)