pytorch WHAT IS TORCH.NN REALLY?

手动创建神经网络

import torch
import torch.nn as nn

from pathlib import Path
import requests
import pickle
import gzip
from matplotlib import pyplot
import numpy as np
import math

# 下载数据
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"
PATH.mkdir(parents=True, exist_ok=True)
URL = "http://deeplearning.net/data/mnist/"
FILENAME = "mnist.pkl.gz"
content = requests.get(URL + FILENAME).content
(PATH / FILENAME).open("wb").write(content)
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")
pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray")
print(x_train.shape)
x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())


# 构建模型
weights = torch.randn(784, 10) / math.sqrt(784) # 权重矩阵
weights.requires_grad_() # 加入graph中,计算梯度
bias = torch.zeros(10, requires_grad=True) # 偏置量

def log_softmax(x): # softmaxt函数
    return x - x.exp().sum(-1).log().unsqueeze(-1)

def model(xb):# 构建模型
    return log_softmax(xb @ weights + bias) # “@”表示点积(dot production)

bs = 64  # batch size
xb = x_train[0:bs]  # a mini-batch from x
preds = model(xb)  # predictions
preds[0], preds.shape
print(preds[0], preds.shape)

def nll(input, target): # 损失函数
    return -input[range(target.shape[0]), target].mean()

loss_func = nll

# 查看损失值
yb = y_train[0:bs]
print(loss_func(preds, yb))


def accuracy(out, yb): # 计算准确度
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

print(accuracy(preds, yb))


# 开始训练
lr = 0.5  # learning rate
epochs = 2  # how many epochs to train for

for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        xb = x_train[i * bs:start_i + bs]
        yb = y_train[i * bs:start_i + bs]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        with torch.no_grad():
            weights -= weights.grad * lr
            bias -= bias.grad * lr
            weights.grad.zero_()
            bias.grad.zero_()

print(loss_func(model(xb), yb), accuracy(model(xb), yb))

使用torch.nn.functional

torch.nn.functional包含了很多loss和activation函数。
比如下面的代码就是交叉熵函数cross_entropy

H ( p , q ) = − ∑ x p ( x ) l o g q ( x ) H(p,q) = -\sum_{x}p(x)logq(x) H(p,q)=xp(x)logq(x)

import torch.nn.functional as F

loss_func = F.cross_entropy

def model(xb):
    return xb @ weights + bias

使用nn.Module

from torch import nn

class Mnist_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
        self.bias = nn.Parameter(torch.zeros(10))

    def forward(self, xb):
        return xb @ self.weights + self.bias
model = Mnist_Logistic()
print(loss_func(model(xb), yb))

with torch.no_grad():
    for p in model.parameters(): p -= p.grad * lr
    model.zero_grad()

def fit():
    for epoch in range(epochs):
        for i in range((n - 1) // bs + 1):
            start_i = i * bs
            end_i = start_i + bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            pred = model(xb)
            loss = loss_func(pred, yb)

            loss.backward()
            with torch.no_grad():
                for p in model.parameters():
                    p -= p.grad * lr
                model.zero_grad()

fit()

使用using nn.Linear

nn.Linear来代替self.weightsself.bias,以及xb @ self.weights + self.bias

class Mnist_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin = nn.Linear(784, 10)

    def forward(self, xb):
        return self.lin(xb)

model = Mnist_Logistic()
print(loss_func(model(xb), yb))
fit()

print(loss_func(model(xb), yb))

使用optim

with torch.no_grad():
    for p in model.parameters(): p -= p.grad * lr
    model.zero_grad()

替换为:

opt.step()
opt.zero_grad()

使用dataset

dataset实现了__len__方法和__getitem__方法来索引。

from torch.utils.data import TensorDataset
train_ds = TensorDataset(x_train, y_train)
# 替换一下两步
# xb = x_train[start_i:end_i]
# yb = y_train[start_i:end_i]
xb,yb = train_ds[i*bs : i*bs+bs] # 相当于同时取xy

使用DataLoader

DataLoader用来管理batches。DataLoader来替换train_ds[i*bs : i * bs+bs],自动生成batch

from torch.utils.data import DataLoader

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs)

# 替换
# for i in range((n-1)//bs + 1):
#     xb,yb = train_ds[i*bs : i*bs+bs]
#     pred = model(xb)
for xb,yb in train_dl:
    pred = model(xb)

shuffle

在每次循环中都要有一个validation set(验证集),以此来判断是否过拟合。shuffling 训练集以避免batches之间的相互关系和过拟合。
验证集做成训练集的两倍。这时因为验证集不需要反向传播。

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)

在训练前调用model.train()在验证前使用model.eval(),因为nn.BatchNorma2dnn.Dropout可能会用到。

model, opt = get_model()
for epoch in range(epochs):
    model.train()
    for xb, yb in train_dl:
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        opt.step()
        opt.zero_grad()

    model.eval()
    with torch.no_grad():
        valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)

    print(epoch, valid_loss / len(valid_dl))

CNN

class Mnist_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)

    def forward(self, xb):
        xb = xb.view(-1, 1, 28, 28)
        xb = F.relu(self.conv1(xb))
        xb = F.relu(self.conv2(xb))
        xb = F.relu(self.conv3(xb))
        xb = F.avg_pool2d(xb, 4)
        return xb.view(-1, xb.size(1))

lr = 0.1

nn.Sequential

Sequential会运行内部包含的modules。
在使用Sequential之前,我们需要先自定义一层。pytorch没有view layer,所以我们需要自定义一个

class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x):
        return self.func(x)


def preprocess(x):
    return x.view(-1, 1, 28, 28)

使用Sequential来创建神经网络:

model = nn.Sequential(
    Lambda(preprocess),
    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AvgPool2d(4),
    Lambda(lambda x: x.view(x.size(0), -1)),
)

opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

Wrapping DataLoader

CNN是concise,但只能用于MNIST:
1.假设输入是2828
2.最终的CNN是4
4(应为使用了average pooling)
所以我们要更改,以便可以作用与任何尺寸的image。

def preprocess(x, y):
    return x.view(-1, 1, 28, 28), y

class WrappedDataLoader:
    def __init__(self, dl, func):
        self.dl = dl
        self.func = func

    def __len__(self):
        return len(self.dl)

    def __iter__(self):
        batches = iter(self.dl)
        for b in batches:
            yield (self.func(*b))

train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)

然后使用nn.AdaptiveAvgPool2d来代替nn.AvgPool2d,这样就可以定义输出的size:

model = nn.Sequential(
    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AdaptiveAvgPool2d(1),
    Lambda(lambda x: x.view(x.size(0), -1)),
)

opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

参考:
https://pytorch.org/tutorials/beginner/nn_tutorial.html

你可能感兴趣的:(Python,python)