pytorch 学习笔记6 —— 一步步解析nn模块

一步步解析nn模块

来源:https://pytorch.org/tutorials/beginner/nn_tutorial.html#refactor-using-optim

数据集

使用的数据集是MNIST,有多种方式可以获取,包括教程中的requests方式。这里不在详细说明,但是说明将数据转化为图片的方式。(MNIST的数据是手写数字)

使用matplotlib显示图片
pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray")
print(x_train.shape)
将数据转化为torch.tensor
x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)
使用自动计算梯度
import math

weights = torch.randn(784, 10) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)
自定义自己的损失函数

虽然pytorch提供了很多写好的loss function,activation function给我们,但我们可以自己实现自己的版本。pytorch会自动将代码优化为更快的CPU或者向量化CPU的计算方式。

def log_softmax(x):
    return x - x.exp().sum(-1).log().unsqueeze(-1)

def model(xb):
    return log_softmax(xb @ weights + bias)

其中@代表点乘

批处理
bs = 64  # batch size

xb = x_train[0:bs]  # a mini-batch from x
preds = model(xb)  # predictions
preds[0], preds.shape
print(preds[0], preds.shape)

Out:
tensor([-2.9153, -2.6091, -2.9060, -2.2049, -2.4748, -2.4453, -2.1476, -2.1597,
        -2.7930, -1.4146], grad_fn=<SelectBackward>) torch.Size([64, 10])
定义negative log-likelihood loss_func
def nll(input, target):
    return -input[range(target.shape[0]), target].mean()

loss_func = nll
测试
yb = y_train[0:bs]
print(loss_func(preds, yb))

Out:
tensor(2.3504, grad_fn=<NegBackward>)
定义计算准确率函数
def accuracy(out, yb):
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

print(accuracy(preds, yb))

Out:
tensor(0.1562)
使用torch.no_grad()更新权重和偏置

使用weights.grad.zero_() bias.grad.zero_()是为了准备好进行下一次梯度计算。避免梯度累加。因为每次计算不是覆盖,而是累加上去。

set_trace() 可以追踪每一步的变量的值。

from IPython.core.debugger import set_trace

lr = 0.5  # learning rate
epochs = 2  # how many epochs to train for

for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        # set_trace()
        start_i = i * bs
        end_i = start_i + bs
        xb = x_train[start_i:end_i]
        yb = y_train[start_i:end_i]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        with torch.no_grad():
            weights -= weights.grad * lr
            bias -= bias.grad * lr
            weights.grad.zero_()
            bias.grad.zero_()

# 再次测试
print(loss_func(model(xb), yb), accuracy(model(xb), yb))

Out:
tensor(0.0829, grad_fn=<NegBackward>) tensor(1.)
使用torch.nn.funcional进行改进
import torch.nn.functional as F

loss_func = F.cross_entropy

def model(xb):
    return xb @ weights + bias
	
print(loss_func(model(xb), yb), accuracy(model(xb), yb))

Out:
tensor(0.0829, grad_fn=<NllLossBackward>) tensor(1.)
使用nn.Module 进行改进

注意这里的Module是大写的M,小写的module是另一完全不同的库。

from torch import nn

class Mnist_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
        self.bias = nn.Parameter(torch.zeros(10))

    def forward(self, xb):
        return xb @ self.weights + self.bias
		
model = Mnist_Logistic()

print(loss_func(model(xb), yb))

Out:
tensor(2.3437, grad_fn=<NllLossBackward>)
使用model.parameters() and model.zero_grad()进行改进

原来的版本:

with torch.no_grad():
    weights -= weights.grad * lr
    bias -= bias.grad * lr
    weights.grad.zero_()
    bias.grad.zero_()

改进之后的版本:

with torch.no_grad():
    for p in model.parameters(): p -= p.grad * lr
    model.zero_grad()

写进fit():

def fit():
    for epoch in range(epochs):
        for i in range((n - 1) // bs + 1):
            start_i = i * bs
            end_i = start_i + bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            pred = model(xb)
            loss = loss_func(pred, yb)

            loss.backward()
            with torch.no_grad():
                for p in model.parameters():
                    p -= p.grad * lr
                model.zero_grad()

fit()

print(loss_func(model(xb), yb))

Out:
tensor(0.0812, grad_fn=<NllLossBackward>)
使用nn.Linear 进行改进

使用Linear帮助我们初始化和进行前向操作。

原来的版本:

class Mnist_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
        self.bias = nn.Parameter(torch.zeros(10))

    def forward(self, xb):
        return xb @ self.weights + self.bias

改进之后的版本:

class Mnist_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin = nn.Linear(784, 10)

    def forward(self, xb):
        return self.lin(xb)

model = Mnist_Logistic()
print(loss_func(model(xb), yb))

Out:
tensor(2.3360, grad_fn=<NllLossBackward>)

fit()
print(loss_func(model(xb), yb))

Out:
tensor(0.0798, grad_fn=<NllLossBackward>)
使用optim 进行改进

原来的版本:

with torch.no_grad():
    for p in model.parameters(): p -= p.grad * lr
    model.zero_grad()

改进之后的版本:

from torch import optim

# opt.step()
# opt.zero_grad()


def get_model():
    model = Mnist_Logistic()
    return model, optim.SGD(model.parameters(), lr=lr)

model, opt = get_model()
print(loss_func(model(xb), yb))

for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        start_i = i * bs
        end_i = start_i + bs
        xb = x_train[start_i:end_i]
        yb = y_train[start_i:end_i]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        opt.step()
        opt.zero_grad()

print(loss_func(model(xb), yb))

Out:
tensor(2.3593, grad_fn=<NllLossBackward>)
tensor(0.0821, grad_fn=<NllLossBackward>)
使用fit()进行改进

如果没有传入opt,那么将不会进行反向传播。

def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), len(xb)
	
import numpy as np

def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_dl:
            loss_batch(model, loss_func, xb, yb, opt)

        model.eval()
        with torch.no_grad():
            losses, nums = zip(
                *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
            )
        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)

        print(epoch, val_loss)
自己定义一个CNN网络
class Mnist_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)

    def forward(self, xb):
        xb = xb.view(-1, 1, 28, 28)
        xb = F.relu(self.conv1(xb))
        xb = F.relu(self.conv2(xb))
        xb = F.relu(self.conv3(xb))
        xb = F.avg_pool2d(xb, 4)
        return xb.view(-1, xb.size(1))

lr = 0.1

model = Mnist_CNN()
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

fit(epochs, model, loss_func, opt, train_dl, valid_dl)

Out:
0 0.346197331571579
1 0.26924657912254335
使用nn.Sequential

其实这是一种简化的定义神经网络的方式。

搭配Lambda:lambda可以在我们定义的模型的时候创建层。其定义如下:

Lambda will create a layer that we can then use when defining a network with Sequential.
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x):
        return self.func(x)


def preprocess(x):
    return x.view(-1, 1, 28, 28)

model = nn.Sequential(
    Lambda(preprocess),
    nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
    nn.ReLU(),
    nn.AvgPool2d(4),
    Lambda(lambda x: x.view(x.size(0), -1)),
)

opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

fit(epochs, model, loss_func, opt, train_dl, valid_dl)

Out:
0 0.38947487244606016
1 0.279226434135437
使用GPU

如果可以使用CUDA-capable GPU,将会对代码进行加速。

  • 判断是否可以用:
print(torch.cuda.is_available())

Out:
True
  • 接着创建设备:
dev = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")

  • 将数据移动到“设备”上
	
def preprocess(x, y):
    return x.view(-1, 1, 28, 28).to(dev), y.to(dev)

train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)
  • 将模型移动到GPU上
model.to(dev)
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

fit(epochs, model, loss_func, opt, train_dl, valid_dl)

你可能感兴趣的:(pytorch)