import torch
import torch.nn as nn
from pathlib import Path
import requests
import pickle
import gzip
from matplotlib import pyplot
import numpy as np
import math
# 下载数据
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"
PATH.mkdir(parents=True, exist_ok=True)
URL = "http://deeplearning.net/data/mnist/"
FILENAME = "mnist.pkl.gz"
content = requests.get(URL + FILENAME).content
(PATH / FILENAME).open("wb").write(content)
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")
pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray")
print(x_train.shape)
x_train, y_train, x_valid, y_valid = map(
torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())
# 构建模型
weights = torch.randn(784, 10) / math.sqrt(784) # 权重矩阵
weights.requires_grad_() # 加入graph中,计算梯度
bias = torch.zeros(10, requires_grad=True) # 偏置量
def log_softmax(x): # softmaxt函数
return x - x.exp().sum(-1).log().unsqueeze(-1)
def model(xb):# 构建模型
return log_softmax(xb @ weights + bias) # “@”表示点积(dot production)
bs = 64 # batch size
xb = x_train[0:bs] # a mini-batch from x
preds = model(xb) # predictions
preds[0], preds.shape
print(preds[0], preds.shape)
def nll(input, target): # 损失函数
return -input[range(target.shape[0]), target].mean()
loss_func = nll
# 查看损失值
yb = y_train[0:bs]
print(loss_func(preds, yb))
def accuracy(out, yb): # 计算准确度
preds = torch.argmax(out, dim=1)
return (preds == yb).float().mean()
print(accuracy(preds, yb))
# 开始训练
lr = 0.5 # learning rate
epochs = 2 # how many epochs to train for
for epoch in range(epochs):
for i in range((n - 1) // bs + 1):
xb = x_train[i * bs:start_i + bs]
yb = y_train[i * bs:start_i + bs]
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
with torch.no_grad():
weights -= weights.grad * lr
bias -= bias.grad * lr
weights.grad.zero_()
bias.grad.zero_()
print(loss_func(model(xb), yb), accuracy(model(xb), yb))
torch.nn.functional
torch.nn.functional
包含了很多loss和activation函数。
比如下面的代码就是交叉熵函数cross_entropy
H ( p , q ) = − ∑ x p ( x ) l o g q ( x ) H(p,q) = -\sum_{x}p(x)logq(x) H(p,q)=−∑xp(x)logq(x)
import torch.nn.functional as F
loss_func = F.cross_entropy
def model(xb):
return xb @ weights + bias
nn.Module
from torch import nn
class Mnist_Logistic(nn.Module):
def __init__(self):
super().__init__()
self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
self.bias = nn.Parameter(torch.zeros(10))
def forward(self, xb):
return xb @ self.weights + self.bias
model = Mnist_Logistic()
print(loss_func(model(xb), yb))
with torch.no_grad():
for p in model.parameters(): p -= p.grad * lr
model.zero_grad()
def fit():
for epoch in range(epochs):
for i in range((n - 1) // bs + 1):
start_i = i * bs
end_i = start_i + bs
xb = x_train[start_i:end_i]
yb = y_train[start_i:end_i]
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
with torch.no_grad():
for p in model.parameters():
p -= p.grad * lr
model.zero_grad()
fit()
using nn.Linear
nn.Linear
来代替self.weights
和self.bias
,以及xb @ self.weights + self.bias
class Mnist_Logistic(nn.Module):
def __init__(self):
super().__init__()
self.lin = nn.Linear(784, 10)
def forward(self, xb):
return self.lin(xb)
model = Mnist_Logistic()
print(loss_func(model(xb), yb))
fit()
print(loss_func(model(xb), yb))
optim
将
with torch.no_grad():
for p in model.parameters(): p -= p.grad * lr
model.zero_grad()
替换为:
opt.step()
opt.zero_grad()
dataset
dataset
实现了__len__
方法和__getitem__
方法来索引。
from torch.utils.data import TensorDataset
train_ds = TensorDataset(x_train, y_train)
# 替换一下两步
# xb = x_train[start_i:end_i]
# yb = y_train[start_i:end_i]
xb,yb = train_ds[i*bs : i*bs+bs] # 相当于同时取xy
DataLoader
DataLoader
用来管理batches。DataLoader
来替换train_ds[i*bs : i * bs+bs]
,自动生成batch
from torch.utils.data import DataLoader
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs)
# 替换
# for i in range((n-1)//bs + 1):
# xb,yb = train_ds[i*bs : i*bs+bs]
# pred = model(xb)
for xb,yb in train_dl:
pred = model(xb)
在每次循环中都要有一个validation set(验证集),以此来判断是否过拟合。shuffling 训练集以避免batches之间的相互关系和过拟合。
验证集做成训练集的两倍。这时因为验证集不需要反向传播。
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)
valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)
在训练前调用model.train()
在验证前使用model.eval()
,因为nn.BatchNorma2d
和nn.Dropout
可能会用到。
model, opt = get_model()
for epoch in range(epochs):
model.train()
for xb, yb in train_dl:
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
opt.step()
opt.zero_grad()
model.eval()
with torch.no_grad():
valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
print(epoch, valid_loss / len(valid_dl))
class Mnist_CNN(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)
self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)
self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)
def forward(self, xb):
xb = xb.view(-1, 1, 28, 28)
xb = F.relu(self.conv1(xb))
xb = F.relu(self.conv2(xb))
xb = F.relu(self.conv3(xb))
xb = F.avg_pool2d(xb, 4)
return xb.view(-1, xb.size(1))
lr = 0.1
Sequential
会运行内部包含的modules。
在使用Sequential
之前,我们需要先自定义一层。pytorch没有view layer,所以我们需要自定义一个
class Lambda(nn.Module):
def __init__(self, func):
super().__init__()
self.func = func
def forward(self, x):
return self.func(x)
def preprocess(x):
return x.view(-1, 1, 28, 28)
使用Sequential
来创建神经网络:
model = nn.Sequential(
Lambda(preprocess),
nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.AvgPool2d(4),
Lambda(lambda x: x.view(x.size(0), -1)),
)
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
CNN是concise,但只能用于MNIST:
1.假设输入是2828
2.最终的CNN是44(应为使用了average pooling)
所以我们要更改,以便可以作用与任何尺寸的image。
def preprocess(x, y):
return x.view(-1, 1, 28, 28), y
class WrappedDataLoader:
def __init__(self, dl, func):
self.dl = dl
self.func = func
def __len__(self):
return len(self.dl)
def __iter__(self):
batches = iter(self.dl)
for b in batches:
yield (self.func(*b))
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)
然后使用nn.AdaptiveAvgPool2d
来代替nn.AvgPool2d
,这样就可以定义输出的size:
model = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.AdaptiveAvgPool2d(1),
Lambda(lambda x: x.view(x.size(0), -1)),
)
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
参考:
https://pytorch.org/tutorials/beginner/nn_tutorial.html