### Torch install method
- CPU:pip install torch -i https://download.pytorch.org/shl/torch_stable.html
- GPU:CUDA and corresponding torch version
- http://download.pytorch.org/whl/torch_stable.html
- https://developer.nvidia.com/cuda-toolkit-archive
### Mnist sorting task:
- Basic network construction and training methods
- torch.nn.functional module
- nn.Module module
### Read Mnist dataset
Automatic loading dataset
import torch
print(torch.__version__)
%matplotlib inline
from pathlib import Path
import requests
DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"
PATH.mkdir(parents=True, exist_ok=True)
URL = "https://github.com/mnielsen/neural-networks-and-deep-learning/tree/master/data"
FILENAME = "mnist.pkl.gz"
if not (PATH / FILENAME).exists():
content = requests.get(URL + FILENAME).content
(PATH / FILENAME).open("wb").write(content)
import pickle
import gzip
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f,encoding='latin-1')
x_train.shape
784 is the number of pixels per sample in minist dataset
from matplotlib import pyplot
import numpy as np
pyplot.imshow(x_train[0].reshape(28,28),cmap="gray")
print(x_train.shape)
Note that the data needs to be transformed into tensor to participate in the subsequent modeling training
import torch
x_train, y_train, x_valid, y_valid = map(
torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
x_train, x_train.shape, y_train.min(), y_train.max()
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())
### torch.nn.functional Many layers and functions are used here
torch.nn.functional have many features will be used. When to use nn.Module and when to use nn.functional? In general, If the model has learnable parameters, it is best to use nn.Module, then the other case nn.functional is relatively simple
# nn.function suitable for testing , not suitable for building actual modules
import torch.nn.functional as F
loss_func = F.cross_entropy
def model(xb):
return xb.mm(weights) + bias # mm representation matris multiplication
bs =64
xb = x_train[0:bs]
yb = y_train[0:bs]
weights = torch.randn([784, 10], dtype = torch.float, requires_grad = True)
bs =64
bias = torch.zeros(10, requires_grad = True)
print(loss_func(model(xb), yb))
### Creat a module to simplify the code
- You must inherit mm.Module and call the constructor of nn.Module in its constructor
- Instead of writing backpropagators, nn.Module can automatically implement backpropagation using autograd
- A learnable parameter in a module can return an iterator by either named_parameters() or parameters()
from torch import nn
class Mnist_NN(nn.Module):
def __init__(self):
super().__init__()
self.hidden1 = nn.Linear(784, 128)
self.hidden2 = nn.Linear(128, 256)
# self.hidden3 = nn.Linear(256, 512)
self.out = nn.Linear(256, 10)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = F.relu(self.hidden1(x))
x = self.dropout(x)
x = F.relu(self.hidden2(x))
x = self.dropout(x)
# x = F.relu(self.hidden3(x))
x = self.out(x)
return x
from torch import nn
class Mnist_NN(nn.Module):
def __init__(self):
super().__init__()
self.hidden1 = nn.Linear(784, 128)
self.hidden2 = nn.Linear(128, 256)
self.hidden3 = nn.Linear(256, 512)
self.out = nn.Linear(512, 10)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = F.relu(self.hidden1(x))
x = self.dropout(x)
x = F.relu(self.hidden2(x))
x = self.dropout(x)
x = F.relu(self.hidden3(x))
x = self.dropout(x)
x = self.out(x)
return x
net = Mnist_NN()
print(net)
We can print the weights and bias items in our defined names
for name, parameter in net.named_parameters():
print(name, parameter, parameter.size())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
### Use TensorDataset and DataLoder to simplify the construction of datasets
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds,batch_size = bs , shuffle = True)
valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds,batch_size = bs * 2)
def get_data(train_ds, valid_ds, bs):
return(
DataLoader(train_ds, batch_size = bs, shuffle = True),
DataLoader(valid_ds, batch_size = bs *2)
)
- It is common to add model.train() when training the module, so that Batch Normalization and Dropout are used normally
- model.eval() is generally selected during testing, so that Batch Normalization and Dropout are not used
import numpy as np
def fit(steps, model, loss_func, opt, train_dl, valid_dl):
for step in range(steps):
model.train()
for xb, yb in train_dl:
loss_batch(model, loss_func, xb, yb, opt)
model.eval()
with torch.no_grad():
losses, nums = zip(
*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
)
val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
print("Current step:" + str(step), "Verification set loss:" + str(val_loss))
### zip usage
a = [1,2,3]
b = [4,5,6]
zipped = zip(a,b)
print(list(zipped))
a2, b2 = zip(*zip(a,b))
print(a2)
print(b2)
from torch import optim
def get_model():
model = Mnist_NN()
return model, optim.Adam(model.parameters(), lr = 0.001) # Adam
def loss_batch(model, loss_func, xb, yb, opt=None):
loss = loss_func(model(xb), yb)
if opt is not None:
loss.backward() # backpropagation
opt.step() # update parameters
opt.zero_grad() # gradient clearing
return loss.item(), len(xb)
### Three line complete!
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model, opt = get_model()
fit(100, model, loss_func, opt, train_dl, valid_dl)
### practice
- Change the optimizer from SGD to Adam
- Change the number of layers and the number of neurons to observe the effect
- Calculate the accuracy of the current model
correct = 0
total = 0
for xb, yb in valid_dl:
outputs = model(xb)
_, predicted = torch.max(outputs.data, 1) # Max value and Index
total += yb.size(0)
correct += (predicted == yb).sum().item() # .item Transform tensor into ndarray,easy drawing
print("Accuracy of the network on the 10000 test images: %d %%" % (100 * correct / total))