import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optimizer
基本配置
# 批次大小
batch_size = 16
# 学习率
lr = 1e-4
# 最大迭代次数
max_epochs = 100
# 设置GPU
# 方式1
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
# 方式2,后续对要使用GPU的变量用.to(device)
device = torch.device('cuda:1' if torch.cuda.is_available() else "cpu")
数据读入
通过Dataset+DataLoader方式完成
Dataset类主要包含三个函数:
- init: 传入外部参数,初始化
- getitem: 逐个读取样本
- len: 返回样本数
import torch
from torchvision import datasets
# 数据集
train_data = datasets.ImageFolder(train_path, transform=data_transform)
val_data = datasets.ImageFolder(val_path, transform=data_transform)
# 按批次读入数据
from torch.utils.data import DataLoader
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=4, shuffle=True, drop_last=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, num_workers=4, shuffle=False)
# 可视化下一个样本图片
import matplotlib.pyplot as plt
images, labels = next(iter(val_loader))
print(images.shape)
plt.imshow(images[0].transpose(1,2,0))
plt.show()
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_21680\1158052838.py in
1 # 可视化下一个样本图片
----> 2 import matplotlib.pyplot as plt
3 images, labels = next(iter(val_loader))
4 print(images.shape)
5 plt.imshow(images[0].transpose(1,2,0))
ModuleNotFoundError: No module named 'matplotlib'
class MyDataset(Dataset):
def __init__(self, data_dir, info_csv, image_list, transform=None):
"""
Args:
data_dir: path to image directory
info_csv: path to labels
image_list: path to txt file contains image names
transform: optional transform to be applied on a sample
"""
label_info = pd.read_csv(info_csv)
image_file = open(image_list).readlines()
self.data_dir = data_dir
self.image_file = image_file
self.label_info = label_info
self.transform = transform
def __getitem__(self, index):
image_name = self.image_file[index].strip('\n')
raw_label = self.label_info.loc[self.label_info['Image_index'] == image_name]
label = raw_label.iloc[:,0]
image_name = os.path.join(self.data_dir, image_name)
image = Image.open(image_name).convert('RGB')
if self.transform is not None:
image = self.transform(image)
return image, label
def __len__(self):
return len(self.image_file)
模型构建
神经网络的构造
import torch
from torch import nn
class MLP(nn.Module):
# 声明带有模型参数的层
def __init__(self, **kwargs):
# 继承父类时初始化
super(MLP, self).__init__(**kwargs)
self.hidden = nn.Linear(784, 256) # 输入样本784个var, 输出n * 256
self.act = nn.ReLU() # Relu激活函数
self.output = nn.Linear(256, 10) # 输出n * 10
# 定义模型的前向计算,输入x计算返回值
def forward(self, x):
o = self.act(self.hidden(x))
return self.output(o)
X = torch.rand(2, 784)
net = MLP()
print(net)
net(X)
MLP(
(hidden): Linear(in_features=784, out_features=256, bias=True)
(act): ReLU()
(output): Linear(in_features=256, out_features=10, bias=True)
)
tensor([[-0.1357, -0.1145, 0.0626, 0.0197, 0.0813, 0.0714, -0.0941, -0.0946,
-0.2517, 0.0006],
[ 0.0474, -0.1632, 0.1313, -0.0293, 0.1087, 0.0678, -0.0179, -0.2159,
-0.1594, -0.1324]], grad_fn=)
神经网络常见的层
自定义层
# 不含参数的层
import torch
from torch import nn
class MyLayer(nn.Module):
def __init__(self, **kwargs):
super(MyLayer, self).__init__(**kwargs)
def forward(self, x):
return x - x.mean()
layer = MyLayer()
layer(torch.tensor([1, 2, 3, 4, 5], dtype=torch.float))
tensor([-2., -1., 0., 1., 2.])
# 含模型参数的层
class MyListDense(nn.Module):
def __init__(self):
super(MyListDense, self).__init__()
# 也可以用nn.ParameterDict
self.params = nn.ParameterList([nn.Parameter(torch.randn(4, 4)) for i in range(3)])
self.params.append(nn.Parameter(torch.randn(4, 1)))
def forward(self, x):
for i in range(len(self.params)):
x = torch.mm(x, self.params[i])
return x
二维卷积层
import torch
from torch import nn
def corr2d(X, K):
h, w = K.shape
X, K = X.float(), K.float()
Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
Y[i, j] = (X[i:i+h, j:j+w] * K).sum()
return Y
# 二维卷积层
class Conv2D(nn.Module):
def __init__(self, kernel_size):
super(Conv2D, self).__init__()
self.weight = nn.Parameter(torch.randn(kernel_size))
self.bias = nn.Parameter(torch.randn(1))
def forward(self, x):
return corr2d(x, self.weight) + self.bias
import torch
from torch import nn
# 定义一个函数来计算卷积层。它对输入和输出做相应的升维和降维
import torch
from torch import nn
# 定义一个函数来计算卷积层。它对输入和输出做相应的升维和降维
def comp_conv2d(conv2d, X):
# (1, 1)代表批量大小和通道数
X = X.view((1, 1) + X.shape) # 拼接成4维数组
Y = conv2d(X)
return Y.view(Y.shape[2:]) # 排除不关心的前两维:批量和通道
# 注意这里是两侧分别填充1⾏或列,所以在两侧一共填充2⾏或列
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3,padding=1)
X = torch.rand(8, 8)
comp_conv2d(conv2d, X).shape
torch.Size([8, 8])
池化层
import torch
from torch import nn
def pool2d(X, pool_size, mode='max'):
p_h, p_w = pool_size
Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
if mode == 'max':
Y[i, j] = X[i: i + p_h, j: j + p_w].max()
elif mode == 'avg':
Y[i, j] = X[i: i + p_h, j: j + p_w].mean()
return Y
X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype=torch.float)
pool2d(X, (2, 2))
tensor([[4., 5.],
[7., 8.]])
AlexNet模型示例
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 96, 11, 4), # in_channels, out_channels, kernel_size, stride, padding
nn.ReLU(),
nn.MaxPool2d(3, 2), # kernel_size, stride
# 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数
nn.Conv2d(96, 256, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(3, 2),
# 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。
# 前两个卷积层后不使用池化层来减小输入的高和宽
nn.Conv2d(256, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 256, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(3, 2)
)
# 这里全连接层的输出个数比LeNet中的大数倍。使用丢弃层来缓解过拟合
self.fc = nn.Sequential(
nn.Linear(256*5*5, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Dropout(0.5),
# 输出层。由于这里使用Fashion-MNIST,所以用类别数为10,而非论文中的1000
nn.Linear(4096, 10),
)
def forward(self, img):
feature = self.conv(img)
output = self.fc(feature.view(img.shape[0], -1))
return output
net = AlexNet()
print(net)
AlexNet(
(conv): Sequential(
(0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
(1): ReLU()
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU()
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU()
(8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU()
(10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU()
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(fc): Sequential(
(0): Linear(in_features=6400, out_features=4096, bias=True)
(1): ReLU()
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=4096, out_features=4096, bias=True)
(4): ReLU()
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=4096, out_features=10, bias=True)
)
)
模型初始化
torch.nn.init
import torch
import torch.nn as nn
conv = nn.Conv2d(1,3,3)
linear = nn.Linear(10,1)
# 查看随机初始化的conv参数
print(conv.weight.data)
# 查看linear的参数
print(linear.weight.data)
tensor([[[[ 0.1268, -0.0083, -0.1545],
[ 0.2284, 0.0016, -0.1577],
[ 0.0579, -0.0849, -0.2911]]],
[[[ 0.0605, 0.1680, 0.1811],
[-0.1920, -0.1370, 0.2808],
[ 0.0944, 0.1277, -0.1246]]],
[[[ 0.2819, -0.2207, 0.0406],
[ 0.2280, -0.0377, 0.2743],
[ 0.3203, -0.0189, -0.0889]]]])
tensor([[-0.1381, -0.1385, 0.1530, 0.0018, -0.0307, -0.0804, -0.2288, 0.1726,
0.0550, 0.1364]])
# 对conv进行kaiming初始化
torch.nn.init.kaiming_normal_(conv.weight.data)
print(conv.weight.data)
# 对linear进行常数初始化
torch.nn.init.constant_(linear.weight.data,0.3)
print(linear.weight.data)
tensor([[[[-0.0482, 0.4568, -0.0451],
[ 0.5877, 0.1045, -0.0692],
[ 0.6888, 0.0516, 0.1558]]],
[[[ 0.2490, -0.1846, -0.2468],
[-0.1517, -1.1788, 0.1218],
[ 0.3340, -0.4299, 0.1132]]],
[[[ 0.1487, -0.4090, 0.6382],
[ 0.4713, -0.3765, 0.4769],
[ 0.0353, 0.2126, -0.0899]]]])
tensor([[0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000,
0.3000]])
初始化函数的封装
def initialize_weights(self):
for m in self.modules():
# 判断是否属于Conv2d
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_normal_(m.weight.data)
# 判断是否有偏置
if m.bias is not None:
torch.nn.init.constant_(m.bias.data,0.3)
elif isinstance(m, nn.Linear):
torch.nn.init.normal_(m.weight.data, 0.1)
if m.bias is not None:
torch.nn.init.zeros_(m.bias.data)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zeros_()
# 模型的定义
class MLP(nn.Module):
# 声明带有模型参数的层,这里声明了两个全连接层
def __init__(self, **kwargs):
# 调用MLP父类Block的构造函数来进行必要的初始化。这样在构造实例时还可以指定其他函数
super(MLP, self).__init__(**kwargs)
self.hidden = nn.Conv2d(1,1,3)
self.act = nn.ReLU()
self.output = nn.Linear(10,1)
# 定义模型的前向计算,即如何根据输入x计算返回所需要的模型输出
def forward(self, x):
o = self.act(self.hidden(x))
return self.output(o)
mlp = MLP()
print(list(mlp.parameters()))
print("-------初始化-------")
initialize_weights(mlp)
print(list(mlp.parameters()))
[Parameter containing:
tensor([[[[-0.0073, 0.1146, -0.0839],
[ 0.2442, -0.1130, 0.0334],
[-0.2217, 0.1572, 0.1452]]]], requires_grad=True), Parameter containing:
tensor([0.1457], requires_grad=True), Parameter containing:
tensor([[ 0.2057, -0.1126, -0.2133, -0.1641, 0.0404, -0.0776, -0.2792, -0.2329,
0.0734, -0.0483]], requires_grad=True), Parameter containing:
tensor([-0.2791], requires_grad=True)]
-------初始化-------
[Parameter containing:
tensor([[[[ 0.2118, -0.0682, 0.1048],
[ 0.1412, 0.4079, 0.0789],
[ 0.2269, -0.1477, -0.2181]]]], requires_grad=True), Parameter containing:
tensor([0.3000], requires_grad=True), Parameter containing:
tensor([[-0.1874, 0.3172, -0.6525, -1.3967, 0.7327, 0.5498, -0.3125, 0.5182,
0.2366, 0.0422]], requires_grad=True), Parameter containing:
tensor([0.], requires_grad=True)]
损失函数
二分类交叉熵损失函数-示例
import torch.nn as nn
m = nn.Sigmoid()
loss = nn.BCELoss()
input = torch.randn(3, requires_grad=True)
target = torch.empty(3).random_(2)
output = loss(m(input), target)
output.backward()
print('BCELoss损失函数的计算结果为',output)
BCELoss损失函数的计算结果为 tensor(1.3916, grad_fn=)
各种损失函数
# 二分类交叉熵损失函数
torch.nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')
# 交叉熵损失函数
torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')
# L1损失函数
torch.nn.L1Loss(size_average=None, reduce=None, reduction='mean')
# MSE损失函数
torch.nn.MSELoss(size_average=None, reduce=None, reduction='mean')
# 平滑L1 (Smooth L1)损失函数
torch.nn.SmoothL1Loss(size_average=None, reduce=None, reduction='mean', beta=1.0)
# 目标泊松分布的负对数似然损失
torch.nn.PoissonNLLLoss(log_input=True, full=False, size_average=None, eps=1e-08, reduce=None, reduction='mean')
# KL散度
torch.nn.KLDivLoss(size_average=None, reduce=None, reduction='mean', log_target=False)
# MarginRankingLoss:计算两个向量之间的相似度,用于排序任务
torch.nn.MarginRankingLoss(margin=0.0, size_average=None, reduce=None, reduction='mean')
# 多标签边界损失函数-多标签边界损失函数
torch.nn.MultiLabelMarginLoss(size_average=None, reduce=None, reduction='mean')
# 二分类损失函数
torch.nn.SoftMarginLoss(size_average=None, reduce=None, reduction='mean')
# 多分类的折页损失
torch.nn.MultiMarginLoss(p=1, margin=1.0, weight=None, size_average=None, reduce=None, reduction='mean')
# 三元组损失
torch.nn.TripletMarginLoss(margin=1.0, p=2.0, eps=1e-06, swap=False, size_average=None, reduce=None, reduction='mean')
# HingEmbeddingLoss-对输出的embedding结果做Hing损失计算
torch.nn.HingeEmbeddingLoss(margin=1.0, size_average=None, reduce=None, reduction='mean')
# 余弦相似度
torch.nn.CosineEmbeddingLoss(margin=0.0, size_average=None, reduce=None, reduction='mean')
# CTC损失函数
torch.nn.CTCLoss(blank=0, reduction='mean', zero_infinity=False)
CTCLoss()
训练和评估
def train(epoch):
model.train()
train_loss = 0
for data, label in train_loader:
data, label = data.cuda(), label.cuda()
optimizer.zero_grad()
output = model(data)
loss = criterion(label, output)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))
def val(epoch):
model.eval()
val_loss = 0
with torch.no_grad():
for data, label in val_loader:
data, label = data.cuda(), label.cuda()
output = model(data)
preds = torch.argmax(output, 1)
loss = criterion(output, label)
val_loss += loss.item()*data.size(0)
running_accu += torch.sum(preds == label.data)
val_loss = val_loss/len(val_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, val_loss))
Pytorch优化器
- torch.optim.ASGD
- torch.optim.Adadelta
- torch.optim.Adagrad
- torch.optim.Adam
- torch.optim.AdamW
- torch.optim.Adamax
- torch.optim.LBFGS
- torch.optim.RMSprop
- torch.optim.Rprop
- torch.optim.SGD
- torch.optim.SparseAdam
Optimizer有三个属性:
- defaults:存储的是优化器的超参数
- state:参数的缓存
- param_groups:管理的参数组,是一个list,其中每个元素是一个字典,顺序是params,lr,momentum,dampening,weight_decay,nesterov
还有以下方法:
- zero_grad(): 清空所管理参数的梯度,PyTorch的特性是张量的梯度不自动清零,因此每次反向传播后都需要清空梯度。
- step():执行一步梯度更新,参数更新
- load_state_dict() :加载状态参数字典,可以用来进行模型的断点续训练,继续上次的参数进行训练
- state_dict():获取优化器当前状态信息字典
每个优化器都是一个类,我们一定要进行实例化才能使用
optimizer在一个神经网络的epoch中需要实现下面两个步骤:
梯度置零
梯度更新
for epoch in range(EPOCH):
...
optimizer.zero_grad() #梯度置零
loss = ... #计算loss
loss.backward() #BP反向传播
optimizer.step() #梯度更新
基础实战—FashionMNIST时装分类
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
配置训练环境和超参数
# 配置GPU,这里有两种方式
## 方案一:使用os.environ
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
# 方案二:使用“device”,后续对要使用GPU的变量用.to(device)即可
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
## 配置其他超参数,如batch_size, num_workers, learning rate, 以及总的epochs
batch_size = 4
num_workers = 0 # 对于Windows用户,这里应设置为0,否则会出现多线程错误
lr = 1e-4
epochs = 2
数据读入和加载
# 首先设置数据变换
from torchvision import transforms
image_size = 28
data_transform = transforms.Compose([
transforms.ToPILImage(),
# 这一步取决于后续的数据读取方式,如果使用内置数据集读取方式则不需要
transforms.Resize(image_size),
transforms.ToTensor()
])
## 读取方式一:使用torchvision自带数据集,下载可能需要一段时间
from torchvision import datasets
train_data = datasets.FashionMNIST(root='./', train=True, download=True, transform=data_transform)
test_data = datasets.FashionMNIST(root='./', train=False, download=True, transform=data_transform)
## 读取方式二:读入csv格式的数据,自行构建Dataset类
# csv数据下载链接:https://www.kaggle.com/zalando-research/fashionmnist
class FMDataset(Dataset):
def __init__(self, df, transform=None):
self.df = df
self.transform = transform
self.images = df.iloc[:,1:].values.astype(np.uint8)
self.labels = df.iloc[:, 0].values
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
image = self.images[idx].reshape(28,28,1)
label = int(self.labels[idx])
if self.transform is not None:
image = self.transform(image)
else:
image = torch.tensor(image/255., dtype=torch.float)
label = torch.tensor(label, dtype=torch.long)
return image, label
train_df = pd.read_csv("../data/FashionMNIST/fashion-mnist_train.csv")
test_df = pd.read_csv("../data/FashionMNIST/fashion-mnist_test.csv")
train_data = FMDataset(train_df, data_transform)
test_data = FMDataset(test_df, data_transform)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
# 这里不知道为啥总是服务器挂掉,后面再看看
import matplotlib.pyplot as plt
image, label = next(iter(train_loader))
print(image.shape, label.shape)
plt.imshow(image[0][0], cmap="gray")
torch.Size([4, 1, 28, 28]) torch.Size([4])
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 32, 5),
nn.ReLU(),
nn.MaxPool2d(2, stride=2),
nn.Dropout(0.3),
nn.Conv2d(32, 64, 5),
nn.ReLU(),
nn.MaxPool2d(2, stride=2),
nn.Dropout(0.3)
)
self.fc = nn.Sequential(
nn.Linear(64*4*4, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, x):
x = self.conv(x)
x = x.view(-1, 64*4*4)
x = self.fc(x)
# x = nn.functional.normalize(x)
return x
model = Net()
model = model.cuda()
# model = nn.DataParallel(model).cuda() # 多卡训练时的写法,之后的课程中会进一步讲解
设定损失函数
criterion = nn.CrossEntropyLoss()
设定优化器
optimizer = optim.Adam(model.parameters(), lr=0.001)
训练和测试验证
def train(epoch):
model.train()
train_loss = 0
for data, label in train_loader:
data, label = data.cuda(), label.cuda()
optimizer.zero_grad()
output = model(data)
loss = criterion(output, label)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))
def val(epoch):
model.eval()
val_loss = 0
gt_labels = []
pred_labels = []
with torch.no_grad():
for data, label in test_loader:
data, label = data.cuda(), label.cuda()
output = model(data)
preds = torch.argmax(output, 1)
gt_labels.append(label.cpu().data.numpy())
pred_labels.append(preds.cpu().data.numpy())
loss = criterion(output, label)
val_loss += loss.item()*data.size(0)
val_loss = val_loss/len(test_loader.dataset)
gt_labels, pred_labels = np.concatenate(gt_labels), np.concatenate(pred_labels)
acc = np.sum(gt_labels==pred_labels)/len(pred_labels)
print('Epoch: {} \tValidation Loss: {:.6f}, Accuracy: {:6f}'.format(epoch, val_loss, acc))
for epoch in range(1, epochs+1):
train(epoch)
val(epoch)
Epoch: 1 Training Loss: 0.505714
Epoch: 1 Validation Loss: 0.371850, Accuracy: 0.864200
Epoch: 2 Training Loss: 0.393027
Epoch: 2 Validation Loss: 0.329903, Accuracy: 0.878400
模型保存
save_path = "./FahionModel.pkl"
torch.save(model, save_path)