机器学习基本流程如下·:
深度学习基本流程如下:
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optimizer
#初始化超参数
batch_size = 16 #batch size
lr = 1e-4 #初始学习率
max_epochs = 100 #最大训练次数
#GPU设置
# 方案一:使用os.environ,这种情况如果使用GPU不需要设置
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
# 方案二:使用“device”,后续对要使用GPU的变量用.to(device)即可
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
自定义Dataset类需要继承PyTorch自身的Dataset类。主要包含三个函数:
例如:
class MyDataset(Dataset):
def __init__(self, data_dir, info_csv, image_list, transform=None):
"""
Args:
data_dir: path to image directory.
info_csv: path to the csv file containing image indexes
with corresponding labels.
image_list: path to the txt file contains image names to training/validation set
transform: optional transform to be applied on a sample.
"""
label_info = pd.read_csv(info_csv)
image_file = open(image_list).readlines()
self.data_dir = data_dir
self.image_file = image_file
self.label_info = label_info
self.transform = transform
def __getitem__(self, index):
"""
Args:
index: the index of item
Returns:
image and its labels
"""
image_name = self.image_file[index].strip('\n')
raw_label = self.label_info.loc[self.label_info['Image_index'] == image_name]
label = raw_label.iloc[:,0]
image_name = os.path.join(self.data_dir, image_name)
image = Image.open(image_name).convert('RGB')
if self.transform is not None:
image = self.transform(image)
return image, label
def __len__(self):
return len(self.image_file)
from torchvision import datasets
# train_path = '' #训练集路径
# val_path = '' # 测试集路径
train_data = datasets.ImageFolder(train_path, transform=data_transform)
val_data = datasets.ImageFolder(val_path, transform=data_transform)
# 或
train_data = MyDataset(train_path, transform=data_transform)
val_data = MyDataset(val_path, transform=data_transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=4, shuffle=True, drop_last=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, num_workers=4, shuffle=False)
# batch_size:每批读入的样本数
# num_workers:有多少个进程用于读取数据
# shuffle:是否将读入的数据打乱
# drop_last:对于样本最后一部分没有达到批次数的样本,使其不再参与训练
import matplotlib.pyplot as plt
images, labels = next(iter(val_loader))
print(images.shape)
plt.imshow(images[0].transpose(1,2,0))
plt.show()
搭建深度学习神经网络主要通过torch.nn模块。torch.nn主要包含以下几个部分:
分类 | 模块 | 子模块 | 说明 |
---|---|---|---|
参数 | parameter | Parameter | 模型参数,Tensor |
UninitializedParameter | 无需初始化参数 | ||
UninitializedBuffer | 无需初始化Tensor | ||
基本单元 | Containers | Module | 构建神经网络基础单元 |
Sequential | 将不同模块连接起来构成一个神经网络模型 | ||
ModuleList/Dict | Module组成的List/Dict,无顺序连接关系 | ||
ParameterList/Dict | 参数的List/Dict | ||
基础层 | Convolution Layers (卷积层) |
nn.Conv1d nn.Conv2d nn.Conv3d |
1、2、3维信号卷积 |
nn.ConvTranspose1d nn.ConvTranspose2d nn.ConvTranspose3d |
1、2、3维图像转置卷积 | ||
nn.LazyConv1d nn.LazyConv2d nn.LazyConv3d |
使用第一个输入初始化参数1、2、3维信号卷积 | ||
nn.LazyConvTranspose1d nn.LazyConvTranspose2d nn.LazyConvTranspose3d |
使用第一个输入初始化参数1、2、3维图像转置卷积 | ||
nn.Unfold | 从滑动窗口中提取元素 | ||
nn.Fold | 将滑动窗口中的元素还原至Tensor | ||
Pooling layers (池化层) |
nn.MaxPool1d nn.MaxPool2d nn.MaxPool3d |
1、2、3维最大池化 | |
nn.MaxUnpool1d nn.MaxUnpool2d nn.MaxUnpool3d |
1、2、3维最大池化加0还原 | ||
nn.AvgPool1d nn.AvgPool2d nn.AvgPool3d |
1、2、3维平均值池化 | ||
nn.FractionalMaxPool2d nn.FractionalMaxPool3d |
2、3维分数阶最大池化 | ||
nn.LPPool1d nn.LPPool2d |
1、2维幂平均池化 | ||
nn.MaxPool1d nn.MaxPool2d nn.MaxPool3d |
1、2、3维最大池化 | ||
nn.AdaptiveMaxPool1d nn.AdaptiveMaxPool2d nn.AdaptiveMaxPool3d nn.AdaptiveAvgPool1d nn.AdaptiveAvgPool2d nn.AdaptiveAvgPool3d |
1、2、3维自适应最大/平均池化 | ||
Padding Layers | nn.ReflectionPad1d nn.ReflectionPad2d nn.ReflectionPad3d |
用输入边界的反射(以边界为轴对称元素)填充输入张量 | |
nn.ReplicationPad1d nn.ReplicationPad2d nn.ReplicationPad3d |
用输入边界元素填充输入张量 | ||
nn.ZeroPad2d | 用0输入张量 | ||
nn.ConstantPad1d nn.ConstantPad2d nn.ConstantPad3d |
用指定常数填充输入张量 | ||
Non-linear Activations (非线性激活函数) |
nn.Softmax nn.Sigmoid nn.ReLU nn.Tanh等 |
详情参照官网 | |
Linear Layers (线性层) |
nn.Identity nn.Linear nn.Bilinear nn.LazyLinear |
线性变化 | |
Normalization Layers | nn.BatchNorm1d nn.BatchNorm2d nn.BatchNorm3d nn.LazyBatchNorm1d nn.LazyBatchNorm2d nn.LazyBatchNorm3d |
一个数据batch内进行归一化,详情参考论文 | |
nn.InstanceNorm1d nn.InstanceNorm2d nn.InstanceNorm3d nn.LazyInstanceNorm1d nn.LazyInstanceNorm2d nn.LazyInstanceNorm3d |
一个通道内进行归一化 | ||
nn.LayerNorm | 一层进行归一化 | ||
nn.GroupNorm | 一组数据(mini-batch)内进行归一化 | ||
nn.SyncBatchNorm | 一组指定维度数据内进行归一化 | ||
nn.LocalResponseNorm | 指定数据周围局部进行归一化 | ||
Recurrent Layers | nn.RNNBase nn.RNN nn.LSTM nn.GRU nn.RNNCell nn.LSTMCell nn.GRUCell |
循环神经网络相关结构层 | |
Transformer Layers | nn.Transformer | Transformer模型 | |
nn.TransformerEncoder nn.TransformerDecoder |
由多层编码层(解码层)组成的编码器(解码器) | ||
nn.TransformerEncoderLayer | 由自注意力网络和前馈神经网络组成 | ||
nn.TransformerDecoderLayer | 由自注意力网络、multi-head自注意力网络和前馈神经网络组成 | ||
Dropout Layers | nn.Dropout nn.Dropout2d nn.Dropout3d |
在训练过程中按Bernoulli分布将概率p的数据随机变为0(防止过拟合) | |
nn.AlphaDropout nn.FeatureAlphaDropout |
dropout过程保持均值、标准差不变 | ||
Sparse Layers | nn.Embedding | 嵌入向量 | |
nn.EmbeddingBag | 将embedding进行分组求和、均值计算 | ||
函数 | 距离函数 | nn.CosineSimilarity | 余弦相似度 |
nn.PairwiseDistance | p范式成对距离 | ||
损失函数 | nn.L1Loss nn.MSELoss nn.CrossEntropyLoss nn.KLDivLoss等 |
详情参照官网 | |
其他 | Vision Layers | nn.PixelShuffle nn.PixelUnshuffle |
像素重组/还原 |
nn.Upsample nn.UpsamplingNearest2d nn.UpsamplingBilinear2d |
上采样 | ||
Shuffle Layers | nn.ChannelShuffle | 通道数据打乱 | |
DataParallel Layers | nn.DataParallel nn.parallel.DistributedDataParallel |
多GPU并行计算 | |
Utilities | from torch.nn.utils import... | 详情参照官网 |
以多层感知器为例:
import torch
from torch import nn
#定义一个MLP类
class MLP(nn.Module):
# 声明带有模型参数的层,这里声明了两个全连接层
def __init__(self, **kwargs):
super(MLP, self).__init__(**kwargs) #调用MLP父类Block的构造函数来进行必要的初始化
self.hidden = nn.Linear(784, 256)
self.act = nn.ReLU()
self.output = nn.Linear(256,10)
# 定义模型的前向计算,即如何根据输入x计算返回所需要的模型输出
def forward(self, x):
o = self.act(self.hidden(x))
return self.output(o)
#实例化
X = torch.rand(2,784)
net = MLP()
net(X)
class MyLayer(nn.Module):
def __init__(self, **kwargs):
super(MyLayer, self).__init__(**kwargs)
def forward(self, x):
return x - x.mean()
class MyListDense(nn.Module):
def __init__(self):
super(MyListDense, self).__init__()
self.params = nn.ParameterList([nn.Parameter(torch.randn(4, 4)) for i in range(3)])
self.params.append(nn.Parameter(torch.randn(4, 1)))
def forward(self, x):
for i in range(len(self.params)):
x = torch.mm(x, self.params[i])
return x
class MyDictDense(nn.Module):
def __init__(self):
super(MyDictDense, self).__init__()
self.params = nn.ParameterDict({
'linear1': nn.Parameter(torch.randn(4, 4)),
'linear2': nn.Parameter(torch.randn(4, 1))
})
self.params.update({'linear3': nn.Parameter(torch.randn(4, 2))}) # 新增
def forward(self, x, choice='linear1'):
return torch.mm(x, self.params[choice])
def corr2d(X, K):
h, w = K.shape
X, K = X.float(), K.float()
Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
Y[i, j] = (X[i: i + h, j: j + w] * K).sum()
return Y
# 二维卷积层
class Conv2D(nn.Module):
def __init__(self, kernel_size):
super(Conv2D, self).__init__()
#随机初始化
self.weight = nn.Parameter(torch.randn(kernel_size))
self.bias = nn.Parameter(torch.randn(1))
def forward(self, x):
return corr2d(x, self.weight) + self.bias
def pool2d(X, pool_size, mode='max'):
p_h, p_w = pool_size
Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
if mode == 'max':
Y[i, j] = X[i: i + p_h, j: j + p_w].max()
elif mode == 'avg':
Y[i, j] = X[i: i + p_h, j: j + p_w].mean()
return Y
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 输入图像channel:1;输出channel:6;5x5卷积核
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
# 2x2 Max pooling
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
# 如果是方阵,则可以只使用一个数字进行定义
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def num_flat_features(self, x):
size = x.size()[1:] # 除去批处理维度的其他所有维度
num_features = 1
for s in size:
num_features *= s
return num_features
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 96, 11, 4), # in_channels, out_channels, kernel_size, stride, padding
nn.ReLU(),
nn.MaxPool2d(3, 2), # kernel_size, stride
# 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数
nn.Conv2d(96, 256, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(3, 2),
# 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。
# 前两个卷积层后不使用池化层来减小输入的高和宽
nn.Conv2d(256, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 256, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(3, 2)
)
# 这里全连接层的输出个数比LeNet中的大数倍。使用丢弃层来缓解过拟合
self.fc = nn.Sequential(
nn.Linear(256*5*5, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Dropout(0.5),
# 输出层。由于这里使用Fashion-MNIST,所以用类别数为10,而非论文中的1000
nn.Linear(4096, 10),
)
def forward(self, img):
feature = self.conv(img)
output = self.fc(feature.view(img.shape[0], -1))
return output
不同结构应选用不同初始化方法,pytorch初始化函数在torch.nn.init中,具体方法详见官网。
遍历,对模型所有模块参数进行初始化:
def initialize_weights(self):
for m in self.modules():
# 判断是否属于Conv2d
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_normal_(m.weight.data)
# 判断是否有偏置
if m.bias is not None:
torch.nn.init.constant_(m.bias.data,0.3)
elif isinstance(m, nn.Linear):
torch.nn.init.normal_(m.weight.data, 0.1)
if m.bias is not None:
torch.nn.init.zeros_(m.bias.data)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zeros_()
名称 | 函数 | 公式 | 应用 |
---|---|---|---|
二分类交叉熵损失函数 | torch.nn.BCELoss(weight=None, size_average=None, reduce=None, reduction=‘mean’) | ℓ ( x , y ) = { m e a n ( L ) reduction=’mean’ s u m ( L ) reduction=’sum’ \ell(x, y)=\begin{cases}mean(L)& \text{reduction='mean'}\\sum(L)& \text{reduction='sum'}\end{cases} ℓ(x,y)={mean(L)sum(L)reduction=’mean’reduction=’sum’ | 计算二分类任务的交叉熵 |
交叉熵损失函数 | torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction=‘mean’) | loss ( x , class ) = − log ( exp ( x [ class ] ) ∑ j exp ( x [ j ] ) ) = − x [ class ] + log ( ∑ j exp ( x [ j ] ) ) \operatorname{loss}(x, \text { class })=-\log \left(\frac{\exp (x[\text { class }])}{\sum_{j} \exp (x[j])}\right)=-x[\text { class }]+\log \left(\sum_{j} \exp (x[j])\right) loss(x, class )=−log(∑jexp(x[j])exp(x[ class ]))=−x[ class ]+log(j∑exp(x[j])) | 多分类 |
L1损失函数 | torch.nn.L1Loss(size_average=None, reduce=None, reduction=‘mean’) | L n = a b s ( x n − y n ) L_{n} = abs(x_{n}-y_{n}) Ln=abs(xn−yn) | 回归问题,返回误差绝对值 |
MSE损失函数 | torch.nn.MSELoss(size_average=None, reduce=None, reduction=‘mean’) | l n = ( x n − y n ) 2 l_{n}=\left(x_{n}-y_{n}\right)^{2} ln=(xn−yn)2 | 回归问题,返回误差平方 |
平滑L1损失函数 | torch.nn.SmoothL1Loss(size_average=None, reduce=None, reduction=‘mean’, beta=1.0) | loss ( x , y ) = 1 n ∑ i = 1 n z i \operatorname{loss}(x, y)=\frac{1}{n}\sum_{i=1}^{n} z_{i} loss(x,y)=n1i=1∑nzi z i = { 0.5 ( x i − y i ) 2 abs(xi-yi)<1 a b s ( x i − y i ) − 0.5 else z_{i}=\begin{cases}0.5\left(x_{i}-y_{i}\right)^{2}& \text{abs(xi-yi)<1}\\abs(x_{i}-y_{i})-0.5& \text{else}\end{cases} zi={0.5(xi−yi)2abs(xi−yi)−0.5abs(xi-yi)<1else |
L1的平滑输出,可减轻离群点带来的影响 |
目标泊松分布的负对数似然损失 | torch.nn.PoissonNLLLoss(log_input=True, full=False, size_average=None, eps=1e-08, reduce=None, reduction=‘mean’) | loss ( x , y ) = { e x n − x n y n log_input=True x n − y n ⋅ log ( x n + eps ) log_input=False \operatorname{loss}(x, y)=\begin{cases}e^{x_{n}}-x_{n}y_{n}& \text{log\_input=True}\\x_{n}-y_{n} \cdot \log \left(x_{n}+\text { eps }\right)& \text{log\_input=False}\end{cases} loss(x,y)={exn−xnynxn−yn⋅log(xn+ eps )log_input=Truelog_input=False | 泊松分布的负对数似然损失函数 |
KL散度 | torch.nn.KLDivLoss(size_average=None, reduce=None, reduction=‘mean’, log_target=False) | D K L ( P , Q ) = ∑ i = 1 n P ( x i ) ( log P ( x i ) − log Q ( x i ) ) D_{\mathrm{KL}}(P, Q)=\sum_{i=1}^{n} P\left(x_{i}\right)\left(\log P\left(x_{i}\right)-\log Q\left(x_{i}\right)\right) DKL(P,Q)=i=1∑nP(xi)(logP(xi)−logQ(xi)) | 用于连续分布的距离度量,并且对离散采用的连续输出空间分布进行回归通常很有用 |
MarginRankingLoss | torch.nn.MarginRankingLoss(margin=0.0, size_average=None, reduce=None, reduction=‘mean’) | loss ( x 1 , x 2 , y ) = max ( 0 , − y ∗ ( x 1 − x 2 ) + margin ) \operatorname{loss}(x 1, x 2, y)=\max (0,-y *(x 1-x 2)+\operatorname{margin}) loss(x1,x2,y)=max(0,−y∗(x1−x2)+margin) | 用于排序任务 |
多标签边界损失函数 | torch.nn.MultiLabelMarginLoss(size_average=None, reduce=None, reduction=‘mean’) | loss ( x , y ) = ∑ i j max ( 0 , 1 − x [ y [ j ] ] − x [ i ] ) x ⋅ size ( 0 ) \operatorname{loss}(x, y)=\sum_{i j} \frac{\max (0,1-x[y[j]]-x[i])}{x \cdot \operatorname{size}(0)} loss(x,y)=ij∑x⋅size(0)max(0,1−x[y[j]]−x[i]) | 多标签分类 |
二分类损失函数 | torch.nn.SoftMarginLoss(size_average=None, reduce=None, reduction=‘mean’) | loss ( x , y ) = ∑ i log ( 1 + exp ( − y [ i ] ⋅ x [ i ] ) ) x ⋅ nelement ( ) \operatorname{loss}(x, y)=\sum_{i} \frac{\log (1+\exp (-y[i] \cdot x[i]))}{x \cdot \operatorname{nelement}()} loss(x,y)=i∑x⋅nelement()log(1+exp(−y[i]⋅x[i])) | 二分类逻辑损失函数 |
多分类折页损失 | torch.nn.MultiMarginLoss(p=1, margin=1.0, weight=None, size_average=None, reduce=None, reduction=‘mean’) | loss ( x , y ) = ∑ i max ( 0 , margin − x [ y ] + x [ i ] ) p x ⋅ size ( 0 ) \operatorname{loss}(x, y)=\frac{\sum_{i} \max (0, \operatorname{margin}-x[y]+x[i])^{p}}{x \cdot \operatorname{size}(0)} loss(x,y)=x⋅size(0)∑imax(0,margin−x[y]+x[i])p | 多分类问题 |
三元组损失 | torch.nn.TripletMarginLoss(margin=1.0, p=2.0, eps=1e-06, swap=False, size_average=None, reduce=None, reduction=‘mean’) | L ( a , p , n ) = max { d ( a i , p i ) − d ( a i , n i ) + margin , 0 } L(a, p, n)=\max \left\{d\left(a_{i}, p_{i}\right)-d\left(a_{i}, n_{i}\right)+\operatorname{margin}, 0\right\} L(a,p,n)=max{d(ai,pi)−d(ai,ni)+margin,0} | 三元组相似性 |
HingeEmbeddingLoss | torch.nn.HingeEmbeddingLoss(margin=1.0, size_average=None, reduce=None, reduction=‘mean’) | ℓ n = { x n yn=1 max { 0 , Δ − x n } yn=-1 \ell_n=\begin{cases}x_n& \text{yn=1}\\\max \left\{0, \Delta-x_{n}\right\}& \text{yn=-1}\end{cases} ℓn={xnmax{0,Δ−xn}yn=1yn=-1x为两个输入之差的绝对值 | 判断两个输入之间的相似性 |
余弦相似度 | torch.nn.CosineEmbeddingLoss(margin=0.0, size_average=None, reduce=None, reduction=‘mean’) | ℓ n = { 1 − cos ( x 1 , x 2 ) yn=1 max { 0 , cos ( x 1 , x 2 ) − margin } yn=-1 \ell_n=\begin{cases}1-\cos \left(x_{1}, x_{2}\right)& \text{yn=1}\\\max \left\{0, \cos \left(x_{1}, x_{2}\right)-\text { margin }\right\}& \text{yn=-1}\end{cases} ℓn={1−cos(x1,x2)max{0,cos(x1,x2)− margin }yn=1yn=-1 | 两个向量做余弦相似度 |
CTC损失函数 | torch.nn.CTCLoss(blank=0, reduction=‘mean’, zero_infinity=False) | / | 时序分类问题 |
在模型训练过程中反向传播可进行参数修改;而验证/测试过程,参数不变。
流程:
代码:
def train(epoch):
model.train()
train_loss = 0
for data, label in train_loader:
data, label = data.cuda(), label.cuda() #放至GPU
optimizer.zero_grad() #将优化器的梯度置零
output = model(data) #将data送入模型中训练
loss = criterion(label, output) #计算损失函数
loss.backward() #将loss反向传播回网络
optimizer.step() #使用优化器更新模型参数
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))
流程:
代码:
def val(epoch):
model.eval()
val_loss = 0
with torch.no_grad():
for data, label in val_loader:
data, label = data.cuda(), label.cuda()
output = model(data)
preds = torch.argmax(output, 1)
loss = criterion(output, label)
val_loss += loss.item()*data.size(0)
running_accu += torch.sum(preds == label.data)
val_loss = val_loss/len(val_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, val_loss))
优化器 | 说明 |
---|---|
LBFGS | 拟牛顿法 |
SGD | 随机梯度下降 |
ASGD | 平均随机梯度下降 |
Adagrad | 自适应学习率,增加二阶动量 |
Adadelta | Adagrad的扩展,不用依赖于全局学习率 |
Rprop | 弹性反向传播 |
RMSprop | Adadelta特例,对于RNN效果很好 |
Adam | 一阶动量+二阶动量 |
Adamax | 学习率的边界范围比Adam简单 |
NAdam | 带有Nesterov动量项的Adam |
SparseAdam | 针对稀疏张量的Adam |
RAdam | 提供自动化的方差衰减,消除了在训练期间warmup所涉及手动调优的需要 |
AdamW | Adam+L2正则 |
from torch import optim
from torchvision.models import resnet18
#模型
net = resnet18()
#不同层用优化器参数
optimizer = optim.SGD([{'params':net.fc.parameters()},
{'params':net.layer4[0].conv1.parameters(),'lr':1e-2}],
lr=1e-5)
for epoch in range(EPOCH):
...
optimizer.zero_grad() #梯度置零
loss = ... #计算loss
loss.backward() #BP反向传播
optimizer.step() #梯度更新
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
batch_size = 256
num_workers = 4
lr = 1e-4
epochs = 20
from torchvision import transforms
image_size = 28
data_transform = transforms.Compose([
transforms.ToPILImage(), # 这一步取决于后续的数据读取方式,如果使用内置数据集则不需要
transforms.Resize(image_size),
transforms.ToTensor()
])
from torchvision import datasets
train_data = datasets.FashionMNIST(root='./', train=True, download=True, transform=data_transform)
test_data = datasets.FashionMNIST(root='./', train=False, download=True, transform=data_transform)
# csv数据下载链接:https://www.kaggle.com/zalando-research/fashionmnist
class FMDataset(Dataset):
def __init__(self, df, transform=None):
self.df = df
self.transform = transform
self.images = df.iloc[:,1:].values.astype(np.uint8)
self.labels = df.iloc[:, 0].values
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
image = self.images[idx].reshape(28,28,1)
label = int(self.labels[idx])
if self.transform is not None:
image = self.transform(image)
else:
image = torch.tensor(image/255., dtype=torch.float)
label = torch.tensor(label, dtype=torch.long)
return image, label
train_df = pd.read_csv("./FashionMNIST/fashion-mnist_train.csv")
test_df = pd.read_csv("./FashionMNIST/fashion-mnist_test.csv")
train_data = FMDataset(train_df, data_transform)
test_data = FMDataset(test_df, data_transform)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
import matplotlib.pyplot as plt
image, label = next(iter(train_loader))
print(image.shape, label.shape)
plt.imshow(image[0][0], cmap="gray")
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 32, 5),
nn.ReLU(),
nn.MaxPool2d(2, stride=2),
nn.Dropout(0.3),
nn.Conv2d(32, 64, 5),
nn.ReLU(),
nn.MaxPool2d(2, stride=2),
nn.Dropout(0.3)
)
self.fc = nn.Sequential(
nn.Linear(64*4*4, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, x):
x = self.conv(x)
x = x.view(-1, 64*4*4)
x = self.fc(x)
# x = nn.functional.normalize(x)
return x
model = Net()
model = model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
def train(epoch):
model.train()
train_loss = 0
for data, label in train_loader:
data, label = data.cuda(), label.cuda()
optimizer.zero_grad()
output = model(data)
loss = criterion(output, label)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))
def val(epoch):
model.eval()
val_loss = 0
gt_labels = []
pred_labels = []
with torch.no_grad():
for data, label in test_loader:
data, label = data.cuda(), label.cuda()
output = model(data)
preds = torch.argmax(output, 1)
gt_labels.append(label.cpu().data.numpy())
pred_labels.append(preds.cpu().data.numpy())
loss = criterion(output, label)
val_loss += loss.item()*data.size(0)
val_loss = val_loss/len(test_loader.dataset)
gt_labels, pred_labels = np.concatenate(gt_labels), np.concatenate(pred_labels)
acc = np.sum(gt_labels==pred_labels)/len(pred_labels)
print('Epoch: {} \tValidation Loss: {:.6f}, Accuracy: {:6f}'.format(epoch, val_loss, acc))
for epoch in range(1, epochs+1):
train(epoch)
val(epoch)
save_path = "./FahionModel.pkl"
torch.save(model, save_path)