CIFAR-10数据集由10个类的60000个32x32
彩色图像组成,每个类有6000个图像。有50000个训练图像和10000个测试图像。
数据集分为5个训练批次和1个测试批次,每个批次有10000个图像。
以下是数据集中的类,以及来自每个类的10个随机图像:
与MNIST 数据集中目比, cifar-10有以下特点:
32 × 32
, 而MNIST 的图片尺寸为28 × 28
,比MNIST 稍大。代码如下
import torch
import torchvision
import torchvision.transforms as transforms
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
这部分代码主要用到torchvision
库,对于这个库的介绍在本人的另一篇博客torchvision库及其常用的函数,或者直接去torchvision - PyTorch中文文档看也可以。
import torch
import torchvision
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as Data
import torchvision.transforms as transforms
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import time
import os
# 预设参数
CLASS_NUM = 10
BATCH_SIZE = 128
EPOCH = 15
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device
CLASS_NUM=0
表示数据分为10个类
BATCH_SIZE=128
表示每个BATCH
中有128个Sample
。
EPOCH=15
表示整个训练集被遍历15遍。
device
也是一个人为设定的超参数,在GPU
可用时为cuda
,不可用时为CPU
。
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
# 输入shape 3*32*32
self.conv1 = nn.Conv2d(3,64,3,padding=1) # 64*32*32
self.conv2 = nn.Conv2d(64,64,3,padding=1) # 64*32*32
self.pool1 = nn.MaxPool2d(2, 2) # 64*16*16
self.bn1 = nn.BatchNorm2d(64) # 64*16*16
self.relu1 = nn.ReLU() # 64*16*16
self.conv3 = nn.Conv2d(64,128,3,padding=1) # 128*16*16
self.conv4 = nn.Conv2d(128, 128, 3,padding=1) # 128*16*16
self.pool2 = nn.MaxPool2d(2, 2, padding=1) # 128*9*9
self.bn2 = nn.BatchNorm2d(128) # 128*9*9
self.relu2 = nn.ReLU() # 128*9*9
self.conv5 = nn.Conv2d(128,128, 3,padding=1) # 128*9*9
self.conv6 = nn.Conv2d(128, 128, 3,padding=1) # 128*9*9
self.conv7 = nn.Conv2d(128, 128, 1,padding=1) # 128*11*11
self.pool3 = nn.MaxPool2d(2, 2, padding=1) # 128*6*6
self.bn3 = nn.BatchNorm2d(128) # 128*6*6
self.relu3 = nn.ReLU() # 128*6*6
self.conv8 = nn.Conv2d(128, 256, 3,padding=1) # 256*6*6
self.conv9 = nn.Conv2d(256, 256, 3, padding=1) # 256*6*6
self.conv10 = nn.Conv2d(256, 256, 1, padding=1) # 256*8*8
self.pool4 = nn.MaxPool2d(2, 2, padding=1) # 256*5*5
self.bn4 = nn.BatchNorm2d(256) # 256*5*5
self.relu4 = nn.ReLU() # 256*5*5
self.conv11 = nn.Conv2d(256, 512, 3, padding=1) # 512*5*5
self.conv12 = nn.Conv2d(512, 512, 3, padding=1) # 512*5*5
self.conv13 = nn.Conv2d(512, 512, 1, padding=1) # 512*7*7
self.pool5 = nn.MaxPool2d(2, 2, padding=1) # 512*4*4
self.bn5 = nn.BatchNorm2d(512) # 512*4*4
self.relu5 = nn.ReLU() # 512*4*4
self.fc14 = nn.Linear(512*4*4,1024) # 1*1024
self.drop1 = nn.Dropout2d() # 1*1024
self.fc15 = nn.Linear(1024,1024) # 1*1024
self.drop2 = nn.Dropout2d() # 1*1024
self.fc16 = nn.Linear(1024,10) # 1*10
def forward(self,x):
x = x.to(device) # 自加
x = self.conv1(x)
x = self.conv2(x)
x = self.pool1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.pool2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.conv5(x)
x = self.conv6(x)
x = self.conv7(x)
x = self.pool3(x)
x = self.bn3(x)
x = self.relu3(x)
x = self.conv8(x)
x = self.conv9(x)
x = self.conv10(x)
x = self.pool4(x)
x = self.bn4(x)
x = self.relu4(x)
x = self.conv11(x)
x = self.conv12(x)
x = self.conv13(x)
x = self.pool5(x)
x = self.bn5(x)
x = self.relu5(x)
# print(" x shape ",x.size())
x = x.view(-1,512*4*4)
x = F.relu(self.fc14(x))
x = self.drop1(x)
x = F.relu(self.fc15(x))
x = self.drop2(x)
x = self.fc16(x)
return x
# ----------------------------------------------------------------------------------------------------------------------
卷积层函数nn.Conv2d()的参数意义,详见本人的另一篇博客卷积函数 and 解卷积函数,里面对in_channel
、out_channel
和in_size
、out_size
有详细的讲解。
nn.BatchNorm2d()的介绍,推荐这两个博客:BatchNormalization、LayerNormalization、InstanceNorm、GroupNorm、SwitchableNorm总结和PyTorch学习之归一化层(BatchNorm、LayerNorm、InstanceNorm、GroupNorm)。
简单来说,nn.BatchNorm2d()
以batch
为单位进行归一化,即在batch上,对NHW做归一化。
不同归一化的示意图如下:
nn.Dropout2d()
是对每个通道按照概率0.5置为0,是用来避免过拟合情况的函数。详见博客:PyTorch笔记8-Dropout
下面这张图形象地说明了一个batch内的H(height)、W(width)、C(channel)、N(SampleNumber)的意义。
然后我们分模块,把整个前向传播走一遍。
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 32, 32] 1,792
Conv2d-2 [-1, 64, 32, 32] 36,928
MaxPool2d-3 [-1, 64, 16, 16] 0
BatchNorm2d-4 [-1, 64, 16, 16] 128
ReLU-5 [-1, 64, 16, 16] 0
Conv2d-6 [-1, 128, 16, 16] 73,856
Conv2d-7 [-1, 128, 16, 16] 147,584
MaxPool2d-8 [-1, 128, 9, 9] 0
BatchNorm2d-9 [-1, 128, 9, 9] 256
ReLU-10 [-1, 128, 9, 9] 0
Conv2d-11 [-1, 128, 9, 9] 147,584
Conv2d-12 [-1, 128, 9, 9] 147,584
Conv2d-13 [-1, 128, 11, 11] 16,512
MaxPool2d-14 [-1, 128, 6, 6] 0
BatchNorm2d-15 [-1, 128, 6, 6] 256
ReLU-16 [-1, 128, 6, 6] 0
Conv2d-17 [-1, 256, 6, 6] 295,168
Conv2d-18 [-1, 256, 6, 6] 590,080
Conv2d-19 [-1, 256, 8, 8] 65,792
MaxPool2d-20 [-1, 256, 5, 5] 0
BatchNorm2d-21 [-1, 256, 5, 5] 512
ReLU-22 [-1, 256, 5, 5] 0
Conv2d-23 [-1, 512, 5, 5] 1,180,160
Conv2d-24 [-1, 512, 5, 5] 2,359,808
Conv2d-25 [-1, 512, 7, 7] 262,656
MaxPool2d-26 [-1, 512, 4, 4] 0
BatchNorm2d-27 [-1, 512, 4, 4] 1,024
ReLU-28 [-1, 512, 4, 4] 0
Linear-29 [-1, 1024] 8,389,632
Dropout2d-30 [-1, 1024] 0
Linear-31 [-1, 1024] 1,049,600
Dropout2d-32 [-1, 1024] 0
Linear-33 [-1, 10] 10,250
================================================================
Total params: 14,777,162
Trainable params: 14,777,162
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 3.51
Params size (MB): 56.37
Estimated Total Size (MB): 59.89
----------------------------------------------------------------
模块1开始
网络结构 | 函数 |
---|---|
卷积层 | nn.Conv2d(3,64,3,padding=1) |
卷积层 | nn.Conv2d(64,64,3,padding=1) |
最大池化层 | nn.MaxPool2d(2, 2) |
归一化 | nn.BatchNorm2d(64) |
激活函数 | nn.ReLU() |
第一个卷积层: nn.Conv2d(3,64,3,padding=1)
其参数意义为:
kernel_size
为 3×3padding=1
32输出维度=32输入维度+2padding-3卷积核size+1
输出shape为:64×32×32
第二个卷积层: nn.Conv2d(64,64,3,padding=1)
其参数意义为:
out_channel
kernel_size
为 3×3padding=1
32输出维度=32输入维度+2padding-3卷积核size+1
输出shape为:64×32×32
第一个最大池化层: nn.MaxPool2d(2, 2)
该最大池化层在 2x2 空间里向下采样,步长为2
16输出维度=32输入维度/2
输出shape为:64×16×16
第一个BN:nn.BatchNorm2d(64)
参数64为输入数据的通道数
输出shape不变,仍为:64×16×16
第一个激活函数:nn.ReLU()
输出shape不变,仍为:64×16×16
模块1结束
模块2开始
网络结构 | 函数 |
---|---|
卷积层 | nn.Conv2d(64,128,3,padding=1) |
卷积层 | nn.Conv2d(128, 128, 3,padding=1) |
最大池化层 | nn.MaxPool2d(2, 2, padding=1) |
归一化 | nn.BatchNorm2d(128) |
激活函数 | nn.ReLU() |
第三个卷积层: nn.Conv2d(64,128,3,padding=1)
其参数意义为:
out_channel
kernel_size
为 3×3padding=1
16输出维度=16输入维度+2padding-3卷积核size+1
输出shape为:128×16×16
第四个卷积层: nn.Conv2d(128, 128, 3,padding=1)
其参数意义为:
out_channel
kernel_size
为 3×3padding=1
16输出维度=16输入维度+2padding-3卷积核size+1
输出shape为:128×16×16
第二个最大池化层: nn.MaxPool2d(2, 2, padding=1)
该最大池化层在 2x2 空间里向下采样,输出信号向下取整
9输出维度=(16输入维度+2padding) /2
输出shape为:128×9×9
第二个BN:nn.BatchNorm2d(128)
参数64为输入数据的通道数
输出shape不变,仍为:128×9×9
第二个激活函数:nn.ReLU()
输出shape不变,仍为:128×9×9
模块2结束
模块3开始
网络结构 | 函数 |
---|---|
卷积层 | nn.Conv2d(128,128, 3,padding=1) |
卷积层 | nn.Conv2d(128, 128, 3,padding=1) |
卷积层 | nn.Conv2d(128, 128, 1,padding=1) |
最大池化层 | nn.MaxPool2d(2, 2, padding=1) |
归一化 | nn.BatchNorm2d(128) |
激活函数 | nn.ReLU() |
第五个卷积层: nn.Conv2d(128,128, 3,padding=1)
其参数意义为:
out_channel
kernel_size
为 3×3padding=1
9输出维度=9输入维度+2padding-3卷积核size+1
输出shape为:128×9×9
第六个卷积层: nn.Conv2d(128, 128, 3,padding=1)
其参数意义为:
out_channel
kernel_size
为 3×3padding=1
9输出维度=9输入维度+2padding-3卷积核size+1
输出shape为:128×9×9
第七个卷积层: nn.Conv2d(128, 128, 1,padding=1)
其参数意义为:
out_channel
kernel_size
为 1×1padding=1
9输出维度=9输入维度+2padding-1卷积核size+1
输出shape为:128×11×11
第三个最大池化层: nn.MaxPool2d(2, 2, padding=1)
该最大池化层在 2x2 空间里向下采样,输出信号向下取整
6输出维度=(11输入维度+2padding) /2
输出shape为:128×6×6
第三个BN:nn.BatchNorm2d(128)
参数64为输入数据的通道数
输出shape不变,仍为:128×6×6
第三个激活函数:nn.ReLU()
输出shape不变,仍为:128×6×6
模块3结束
模块4开始
网络结构 | 函数 |
---|---|
卷积层 | nn.Conv2d(128, 256, 3,padding=1) |
卷积层 | nn.Conv2d(256, 256, 3, padding=1) |
卷积层 | nn.Conv2d(256, 256, 1, padding=1) |
最大池化层 | nn.MaxPool2d(2, 2, padding=1) |
归一化 | nn.BatchNorm2d(256) |
激活函数 | nn.ReLU() |
第八个卷积层: nn.Conv2d(128, 256, 3,padding=1)
其参数意义为:
out_channel
kernel_size
为 3×3padding=1
6输出维度=6输入维度+2padding-3卷积核size+1
输出shape为:256×6×6
第九个卷积层: nn.Conv2d(256, 256, 3, padding=1)
其参数意义为:
out_channel
kernel_size
为 3×3padding=1
6输出维度=6输入维度+2padding-3卷积核size+1
输出shape为:256×6×6
第十个卷积层: nn.Conv2d(256, 256, 1, padding=1)
其参数意义为:
out_channel
kernel_size
为 1×1padding=1
8输出维度=6输入维度+2padding-1卷积核size+1
输出shape为:256×8×8
第四个最大池化层: nn.MaxPool2d(2, 2, padding=1)
该最大池化层在 2x2 空间里向下采样,输出信号向下取整
5输出维度=(8输入维度+2padding) /2
输出shape为:256×5×5
第四个BN:nn.BatchNorm2d(128)
参数64为输入数据的通道数
输出shape不变,仍为:256×5×5
第四个激活函数:nn.ReLU()
输出shape不变,仍为:256×5×5
模块4结束
模块5开始
网络结构 | 函数 |
---|---|
卷积层 | nn.Conv2d(256, 512, 3, padding=1) |
卷积层 | nn.Conv2d(512, 512, 3, padding=1) |
卷积层 | nn.Conv2d(512, 512, 1, padding=1) |
最大池化层 | nn.MaxPool2d(2, 2, padding=1) |
归一化 | nn.BatchNorm2d(512) |
激活函数 | nn.ReLU() |
第十一个卷积层: nn.Conv2d(256, 512, 3, padding=1)
其参数意义为:
out_channel
kernel_size
为 3×3padding=1
5输出维度=5输入维度+2padding-3卷积核size+1
输出shape为:512×5×5
第十二个卷积层: nn.Conv2d(512, 512, 3, padding=1)
其参数意义为:
out_channel
kernel_size
为 3×3padding=1
5输出维度=5输入维度+2padding-3卷积核size+1
输出shape为:512×5×5
第十三个卷积层: nn.Conv2d(512, 512, 1, padding=1)
其参数意义为:
out_channel
kernel_size
为 1×1padding=1
7输出维度=5输入维度+2padding-1卷积核size+1
输出shape为:512×7×7
第四个最大池化层: nn.MaxPool2d(2, 2, padding=1)
该最大池化层在 2x2 空间里向下采样,输出信号向下取整
4输出维度=(7输入维度+2padding) /2
输出shape为:512×4×4
第五个BN:nn.BatchNorm2d(128)
参数64为输入数据的通道数
输出shape不变,仍为:512×4×4
第五个激活函数:nn.ReLU()
输出shape不变,仍为:512×4×4
模块5结束
输出前的数据预处理开始
x.view(-1,512*4 *4)
将 x
(即batch_size
)个Sample
拉成一维。-1
表示行自适应。
输出前的数据预处理结束
输出即全连接层开始
网络结构 | 函数 |
---|---|
全连接层 | self.fc14 = nn.Linear(512* 4* 4,1024) |
概率置零 | nn.Dropout2d() |
全连接层 | nn.Linear(1024,1024) |
概率置零 | nn.Dropout2d() |
全连接层 | nn.Linear(1024,10) |
第一个全连接层nn.Linear(512* 4* 4,1024)
输入维度为 512 * 4 * 4
设定的输出维度为 1*1024
第一个Dropout层nn.Dropout2d()
数据维度不变,仍为1*1024
第二个全连接层nn.Linear(1024,1024)
输入维度为 1*1024
设定的输出维度为 1*1024
第二个Dropout层nn.Dropout2d()
数据维度不变,仍为1*1024
第三个激活函数nn.Linear(1024,10)
输入维度为 1*1024
设定的输出维度为 10 × 1,对应分类的CLASS_NUM
为10
输出即全连接层结束
def unpickle(file):
import pickle
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
# 从源文件读取数据
# 返回 train_data[50000,3072]和labels[50000]
# test_data[10000,3072]和labels[10000]
def get_data(train=False):
data = None
labels = None
if train == True:
for i in range(1, 6):
batch = unpickle('data/cifar-10-batches-py/data_batch_' + str(i))
if i == 1:
data = batch[b'data']
else:
data = np.concatenate([data, batch[b'data']])
if i == 1:
labels = batch[b'labels']
else:
labels = np.concatenate([labels, batch[b'labels']])
else:
batch = unpickle('data/cifar-10-batches-py/test_batch')
data = batch[b'data']
labels = batch[b'labels']
return data, labels
个人认为这是一共提高数据存取效率的模块,用到pickle
。
具体的东西现在不甚了解,暂时作为一个遗留问题。
# 图像预处理函数,Compose会将多个transform操作包在一起
# 对于彩色图像,色彩通道不存在平稳特性
transform = transforms.Compose([
# ToTensor是指把PIL.Image(RGB) 或者numpy.ndarray(H * W * C)
# 从0到255的值映射到0到1的范围内,并转化成Tensor格式。
transforms.ToTensor(),
# Normalize函数将图像数据归一化到[-1,1]
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
]
)
# 将标签转换为torch.LongTensor
def target_transform(label):
label = np.array(label) # 变为ndarray
target = torch.from_numpy(label).long() # 变为torch.LongTensor
return target
transforms.Compose(transforms)
是将多个transform
组合起来使用。transforms
是由多个transform
构成的列表。
在这里transforms.Compose()
把.ToTensor()
和.Normalize()
组合到一起去执行。
将标签转换为torch.LongTensor
,就是简单的类型转换了。
下载完成后运行分类代码
import torch
import torchvision
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as Data
import torchvision.transforms as transforms
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import time
import os
from torchsummary import summary
# 预设参数
CLASS_NUM = 10
BATCH_SIZE = 128
EPOCH = 15
# 检验GPU是否可用
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# ----------------------------------------------------------------------------------------------------------------------
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
# 输入shape 3*32*32
self.conv1 = nn.Conv2d(3,64,3,padding=1) # 64*32*32
self.conv2 = nn.Conv2d(64,64,3,padding=1) # 64*32*32
self.pool1 = nn.MaxPool2d(2, 2) # 64*16*16
self.bn1 = nn.BatchNorm2d(64) # 64*16*16
self.relu1 = nn.ReLU() # 64*16*16
self.conv3 = nn.Conv2d(64,128,3,padding=1) # 128*16*16
self.conv4 = nn.Conv2d(128, 128, 3,padding=1) # 128*16*16
self.pool2 = nn.MaxPool2d(2, 2, padding=1) # 128*9*9
self.bn2 = nn.BatchNorm2d(128) # 128*9*9
self.relu2 = nn.ReLU() # 128*9*9
self.conv5 = nn.Conv2d(128,128, 3,padding=1) # 128*9*9
self.conv6 = nn.Conv2d(128, 128, 3,padding=1) # 128*9*9
self.conv7 = nn.Conv2d(128, 128, 1,padding=1) # 128*11*11
self.pool3 = nn.MaxPool2d(2, 2, padding=1) # 128*6*6
self.bn3 = nn.BatchNorm2d(128) # 128*6*6
self.relu3 = nn.ReLU() # 128*6*6
self.conv8 = nn.Conv2d(128, 256, 3,padding=1) # 256*6*6
self.conv9 = nn.Conv2d(256, 256, 3, padding=1) # 256*6*6
self.conv10 = nn.Conv2d(256, 256, 1, padding=1) # 256*8*8
self.pool4 = nn.MaxPool2d(2, 2, padding=1) # 256*5*5
self.bn4 = nn.BatchNorm2d(256) # 256*5*5
self.relu4 = nn.ReLU() # 256*5*5
self.conv11 = nn.Conv2d(256, 512, 3, padding=1) # 512*5*5
self.conv12 = nn.Conv2d(512, 512, 3, padding=1) # 512*5*5
self.conv13 = nn.Conv2d(512, 512, 1, padding=1) # 512*7*7
self.pool5 = nn.MaxPool2d(2, 2, padding=1) # 512*4*4
self.bn5 = nn.BatchNorm2d(512) # 512*4*4
self.relu5 = nn.ReLU() # 512*4*4
self.fc14 = nn.Linear(512*4*4,1024) # 1*1024
self.drop1 = nn.Dropout2d() # 1*1024
self.fc15 = nn.Linear(1024,1024) # 1*1024
self.drop2 = nn.Dropout2d() # 1*1024
self.fc16 = nn.Linear(1024,10) # 1*10
def forward(self,x):
x = x.to(device) # 自加
x = self.conv1(x)
x = self.conv2(x)
x = self.pool1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.pool2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.conv5(x)
x = self.conv6(x)
x = self.conv7(x)
x = self.pool3(x)
x = self.bn3(x)
x = self.relu3(x)
x = self.conv8(x)
x = self.conv9(x)
x = self.conv10(x)
x = self.pool4(x)
x = self.bn4(x)
x = self.relu4(x)
x = self.conv11(x)
x = self.conv12(x)
x = self.conv13(x)
x = self.pool5(x)
x = self.bn5(x)
x = self.relu5(x)
# print(" x shape ",x.size())
x = x.view(-1,512*4*4)
x = F.relu(self.fc14(x))
x = self.drop1(x)
x = F.relu(self.fc15(x))
x = self.drop2(x)
x = self.fc16(x)
return x
# ----------------------------------------------------------------------------------------------------------------------
def unpickle(file):
import pickle
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
# 从源文件读取数据
# 返回 train_data[50000,3072]和labels[50000]
# test_data[10000,3072]和labels[10000]
def get_data(train=False):
data = None
labels = None
if train == True:
for i in range(1, 6):
batch = unpickle('data/cifar-10-batches-py/data_batch_' + str(i))
if i == 1:
data = batch[b'data']
else:
data = np.concatenate([data, batch[b'data']])
if i == 1:
labels = batch[b'labels']
else:
labels = np.concatenate([labels, batch[b'labels']])
else:
batch = unpickle('data/cifar-10-batches-py/test_batch')
data = batch[b'data']
labels = batch[b'labels']
return data, labels
# 图像预处理函数,Compose会将多个transform操作包在一起
# 对于彩色图像,色彩通道不存在平稳特性
transform = transforms.Compose([
# ToTensor是指把PIL.Image(RGB) 或者numpy.ndarray(H * W * C)
# 从0到255的值映射到0到1的范围内,并转化成Tensor格式。
transforms.ToTensor(),
# Normalize函数将图像数据归一化到[-1,1]
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
]
)
# 将标签转换为torch.LongTensor
def target_transform(label):
label = np.array(label) # 变为ndarray
target = torch.from_numpy(label).long() # 变为torch.LongTensor
return target
'''
自定义数据集读取框架来载入cifar10数据集
需要继承data.Dataset
'''
# 数据集
class Cifar10_Dataset(Data.Dataset):
def __init__(self, train=True, transform=None, target_transform=None):
# 初始化文件路径
self.transform = transform
self.target_transform = target_transform
self.train = train
# 载入训练数据集
if self.train:
self.train_data, self.train_labels = get_data(train)
self.train_data = self.train_data.reshape((50000, 3, 32, 32))
# 将图像数据格式转换为[height,width,channels]方便预处理
self.train_data = self.train_data.transpose((0, 2, 3, 1))
# 载入测试数据集
else:
self.test_data, self.test_labels = get_data()
self.test_data = self.test_data.reshape((10000, 3, 32, 32))
self.test_data = self.test_data.transpose((0, 2, 3, 1))
pass
# 从数据集中读取一个数据并对数据进行预处理返回一个数据对,如(data,label)
def __getitem__(self, index):
if self.train:
img, label = self.train_data[index], self.train_labels[index]
else:
img, label = self.test_data[index], self.test_labels[index]
img = Image.fromarray(img)
# 图像预处理
if self.transform is not None:
img = self.transform(img)
# 标签预处理
if self.target_transform is not None:
target = self.target_transform(label)
return img, target
def __len__(self):
# 返回数据集的size
if self.train:
return len(self.train_data)
else:
return len(self.test_data)
if __name__ == '__main__':
# 读取训练集和测试集
train_data = Cifar10_Dataset(True, transform, target_transform)
print('size of train_data:{}'.format(train_data.__len__()))
test_data = Cifar10_Dataset(False, transform, target_transform)
print('size of test_data:{}'.format(test_data.__len__()))
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
net = Net()
net.to(device)
# ------------------------------------------------
with torch.no_grad():
for input_data, _ in train_loader:
break
# summary(model.to(hyperparams['device']), input.size()[1:], device=hyperparams['device'])
# print(input_data.size())
summary(net, input_data.size()[1:])
os.system('pause')
# ------------------------------------------------
# 定义优化器
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9,
weight_decay=5e-4) # 优化方式为mini-batch momentum-SGD,并采用L2正则化(权重衰减)
# 定义损失函数
# 在使用CrossEntropyLoss时target直接使用类别索引,不适用one-hot
loss_fn = nn.CrossEntropyLoss()
loss_list = []
Accuracy = []
for epoch in range(1, EPOCH + 1):
# 训练部分
timestart = time.time() # 自加计时
for step, (x, y) in enumerate(train_loader):
b_x = Variable(x)
b_y = Variable(y)
output = net(b_x)
b_x, b_y = b_x.to(device), b_y.to(device) # CPU 转 GPU
loss = loss_fn(output, b_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 记录loss
if step % 50 == 0:
loss_list.append(loss)
# 每完成一个epoch进行一次测试观察效果
pre_correct = 0.0
test_loader = Data.DataLoader(dataset=test_data, batch_size=100, shuffle=True)
for (x, y) in (test_loader):
b_x = Variable(x)
b_y = Variable(y)
b_x, b_y = b_x.to(device), b_y.to(device) # 自加
output = net(b_x)
pre = torch.max(output, 1)[1]
# pre_correct = pre_correct.to(device) # 自加
pre_correct = pre_correct + float(torch.sum(pre == b_y))
print('EPOCH:{epoch},ACC:{acc}%'.format(epoch=epoch, acc=(pre_correct / float(10000)) * 100))
Accuracy.append(pre_correct / float(10000) * 100)
# 自加计时
print('epoch %d cost %3f sec' % (epoch, time.time() - timestart))
# 保存网络模型
torch.save(net, 'lenet_cifar_10.model')
# 绘制loss变化曲线
plt.figure()
plt.plot(loss_list)
plt.figure()
plt.plot(Accuracy)
plt.show()
其中用到的网络结构为
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(3,64,3,padding=1)
self.conv2 = nn.Conv2d(64,64,3,padding=1)
self.pool1 = nn.MaxPool2d(2, 2)
self.bn1 = nn.BatchNorm2d(64)
self.relu1 = nn.ReLU()
self.conv3 = nn.Conv2d(64,128,3,padding=1)
self.conv4 = nn.Conv2d(128, 128, 3,padding=1)
self.pool2 = nn.MaxPool2d(2, 2, padding=1)
self.bn2 = nn.BatchNorm2d(128)
self.relu2 = nn.ReLU()
self.conv5 = nn.Conv2d(128,128, 3,padding=1)
self.conv6 = nn.Conv2d(128, 128, 3,padding=1)
self.conv7 = nn.Conv2d(128, 128, 1,padding=1)
self.pool3 = nn.MaxPool2d(2, 2, padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.relu3 = nn.ReLU()
self.conv8 = nn.Conv2d(128, 256, 3,padding=1)
self.conv9 = nn.Conv2d(256, 256, 3, padding=1)
self.conv10 = nn.Conv2d(256, 256, 1, padding=1)
self.pool4 = nn.MaxPool2d(2, 2, padding=1)
self.bn4 = nn.BatchNorm2d(256)
self.relu4 = nn.ReLU()
self.conv11 = nn.Conv2d(256, 512, 3, padding=1)
self.conv12 = nn.Conv2d(512, 512, 3, padding=1)
self.conv13 = nn.Conv2d(512, 512, 1, padding=1)
self.pool5 = nn.MaxPool2d(2, 2, padding=1)
self.bn5 = nn.BatchNorm2d(512)
self.relu5 = nn.ReLU()
self.fc14 = nn.Linear(512*4*4,1024)
self.drop1 = nn.Dropout2d()
self.fc15 = nn.Linear(1024,1024)
self.drop2 = nn.Dropout2d()
self.fc16 = nn.Linear(1024,10)
def forward(self,x):
x = x.to(device) # 自加
x = self.conv1(x)
x = self.conv2(x)
x = self.pool1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.pool2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.conv5(x)
x = self.conv6(x)
x = self.conv7(x)
x = self.pool3(x)
x = self.bn3(x)
x = self.relu3(x)
x = self.conv8(x)
x = self.conv9(x)
x = self.conv10(x)
x = self.pool4(x)
x = self.bn4(x)
x = self.relu4(x)
x = self.conv11(x)
x = self.conv12(x)
x = self.conv13(x)
x = self.pool5(x)
x = self.bn5(x)
x = self.relu5(x)
# print(" x shape ",x.size())
x = x.view(-1,512*4*4)
x = F.relu(self.fc14(x))
x = self.drop1(x)
x = F.relu(self.fc15(x))
x = self.drop2(x)
x = self.fc16(x)
return x