MNIST数据集介绍见上篇文章:在MNIST手写数字数据集上实现Logistic regression 逻辑回归
# 包
import torch
import torch.nn as nn
import torch.nn.functional as F
# torchvision 包收录了若干重要的公开数据集、网络模型和计算机视觉中的常用图像变换
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# 设备配置
#torch.cuda.set_device(1) # 这句用来设置pytorch在哪块GPU上运行
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 超参数设置
num_epochs = 5
num_classes = 10
batch_size = 64 # 一个batch 的大小
image_size = 28 #图像的总尺寸28*28
learning_rate = 0.001
所有的torchvision.datasets
都是torch.utils.data.Dataset
的子类, 即:它们实现了__getitem__
和__len__
方法。因此,它们都可以传递给torch.utils.data.DataLoader
,进而通过torch.multiprocessing
实现批数据的并行化加载。
torchvision包的几个数据集的接口基本上很相近,它们至少包括两个公共的参数transform和target_transform,以便分别对输入和和目标做变换。
torchvision.datasets.MNIST(root, train=True, transform=None, target_transform=None, download=False)
torch.utils.data.Dataset是数据集的抽象类,所有用到的数据集都必须是其子类。这些子类都必须重写以下方法:__len__
:定义了数据集的规模;__getitem__
:支持0到len(self)范围内的整数索引。
torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0, collate_fn=<function default_collate>, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None)
组合数据集和采样器,并在数据集上提供单进程或多进程迭代器。
torch.utils.data.SubsetRandomSampler(indices)
从给定的索引列表中采样,不替换。
# transform=transforms.ToTensor():将图像转化为Tensor,在加载数据的时候,就可以对图像做预处理
train_dataset = torchvision.datasets.MNIST(root='./data',train=True,transform=transforms.ToTensor(),download=True)
test_dataset = torchvision.datasets.MNIST(root='./data',train=False,transform=transforms.ToTensor(),download=True)
# 训练数据集的加载器,自动将数据分割成batch,顺序随机打乱
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
"""
接下来把测试数据中的前5000个样本作为验证集,后5000个样本作为测试集
"""
indices = range(len(test_dataset))
indices_val = indices[:5000]
indices_test = indices[5000:]
# 通过下标对验证集和测试集进行采样
sampler_val = torch.utils.data.sampler.SubsetRandomSampler(indices_val)
sampler_test = torch.utils.data.sampler.SubsetRandomSampler(indices_test)
# 根据采样器来定义加载器,然后加载数据
validation_loader = torch.utils.data.DataLoader(dataset =test_dataset,batch_size = batch_size,sampler = sampler_val)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,sampler = sampler_test)
#从数据集中读入一张图片,并绘制出来
idx = 0
#dataset支持下标索引,其中提取出来的每一个元素为features,target格式,即属性和标签。[0]表示索引features
muteimg = train_dataset[idx][0].numpy()
#由于一般的图像包含rgb三个通道,而MINST数据集的图像都是灰度的,只有一个通道。因此,我们忽略通道,把图像看作一个灰度矩阵。
#用imshow画图,会将灰度矩阵自动展现为彩色,不同灰度对应不同颜色:从黄到紫
plt.imshow(muteimg[0,...])
print('标签是:',train_dataset[idx][1])
#定义两个卷积层的厚度(feature map的数量)
depth = [4, 8]
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1,4,5,padding=2) # 1 input channel, 4 output channels, 5x5 square convolution kernel
self.pool = nn.MaxPool2d(2, 2) #定义一个Pooling层
self.conv2 = nn.Conv2d(depth[0],depth[1],5, padding = 2) #第二层卷积:4input channel, 8 output channels, 5x5 square convolution kernel
self.fc1 = nn.Linear( depth[1] * image_size // 4 * image_size // 4 , 512) #线性连接层的输入尺寸为最后一层立方体的平铺,输出层512个节点
self.fc2 = nn.Linear(512, num_classes) #最后一层线性分类单元,输入为512,输出为要做分类的类别数
def forward(self, x):
# x尺寸:(batch_size, image_channels, image_width, image_height)
x = F.relu(self.conv1(x)) #第一层卷积的激活函数用ReLu
x = self.pool(x) #第二层pooling,将片变小
#x的尺寸:(batch_size, depth[0], image_width/2, image_height/2)
x = F.relu(self.conv2(x)) #第三层卷积,输入输出通道分别为depth[0]=4, depth[1]=8
x = self.pool(x) #第四层pooling,将图片缩小到原大小的1/4
#x的尺寸:(batch_size, depth[1], image_width/4, image_height/4)
# view函数将张量x变形成一维的向量形式,总特征数batch_size * (image_size//4)^2*depth[1]不改变,为接下来的全连接作准备。
x = x.view(-1, image_size // 4 * image_size // 4 * depth[1])
#x的尺寸:(batch_size, depth[1]*image_width/4*image_height/4)
x = F.relu(self.fc1(x)) #第五层为全链接,ReLu激活函数
#x的尺寸:(batch_size, 512)
x = F.dropout(x, training=self.training) #以默认为0.5的概率对这一层进行dropout操作,为了防止过拟合
x = self.fc2(x)
#x的尺寸:(batch_size, num_classes)
x = F.log_softmax(x, dim = 0) #输出层为log_softmax,即概率对数值log(p(x))。采用log_softmax可以使得后面的交叉熵计算更快
return x
def retrieve_features(self, x):
#该函数专门用于提取卷积神经网络的特征图的功能,返回feature_map1, feature_map2为前两层卷积层的特征图
feature_map1 = F.relu(self.conv1(x)) #完成第一层卷积
x = self.pool(feature_map1) # 完成第一层pooling
print('type(feature_map1)=',feature_map1)
feature_map2 = F.relu(self.conv2(x)) #第二层卷积,两层特征图都存储到了feature_map1, feature_map2中
return (feature_map1, feature_map2)
"""计算预测正确率的函数,其中predictions是模型给出的一组预测结果,batch_size行num_classes列的矩阵,labels是数据之中的正确答案"""
def accuracy(predictions, labels):
# torch.max的输出:out (tuple, optional维度) – the result tuple of two output tensors (max, max_indices)
pred = torch.max(predictions.data, 1)[1] # 对于任意一行(一个样本)的输出值的第1个维度,求最大,得到每一行的最大元素的下标
right_num = pred.eq(labels.data.view_as(pred)).sum() #将下标与labels中包含的类别进行比较,并累计得到比较正确的数量
return right_num, len(labels) #返回正确的数量和这一次一共比较了多少元素
net = ConvNet()
criterion = nn.CrossEntropyLoss() #Loss函数的定义,交叉熵
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9) #定义优化器,普通的随机梯度下降算法
record = [] #记录准确率等数值的list
weights = [] #每若干步就记录一次卷积核
for epoch in range(num_epochs):
train_accuracy = [] #记录训练数据集准确率的容器
# 一次迭代一个batch的 data 和 target
for batch_id, (data,target) in enumerate(train_loader):
net.train() # 给网络模型做标记,标志说模型正在训练集上训练,这种区分主要是为了打开关闭net的training标志,从而决定是否运行dropout
output = net(data) #forward
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
accuracies = accuracy(output, target)
train_accuracy.append(accuracies)
if batch_id%100 ==0: #每间隔100个batch执行一次打印等操作
net.eval() # 给网络模型做标记,将模型转换为测试模式。
val_accuracy = [] #记录校验数据集准确率的容器
for (data, target) in validation_loader: #计算校验集上面的准确度
output = net(data) #完成一次前馈计算过程,得到目前训练得到的模型net在校验数据集上的表现
accuracies = accuracy(output, target) #计算准确率所需数值,返回正确的数值为(正确样例数,总样本数)
val_accuracy.append(accuracies)
# 分别计算在已经计算过的训练集,以及全部校验集上模型的分类准确率
#train_r为一个二元组,分别记录目前 已经经历过的所有 训练集中分类正确的数量和该集合中总的样本数,
train_r = (sum([tup[0] for tup in train_accuracy]), sum([tup[1] for tup in train_accuracy]))
#val_r为一个二元组,分别记录校验集中分类正确的数量和该集合中总的样本数
val_r = (sum([tup[0] for tup in val_accuracy]), sum([tup[1] for tup in val_accuracy]))
#打印准确率等数值,其中正确率为本训练周期Epoch开始后到目前batch的正确率的平均值
print('Epoch [{}/{}] [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t训练正确率: {:.2f}%\t校验正确率: {:.2f}%'.format(
epoch+1,num_epochs, batch_id * batch_size, len(train_loader.dataset),
100. * batch_id / len(train_loader), loss.item(),
100. * train_r[0] / train_r[1],
100. * val_r[0] / val_r[1]))
Epoch [1/5] [0/60000 (0%)] Loss: 2.304904 训练正确率: 6.00% 校验正确率: 6.00%
Epoch [1/5] [6400/60000 (11%)] Loss: 2.298373 训练正确率: 10.00% 校验正确率: 20.00%
Epoch [1/5] [12800/60000 (21%)] Loss: 2.294849 训练正确率: 12.00% 校验正确率: 33.00%
Epoch [1/5] [19200/60000 (32%)] Loss: 2.288789 训练正确率: 15.00% 校验正确率: 48.00%
Epoch [1/5] [25600/60000 (43%)] Loss: 2.259372 训练正确率: 19.00% 校验正确率: 59.00%
Epoch [1/5] [32000/60000 (53%)] Loss: 2.204686 训练正确率: 23.00% 校验正确率: 64.00%
Epoch [1/5] [38400/60000 (64%)] Loss: 1.863103 训练正确率: 27.00% 校验正确率: 65.00%
Epoch [1/5] [44800/60000 (75%)] Loss: 1.104945 训练正确率: 32.00% 校验正确率: 75.00%
Epoch [1/5] [51200/60000 (85%)] Loss: 0.926101 训练正确率: 37.00% 校验正确率: 80.00%
Epoch [1/5] [57600/60000 (96%)] Loss: 0.544159 训练正确率: 41.00% 校验正确率: 82.00%
Epoch [2/5] [0/60000 (0%)] Loss: 0.606964 训练正确率: 81.00% 校验正确率: 83.00%
Epoch [2/5] [6400/60000 (11%)] Loss: 0.657308 训练正确率: 81.00% 校验正确率: 84.00%
Epoch [2/5] [12800/60000 (21%)] Loss: 0.463484 训练正确率: 82.00% 校验正确率: 86.00%
Epoch [2/5] [19200/60000 (32%)] Loss: 0.511119 训练正确率: 82.00% 校验正确率: 86.00%
Epoch [2/5] [25600/60000 (43%)] Loss: 0.336797 训练正确率: 83.00% 校验正确率: 87.00%
Epoch [2/5] [32000/60000 (53%)] Loss: 0.440034 训练正确率: 83.00% 校验正确率: 88.00%
Epoch [2/5] [38400/60000 (64%)] Loss: 0.227374 训练正确率: 84.00% 校验正确率: 89.00%
Epoch [2/5] [44800/60000 (75%)] Loss: 0.193706 训练正确率: 84.00% 校验正确率: 89.00%
Epoch [2/5] [51200/60000 (85%)] Loss: 0.415834 训练正确率: 85.00% 校验正确率: 90.00%
Epoch [2/5] [57600/60000 (96%)] Loss: 0.400346 训练正确率: 85.00% 校验正确率: 90.00%
Epoch [3/5] [0/60000 (0%)] Loss: 0.324649 训练正确率: 89.00% 校验正确率: 91.00%
Epoch [3/5] [6400/60000 (11%)] Loss: 0.275483 训练正确率: 89.00% 校验正确率: 90.00%
Epoch [3/5] [12800/60000 (21%)] Loss: 0.170515 训练正确率: 90.00% 校验正确率: 91.00%
Epoch [3/5] [19200/60000 (32%)] Loss: 0.128764 训练正确率: 90.00% 校验正确率: 91.00%
Epoch [3/5] [25600/60000 (43%)] Loss: 0.222985 训练正确率: 90.00% 校验正确率: 92.00%
Epoch [3/5] [32000/60000 (53%)] Loss: 0.130712 训练正确率: 90.00% 校验正确率: 92.00%
Epoch [3/5] [38400/60000 (64%)] Loss: 0.164324 训练正确率: 90.00% 校验正确率: 92.00%
Epoch [3/5] [44800/60000 (75%)] Loss: 0.157739 训练正确率: 90.00% 校验正确率: 92.00%
Epoch [3/5] [51200/60000 (85%)] Loss: 0.184269 训练正确率: 91.00% 校验正确率: 93.00%
Epoch [3/5] [57600/60000 (96%)] Loss: 0.190089 训练正确率: 91.00% 校验正确率: 93.00%
Epoch [4/5] [0/60000 (0%)] Loss: 0.236670 训练正确率: 93.00% 校验正确率: 93.00%
Epoch [4/5] [6400/60000 (11%)] Loss: 0.180199 训练正确率: 92.00% 校验正确率: 93.00%
Epoch [4/5] [12800/60000 (21%)] Loss: 0.153781 训练正确率: 92.00% 校验正确率: 93.00%
Epoch [4/5] [19200/60000 (32%)] Loss: 0.374177 训练正确率: 92.00% 校验正确率: 93.00%
Epoch [4/5] [25600/60000 (43%)] Loss: 0.255291 训练正确率: 92.00% 校验正确率: 94.00%
Epoch [4/5] [32000/60000 (53%)] Loss: 0.221844 训练正确率: 92.00% 校验正确率: 94.00%
Epoch [4/5] [38400/60000 (64%)] Loss: 0.269937 训练正确率: 92.00% 校验正确率: 94.00%
Epoch [4/5] [44800/60000 (75%)] Loss: 0.130776 训练正确率: 93.00% 校验正确率: 94.00%
Epoch [4/5] [51200/60000 (85%)] Loss: 0.063669 训练正确率: 93.00% 校验正确率: 94.00%
Epoch [4/5] [57600/60000 (96%)] Loss: 0.255013 训练正确率: 93.00% 校验正确率: 94.00%
Epoch [5/5] [0/60000 (0%)] Loss: 0.203829 训练正确率: 93.00% 校验正确率: 94.00%
Epoch [5/5] [6400/60000 (11%)] Loss: 0.096614 训练正确率: 94.00% 校验正确率: 94.00%
Epoch [5/5] [12800/60000 (21%)] Loss: 0.192363 训练正确率: 94.00% 校验正确率: 94.00%
Epoch [5/5] [19200/60000 (32%)] Loss: 0.184329 训练正确率: 94.00% 校验正确率: 94.00%
Epoch [5/5] [25600/60000 (43%)] Loss: 0.177337 训练正确率: 94.00% 校验正确率: 94.00%
Epoch [5/5] [32000/60000 (53%)] Loss: 0.173036 训练正确率: 94.00% 校验正确率: 94.00%
Epoch [5/5] [38400/60000 (64%)] Loss: 0.143616 训练正确率: 94.00% 校验正确率: 95.00%
Epoch [5/5] [44800/60000 (75%)] Loss: 0.278670 训练正确率: 94.00% 校验正确率: 94.00%
Epoch [5/5] [51200/60000 (85%)] Loss: 0.204592 训练正确率: 94.00% 校验正确率: 95.00%
Epoch [5/5] [57600/60000 (96%)] Loss: 0.153006 训练正确率: 94.00% 校验正确率: 95.00%
# 在测试集上进行测试
net.eval() #标志模型当前为测试阶段
vals = [] #记录准确率所用列表
with torch.no_grad():
for data,target in test_loader:
output = net(data)
val = accuracy(output,target)
#print(val[0].data)
vals.append(val)
#计算准确率
rights = (sum([tup[0] for tup in vals]), sum([tup[1] for tup in vals]))
right_rate = 1.0 * rights[0].data.numpy() / rights[1]
print("accuracy:",right_rate)
accuracy: 0.9786