前言
一、基于残差网络的手写体数字识别实验
5.4.1 模型构建
5.4.1.1 残差单元
5.4.1.2 残差网络的整体结构
5.4.2 没有残差连接的ResNet18
5.4.2.1 模型训练
5.4.2.2 模型评价
5.4.3 带残差连接的ResNet18
5.4.3.1 模型训练
5.4.3.2 模型评价
5.4.4 与高层API实现版本的对比实验
编辑 遇到的问题
总结
这我还是写的很细,并且感觉这次用到了好多上一个实验的东西,遇到了一个问题研究好长时间,感觉解决的不是很好,请个老师和各位大佬多教教我。
首先来说一下残差网络是啥
残差网络(Residual Network,ResNet)是在神经网络模型中给非线性层增加直连边的方式来缓解梯度消失问题,从而使训练深度神经网络变得更加容易。
在残差网络中,最基本的单位为残差单元。
在本节中,我们先构建ResNet18的残差单元,然后在组建完整的网络。
先说一下需要用的库
# coding=gbk
import json
import gzip
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torchvision.transforms import Compose, Resize, Normalize,ToTensor
import random
import torch.utils.data as data
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.nn.init import constant_, normal_, uniform_
import time
from torchsummary import summary
from thop import profile
import torch.optim as opt
from nndl import RunnerV3
import metric
from nndl import plot
from torchvision.models import resnet18
这个和上一个用到的库,几乎一样,但是和之前一样需要注意解码方式。
残差单元包裹的非线性层的输入和输出形状大小应该一致。如果一个卷积层的输入特征图和输出特征图的通道数不一致,则其输出与输入特征图无法直接相加。为了解决上述问题,我们可以使用1×1大小的卷积将输入特征图的通道数映射为与级联卷积输出特征图的一致通道数。
1×1卷积:与标准卷积完全一样,唯一的特殊点在于卷积核的尺寸是1×1,也就是不去考虑输入数据局部信息之间的关系,而把关注点放在不同通道间。通过使用1×1卷积,可以起到如下作用:
class ResBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, use_residual=True):
"""
残差单元
输入:
- in_channels:输入通道数
- out_channels:输出通道数
- stride:残差单元的步长,通过调整残差单元中第一个卷积层的步长来控制
- use_residual:用于控制是否使用残差连接
"""
super(ResBlock, self).__init__()
self.stride = stride
self.use_residual = use_residual
# 第一个卷积层,卷积核大小为3×3,可以设置不同输出通道数以及步长
self.conv1 = nn.Conv2d(in_channels, out_channels, 3, padding=1, stride=self.stride)
# 第二个卷积层,卷积核大小为3×3,不改变输入特征图的形状,步长为1
self.conv2 = nn.Conv2d(out_channels, out_channels, 3, padding=1)
# 如果conv2的输出和此残差块的输入数据形状不一致,则use_1x1conv = True
# 当use_1x1conv = True,添加1个1x1的卷积作用在输入数据上,使其形状变成跟conv2一致
if in_channels != out_channels or stride != 1:
self.use_1x1conv = True
else:
self.use_1x1conv = False
# 当残差单元包裹的非线性层输入和输出通道数不一致时,需要用1×1卷积调整通道数后再进行相加运算
if self.use_1x1conv:
self.shortcut = nn.Conv2d(in_channels, out_channels, 1, stride=self.stride)
# 每个卷积层后会接一个批量规范化层,批量规范化的内容在7.5.1中会进行详细介绍
self.bn1 = nn.BatchNorm2d(out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
if self.use_1x1conv:
self.bn3 = nn.BatchNorm2d(out_channels)
def forward(self, inputs):
y = F.relu(self.bn1(self.conv1(inputs)))
y = self.bn2(self.conv2(y))
if self.use_residual:
if self.use_1x1conv: # 如果为真,对inputs进行1×1卷积,将形状调整成跟conv2的输出y一致
shortcut = self.shortcut(inputs)
shortcut = self.bn3(shortcut)
else: # 否则直接将inputs和conv2的输出y相加
shortcut = inputs
y = torch.add(shortcut, y)
out = F.relu(y)
return out
上边就是一个小的残差单元,就是我上边图像中蓝色的部分。
这里说一下残差网络的结构,并且定义的方法,也和咱们之前见过的不太一样
残差网络就是将很多个残差单元串联起来构成的一个非常深的网络。ResNet18 的网络结构如图5.16所示。
其中为了便于理解,可以将ResNet18网络划分为6个模块:
ResNet18模型的代码实现如下:
定义模块一。
def make_first_module(in_channels):
# 模块一:7*7卷积、批量规范化、汇聚
m1 = nn.Sequential(nn.Conv2d(in_channels, 64, 7, stride=2, padding=3),
nn.BatchNorm2d(64), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
return m1
定义模块二到模块五。
def resnet_module(input_channels, out_channels, num_res_blocks, stride=1, use_residual=True):
blk = []
# 根据num_res_blocks,循环生成残差单元
for i in range(num_res_blocks):
if i == 0: # 创建模块中的第一个残差单元
blk.append(ResBlock(input_channels, out_channels,
stride=stride, use_residual=use_residual))
else: # 创建模块中的其他残差单元
blk.append(ResBlock(out_channels, out_channels, use_residual=use_residual))
return blk
封装模块二到模块五。
def make_modules(use_residual):
# 模块二:包含两个残差单元,输入通道数为64,输出通道数为64,步长为1,特征图大小保持不变
m2 = nn.Sequential(*resnet_module(64, 64, 2, stride=1, use_residual=use_residual))
# 模块三:包含两个残差单元,输入通道数为64,输出通道数为128,步长为2,特征图大小缩小一半。
m3 = nn.Sequential(*resnet_module(64, 128, 2, stride=2, use_residual=use_residual))
# 模块四:包含两个残差单元,输入通道数为128,输出通道数为256,步长为2,特征图大小缩小一半。
m4 = nn.Sequential(*resnet_module(128, 256, 2, stride=2, use_residual=use_residual))
# 模块五:包含两个残差单元,输入通道数为256,输出通道数为512,步长为2,特征图大小缩小一半。
m5 = nn.Sequential(*resnet_module(256, 512, 2, stride=2, use_residual=use_residual))
return m2, m3, m4, m5
定义完整网络。
# 定义完整网络
class Model_ResNet18(nn.Module):
def __init__(self, in_channels=3, num_classes=10, use_residual=True):
super(Model_ResNet18,self).__init__()
m1 = make_first_module(in_channels)
m2, m3, m4, m5 = make_modules(use_residual)
# 封装模块一到模块6
self.net = nn.Sequential(m1, m2, m3, m4, m5,
# 模块六:汇聚层、全连接层
nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Linear(512, num_classes) )
def forward(self, x):
return self.net(x)
这里同样可以使用torhsummary统计模型的参数量。
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0
model = Model_ResNet18(in_channels=1, num_classes=10, use_residual=True).to(device)
summary(model, ( 1, 32, 32))
运行结果为:
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 16, 16] 3,200
BatchNorm2d-2 [-1, 64, 16, 16] 128
ReLU-3 [-1, 64, 16, 16] 0
MaxPool2d-4 [-1, 64, 8, 8] 0
Conv2d-5 [-1, 64, 8, 8] 36,928
BatchNorm2d-6 [-1, 64, 8, 8] 128
Conv2d-7 [-1, 64, 8, 8] 36,928
BatchNorm2d-8 [-1, 64, 8, 8] 128
ResBlock-9 [-1, 64, 8, 8] 0
Conv2d-10 [-1, 64, 8, 8] 36,928
BatchNorm2d-11 [-1, 64, 8, 8] 128
Conv2d-12 [-1, 64, 8, 8] 36,928
BatchNorm2d-13 [-1, 64, 8, 8] 128
ResBlock-14 [-1, 64, 8, 8] 0
Conv2d-15 [-1, 128, 4, 4] 73,856
BatchNorm2d-16 [-1, 128, 4, 4] 256
Conv2d-17 [-1, 128, 4, 4] 147,584
BatchNorm2d-18 [-1, 128, 4, 4] 256
Conv2d-19 [-1, 128, 4, 4] 8,320
BatchNorm2d-20 [-1, 128, 4, 4] 256
ResBlock-21 [-1, 128, 4, 4] 0
Conv2d-22 [-1, 128, 4, 4] 147,584
BatchNorm2d-23 [-1, 128, 4, 4] 256
Conv2d-24 [-1, 128, 4, 4] 147,584
BatchNorm2d-25 [-1, 128, 4, 4] 256
ResBlock-26 [-1, 128, 4, 4] 0
Conv2d-27 [-1, 256, 2, 2] 295,168
BatchNorm2d-28 [-1, 256, 2, 2] 512
Conv2d-29 [-1, 256, 2, 2] 590,080
BatchNorm2d-30 [-1, 256, 2, 2] 512
Conv2d-31 [-1, 256, 2, 2] 33,024
BatchNorm2d-32 [-1, 256, 2, 2] 512
ResBlock-33 [-1, 256, 2, 2] 0
Conv2d-34 [-1, 256, 2, 2] 590,080
BatchNorm2d-35 [-1, 256, 2, 2] 512
Conv2d-36 [-1, 256, 2, 2] 590,080
BatchNorm2d-37 [-1, 256, 2, 2] 512
ResBlock-38 [-1, 256, 2, 2] 0
Conv2d-39 [-1, 512, 1, 1] 1,180,160
BatchNorm2d-40 [-1, 512, 1, 1] 1,024
Conv2d-41 [-1, 512, 1, 1] 2,359,808
BatchNorm2d-42 [-1, 512, 1, 1] 1,024
Conv2d-43 [-1, 512, 1, 1] 131,584
BatchNorm2d-44 [-1, 512, 1, 1] 1,024
ResBlock-45 [-1, 512, 1, 1] 0
Conv2d-46 [-1, 512, 1, 1] 2,359,808
BatchNorm2d-47 [-1, 512, 1, 1] 1,024
Conv2d-48 [-1, 512, 1, 1] 2,359,808
BatchNorm2d-49 [-1, 512, 1, 1] 1,024
ResBlock-50 [-1, 512, 1, 1] 0
AdaptiveAvgPool2d-51 [-1, 512, 1, 1] 0
Flatten-52 [-1, 512] 0
Linear-53 [-1, 10] 5,130
================================================================
Total params: 11,180,170
Trainable params: 11,180,170
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.05
Params size (MB): 42.65
Estimated Total Size (MB): 43.71
----------------------------------------------------------------
先使用没有残差连接的ResNet18进行实验。
在训练模型之前,我先说一下,会用到的前一个实验定义好的东西
# 打印并观察数据集分布情况
train_set, dev_set, test_set = json.load(gzip.open('mnist.json.gz'))
train_images, train_labels = train_set[0][:2000], train_set[1][:2000]
dev_images, dev_labels = dev_set[0][:200], dev_set[1][:200]
test_images, test_labels = test_set[0][:200], test_set[1][:200]
train_set, dev_set, test_set = [train_images, train_labels], [dev_images, dev_labels], [test_images, test_labels]
print('Length of train/dev/test set:{}/{}/{}'.format(len(train_set[0]), len(dev_set[0]), len(test_set[0])))
image, label = train_set[0][0], train_set[1][0]
image, label = np.array(image).astype('float32'), int(label)
# 原始图像数据为长度784的行向量,需要调整为[28,28]大小的图像
image = np.reshape(image, [28,28])
image = Image.fromarray(image.astype('uint8'), mode='L')
print("The number in the picture is {}".format(label))
plt.figure(figsize=(5, 5))
plt.imshow(image)
plt.savefig('conv-number5.pdf')
# 数据预处理
transforms = Compose([Resize(32), ToTensor(),Normalize(mean=[1], std=[1])])
class MNIST_dataset(data.Dataset):
def __init__(self, dataset, transforms, mode='train'):
self.mode = mode
self.transforms =transforms
self.dataset = dataset
def __getitem__(self, idx):
# 获取图像和标签
image, label = self.dataset[0][idx], self.dataset[1][idx]
image, label = np.array(image).astype('float32'), int(label)
image = np.reshape(image, [28,28])
image = Image.fromarray(image.astype('uint8'), mode='L')
image = self.transforms(image)
return image, label
def __len__(self):
return len(self.dataset[0])
# 固定随机种子
random.seed(0)
# 加载 mnist 数据集
train_dataset = MNIST_dataset(dataset=train_set, transforms=transforms, mode='train')
test_dataset = MNIST_dataset(dataset=test_set, transforms=transforms, mode='test')
dev_dataset = MNIST_dataset(dataset=dev_set, transforms=transforms, mode='dev')
# 定义损失函数
loss_fn = F.cross_entropy
# 定义评价指标
metric = metric.Accuracy(is_logist=True)
class RunnerV3(object):
def __init__(self, model, optimizer, loss_fn, metric, **kwargs):
self.model = model
self.optimizer = optimizer
self.loss_fn = loss_fn
self.metric = metric # 只用于计算评价指标
# 记录训练过程中的评价指标变化情况
self.dev_scores = []
# 记录训练过程中的损失函数变化情况
self.train_epoch_losses = [] # 一个epoch记录一次loss
self.train_step_losses = [] # 一个step记录一次loss
self.dev_losses = []
# 记录全局最优指标
self.best_score = 0
def train(self, train_loader, dev_loader=None, **kwargs):
# 将模型切换为训练模式
self.model.train()
# 传入训练轮数,如果没有传入值则默认为0
num_epochs = kwargs.get("num_epochs", 0)
# 传入log打印频率,如果没有传入值则默认为100
log_steps = kwargs.get("log_steps", 100)
# 评价频率
eval_steps = kwargs.get("eval_steps", 0)
# 传入模型保存路径,如果没有传入值则默认为"best_model.pdparams"
save_path = kwargs.get("save_path", "best_model.pdparams")
custom_print_log = kwargs.get("custom_print_log", None)
# 训练总的步数
num_training_steps = num_epochs * len(train_loader)
if eval_steps:
if self.metric is None:
raise RuntimeError('Error: Metric can not be None!')
if dev_loader is None:
raise RuntimeError('Error: dev_loader can not be None!')
# 运行的step数目
global_step = 0
# 进行num_epochs轮训练
for epoch in range(num_epochs):
# 用于统计训练集的损失
total_loss = 0
for step, data in enumerate(train_loader):
X, y = data
# 获取模型预测
logits = self.model(X)
loss = self.loss_fn(logits, y) # 默认求mean
total_loss += loss
# 训练过程中,每个step的loss进行保存
self.train_step_losses.append((global_step, loss.item()))
if log_steps and global_step % log_steps == 0:
print(
f"[Train] epoch: {epoch}/{num_epochs}, step: {global_step}/{num_training_steps}, loss: {loss.item():.5f}")
# 梯度反向传播,计算每个参数的梯度值
loss.backward()
if custom_print_log:
custom_print_log(self)
# 小批量梯度下降进行参数更新
self.optimizer.step()
# 梯度归零
self.optimizer.zero_grad()
# 判断是否需要评价
if eval_steps > 0 and global_step > 0 and \
(global_step % eval_steps == 0 or global_step == (num_training_steps - 1)):
dev_score, dev_loss = self.evaluate(dev_loader, global_step=global_step)
print(f"[Evaluate] dev score: {dev_score:.5f}, dev loss: {dev_loss:.5f}")
# 将模型切换为训练模式
self.model.train()
# 如果当前指标为最优指标,保存该模型
if dev_score > self.best_score:
self.save_model(save_path)
print(
f"[Evaluate] best accuracy performence has been updated: {self.best_score:.5f} --> {dev_score:.5f}")
self.best_score = dev_score
global_step += 1
# 当前epoch 训练loss累计值
trn_loss = (total_loss / len(train_loader)).item()
# epoch粒度的训练loss保存
self.train_epoch_losses.append(trn_loss)
print("[Train] Training done!")
# 模型评估阶段,使用'paddle.no_grad()'控制不计算和存储梯度
@torch.no_grad()
def evaluate(self, dev_loader, **kwargs):
assert self.metric is not None
# 将模型设置为评估模式
self.model.eval()
global_step = kwargs.get("global_step", -1)
# 用于统计训练集的损失
total_loss = 0
# 重置评价
self.metric.reset()
# 遍历验证集每个批次
for batch_id, data in enumerate(dev_loader):
X, y = data
# 计算模型输出
logits = self.model(X)
# 计算损失函数
loss = self.loss_fn(logits, y).item()
# 累积损失
total_loss += loss
# 累积评价
self.metric.update(logits, y)
dev_loss = (total_loss / len(dev_loader))
dev_score = self.metric.accumulate()
# 记录验证集loss
if global_step != -1:
self.dev_losses.append((global_step, dev_loss))
self.dev_scores.append(dev_score)
return dev_score, dev_loss
# 模型评估阶段,使用'paddle.no_grad()'控制不计算和存储梯度
@torch.no_grad()
def predict(self, x, **kwargs):
# 将模型设置为评估模式
self.model.eval()
# 运行模型前向计算,得到预测值
logits = self.model(x)
return logits
def save_model(self, save_path):
torch.save(self.model.state_dict(), save_path)
def load_model(self, model_path):
state_dict = torch.load(model_path)
self.model.load_state_dict(state_dict)
class Accuracy():
def __init__(self, is_logist=True):
"""
输入:
- is_logist: outputs是logist还是激活后的值
"""
# 用于统计正确的样本个数
self.num_correct = 0
# 用于统计样本的总数
self.num_count = 0
self.is_logist = is_logist
def update(self, outputs, labels):
"""
输入:
- outputs: 预测值, shape=[N,class_num]
- labels: 标签值, shape=[N,1]
"""
# 判断是二分类任务还是多分类任务,shape[1]=1时为二分类任务,shape[1]>1时为多分类任务
if outputs.shape[1] == 1: # 二分类
outputs = torch.squeeze(outputs, dim=-1)
if self.is_logist:
# logist判断是否大于0
preds = torch.tensor((outputs >= 0), dtype=torch.float32)
else:
# 如果不是logist,判断每个概率值是否大于0.5,当大于0.5时,类别为1,否则类别为0
preds = torch.tensor((outputs >= 0.5), dtype=torch.float32)
else:
# 多分类时,使用'torch.argmax'计算最大元素索引作为类别
preds = torch.argmax(outputs, dim=1)
# 获取本批数据中预测正确的样本个数
labels = torch.squeeze(labels, dim=-1)
batch_correct = torch.sum(torch.tensor(preds == labels, dtype=torch.float32)).numpy()
batch_count = len(labels)
# 更新num_correct 和 num_count
self.num_correct += batch_correct
self.num_count += batch_count
def accumulate(self):
# 使用累计的数据,计算总的指标
if self.num_count == 0:
return 0
return self.num_correct / self.num_count
def reset(self):
# 重置正确的数目和总数
self.num_correct = 0
self.num_count = 0
def name(self):
return "Accuracy"
class Accuracy():
def __init__(self, is_logist=True):
"""
输入:
- is_logist: outputs是logist还是激活后的值
"""
# 用于统计正确的样本个数
self.num_correct = 0
# 用于统计样本的总数
self.num_count = 0
self.is_logist = is_logist
def update(self, outputs, labels):
"""
输入:
- outputs: 预测值, shape=[N,class_num]
- labels: 标签值, shape=[N,1]
"""
# 判断是二分类任务还是多分类任务,shape[1]=1时为二分类任务,shape[1]>1时为多分类任务
if outputs.shape[1] == 1: # 二分类
outputs = torch.squeeze(outputs, dim=-1)
if self.is_logist:
# logist判断是否大于0
preds = torch.tensor((outputs >= 0), dtype=torch.float32)
else:
# 如果不是logist,判断每个概率值是否大于0.5,当大于0.5时,类别为1,否则类别为0
preds = torch.tensor((outputs >= 0.5), dtype=torch.float32)
else:
# 多分类时,使用'torch.argmax'计算最大元素索引作为类别
preds = torch.argmax(outputs, dim=1)
# 获取本批数据中预测正确的样本个数
labels = torch.squeeze(labels, dim=-1)
batch_correct = torch.sum(torch.tensor(preds == labels, dtype=torch.float32)).numpy()
batch_count = len(labels)
# 更新num_correct 和 num_count
self.num_correct += batch_correct
self.num_count += batch_count
def accumulate(self):
# 使用累计的数据,计算总的指标
if self.num_count == 0:
return 0
return self.num_correct / self.num_count
def reset(self):
# 重置正确的数目和总数
self.num_correct = 0
self.num_count = 0
def name(self):
return "Accuracy"
# 可视化
def plot(runner, fig_name):
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
train_items = runner.train_step_losses[::30]
train_steps = [x[0] for x in train_items]
train_losses = [x[1] for x in train_items]
plt.plot(train_steps, train_losses, color='#8E004D', label="Train loss")
if runner.dev_losses[0][0] != -1:
dev_steps = [x[0] for x in runner.dev_losses]
dev_losses = [x[1] for x in runner.dev_losses]
plt.plot(dev_steps, dev_losses, color='#E20079', linestyle='--', label="Dev loss")
# 绘制坐标轴和图例
plt.ylabel("loss", fontsize='x-large')
plt.xlabel("step", fontsize='x-large')
plt.legend(loc='upper right', fontsize='x-large')
plt.subplot(1, 2, 2)
# 绘制评价准确率变化曲线
if runner.dev_losses[0][0] != -1:
plt.plot(dev_steps, runner.dev_scores,
color='#E20079', linestyle="--", label="Dev accuracy")
else:
plt.plot(list(range(len(runner.dev_scores))), runner.dev_scores,
color='#E20079', linestyle="--", label="Dev accuracy")
# 绘制坐标轴和图例
plt.ylabel("score", fontsize='x-large')
plt.xlabel("step", fontsize='x-large')
plt.legend(loc='lower right', fontsize='x-large')
plt.savefig(fig_name)
plt.show()
好了,这就是用到上一个实验的东西,要是没有的话,在这找就可以了。
使用训练集和验证集进行模型训练,共训练5个epoch。在实验中,保存准确率最高的模型作为最佳模型。代码实现如下
torch.manual_seed(100)
# 学习率大小
lr = 0.005
# 批次大小
batch_size = 64
# 加载数据
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = data.DataLoader(dev_dataset, batch_size=batch_size)
test_loader = data.DataLoader(test_dataset, batch_size=batch_size)
# 定义网络,不使用残差结构的深层网络
model = Model_ResNet18(in_channels=1, num_classes=10, use_residual=False)
# 定义优化器
optimizer = opt.SGD(lr=lr, params=model.parameters())
# 定义损失函数
loss_fn = F.cross_entropy
# 定义评价指标
metric = metric.Accuracy(is_logist=True)
# 实例化RunnerV3
runner = RunnerV3(model, optimizer, loss_fn, metric)
# 启动训练
log_steps = 15
eval_steps = 15
runner.train(train_loader, dev_loader, num_epochs=5, log_steps=log_steps,
eval_steps=eval_steps, save_path="best_model.pdparams")
# 可视化观察训练集与验证集的Loss变化情况
plot(runner, 'cnn-loss2.pdf')
运行结果为:
[Train] epoch: 0/5, step: 0/160, loss: 2.34224
[Train] epoch: 0/5, step: 15/160, loss: 1.31700
C:\Users\LENOVO\PycharmProjects\pythonProject\深度学习\metric.py:62: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
batch_correct = torch.sum(torch.tensor(preds == labels, dtype=torch.float32)).numpy()
[Evaluate] dev score: 0.09500, dev loss: 2.30257
[Evaluate] best accuracy performence has been updated: 0.00000 --> 0.09500
[Train] epoch: 0/5, step: 30/160, loss: 0.66286
[Evaluate] dev score: 0.11500, dev loss: 2.29716
[Evaluate] best accuracy performence has been updated: 0.09500 --> 0.11500
[Train] epoch: 1/5, step: 45/160, loss: 0.39897
[Evaluate] dev score: 0.55000, dev loss: 1.51973
[Evaluate] best accuracy performence has been updated: 0.11500 --> 0.55000
[Train] epoch: 1/5, step: 60/160, loss: 0.31825
[Evaluate] dev score: 0.92500, dev loss: 0.40170
[Evaluate] best accuracy performence has been updated: 0.55000 --> 0.92500
[Train] epoch: 2/5, step: 75/160, loss: 0.12904
[Evaluate] dev score: 0.93500, dev loss: 0.22754
[Evaluate] best accuracy performence has been updated: 0.92500 --> 0.93500
[Train] epoch: 2/5, step: 90/160, loss: 0.13832
[Evaluate] dev score: 0.90500, dev loss: 0.27319
[Train] epoch: 3/5, step: 105/160, loss: 0.10297
[Evaluate] dev score: 0.93000, dev loss: 0.24763
[Train] epoch: 3/5, step: 120/160, loss: 0.12388
[Evaluate] dev score: 0.93000, dev loss: 0.18329
[Train] epoch: 4/5, step: 135/160, loss: 0.20459
[Evaluate] dev score: 0.91000, dev loss: 0.25516
[Train] epoch: 4/5, step: 150/160, loss: 0.09298
[Evaluate] dev score: 0.93500, dev loss: 0.18513
[Evaluate] dev score: 0.89500, dev loss: 0.30162
[Train] Training done!
使用测试数据对在训练过程中保存的最佳模型进行评价,观察模型在测试集上的准确率以及损失情况。代码实现如下
# 加载最优模型
runner.load_model('best_model.pdparams')
# 模型评价
score, loss = runner.evaluate(test_loader)
print("[Test] accuracy/loss: {:.4f}/{:.4f}".format(score, loss))
运行结果为:
[Test] accuracy/loss: 0.9550/0.2424
从输出结果看,对比LeNet-5模型评价实验结果,网络层级加深后,训练效果不升反降。
再使用带残差连接的ResNet18重复上面的实验。
使用带残差连接的ResNet18重复上面的实验,代码实现如下:
# 学习率大小
lr = 0.01
# 批次大小
batch_size = 64
# 加载数据
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader = data.DataLoader(dev_dataset, batch_size=batch_size)
test_loader = data.DataLoader(test_dataset, batch_size=batch_size)
# 定义网络,通过指定use_residual为True,使用残差结构的深层网络
model = Model_ResNet18(in_channels=1, num_classes=10, use_residual=True)
# 定义优化器
optimizer = opt.SGD(lr=lr, params=model.parameters())
# 实例化RunnerV3
runner = RunnerV3(model, optimizer, loss_fn, metric)
# 启动训练
log_steps = 15
eval_steps = 15
runner.train(train_loader, dev_loader, num_epochs=5, log_steps=log_steps,
eval_steps=eval_steps, save_path="best_model.pdparams")
# 可视化观察训练集与验证集的Loss变化情况
plot(runner, 'cnn-loss3.pdf')
运行结果为:
[Train] epoch: 0/5, step: 0/160, loss: 2.46978
[Train] epoch: 0/5, step: 15/160, loss: 0.52145
[Evaluate] dev score: 0.19000, dev loss: 2.29718
[Evaluate] best accuracy performence has been updated: 0.00000 --> 0.19000
[Train] epoch: 0/5, step: 30/160, loss: 0.22503
[Evaluate] dev score: 0.39500, dev loss: 1.75715
[Evaluate] best accuracy performence has been updated: 0.19000 --> 0.39500
[Train] epoch: 1/5, step: 45/160, loss: 0.13266
[Evaluate] dev score: 0.90000, dev loss: 0.37835
[Evaluate] best accuracy performence has been updated: 0.39500 --> 0.90000
[Train] epoch: 1/5, step: 60/160, loss: 0.07993
[Evaluate] dev score: 0.90500, dev loss: 0.23769
[Evaluate] best accuracy performence has been updated: 0.90000 --> 0.90500
[Train] epoch: 2/5, step: 75/160, loss: 0.03920
[Evaluate] dev score: 0.94500, dev loss: 0.13020
[Evaluate] best accuracy performence has been updated: 0.90500 --> 0.94500
[Train] epoch: 2/5, step: 90/160, loss: 0.04129
[Evaluate] dev score: 0.95500, dev loss: 0.11184
[Evaluate] best accuracy performence has been updated: 0.94500 --> 0.95500
[Train] epoch: 3/5, step: 105/160, loss: 0.01144
[Evaluate] dev score: 0.95500, dev loss: 0.10348
[Train] epoch: 3/5, step: 120/160, loss: 0.00599
[Evaluate] dev score: 0.96500, dev loss: 0.09905
[Evaluate] best accuracy performence has been updated: 0.95500 --> 0.96500
[Train] epoch: 4/5, step: 135/160, loss: 0.00453
[Evaluate] dev score: 0.95500, dev loss: 0.09177
[Train] epoch: 4/5, step: 150/160, loss: 0.00663
[Evaluate] dev score: 0.95500, dev loss: 0.08366
[Evaluate] dev score: 0.84000, dev loss: 0.53131
[Train] Training done!
使用测试数据对在训练过程中保存的最佳模型进行评价,观察模型在测试集上的准确率以及损失情况。
# 加载最优模型
runner.load_model('best_model.pdparams')
# 模型评价
score, loss = runner.evaluate(test_loader)
print("[Test] accuracy/loss: {:.4f}/{:.4f}".format(score, loss))
运行结果为:
[Test] accuracy/loss: 0.9800/0.0517
添加了残差连接后,模型收敛曲线更平滑。
从输出结果看,和不使用残差连接的ResNet相比,添加了残差连接后,模型效果有了一定的提升。
对于Reset18这种比较经典的图像分类网络,飞桨高层API中都为大家提供了实现好的版本,大家可以不再从头开始实现。这里为高层API版本的resnet18模型和自定义的resnet18模型赋予相同的权重,并使用相同的输入数据,观察输出结果是否一致。
import warnings
#warnings.filterwarnings("ignore")
# 使用飞桨HAPI中实现的resnet18模型,该模型默认输入通道数为3,输出类别数1000
hapi_model = resnet18(pretrained=True)
# 自定义的resnet18模型
model = Model_ResNet18(in_channels=3, num_classes=1000, use_residual=True)
# 获取网络的权重
params = hapi_model.state_dict()
# 用来保存参数名映射后的网络权重
new_params = {}
# 将参数名进行映射
for key in params:
if 'layer' in key:
if 'downsample.0' in key:
new_params['net.' + key[5:8] + '.shortcut' + key[-7:]] = params[key]
elif 'downsample.1' in key:
new_params['net.' + key[5:8] + '.shorcutt' + key[23:]] = params[key]
else:
new_params['net.' + key[5:]] = params[key]
elif 'conv1.weight' == key:
new_params['net.0.0.weight'] = params[key]
elif 'bn1' in key:
new_params['net.0.1' + key[3:]] = params[key]
elif 'fc' in key:
new_params['net.7' + key[2:]] = params[key]
# 将飞桨HAPI中实现的resnet18模型的权重参数赋予自定义的resnet18模型,保持两者一致
del new_params[ "net.2.0.shorcutteight"]
del new_params["net.2.0.shorcuttias"]
del new_params["net.2.0.shorcuttunning_mean"]
del new_params["net.2.0.shorcuttunning_var"]
del new_params["net.2.0.shorcuttum_batches_tracked"]
del new_params["net.3.0.shorcutteight"]
del new_params["net.3.0.shorcuttias"]
del new_params["net.3.0.shorcuttunning_mean"]
del new_params["net.3.0.shorcuttunning_var"]
del new_params["net.3.0.shorcuttum_batches_tracked"]
del new_params["net.4.0.shorcutteight"]
del new_params["net.4.0.shorcuttias"]
del new_params["net.4.0.shorcuttunning_mean"]
del new_params["net.4.0.shorcuttunning_var"]
del new_params["net.4.0.shorcuttum_batches_tracked"]
#model.load_state_dict(torch.load("best_model.pdparams"))
#model.load_state_dict(new_params)
# 这里用np.random创建一个随机数组作为测试数据
inputs = np.random.randn(*[3,3,32,32])
inputs = inputs.astype('float32')
x = torch.tensor(inputs)
output = hapi_model(x)
hapi_out = hapi_model(x)
# 计算两个模型输出的差异
diff = output - hapi_out
# 取差异最大的值
max_diff = torch.max(diff)
print(max_diff)
运行结果为:
tensor(0., grad_fn=
)
这里是有一点问题的,我会在下边说一下,先说一下torchvision.models这个库,这个库是真的非常方便,一用感觉和sklearn那个库差不多了,但是个人感觉sklearn训练更方便一点,通用性更强。
torchvision.models
模块的 子模块中包含以下模型结构。
你可以使用随机初始化的权重来创建这些模型,下边是pytorch官方给出的对应表。
关于具体的用法简单版概括一下就是
torchvision.models.alexnet(pretrained=False, ** kwargs)
AlexNet
模型结构 paper地址
- pretrained (bool) –
True
, 返回在ImageNet上训练好的模型。torchvision.models.resnet18(pretrained=False, ** kwargs)
构建一个
resnet18
模型
- pretrained (bool) –
True
, 返回在ImageNet上训练好的模型。torchvision.models.resnet34(pretrained=False, ** kwargs)
构建一个
ResNet-34
模型.Parameters: pretrained (bool) –
True
, 返回在ImageNet上训练好的模型。torchvision.models.resnet50(pretrained=False, ** kwargs)
构建一个
ResNet-50
模型
- pretrained (bool) –
True
, 返回在ImageNet上训练好的模型。torchvision.models.resnet101(pretrained=False, ** kwargs)
Constructs a ResNet-101 model.
- pretrained (bool) –
True
, 返回在ImageNet上训练好的模型。torchvision.models.resnet152(pretrained=False, ** kwargs)
Constructs a ResNet-152 model.
- pretrained (bool) –
True
, 返回在ImageNet上训练好的模型。torchvision.models.vgg11(pretrained=False, ** kwargs)
VGG 11-layer model (configuration “A”)
- pretrained (bool) –True
, 返回在ImageNet上训练好的模型。torchvision.models.vgg11_bn(** kwargs)
VGG 11-layer model (configuration “A”) with batch normalization
torchvision.models.vgg13(pretrained=False, ** kwargs)
VGG 13-layer model (configuration “B”)
- pretrained (bool) –
True
, 返回在ImageNet上训练好的模型。torchvision.models.vgg13_bn(** kwargs)
VGG 13-layer model (configuration “B”) with batch normalization
torchvision.models.vgg16(pretrained=False, ** kwargs)
VGG 16-layer model (configuration “D”)
Parameters: pretrained (bool) – If True, returns a model pre-trained on ImageNet
torchvision.models.vgg16_bn(** kwargs)
VGG 16-layer model (configuration “D”) with batch normalization
torchvision.models.vgg19(pretrained=False, ** kwargs)
VGG 19-layer model (configuration “E”)
- pretrained (bool) –
True
, 返回在ImageNet上训练好的模型。torchvision.models.vgg19_bn(** kwargs)
VGG 19-layer model (configuration ‘E’) with batch normalization
这个是resnet18的用法,但是感觉这个其实用到的参数是后边那个是否预训练,也就是后边那张图,这个非常重要具体在后边一个实验会用到。
说一下遇得到的问题,在也是在模型训练中遇到的问题,在5.4.4传参的过程中,由于参数名的问题,会导致参数传不进去,因为这个参数会有一些没有的,无法对应着赋值,也就是下边的错误。
RuntimeError: Error(s) in loading state_dict for Model_ResNet18:
Missing key(s) in state_dict: "net.0.0.bias", "net.1.0.conv1.bias", "net.1.0.conv2.bias", "net.1.1.conv1.bias", "net.1.1.conv2.bias", "net.2.0.conv1.bias", "net.2.0.conv2.bias", "net.2.0.shortcut.bias", "net.2.0.bn3.weight", "net.2.0.bn3.bias", "net.2.0.bn3.running_mean", "net.2.0.bn3.running_var", "net.2.1.conv1.bias", "net.2.1.conv2.bias", "net.3.0.conv1.bias", "net.3.0.conv2.bias", "net.3.0.shortcut.bias", "net.3.0.bn3.weight", "net.3.0.bn3.bias", "net.3.0.bn3.running_mean", "net.3.0.bn3.running_var", "net.3.1.conv1.bias", "net.3.1.conv2.bias", "net.4.0.conv1.bias", "net.4.0.conv2.bias", "net.4.0.shortcut.bias", "net.4.0.bn3.weight", "net.4.0.bn3.bias", "net.4.0.bn3.running_mean", "net.4.0.bn3.running_var", "net.4.1.conv1.bias", "net.4.1.conv2.bias".
Unexpected key(s) in state_dict: "net.2.0.shorcutteight", "net.2.0.shorcuttias", "net.2.0.shorcuttunning_mean", "net.2.0.shorcuttunning_var", "net.2.0.shorcuttum_batches_tracked", "net.3.0.shorcutteight", "net.3.0.shorcuttias", "net.3.0.shorcuttunning_mean", "net.3.0.shorcuttunning_var", "net.3.0.shorcuttum_batches_tracked", "net.4.0.shorcutteight", "net.4.0.shorcuttias", "net.4.0.shorcuttunning_mean", "net.4.0.shorcuttunning_var", "net.4.0.shorcuttum_batches_tracked".
这个错误的原因是多了一些数据,我试了各种办法都没有办法解决,最后只能用最直接的方法直接删除,由于我太菜了,所以希望老师和各位大佬教我一种好的方法,我的方法是直接删除,对应的多余项,也就是下边的代码。
import warnings
#warnings.filterwarnings("ignore")
# 使用飞桨HAPI中实现的resnet18模型,该模型默认输入通道数为3,输出类别数1000
hapi_model = resnet18(pretrained=True)
# 自定义的resnet18模型
model = Model_ResNet18(in_channels=3, num_classes=1000, use_residual=True)
# 获取网络的权重
params = hapi_model.state_dict()
# 用来保存参数名映射后的网络权重
new_params = {}
# 将参数名进行映射
for key in params:
if 'layer' in key:
if 'downsample.0' in key:
new_params['net.' + key[5:8] + '.shortcut' + key[-7:]] = params[key]
elif 'downsample.1' in key:
new_params['net.' + key[5:8] + '.shorcutt' + key[23:]] = params[key]
else:
new_params['net.' + key[5:]] = params[key]
elif 'conv1.weight' == key:
new_params['net.0.0.weight'] = params[key]
elif 'bn1' in key:
new_params['net.0.1' + key[3:]] = params[key]
elif 'fc' in key:
new_params['net.7' + key[2:]] = params[key]
# 将飞桨HAPI中实现的resnet18模型的权重参数赋予自定义的resnet18模型,保持两者一致
del new_params[ "net.2.0.shorcutteight"]
del new_params["net.2.0.shorcuttias"]
del new_params["net.2.0.shorcuttunning_mean"]
del new_params["net.2.0.shorcuttunning_var"]
del new_params["net.2.0.shorcuttum_batches_tracked"]
del new_params["net.3.0.shorcutteight"]
del new_params["net.3.0.shorcuttias"]
del new_params["net.3.0.shorcuttunning_mean"]
del new_params["net.3.0.shorcuttunning_var"]
del new_params["net.3.0.shorcuttum_batches_tracked"]
del new_params["net.4.0.shorcutteight"]
del new_params["net.4.0.shorcuttias"]
del new_params["net.4.0.shorcuttunning_mean"]
del new_params["net.4.0.shorcuttunning_var"]
del new_params["net.4.0.shorcuttum_batches_tracked"]
#model.load_state_dict(torch.load("best_model.pdparams"))
#model.load_state_dict(new_params)
# 这里用np.random创建一个随机数组作为测试数据
inputs = np.random.randn(*[3,3,32,32])
inputs = inputs.astype('float32')
x = torch.tensor(inputs)
output = hapi_model(x)
hapi_out = hapi_model(x)
# 计算两个模型输出的差异
diff = output - hapi_out
# 取差异最大的值
max_diff = torch.max(diff)
print(max_diff)
运行结果为:
tensor(0., grad_fn=
)
这样就可以了,但是我是菜鸟,希望老师和各位大佬教教我比较好的方法。
首先,我这次同样写的很细,但是,我那个解决问题的方法感觉并不是很好,所以希望,各老师和各位大佬多教教我。
其次,主要是学了好多,主要是学了残差的概念,以前跑过模型,但是没有具体想过是干啥,有啥区别,这才试了一试终于明白了。
其次,由于上一实验好好整了一整,这次作业感觉用到了好多上一个实验的,所以感觉之前好好写都是有作用的(哈哈哈)。
最后,当然是感谢老师,感谢老师在学习和生活上的关心(哈哈哈)。