下面是resnext的代码:
resnext的代码
import torch
from torch import nn
import torch
import torch.nn as nn
class Block(nn.Module):
def __init__(self,in_channels, out_channels, stride=1, is_shortcut=False):
super(Block,self).__init__()
# 继承block的父类进行初始化。而且是用父类的初始化方法来初始化继承的属性
self.relu = nn.ReLU(inplace=True)
# inplace-选择是否进行覆盖运算,即是否将得到的值计算得到的值覆盖之前的值
self.is_shortcut = is_shortcut
self.conv1 = nn.Sequential(
# Sequential一个有序的容器,神经网络模块将按照在传入构造器的顺序依次被添加到计算图中执行,
nn.Conv2d(in_channels, out_channels // 2, kernel_size=1,stride=stride,bias=False),
nn.BatchNorm2d(out_channels // 2),
# 添加BatchNorm2d进行数据的归一化处理,这使得数据在进行Relu之前不会因为数据过大而导致网络性能的不稳定
nn.ReLU()
)
self.conv2 = nn.Sequential(
nn.Conv2d(out_channels // 2, out_channels // 2, kernel_size=3, stride=1, padding=1, groups=32,
bias=False),
nn.BatchNorm2d(out_channels // 2),
nn.ReLU()
)
self.conv3 = nn.Sequential(
nn.Conv2d(out_channels // 2, out_channels, kernel_size=1,stride=1,bias=False),
nn.BatchNorm2d(out_channels),
)
if is_shortcut:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=stride,bias=1),
nn.BatchNorm2d(out_channels)
)
def forward(self, x):
x_shortcut = x
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
if self.is_shortcut:
x_shortcut = self.shortcut(x_shortcut)
x = x + x_shortcut
x = self.relu(x)
return x
class Resnext(nn.Module):
def __init__(self,num_classes,layer=[3,4,6,3]):
super(Resnext,self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)
self.conv2 = self._make_layer(64,256,1,num=layer[0])
self.conv3 = self._make_layer(256,512,2,num=layer[1])
self.conv4 = self._make_layer(512,1024,2,num=layer[2])
self.conv5 = self._make_layer(1024,2048,2,num=layer[3])
self.global_average_pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(2048,num_classes)
# in_features指的是输入的二维张量的大小,即输入的[batch_size, size]中的size。
# out_features指的是输出的二维张量的大小,即输出的二维张量的形状为[batch_size,output_size],当然,它也代表了该全连接层的神经元个数。
# 从输入输出的张量的shape角度来理解,相当于一个输入为[batch_size, in_features]的张量变换成了[batch_size, out_features]的输出张量。
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.global_average_pool(x)
x = torch.flatten(x,1)
# 1)flatten(x,1)是按照x的第1个维度拼接(按照列来拼接,横向拼接);
# 2)flatten(x,0)是按照x的第0个维度拼接(按照行来拼接,纵向拼接);
# 3)有时候会遇到flatten里面有两个维度参数,flatten(x, start_dim, end_dimension),此时flatten函数执行的功能是将从start_dim到end_dim之间的所有维度值乘起来,
# 其他的维度保持不变。例如x是一个size为[4,5,6]的tensor, flatten(x, 0, 1)的结果是一个size为[20,6]的tensor。
x = self.fc(x)
return x
# 形成单个Stage的网络结构
def _make_layer(self,in_channels,out_channels,stride,num):
layers = []
block_1=Block(in_channels, out_channels,stride=stride,is_shortcut=True)
layers.append(block_1)
# 该部分是将每个blocks的第一个残差结构保存在layers列表中。
for i in range(1, num): # Identity Block结构叠加; 基于[3, 4, 6, 3]
layers.append(Block(out_channels,out_channels,stride=1,is_shortcut=False))
# 该部分是将每个blocks的剩下残差结构保存在layers列表中,这样就完成了一个blocks的构造。
return nn.Sequential(*layers)
# 返回Conv Block和Identity Block的集合,形成一个Stage的网络结构
import time
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
def load_dataset(batch_size):
# 下载训练集
train_set = torchvision.datasets.CIFAR10(
root="data/cifar-10", train=True,
download=True, transform=transforms.ToTensor()
# transforms.ToTensor()的操作对象有PIL格式的图像以及numpy(即cv2读取的图像也可以)这两种。对象不能是tensor格式的,因为是要转换为tensor的。
)
# 下载测试集
test_set = torchvision.datasets.CIFAR10(
root="data/cifar-10", train=False,
download=True, transform=transforms.ToTensor()
)
train_iter = torch.utils.data.DataLoader(
train_set, batch_size=batch_size, shuffle=True, num_workers=0
# 该接口主要用来将自定义的数据读取接口的输出或者PyTorch已有的数据读取接口的输入按照batch size封装成Tensor,后续只需要再包装成Variable即可作为模型的输入
# shuffle 将训练模型的数据集进行打乱的操作
# num_workers,从注释可以看出这个参数必须大于等于0,0的话表示数据导入在主进程中进行,其他大于0的数表示通过多个进程来导入数据,可以加快数据导入速度。
)
test_iter = torch.utils.data.DataLoader(
test_set, batch_size=batch_size, shuffle=True, num_workers=0
)
return train_iter, test_iter
# 训练模型
def train(net, train_iter, criterion, optimizer, num_epochs, device, num_print, lr_scheduler=None, test_iter=None):
net.train()
record_train = list()
# 记录每一Epoch下训练集的准确率
# List() 方法用于将元组转换为列表。
record_test = list()
for epoch in range(num_epochs):
print("========== epoch: [{}/{}] ==========".format(epoch + 1, num_epochs))
total, correct, train_loss = 0, 0, 0
start = time.time()
for i, (X, y) in enumerate(train_iter):
# enumerate就是枚举的意思,把元素一个个列举出来,第一个是什么,第二个是什么,所以他返回的是元素以及对应的索引。
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
X, y = X.to(device), y.to(device)
output = net(X)
loss = criterion(output, y) # 计算LOSS
optimizer.zero_grad() # 梯度归零
loss.backward() # 反向传播
optimizer.step() # 更新参数
train_loss += loss.item()
total += y.size(0)
correct += (output.argmax(dim=1) == y).sum().item() # 累积预测正确的样本数
# output.argmax(dim=1) 返回指定维度最大值的序号,dim=1,把dim=1这个维度的,变成这个维度的最大值的index
# 即 dim=1是取每一列最大值的下标,dim=2是取每一行最大值的下标
train_acc = 100.0 * correct / total
if (i + 1) % num_print == 0:
print("step: [{}/{}], train_loss: {:.3f} | train_acc: {:6.3f}% | lr: {:.6f}" \
.format(i + 1, len(train_iter), train_loss / (i + 1), \
train_acc, get_cur_lr(optimizer)))
if lr_scheduler is not None:
# 调整梯度下降算法的学习率
lr_scheduler.step()
# 输出训练的时间
print("--- cost time: {:.4f}s ---".format(time.time() - start))
if test_iter is not None: # 判断测试集是否为空 (注意这里将调用test函数)
record_test.append(test(net, test_iter, criterion, device)) # 每训练一个Epoch模型,使用测试集进行测试模型的准确度
record_train.append(train_acc)
# append() 方法用于在列表末尾追加新的对象。
# 返回每一个Epoch下测试集和训练集的准确率
return record_train, record_test
# 验证模型
def test(net, test_iter, criterion, device):
total, correct = 0, 0
net.eval()# 测试模式
with torch.no_grad():
print("*************** test ***************")
for X, y in test_iter:
X, y = X.to(device), y.to(device)# CPU or GPU运行
output = net(X) # 计算输出
loss = criterion(output, y) # 计算损失
total += y.size(0) # 计算测试集总样本数
correct += (output.argmax(dim=1) == y).sum().item() # 计算测试集预测准确的样本数
test_acc = 100.0 * correct / total # 测试集准确率
# 输出测试集的损失
print("test_loss: {:.3f} | test_acc: {:6.3f}%" \
.format(loss.item(), test_acc))
print("************************************\n")
# 训练模式 (因为这里是因为每经过一个Epoch就使用测试集一次,使用测试集后,进入下一个Epoch前将模型重新置于训练模式)
net.train()
return test_acc
# 返回学习率lr的函数
def get_cur_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
# 画出每一个Epoch下测试集和训练集的准确率
def learning_curve(record_train, record_test=None):
plt.style.use("ggplot")
# 画图工具:https://blog.csdn.net/viviliving/article/details/107690844
plt.plot(range(1, len(record_train) + 1), record_train, label="train acc")
# plt.plot的用法 https://zhuanlan.zhihu.com/p/258106097
if record_test is not None:
plt.plot(range(1, len(record_test) + 1), record_test, label="test acc")
plt.legend(loc=4)
# loc:图例位置
plt.title("learning curve")
plt.xticks(range(0, len(record_train) + 1, 5))
plt.yticks(range(0, 101, 5))
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.show()
import torch.optim as optim
BATCH_SIZE = 128 # 批大小
NUM_EPOCHS = 12 # Epoch大小
NUM_CLASSES = 10 # 分类的个数
LEARNING_RATE = 0.01 # 梯度下降学习率
MOMENTUM = 0.9 # 冲量大小
WEIGHT_DECAY = 0.0005 # 权重衰减系数
NUM_PRINT = 100
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # GPU or CPU运行
def main():
net = Resnext(NUM_CLASSES)
net = net.to(DEVICE)
train_iter, test_iter = load_dataset(BATCH_SIZE) # 导入训练集和测试集
criterion = nn.CrossEntropyLoss() # 交叉熵损失函数损失计算器
# 优化器
optimizer = optim.SGD(
net.parameters(),
# 构建好神经网络后,网络的参数都保存在parameters()函数当中
lr=LEARNING_RATE,
momentum=MOMENTUM,
weight_decay=WEIGHT_DECAY,
nesterov=True
)
# 调整学习率 (step_size:每训练step_size个epoch,更新一次参数; gamma:更新lr的乘法因子)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
record_train, record_test = train(net, train_iter, criterion, optimizer, \
NUM_EPOCHS, DEVICE, NUM_PRINT, lr_scheduler, test_iter)
learning_curve(record_train, record_test) # 画出准确率曲线
main()
下面是SEresnet的代码
## 导入第三方库
from torch import nn
import time
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.optim as optim
# 搭建基于SENet的Conv Block和Identity Block的网络结构
class Block(nn.Module):
def __init__(self, in_channels, filters, stride=1, is_1x1conv=False):
super(Block, self).__init__()
# 各个Stage中的每一大块中每一小块的输出维度,即channel(filter1 = filter2 = filter3 / 4)
filter1, filter2, filter3 = filters
self.is_1x1conv = is_1x1conv # 判断是否是Conv Block
self.relu = nn.ReLU(inplace=True) # RELU操作
# 第一小块, stride = 1(stage = 1) or stride = 2(stage = 2, 3, 4)
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels, filter1, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(filter1),
nn.ReLU()
)
# 中间小块
self.conv2 = nn.Sequential(
nn.Conv2d(filter1, filter2, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(filter2),
nn.ReLU()
)
# 最后小块,不需要进行ReLu操作
self.conv3 = nn.Sequential(
nn.Conv2d(filter2, filter3, kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(filter3),
)
# Conv Block的输入需要额外进行卷积和归一化操作(结合Conv Block网络图理解)
if is_1x1conv:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, filter3, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(filter3)
)
# SENet(结合SENet的网络图理解)
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d((1, 1)), # 全局平均池化
nn.Conv2d(filter3, filter3 // 16, kernel_size=1), # 16表示r,filter3//16表示C/r,这里用卷积层代替全连接层
nn.ReLU(),
nn.Conv2d(filter3 // 16, filter3, kernel_size=1),
nn.Sigmoid()
)
def forward(self, x):
x_shortcut = x
x1 = self.conv1(x) # 执行第一Block操作
x1 = self.conv2(x1) # 执行中间Block操作
x1 = self.conv3(x1) # 执行最后Block操作
x2 = self.se(x1) # 利用SENet计算出每个通道的权重大小
x1 = x1 * x2 # 对原通道进行加权操作
if self.is_1x1conv: # Conv Block进行额外的卷积归一化操作
x_shortcut = self.shortcut(x_shortcut)
x1 = x1 + x_shortcut # Add操作
x1 = self.relu(x1) # ReLU操作
return x1
# 搭建SEResNet50
class SEResnet(nn.Module):
def __init__(self, cfg):
super(SEResnet, self).__init__()
classes = cfg['classes'] # 分类的类别
num = cfg['num'] # ResNet50[3, 4, 6, 3];Conv Block和 Identity Block的个数
# Stem Block
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)
# Stage1
filters = (64, 64, 256) # channel
self.Stage1 = self._make_layer(in_channels=64, filters=filters, num=num[0], stride=1)
# Stage2
filters = (128, 128, 512) # channel
self.Stage2 = self._make_layer(in_channels=256, filters=filters, num=num[1], stride=2)
# Stage3
filters = (256, 256, 1024) # channel
self.Stage3 = self._make_layer(in_channels=512, filters=filters, num=num[2], stride=2)
# Stage4
filters = (512, 512, 2048) # channel
self.Stage4 = self._make_layer(in_channels=1024, filters=filters, num=num[3], stride=2)
# 自适应平均池化,(1, 1)表示输出的大小(H x W)
self.global_average_pool = nn.AdaptiveAvgPool2d((1, 1))
# 全连接层 这里可理解为网络中四个Stage后的Subsequent Processing 环节
self.fc = nn.Sequential(
nn.Linear(2048, classes)
)
# 形成单个Stage的网络结构
def _make_layer(self, in_channels, filters, num, stride=1):
layers = []
# Conv Block
block_1 = Block(in_channels, filters, stride=stride, is_1x1conv=True)
layers.append(block_1)
# Identity Block结构叠加; 基于[3, 4, 6, 3]
for i in range(1, num):
layers.append(Block(filters[2], filters, stride=1, is_1x1conv=False))
# 返回Conv Block和Identity Block的集合,形成一个Stage的网络结构
return nn.Sequential(*layers)
def forward(self, x):
# Stem Block环节
x = self.conv1(x)
# 执行四个Stage环节
x = self.Stage1(x)
x = self.Stage2(x)
x = self.Stage3(x)
x = self.Stage4(x)
# 执行Subsequent Processing环节
x = self.global_average_pool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
# SeResNet50的参数 (注意调用这个函数将间接调用SEResnet,这里单独编写一个函数是为了方便修改成其它ResNet网络的结构)
def SeResNet50():
cfg = {
'num': (3, 4, 6, 3), # ResNet50,四个Stage中Block的个数(其中Conv Block为1个,剩下均为增加Identity Block)
'classes': (10) # 数据集分类的个数
}
return SEResnet(cfg) # 调用SEResnet网络
## 导入数据集
def load_dataset(batch_size):
# 下载训练集
train_set = torchvision.datasets.CIFAR10(
root="data/cifar-10", train=True,
download=True, transform=transforms.ToTensor()
)
# 下载测试集
test_set = torchvision.datasets.CIFAR10(
root="data/cifar-10", train=False,
download=True, transform=transforms.ToTensor()
)
train_iter = torch.utils.data.DataLoader(
train_set, batch_size=batch_size, shuffle=True, num_workers=4
)
test_iter = torch.utils.data.DataLoader(
test_set, batch_size=batch_size, shuffle=True, num_workers=4
)
return train_iter, test_iter
# 训练模型
def train(net, train_iter, criterion, optimizer, num_epochs, device, num_print, lr_scheduler=None, test_iter=None):
net.train() # 训练模式
record_train = list() # 记录每一Epoch下训练集的准确率
record_test = list() # 记录每一Epoch下测试集的准确率
for epoch in range(num_epochs):
print("========== epoch: [{}/{}] ==========".format(epoch + 1, num_epochs))
total, correct, train_loss = 0, 0, 0
start = time.time()
for i, (X, y) in enumerate(train_iter):
X, y = X.to(device), y.to(device) # GPU or CPU运行
output = net(X) # 计算输出
loss = criterion(output, y) # 计算损失
optimizer.zero_grad() # 梯度置0
loss.backward() # 计算梯度
optimizer.step() # 优化参数
train_loss += loss.item() # 累积损失
total += y.size(0) # 累积总样本数
correct += (output.argmax(dim=1) == y).sum().item() # 累积预测正确的样本数
train_acc = 100.0 * correct / total # 计算准确率
if (i + 1) % num_print == 0:
print("step: [{}/{}], train_loss: {:.3f} | train_acc: {:6.3f}% | lr: {:.6f}" \
.format(i + 1, len(train_iter), train_loss / (i + 1), \
train_acc, get_cur_lr(optimizer)))
# 调整梯度下降算法的学习率
if lr_scheduler is not None:
lr_scheduler.step()
# 输出训练的时间
print("--- cost time: {:.4f}s ---".format(time.time() - start))
if test_iter is not None: # 判断测试集是否为空 (注意这里将调用test函数)
record_test.append(test(net, test_iter, criterion, device)) # 每训练一个Epoch模型,使用测试集进行测试模型的准确度
record_train.append(train_acc)
# 返回每一个Epoch下测试集和训练集的准确率
return record_train, record_test
# 验证模型
def test(net, test_iter, criterion, device):
total, correct = 0, 0
net.eval() # 测试模式
with torch.no_grad(): # 不计算梯度
print("*************** test ***************")
for X, y in test_iter:
X, y = X.to(device), y.to(device) # CPU or GPU运行
output = net(X) # 计算输出
loss = criterion(output, y) # 计算损失
total += y.size(0) # 计算测试集总样本数
correct += (output.argmax(dim=1) == y).sum().item() # 计算测试集预测准确的样本数
test_acc = 100.0 * correct / total # 测试集准确率
# 输出测试集的损失
print("test_loss: {:.3f} | test_acc: {:6.3f}%" \
.format(loss.item(), test_acc))
print("************************************\n")
# 训练模式 (因为这里是因为每经过一个Epoch就使用测试集一次,使用测试集后,进入下一个Epoch前将模型重新置于训练模式)
net.train()
return test_acc
# 返回学习率lr的函数
def get_cur_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
# 画出每一个Epoch下测试集和训练集的准确率
def learning_curve(record_train, record_test=None):
plt.style.use("ggplot")
plt.plot(range(1, len(record_train) + 1), record_train, label="train acc")
if record_test is not None:
plt.plot(range(1, len(record_test) + 1), record_test, label="test acc")
plt.legend(loc=4)
plt.title("learning curve")
plt.xticks(range(0, len(record_train) + 1, 5))
plt.yticks(range(0, 101, 5))
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.show()
BATCH_SIZE = 128 # 批大小
NUM_EPOCHS = 12 # Epoch大小
NUM_CLASSES = 10 # 分类的个数
LEARNING_RATE = 0.01 # 梯度下降学习率
MOMENTUM = 0.9 # 冲量大小
WEIGHT_DECAY = 0.0005 # 权重衰减系数
NUM_PRINT = 100
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # GPU or CPU运行
def main():
net = SeResNet50()
net = net.to(DEVICE) # GPU or CPU 运行
train_iter, test_iter = load_dataset(BATCH_SIZE) # 导入训练集和测试集
criterion = nn.CrossEntropyLoss() # 损失计算器
# 优化器
optimizer = optim.SGD(
net.parameters(),
lr=LEARNING_RATE,
momentum=MOMENTUM,
weight_decay=WEIGHT_DECAY,
nesterov=True
)
# 调整学习率 (step_size:每训练step_size个epoch,更新一次参数; gamma:更新lr的乘法因子)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
record_train, record_test = train(net, train_iter, criterion, optimizer, NUM_EPOCHS, DEVICE, NUM_PRINT,
lr_scheduler, test_iter)
learning_curve(record_train, record_test) # 画出准确率曲线
main()