pytorch 实现Resnext代码详细讲解

Resnext就是一种典型的混合模型,有基础的inception+resnet组合而成,通过学习这个模型,你也可以通过以往学习的模型组合,我们每次去学习掌握一个模型的精髓就是为了融合创造新的模型。
第一步先了解下图的含义
pytorch 实现Resnext代码详细讲解_第1张图片
这是resnext的三种结构,这三种结构是等价的,但是©这种结构代码容易构造,所以代码以(c)的讲解。resnext的本质在与gruops分组卷积,在之前的mobilenet网络我有先讲解这个分组的用法mobilenet网络的讲解在这里我就不再讲解groups,总之(a)是将卷积分成32个通道卷积之后相加,nn.Conv2d中的groups这个参数自动为我们分组,编写代码提供了方便。
pytorch 实现Resnext代码详细讲解_第2张图片
仔细的观看,resnet里面除了通道数与resnext不同其他参数完全相同,可以看我之前写的resnet的详细讲解是一样的,这里我在简单描述一下大概的过程,图片先经过conv1,在经过pool1,然后进行第一次conv2,仔细看图中的output大小没有变,所以一会设置stride=1,之后再重复进行conv2二次,在进行conv3的时候,output有变化,所以第一次进行conv3的时候stride=2,特征图变为原来二分之一,之后再重复的三次,stride=1,特征图没有变化。后面一样。

        self.conv2 = self._make_layer(64,256,1,num=layer[0])
        self.conv3 = self._make_layer(256,512,2,num=layer[1])
        self.conv4 = self._make_layer(512,1024,2,num=layer[2])
        self.conv5 = self._make_layer(1024,2048,2,num=layer[3])

所以这里conv2中的stride=1,conv3,conv4,conv5的stride=2,进行特征图减半。图中的通道数很有规律,基本成二倍的关系,输入和输出,conv2的不一样。

pytorch 实现Resnext代码详细讲解_第3张图片
与上图结构对应

全部代码

class Block(nn.Module):
    def __init__(self,in_channels, out_channels, stride=1, is_shortcut=False):
        super(Block,self).__init__()
        self.relu = nn.ReLU(inplace=True)
        self.is_shortcut = is_shortcut
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels // 2, kernel_size=1,stride=stride,bias=False),
            nn.BatchNorm2d(out_channels // 2),
            nn.ReLU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(out_channels // 2, out_channels // 2, kernel_size=3, stride=1, padding=1, groups=32,
                                   bias=False),
            nn.BatchNorm2d(out_channels // 2),
            nn.ReLU()
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(out_channels // 2, out_channels, kernel_size=1,stride=1,bias=False),
            nn.BatchNorm2d(out_channels),
        )
        if is_shortcut:
            self.shortcut = nn.Sequential(
            nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=stride,bias=1),
            nn.BatchNorm2d(out_channels)
        )
    def forward(self, x):
        x_shortcut = x
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        if self.is_shortcut:
            x_shortcut = self.shortcut(x_shortcut)
        x = x + x_shortcut
        x = self.relu(x)
        return x

需要注意的点只有一个self.shortcut = nn.Sequential(
nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=stride,bias=1),
nn.BatchNorm2d(out_channels)这个的使用,它只是用一次,在每个版块,conv2开始使用一次,后面重复的二次卷积都没用使用。以conv2讲解,本质是进过此卷积,第一次已经将浅层的特征利用过了。后面重复的二次卷积(256,256),(256,256),特征图的输入和输出一样,所以这次shortcut即使使用也没用效果。

import torch
import torch.nn as nn

class Block(nn.Module):
    def __init__(self,in_channels, out_channels, stride=1, is_shortcut=False):
        super(Block,self).__init__()
        self.relu = nn.ReLU(inplace=True)
        self.is_shortcut = is_shortcut
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels // 2, kernel_size=1,stride=stride,bias=False),
            nn.BatchNorm2d(out_channels // 2),
            nn.ReLU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(out_channels // 2, out_channels // 2, kernel_size=3, stride=1, padding=1, groups=32,
                                   bias=False),
            nn.BatchNorm2d(out_channels // 2),
            nn.ReLU()
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(out_channels // 2, out_channels, kernel_size=1,stride=1,bias=False),
            nn.BatchNorm2d(out_channels),
        )
        if is_shortcut:
            self.shortcut = nn.Sequential(
            nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=stride,bias=1),
            nn.BatchNorm2d(out_channels)
        )
    def forward(self, x):
        x_shortcut = x
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        if self.is_shortcut:
            x_shortcut = self.shortcut(x_shortcut)
        x = x + x_shortcut
        x = self.relu(x)
        return x

class Resnext(nn.Module):
    def __init__(self,num_classes,layer=[3,4,6,3]):
        super(Resnext,self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        self.conv2 = self._make_layer(64,256,1,num=layer[0])
        self.conv3 = self._make_layer(256,512,2,num=layer[1])
        self.conv4 = self._make_layer(512,1024,2,num=layer[2])
        self.conv5 = self._make_layer(1024,2048,2,num=layer[3])
        self.global_average_pool = nn.AvgPool2d(kernel_size=7, stride=1)
        self.fc = nn.Linear(2048,num_classes)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.global_average_pool(x)
        x = torch.flatten(x,1)
        x = self.fc(x)
        return x
    def _make_layer(self,in_channels,out_channels,stride,num):
        layers = []
        block_1=Block(in_channels, out_channels,stride=stride,is_shortcut=True)
        layers.append(block_1)
        for i in range(1, num):
            layers.append(Block(out_channels,out_channels,stride=1,is_shortcut=False))
        return nn.Sequential(*layers)


net = Resnext(10)
x = torch.rand((10, 3, 224, 224))
for name,layer in net.named_children():
    if name != "fc":
        x = layer(x)
        print(name, 'output shaoe:', x.shape)
    else:
        x = x.view(x.size(0), -1)
        x = layer(x)
        print(name, 'output shaoe:', x.shape)


基本与resnet的代码相同,大家可以去先看我写的resnet讲解和moblienet讲解应该会更容易理解。

训练结果

pytorch 实现Resnext代码详细讲解_第4张图片

训练代码

import torch
from torch import nn

import torch
import torch.nn as nn

class Block(nn.Module):
    def __init__(self,in_channels, out_channels, stride=1, is_shortcut=False):
        super(Block,self).__init__()
        self.relu = nn.ReLU(inplace=True)
        self.is_shortcut = is_shortcut
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels // 2, kernel_size=1,stride=stride,bias=False),
            nn.BatchNorm2d(out_channels // 2),
            nn.ReLU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(out_channels // 2, out_channels // 2, kernel_size=3, stride=1, padding=1, groups=32,
                                   bias=False),
            nn.BatchNorm2d(out_channels // 2),
            nn.ReLU()
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(out_channels // 2, out_channels, kernel_size=1,stride=1,bias=False),
            nn.BatchNorm2d(out_channels),
        )
        if is_shortcut:
            self.shortcut = nn.Sequential(
            nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=stride,bias=1),
            nn.BatchNorm2d(out_channels)
        )
    def forward(self, x):
        x_shortcut = x
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        if self.is_shortcut:
            x_shortcut = self.shortcut(x_shortcut)
        x = x + x_shortcut
        x = self.relu(x)
        return x

class Resnext(nn.Module):
    def __init__(self,num_classes,layer=[3,4,6,3]):
        super(Resnext,self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        self.conv2 = self._make_layer(64,256,1,num=layer[0])
        self.conv3 = self._make_layer(256,512,2,num=layer[1])
        self.conv4 = self._make_layer(512,1024,2,num=layer[2])
        self.conv5 = self._make_layer(1024,2048,2,num=layer[3])
        self.global_average_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(2048,num_classes)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.global_average_pool(x)
        x = torch.flatten(x,1)
        x = self.fc(x)
        return x
    def _make_layer(self,in_channels,out_channels,stride,num):
        layers = []
        block_1=Block(in_channels, out_channels,stride=stride,is_shortcut=True)
        layers.append(block_1)
        for i in range(1, num):
            layers.append(Block(out_channels,out_channels,stride=1,is_shortcut=False))
        return nn.Sequential(*layers)



import time
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt


def load_dataset(batch_size):
    train_set = torchvision.datasets.CIFAR10(
        root="data/cifar-10", train=True,
        download=True, transform=transforms.ToTensor()
    )
    test_set = torchvision.datasets.CIFAR10(
        root="data/cifar-10", train=False,
        download=True, transform=transforms.ToTensor()
    )
    train_iter = torch.utils.data.DataLoader(
        train_set, batch_size=batch_size, shuffle=True, num_workers=4
    )
    test_iter = torch.utils.data.DataLoader(
        test_set, batch_size=batch_size, shuffle=True, num_workers=4
    )
    return train_iter, test_iter


def train(net, train_iter, criterion, optimizer, num_epochs, device, num_print, lr_scheduler=None, test_iter=None):
    net.train()
    record_train = list()
    record_test = list()

    for epoch in range(num_epochs):
        print("========== epoch: [{}/{}] ==========".format(epoch + 1, num_epochs))
        total, correct, train_loss = 0, 0, 0
        start = time.time()

        for i, (X, y) in enumerate(train_iter):
            X, y = X.to(device), y.to(device)
            output = net(X)
            loss = criterion(output, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            total += y.size(0)
            correct += (output.argmax(dim=1) == y).sum().item()
            train_acc = 100.0 * correct / total

            if (i + 1) % num_print == 0:
                print("step: [{}/{}], train_loss: {:.3f} | train_acc: {:6.3f}% | lr: {:.6f}" \
                    .format(i + 1, len(train_iter), train_loss / (i + 1), \
                            train_acc, get_cur_lr(optimizer)))


        if lr_scheduler is not None:
            lr_scheduler.step()

        print("--- cost time: {:.4f}s ---".format(time.time() - start))

        if test_iter is not None:
            record_test.append(test(net, test_iter, criterion, device))
        record_train.append(train_acc)

    return record_train, record_test


def test(net, test_iter, criterion, device):
    total, correct = 0, 0
    net.eval()

    with torch.no_grad():
        print("*************** test ***************")
        for X, y in test_iter:
            X, y = X.to(device), y.to(device)

            output = net(X)
            loss = criterion(output, y)

            total += y.size(0)
            correct += (output.argmax(dim=1) == y).sum().item()

    test_acc = 100.0 * correct / total

    print("test_loss: {:.3f} | test_acc: {:6.3f}%"\
          .format(loss.item(), test_acc))
    print("************************************\n")
    net.train()

    return test_acc


def get_cur_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']


def learning_curve(record_train, record_test=None):
    plt.style.use("ggplot")

    plt.plot(range(1, len(record_train) + 1), record_train, label="train acc")
    if record_test is not None:
        plt.plot(range(1, len(record_test) + 1), record_test, label="test acc")

    plt.legend(loc=4)
    plt.title("learning curve")
    plt.xticks(range(0, len(record_train) + 1, 5))
    plt.yticks(range(0, 101, 5))
    plt.xlabel("epoch")
    plt.ylabel("accuracy")

    plt.show()


import torch.optim as optim


BATCH_SIZE = 128
NUM_EPOCHS = 12
NUM_CLASSES = 10
LEARNING_RATE = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005
NUM_PRINT = 100
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"


def main():
    net = Resnext(NUM_CLASSES)
    net = net.to(DEVICE)

    train_iter, test_iter = load_dataset(BATCH_SIZE)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(
        net.parameters(),
        lr=LEARNING_RATE,
        momentum=MOMENTUM,
        weight_decay=WEIGHT_DECAY,
        nesterov=True
    )
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    record_train, record_test = train(net, train_iter, criterion, optimizer, \
          NUM_EPOCHS, DEVICE, NUM_PRINT, lr_scheduler, test_iter)

    learning_curve(record_train, record_test)


main()

你可能感兴趣的:(卷积神经网络,深度学习,神经网络)