mobilenet

mobilenet分为了v1v2v3三个版本,下面分别来说一下他们的好处

mobilenet v1 的亮点在于

1.使用了depthwise Cinvolution卷积即(dw卷积),其本质是分组卷积的groups分的组等于通道数,原先是需要输入通道数个特征向量才能得到通道数个输出特征,现在是一个特征向量就得到原通道数的输出特征了。然后再配上pw卷积就是原先的卷积方式即可,这样的方式可以大大减小计算量

2.增加了两个超参数,一个是控制卷积层卷积核个数的,一个是输入图像大小的

mobilenet v2 的亮点在于

1.使用了到残差结构即以前的残差结构是先降维后升维,现在 的倒残差结构是先升维后降维

2.用线性的激活函数替代relu函数,因为relu函数会对低维信息造成损失

mobilenet v3的亮点在于

1.更新了bneck即加入了senet通道注意力机制

mobilenet_第1张图片

并且重新设计了激活函数

mobilenet_第2张图片

2.重新设计耗时层结构即减少第一个卷积层的卷积核个数32-》16,并且精简了last stage,

mobilenet_第3张图片

下面是mobblenetv2版本的主函数代码


import torch

import torch.nn as nn
import creatdataset

from accurary import learning_curve
from alexnet import AlexNet
from model_v2 import MobileNetV2
from resnext import Resnext
from test import test
from train import train
from vgg import  vgg


def load_dataset(batch_size):


    root = r"C:\Users\Jia\PycharmProjects\pythonProject\resnet_dataset"
    train_set = creatdataset.MyDataset(root, mode="train")
    train_iter = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)

    val_set = creatdataset.MyDataset(root, mode="val")
    val_iter = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=0)

    test_set = creatdataset.MyDataset(root, mode="test")
    test_iter = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0)
    return train_iter, val_iter, test_iter


import torch.optim as optim
BATCH_SIZE = 128  # 批大小
NUM_EPOCHS = 12  # Epoch大小
NUM_CLASSES = 3  # 分类的个数
LEARNING_RATE = 0.01  # 梯度下降学习率
MOMENTUM = 0.9  # 冲量大小
WEIGHT_DECAY = 0.0005  # 权重衰减系数
NUM_PRINT = 1
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"  # GPU or CPU运行


def main():
    net = MobileNetV2(num_classes=3)
    net = net.to(DEVICE)

    train_iter, val_iter, test_iter = load_dataset(BATCH_SIZE)  # 导入训练集和测试集

    criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数损失计算器
    # 优化器
    optimizer = optim.SGD(
        net.parameters(),
        # 构建好神经网络后,网络的参数都保存在parameters()函数当中
        lr=LEARNING_RATE,
        momentum=MOMENTUM,
        weight_decay=WEIGHT_DECAY,
        nesterov=True
        # Nesterov动量梯度下降

    )
    # 调整学习率 (step_size:每训练step_size个epoch,更新一次参数; gamma:更新lr的乘法因子)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    record_train, record_val = train(net, train_iter, criterion, optimizer, \
          NUM_EPOCHS, DEVICE, NUM_PRINT, lr_scheduler, val_iter)

    learning_curve(record_train, record_val) # 画出准确率曲线


    if test_iter is not None:  # 判断验证集是否为空 (注意这里将调用test函数)
            test(net, test_iter, criterion, DEVICE)



main()

下面时mobblenetv2代码

from torch import nn
import torch


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    if new_ch < 0.9 * ch:
        new_ch += divisor
        # 确保向下取整时不会超过百分之十,到这才是四舍五入的操作,可以将11和9作为通道数带进去算
    return new_ch


class ConvBNReLU(nn.Sequential):
    # 这个是继承sequental而不是moudle。
    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_channel),
            nn.ReLU6(inplace=True)
            # ReLU6就是下限为0,上限为6
            # inplace为True,将会改变输入的数据 ,否则不会改变原输入,只会产生新的输出。
            # 对于super的理解:这是一个继承机制,这个相当于继承了父类相对应的东西,拿这个举例子,就是继承了sequntal对应的init的部分
            # 对于其括号里的只是写参数来进行传参而已,就是我们要使用这个函数需要传入的数据
        )


class InvertedResidual(nn.Module):
    def __init__(self, in_channel, out_channel, stride, expand_ratio):
        # expand_ratio 为扩展因子,也就是论文中的t
        super(InvertedResidual, self).__init__()
        hidden_channel = in_channel * expand_ratio
        self.use_shortcut = stride == 1 and in_channel == out_channel
        # 必须时步距为1,且输入与输出特征矩阵的channel都想同时,我们的use_shortcut才能为Truth

        layers = []
        if expand_ratio != 1:
            # 1x1 pointwise conv
            layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1))
        layers.extend([
            # 3x3 depthwise conv
            # extend是能一次性插入批量的元素和append是一样的用法
            ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel),
            # 1x1 pointwise conv(linear),线性激活就是y=x,用1*1卷积核来卷积再加上不使用激活函数,就相当于linear了
            nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channel),
        ])

        self.conv = nn.Sequential(*layers)

    def forward(self, x):
        if self.use_shortcut:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8):
        # alpha=1.0是超参数,控制卷积层使用卷积核个数的倍率
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = _make_divisible(32 * alpha, round_nearest)
        # _make_divisible 让输出通道数32 * alpha变为round_nearest的整数倍,目的是为了更好的调用硬件设备比如多gpu并行计算
        last_channel = _make_divisible(1280 * alpha, round_nearest)

        inverted_residual_setting = [
            # t扩展因子, c通道数, n重复的次数, s步距注意这里的s是只针对每个块里的第一层卷积的
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]

        features = []
        # 添加 conv1 layer,第一层一开始没有1*1的升维操作,所以单独拿出来
        features.append(ConvBNReLU(3, input_channel, stride=2))
        # building inverted residual residual blockes
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * alpha, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                # s是只针对每个块里的第一层卷积的,是在这里体现的
                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(input_channel, last_channel, 1))
        # combine feature layers
        self.features = nn.Sequential(*features)

        # building classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(last_channel, num_classes)
        )

        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

下面时mobblev2训练集代码

# 训练模型
import time

import torch

def train(net, train_iter, criterion, optimizer, num_epochs, device, num_print, lr_scheduler=None, val_iter=None):
    net.train()
    record_train = list()
    # 记录每一Epoch下训练集的准确率
    # List() 方法用于将元组转换为列表。
    record_val = list()

    for epoch in range(num_epochs):
        print("========== epoch: [{}/{}] ==========".format(epoch + 1, num_epochs))
        total, correct, train_loss = 0, 0, 0
        start = time.time()

        for i, (X, y) in enumerate(train_iter):
            # enumerate就是枚举的意思,把元素一个个列举出来,第一个是什么,第二个是什么,所以他返回的是元素以及对应的索引。
            device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
            X, y = X.to(device), y.to(device)
            output = net(X)
            loss = criterion(output, y) # 计算LOSS

            optimizer.zero_grad()  # 梯度归零
            loss.backward()  # 反向传播
            optimizer.step()  # 更新参数

            train_loss += loss.item()
            total += y.size(0)
            correct += (output.argmax(dim=1) == y).sum().item() # 累积预测正确的样本数
            # output.argmax(dim=1) 返回指定维度最大值的序号,dim=1,把dim=1这个维度的,变成这个维度的最大值的index
            # 即 dim=0是取每一列最大值的下标,dim=1是取每一行最大值的下标
            train_acc = 100.0 * correct / total

            if (i + 1) % num_print == 0:
                print("step: [{}/{}], train_loss: {:.3f} | train_acc: {:6.3f}% | lr: {:.6f}" \
                    .format(i + 1, len(train_iter), train_loss / (i + 1), \
                            train_acc, get_cur_lr(optimizer)))


        if lr_scheduler is not None:
            # 调整梯度下降算法的学习率
            lr_scheduler.step()
        # 输出训练的时间
        print("--- cost time: {:.4f}s ---".format(time.time() - start))

        if val_iter is not None:  # 判断验证集是否为空 (注意这里将调用val函数)
            record_val.append(val(net, val_iter, criterion, device)) # 每训练一个Epoch模型,使用验证集进行验证模型的准确度
        record_train.append(train_acc)
        # append() 方法用于在列表末尾追加新的对象。
    # 返回每一个Epoch下测试集和训练集的准确率
    torch.save(net.state_dict(),"MobileNetV2.pth")
    return record_train, record_val

# 验证模型
def val(net, val_iter, criterion, device):
    total, correct = 0, 0

    net.eval()# 验证模式

    with torch.no_grad():
        print("*************** val ***************")
        for X, y in val_iter:
            X, y = X.to(device), y.to(device)# CPU or GPU运行

            output = net(X)  # 计算输出
            val_loss = criterion(output, y)  # 计算损失
            total += y.size(0)  # 计算测试集总样本数
            correct += (output.argmax(dim=1) == y).sum().item()


            val_acc = 100.0 * correct / total  # 测试集准确率

           # 输出验证集的损失
    print("val_loss: {:.3f} | val_acc: {:6.3f}%" \
              .format(val_loss.item(),val_acc))
    print("************************************\n")

    # 训练模式 (因为这里是因为每经过一个Epoch就使用测试集一次,使用验证集后,进入下一个Epoch前将模型重新置于训练模式)
    net.train()

    return val_acc

# 返回学习率lr的函数
def get_cur_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

下面是mobilenetv3的主函数


import torch

import torch.nn as nn
import creatdataset

from accurary import learning_curve
from alexnet import AlexNet
from model_v2 import MobileNetV2
from model_v3 import mobilenet_v3_large
from resnext import Resnext
from test import test
from train import train
from vgg import  vgg


def load_dataset(batch_size):


    root = r"C:\Users\Jia\PycharmProjects\pythonProject\resnet_dataset"
    train_set = creatdataset.MyDataset(root, mode="train")
    train_iter = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)

    val_set = creatdataset.MyDataset(root, mode="val")
    val_iter = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=0)

    test_set = creatdataset.MyDataset(root, mode="test")
    test_iter = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0)
    return train_iter, val_iter, test_iter


import torch.optim as optim
BATCH_SIZE = 128  # 批大小
NUM_EPOCHS = 12  # Epoch大小
NUM_CLASSES = 3  # 分类的个数
LEARNING_RATE = 0.01  # 梯度下降学习率
MOMENTUM = 0.9  # 冲量大小
WEIGHT_DECAY = 0.0005  # 权重衰减系数
NUM_PRINT = 1
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"  # GPU or CPU运行


def main():
    net = mobilenet_v3_large(num_classes=3)
    net = net.to(DEVICE)

    train_iter, val_iter, test_iter = load_dataset(BATCH_SIZE)  # 导入训练集和测试集

    criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数损失计算器
    # 优化器
    optimizer = optim.SGD(
        net.parameters(),
        # 构建好神经网络后,网络的参数都保存在parameters()函数当中
        lr=LEARNING_RATE,
        momentum=MOMENTUM,
        weight_decay=WEIGHT_DECAY,
        nesterov=True
        # Nesterov动量梯度下降

    )
    # 调整学习率 (step_size:每训练step_size个epoch,更新一次参数; gamma:更新lr的乘法因子)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    record_train, record_val = train(net, train_iter, criterion, optimizer, \
          NUM_EPOCHS, DEVICE, NUM_PRINT, lr_scheduler, val_iter)

    learning_curve(record_train, record_val) # 画出准确率曲线


    if test_iter is not None:  # 判断验证集是否为空 (注意这里将调用test函数)
            test(net, test_iter, criterion, DEVICE)



main()

下面是mobilenetv3的训练函数

# 训练模型
import time

import torch

def train(net, train_iter, criterion, optimizer, num_epochs, device, num_print, lr_scheduler=None, val_iter=None):
    net.train()
    record_train = list()
    # 记录每一Epoch下训练集的准确率
    # List() 方法用于将元组转换为列表。
    record_val = list()

    for epoch in range(num_epochs):
        print("========== epoch: [{}/{}] ==========".format(epoch + 1, num_epochs))
        total, correct, train_loss = 0, 0, 0
        start = time.time()

        for i, (X, y) in enumerate(train_iter):
            # enumerate就是枚举的意思,把元素一个个列举出来,第一个是什么,第二个是什么,所以他返回的是元素以及对应的索引。
            device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
            X, y = X.to(device), y.to(device)
            output = net(X)
            loss = criterion(output, y) # 计算LOSS

            optimizer.zero_grad()  # 梯度归零
            loss.backward()  # 反向传播
            optimizer.step()  # 更新参数

            train_loss += loss.item()
            total += y.size(0)
            correct += (output.argmax(dim=1) == y).sum().item() # 累积预测正确的样本数
            # output.argmax(dim=1) 返回指定维度最大值的序号,dim=1,把dim=1这个维度的,变成这个维度的最大值的index
            # 即 dim=0是取每一列最大值的下标,dim=1是取每一行最大值的下标
            train_acc = 100.0 * correct / total

            if (i + 1) % num_print == 0:
                print("step: [{}/{}], train_loss: {:.3f} | train_acc: {:6.3f}% | lr: {:.6f}" \
                    .format(i + 1, len(train_iter), train_loss / (i + 1), \
                            train_acc, get_cur_lr(optimizer)))


        if lr_scheduler is not None:
            # 调整梯度下降算法的学习率
            lr_scheduler.step()
        # 输出训练的时间
        print("--- cost time: {:.4f}s ---".format(time.time() - start))

        if val_iter is not None:  # 判断验证集是否为空 (注意这里将调用val函数)
            record_val.append(val(net, val_iter, criterion, device)) # 每训练一个Epoch模型,使用验证集进行验证模型的准确度
        record_train.append(train_acc)
        # append() 方法用于在列表末尾追加新的对象。
    # 返回每一个Epoch下测试集和训练集的准确率
    torch.save(net.state_dict(),"mobilenet_v3_large.pth")
    return record_train, record_val

# 验证模型
def val(net, val_iter, criterion, device):
    total, correct = 0, 0

    net.eval()# 验证模式

    with torch.no_grad():
        print("*************** val ***************")
        for X, y in val_iter:
            X, y = X.to(device), y.to(device)# CPU or GPU运行

            output = net(X)  # 计算输出
            val_loss = criterion(output, y)  # 计算损失
            total += y.size(0)  # 计算测试集总样本数
            correct += (output.argmax(dim=1) == y).sum().item()


            val_acc = 100.0 * correct / total  # 测试集准确率

           # 输出验证集的损失
    print("val_loss: {:.3f} | val_acc: {:6.3f}%" \
              .format(val_loss.item(),val_acc))
    print("************************************\n")

    # 训练模式 (因为这里是因为每经过一个Epoch就使用测试集一次,使用验证集后,进入下一个Epoch前将模型重新置于训练模式)
    net.train()

    return val_acc

# 返回学习率lr的函数
def get_cur_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

下面是mobilenet v3 函数

from typing import Callable, List, Optional

import torch
from torch import nn, Tensor
from torch.nn import functional as F
from functools import partial


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNActivation(nn.Sequential):
    def __init__(self,
                 in_planes: int,
                 out_planes: int,
                 kernel_size: int = 3,
                 stride: int = 1,
                 groups: int = 1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None):
        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if activation_layer is None:
            activation_layer = nn.ReLU6
        super(ConvBNActivation, self).__init__(nn.Conv2d(in_channels=in_planes,
                                                         out_channels=out_planes,
                                                         kernel_size=kernel_size,
                                                         stride=stride,
                                                         padding=padding,
                                                         groups=groups,
                                                         bias=False),
                                               norm_layer(out_planes),
                                               activation_layer(inplace=True))


class SqueezeExcitation(nn.Module):
    def __init__(self, input_c: int, squeeze_factor: int = 4):
        # squeeze_factor:第一个全连接层节点个数是输入特征矩阵个数的1/4
        super(SqueezeExcitation, self).__init__()
        squeeze_c = _make_divisible(input_c // squeeze_factor, 8)
        self.fc1 = nn.Conv2d(input_c, squeeze_c, 1)
        self.fc2 = nn.Conv2d(squeeze_c, input_c, 1)

    def forward(self, x: Tensor) -> Tensor:
        scale = F.adaptive_avg_pool2d(x, output_size=(1, 1))
        scale = self.fc1(scale)
        scale = F.relu(scale, inplace=True)
        scale = self.fc2(scale)
        scale = F.hardsigmoid(scale, inplace=True)
        return scale * x


class InvertedResidualConfig:
    # 对应的mobble v3中每一层的配置文件
    def __init__(self,
                 input_c: int,
                 kernel: int,
                 expanded_c: int,
                 out_c: int,
                 use_se: bool,
                 activation: str,
                 stride: int,
                 width_multi: float):
        #  width_multi是channel函数的倍率因子是超参数
        self.input_c = self.adjust_channels(input_c, width_multi)
        self.kernel = kernel
        self.expanded_c = self.adjust_channels(expanded_c, width_multi)
        self.out_c = self.adjust_channels(out_c, width_multi)
        self.use_se = use_se
        self.use_hs = activation == "HS"  # whether using h-swish activation
        self.stride = stride

    @staticmethod
    def adjust_channels(channels: int, width_multi: float):
        return _make_divisible(channels * width_multi, 8)


class InvertedResidual(nn.Module):
    def __init__(self,
                 cnf: InvertedResidualConfig,
                 norm_layer: Callable[..., nn.Module]):
        super(InvertedResidual, self).__init__()

        if cnf.stride not in [1, 2]:
            raise ValueError("illegal stride value.")

        self.use_res_connect = (cnf.stride == 1 and cnf.input_c == cnf.out_c)

        layers: List[nn.Module] = []
        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU

        # expand
        if cnf.expanded_c != cnf.input_c:
            layers.append(ConvBNActivation(cnf.input_c,
                                           cnf.expanded_c,
                                           kernel_size=1,
                                           norm_layer=norm_layer,
                                           activation_layer=activation_layer))

        # depthwise
        layers.append(ConvBNActivation(cnf.expanded_c,
                                       cnf.expanded_c,
                                       kernel_size=cnf.kernel,
                                       stride=cnf.stride,
                                       groups=cnf.expanded_c,
                                       norm_layer=norm_layer,
                                       activation_layer=activation_layer))

        if cnf.use_se:
            layers.append(SqueezeExcitation(cnf.expanded_c))

        # project
        layers.append(ConvBNActivation(cnf.expanded_c,
                                       cnf.out_c,
                                       kernel_size=1,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Identity))

        self.block = nn.Sequential(*layers)
        self.out_channels = cnf.out_c
        self.is_strided = cnf.stride > 1

    def forward(self, x: Tensor) -> Tensor:
        result = self.block(x)
        if self.use_res_connect:
            result += x

        return result


class MobileNetV3(nn.Module):
    def __init__(self,
                 inverted_residual_setting: List[InvertedResidualConfig],
                 last_channel: int,
                 num_classes: int = 1000,
                 block: Optional[Callable[..., nn.Module]] = None,
                 norm_layer: Optional[Callable[..., nn.Module]] = None):
        super(MobileNetV3, self).__init__()

        if not inverted_residual_setting:
            raise ValueError("The inverted_residual_setting should not be empty.")
        elif not (isinstance(inverted_residual_setting, List) and
                  all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])):
            raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]")

        if block is None:
            block = InvertedResidual

        if norm_layer is None:
            norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)

        layers: List[nn.Module] = []

        # building first layer
        firstconv_output_c = inverted_residual_setting[0].input_c
        layers.append(ConvBNActivation(3,
                                       firstconv_output_c,
                                       kernel_size=3,
                                       stride=2,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Hardswish))
        # building inverted residual blocks
        for cnf in inverted_residual_setting:
            layers.append(block(cnf, norm_layer))

        # building last several layers
        lastconv_input_c = inverted_residual_setting[-1].out_c
        lastconv_output_c = 6 * lastconv_input_c
        layers.append(ConvBNActivation(lastconv_input_c,
                                       lastconv_output_c,
                                       kernel_size=1,
                                       norm_layer=norm_layer,
                                       activation_layer=nn.Hardswish))
        self.features = nn.Sequential(*layers)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(nn.Linear(lastconv_output_c, last_channel),
                                        nn.Hardswish(inplace=True),
                                        nn.Dropout(p=0.2, inplace=True),
                                        nn.Linear(last_channel, num_classes))

        # initial weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def _forward_impl(self, x: Tensor) -> Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)


def mobilenet_v3_large(num_classes: int = 1000,
                       reduced_tail: bool = False) -> MobileNetV3:
    """
    Constructs a large MobileNetV3 architecture from
    "Searching for MobileNetV3" .

    weights_link:
    https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth

    Args:
        num_classes (int): number of classes
        reduced_tail (bool): If True, reduces the channel counts of all feature layers
            between C4 and C5 by 2. It is used to reduce the channel redundancy in the
            backbone for Detection and Segmentation.
    """
    width_multi = 1.0
    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)
    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)

    reduce_divider = 2 if reduced_tail else 1

    inverted_residual_setting = [
        # input_c, kernel, expanded_c, out_c, use_se, activation, stride
        bneck_conf(16, 3, 16, 16, False, "RE", 1),
        bneck_conf(16, 3, 64, 24, False, "RE", 2),  # C1
        bneck_conf(24, 3, 72, 24, False, "RE", 1),
        bneck_conf(24, 5, 72, 40, True, "RE", 2),  # C2
        bneck_conf(40, 5, 120, 40, True, "RE", 1),
        bneck_conf(40, 5, 120, 40, True, "RE", 1),
        bneck_conf(40, 3, 240, 80, False, "HS", 2),  # C3
        bneck_conf(80, 3, 200, 80, False, "HS", 1),
        bneck_conf(80, 3, 184, 80, False, "HS", 1),
        bneck_conf(80, 3, 184, 80, False, "HS", 1),
        bneck_conf(80, 3, 480, 112, True, "HS", 1),
        bneck_conf(112, 3, 672, 112, True, "HS", 1),
        bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2),  # C4
        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1),
        bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1),
    ]
    last_channel = adjust_channels(1280 // reduce_divider)  # C5

    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,
                       last_channel=last_channel,
                       num_classes=num_classes)


def mobilenet_v3_small(num_classes: int = 1000,
                       reduced_tail: bool = False) -> MobileNetV3:
    """
    Constructs a large MobileNetV3 architecture from
    "Searching for MobileNetV3" .

    weights_link:
    https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth

    Args:
        num_classes (int): number of classes
        reduced_tail (bool): If True, reduces the channel counts of all feature layers
            between C4 and C5 by 2. It is used to reduce the channel redundancy in the
            backbone for Detection and Segmentation.
    """
    width_multi = 1.0
    bneck_conf = partial(InvertedResidualConfig, width_multi=width_multi)
    adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_multi=width_multi)

    reduce_divider = 2 if reduced_tail else 1

    inverted_residual_setting = [
        # input_c, kernel, expanded_c, out_c, use_se, activation, stride
        bneck_conf(16, 3, 16, 16, True, "RE", 2),  # C1
        bneck_conf(16, 3, 72, 24, False, "RE", 2),  # C2
        bneck_conf(24, 3, 88, 24, False, "RE", 1),
        bneck_conf(24, 5, 96, 40, True, "HS", 2),  # C3
        bneck_conf(40, 5, 240, 40, True, "HS", 1),
        bneck_conf(40, 5, 240, 40, True, "HS", 1),
        bneck_conf(40, 5, 120, 48, True, "HS", 1),
        bneck_conf(48, 5, 144, 48, True, "HS", 1),
        bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2),  # C4
        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1),
        bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1)
    ]
    last_channel = adjust_channels(1024 // reduce_divider)  # C5

    return MobileNetV3(inverted_residual_setting=inverted_residual_setting,
                       last_channel=last_channel,
                       num_classes=num_classes)

你可能感兴趣的:(pytorch,cnn,深度学习,计算机视觉,python)