下图展示了传统卷积与DW卷积的差异,在传统卷积中,每个卷积核的channel与输入特征矩阵的channel相等(每个卷积核都会与输入特征矩阵的每一个维度进行卷积运算)。而在DW卷积中,每个卷积核的channel都是等于1的(每个卷积核只负责输入特征矩阵的一个channel,故卷积核的个数必须等于输入特征矩阵的channel数,从而使得输出特征矩阵的channel数也等于输入特征矩阵的channel数)
如果想改变输出特征矩阵的channel,只需要在DW卷积后接上一个PW卷积即可,如下图所示,其实PW卷积就是普通的卷积而已(只不过卷积核大小为1)。通常DW卷积和PW卷积是放在一起使用的,一起叫做Depthwise Separable Convolution(深度可分卷积)
左侧是ResNet网络中的残差结构,右侧就是MobileNet v2中的倒残差结构。在残差结构中是1x1卷积降维->3x3卷积->1x1卷积升维,在倒残差结构中是1x1卷积升维->3x3DW卷积->1x1卷积降维。(注意倒残差结构中基本使用的都是ReLU6激活函数,但是最后一个1x1的卷积层使用的是线性激活函数)
输入特征矩阵为h*w*k,经过1*1conv(卷积核个数为tk)后为h*w*tk,【t为一个扩展因子,对应倒残差结构中第一层1*1conv卷积核的扩展倍率】,再经过一个3*3步距为s的DW卷积后为h/s*w/s*tk,再经过1*1conv(卷积核个数为k')后为h/s*w/s*k'
t为一个扩展因子,对应倒残差结构中第一层1*1conv卷积核的扩展倍率,
c代表输出特征矩阵的channel,
n代表bottlenect(倒残差结构)重复的次数,
s代表每一个block中第一层bottlenect(倒残差结构)所对应的步距,该block中其它层bottlenect所对应的步距都为1,步距指的是DW卷积的步距
model_v2.py
from torch import nn
import torch
def _make_divisible(ch, divisor=8, min_ch=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
"""
if min_ch is None:
min_ch = divisor
new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor) # 保证ch是divisor的整数倍
# Make sure that round down does not go down by more than 10%.
if new_ch < 0.9 * ch:
new_ch += divisor
return new_ch
class ConvBNReLU(nn.Sequential): # 定义普通卷积
def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1): # groups=1为普通卷积,groups=in_channel为depthwise卷积
padding = (kernel_size - 1) // 2 # kernel_size=3则padding=1;kernel_size=1则padding=0
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False), # 如果要使用BN层,就不用使用偏置了
nn.BatchNorm2d(out_channel),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module): # 定义倒残差结构
def __init__(self, in_channel, out_channel, stride, expand_ratio): # expand_ratio为扩展因子,就是表格中的t
super(InvertedResidual, self).__init__()
hidden_channel = in_channel * expand_ratio # 第一层卷积层的卷积核的个数
self.use_shortcut = stride == 1 and in_channel == out_channel # 用于判断在正向传播过程中是否使用shortcut
layers = []
if expand_ratio != 1:
# 倒残差结构的第一层 1x1 pointwise conv
layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1)) # 扩展因子等于1,这个卷积层可以省略
layers.extend([ # .extend批量插入很多函数
# 倒残差结构的第二层 3x3 depthwise conv
ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel),
# 倒残差结构的第三层 1x1 pointwise conv(linear) 线性激活函数就是不用添加激活函数(y=x)
nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channel),
])
self.conv = nn.Sequential(*layers) # 将一系列层结构打包成一个整体
def forward(self, x): # 定义正向传播过程
if self.use_shortcut: # 使用shortcut
return x + self.conv(x)
else: # 不使用shortcut
return self.conv(x)
class MobileNetV2(nn.Module): # 定义MobileNetV2结构
def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8): # alpha为一个超参数,卷积核的倍率
super(MobileNetV2, self).__init__()
block = InvertedResidual # 倒残差结构传给block
input_channel = _make_divisible(32 * alpha, round_nearest) # 将卷积核的个数调整到8的整数倍
last_channel = _make_divisible(1280 * alpha, round_nearest)
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
features = []
# 定义第一层卷积层,输入为RGB三通道,输出为input_channel conv1 layer
features.append(ConvBNReLU(3, input_channel, stride=2))
# 定义一系列bottleneck层 building inverted residual residual blockes
for t, c, n, s in inverted_residual_setting:
output_channel = _make_divisible(c * alpha, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# 定义倒数第三层的卷积层 building last several layers
features.append(ConvBNReLU(input_channel, last_channel, kernel_size=1))
# combine feature layers
self.features = nn.Sequential(*features) # 将一系列层结构打包成一个整体
#-----------------------以上是特征提取部分-------------------------
# 定义分类器部分(表格中的最后两层) building classifier
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # 自适应的平均池化下采样操作
self.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(last_channel, num_classes)
)
# ------------------------以上是分类器部分-----------------------
# 权重初始化 weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def forward(self, x): # 前向传播过程
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
train.py
import os
import sys
import json
import torch
import torch.nn as nn
import torch.optim as optim
from matplotlib import pyplot as plt
from torchvision import transforms, datasets
from torchvision.datasets import ImageFolder
from tqdm import tqdm
from model_v2 import MobileNetV2
# 下载预训练权重
import torchvision.models.mobilenet
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
ROOT_TRAIN = r'D:/cnn/All Classfication/ResNet/data/train'
ROOT_TEST = r'D:/cnn/All Classfication/ResNet/data/val'
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), # 这里的标准化参数是官网提供的,不做修改
"val": transforms.Compose([transforms.Resize(256), # 将原图像长宽比固定,再将其最小边缩放到256
transforms.CenterCrop(224), # 在使用中心裁剪到224 * 224大小
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}
train_dataset = ImageFolder(ROOT_TRAIN, transform=data_transform["train"]) # 加载训练集
train_num = len(train_dataset) # 打印训练集有多少张图片
animal_list = train_dataset.class_to_idx # 获取类别名称以及对应的索引
cla_dict = dict((val, key) for key, val in animal_list.items()) # 将上面的键值对位置对调一下
json_str = json.dumps(cla_dict, indent=4) # 把类别和对应的索引写入根目录下class_indices.json文件中
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=0)
validate_dataset = ImageFolder(ROOT_TEST, transform=data_transform["val"]) # 载入测试集
val_num = len(validate_dataset) # 打印测试集有多少张图片
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=16, shuffle=False,
num_workers=0)
print("using {} images for training, {} images for validation.".format(train_num, val_num))
# create model
net = MobileNetV2(num_classes=2) # 实例化模型,定义类别个数
# load pretrain weights
model_weight_path = "./mobilenet_v2.pth"
assert os.path.exists(model_weight_path), "file {} dose not exist.".format(model_weight_path)
pre_weights = torch.load(model_weight_path, map_location='cpu') # 通过torch.load载入预训练模型参数
# delete classifier weights 便利权重字典,去除含classifier的层
pre_dict = {k: v for k, v in pre_weights.items() if net.state_dict()[k].numel() == v.numel()}
missing_keys, unexpected_keys = net.load_state_dict(pre_dict, strict=False)
# freeze features weights 冻结特征提取部分的权重
for param in net.features.parameters():
param.requires_grad = False
net.to(device)
# define loss function
loss_function = nn.CrossEntropyLoss()
# construct an optimizer
params = [p for p in net.parameters() if p.requires_grad]
optimizer = optim.Adam(params, lr=0.0001)
epochs = 10
best_acc = 0.0
save_path = './MobileNetV2.pth'
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
logits = net(images.to(device))
loss = loss_function(logits, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
# loss = loss_function(outputs, test_labels)
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1,
epochs)
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
MobileNet(v1、v2)网络详解与模型的搭建_太阳花的小绿豆的博客-CSDN博客
7.1 MobileNet网络详解_哔哩哔哩_bilibili