前言
轻量化网络设计是一种针对移动设备等资源受限环境的深度学习模型设计方法。下面是一些常见的轻量化网络设计方法:
传统的YOLOv8系列中,Backbone采用的是较为复杂的C2f网络结构,这使得模型计算量大幅度的增加,检测速度较慢,应用受限,在某些真实的应用场景如移动或者嵌入式设备,如此大而复杂的模型时难以被应用的。为了解决这个问题,本章节通过采用ShuffleNetV2轻量化主干网络作为Backbone的基础结构,从而在保证检测性能的同时,将网络结构精简到最小,大大减小了模型的参数量和计算量。
2018 论文链接:ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design
Pytorch code:ShuffleNet-Series
论文总结的四条轻量化模型设计的指导思想:
总结:
在ultralytics/nn/modules/block.py
文件中加入以下代码:
# TODO:build shuffle block
# -------------------------------------------------------------------------
def channel_shuffle(x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
# reshape
x = x.view(batchsize, groups,
channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
# flatten
x = x.view(batchsize, -1, height, width)
return x
class conv_bn_relu_maxpool(nn.Module):
def __init__(self, c1, c2): # ch_in, ch_out
super(conv_bn_relu_maxpool, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(c1, c2, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(c2),
nn.ReLU(inplace=True),
)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
def forward(self, x):
return self.maxpool(self.conv(x))
class ShuffleV2Block(nn.Module):
def __init__(self, inp, oup, stride):
super(ShuffleV2Block, self).__init__()
if not (1 <= stride <= 3):
raise ValueError('illegal stride value')
self.stride = stride
branch_features = oup // 2
assert (self.stride != 1) or (inp == branch_features << 1)
if self.stride > 1:
self.branch1 = nn.Sequential(
self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1),
nn.BatchNorm2d(inp),
nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
)
else:
self.branch1 = nn.Sequential()
self.branch2 = nn.Sequential(
nn.Conv2d(inp if (self.stride > 1) else branch_features,
branch_features, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
nn.BatchNorm2d(branch_features),
nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
)
@staticmethod
def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
def forward(self, x):
if self.stride == 1:
x1, x2 = x.chunk(2, dim=1)
out = torch.cat((x1, self.branch2(x2)), dim=1)
else:
out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
out = channel_shuffle(out, 2)
return out
# -------------------------------------------------------------------------
修改ultralytics/nn/modules/__init__.py
文件:
from .block import (ASFF2, ASFF3, C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x,
GhostBottleneck, HGBlock, HGStem, Proto, RepC3, conv_bn_relu_maxpool, ShuffleV2Block)
__all__ = ('Conv', 'Conv2', 'LightConv', 'RepConv', 'DWConv', 'DWConvTranspose2d', 'ConvTranspose', 'Focus',
'GhostConv', 'ChannelAttention', 'SpatialAttention', 'CBAM', 'Concat', 'TransformerLayer',
'TransformerBlock', 'MLPBlock', 'LayerNorm2d', 'DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3',
'C2f', 'C3x', 'C3TR', 'C3Ghost', 'GhostBottleneck', 'Bottleneck', 'BottleneckCSP', 'Proto', 'Detect',
'Segment', 'Pose', 'Classify', 'TransformerEncoderLayer', 'RepC3', 'RTDETRDecoder', 'AIFI',
'DeformableTransformerDecoder', 'DeformableTransformerDecoderLayer', 'MSDeformAttn', 'MLP', 'ASFF2', 'ASFF3',
'conv_bn_relu_maxpool', 'ShuffleV2Block')
修改ultralytics/nn/tasks.py
文件中的parse_model
函数:
from ultralytics.nn.modules import (AIFI, ASFF2, ASFF3, C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f,
C3Ghost, C3x, Classify, Concat, Conv, Conv2, ConvTranspose, Detect, DWConv,
DWConvTranspose2d, Focus, GhostBottleneck, GhostConv, HGBlock, HGStem, Pose, RepC3,
RepConv, RTDETRDecoder, Segment, conv_bn_relu_maxpool, ShuffleV2Block)
if m in (Classify, Conv, ConvTranspose, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, Focus,
BottleneckCSP, C1, C2, C2f, C3, C3TR, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, RepC3,
conv_bn_relu_maxpool, ShuffleV2Block):
c1, c2 = ch[f], args[0]
if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output)
c2 = make_divisible(min(c2, max_channels) * width, 8)
args = [c1, c2, *args[1:]]
这里我们选择替换Backbone中的所有Conv
和C2f
模块。当然也可以将所有Conv
和C2f
模块全部替换掉,哪个效果更好,需要各位去实测一番。
第一版:
# Ultralytics YOLO , AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, conv_bn_relu_maxpool, [32]] # 0-P1/2
- [ -1, 1, ShuffleV2Block, [116, 2] ] # 1-P3/8
- [ -1, 9, ShuffleV2Block, [116, 1] ] # 2
- [ -1, 1, ShuffleV2Block, [232, 2] ] # 3-P4/16
- [ -1, 21, ShuffleV2Block, [232, 1] ] # 4
- [ -1, 1, ShuffleV2Block, [464, 2] ] # 5-P5/32
- [ -1, 9, ShuffleV2Block, [464, 1] ] # 6
- [-1, 1, SPPF, [1024, 5]] # 7
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 4], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 10
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 2], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 13 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 10], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 16 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 7], 1, Concat, [1]] # cat head P5
- [-1, 3, C2f, [1024]] # 19 (P5/32-large)
- [[13, 16, 19], 1, Detect, [nc]] # Detect(P3, P4, P5)
YOLOv8n-shufflenetv2 summary: 336 layers, 2005904 parameters, 2005888 gradients, 5.9 GFLOPs
推荐:第二版(结合YOLOv5-Lite思想):
# Ultralytics YOLO , AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, conv_bn_relu_maxpool, [32]] # 0-P1/2
- [ -1, 1, ShuffleV2Block, [116, 2] ] # 1-P3/8
- [ -1, 9, ShuffleV2Block, [116, 1] ] # 2
- [ -1, 1, ShuffleV2Block, [232, 2] ] # 3-P4/16
- [ -1, 21, ShuffleV2Block, [232, 1] ] # 4
- [ -1, 1, ShuffleV2Block, [464, 2] ] # 5-P5/32
- [ -1, 3, ShuffleV2Block, [464, 1] ] # 6
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 4], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 9
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 2], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 12 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 9], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 15 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 6], 1, Concat, [1]] # cat head P5
- [-1, 3, C2f, [1024]] # 18 (P5/32-large)
- [[12, 15, 18], 1, Detect, [nc]] # Detect(P3, P4, P5)
YOLOv8n-shufflenetv2 summary: 231 layers, 1975560 parameters, 1975544 gradients, 5.8 GFLOPs
from ultralytics import YOLO
# Load a model
model = YOLO("backbone/yolov8n-shufflenetv2.yaml") # build a new model from scratch
from ultralytics import YOLO
# Load a model
model = YOLO("backbone/yolov8n-shufflenetv2.yaml") # build a new model from scratch
# Use the model
model.train(
data="./mydata/data.yaml",
epochs=300,
batch=48) # train the model