2018 论文链接:ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design
Pytorch code:ShuffleNet-Series
# TODO:build shuffle block
# -------------------------------------------------------------------------
def channel_shuffle(x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
# reshape
x = x.view(batchsize, groups,
channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
# flatten
x = x.view(batchsize, -1, height, width)
return x
class conv_bn_relu_maxpool(nn.Module):
def __init__(self, c1, c2): # ch_in, ch_out
super(conv_bn_relu_maxpool, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(c1, c2, kernel_size=3, stride=2, padding=1, bias=False),
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
def forward(self, x):
return self.maxpool(self.conv(x))
class ShuffleV2Block(nn.Module):
def __init__(self, inp, oup, stride):
super(ShuffleV2Block, self).__init__()
if not (1 <= stride <= 3):
raise ValueError('illegal stride value')
self.stride = stride
branch_features = oup // 2
assert (self.stride != 1) or (inp == branch_features << 1)
if self.stride > 1:
self.branch1 = nn.Sequential(
self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1),
nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
self.branch1 = nn.Sequential()
self.branch2 = nn.Sequential(
nn.Conv2d(inp if (self.stride > 1) else branch_features,
branch_features, kernel_size=1, stride=1, padding=0, bias=False),
self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
def forward(self, x):
if self.stride == 1:
x1, x2 = x.chunk(2, dim=1)
out = torch.cat((x1, self.branch2(x2)), dim=1)
out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
out = channel_shuffle(out, 2)
return out
# -------------------------------------------------------------------------
from .block import (ASFF2, ASFF3, C1, C2, C3, C3TR, DFL, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x,
GhostBottleneck, HGBlock, HGStem, Proto, RepC3, conv_bn_relu_maxpool, ShuffleV2Block)
__all__ = ('Conv', 'Conv2', 'LightConv', 'RepConv', 'DWConv', 'DWConvTranspose2d', 'ConvTranspose', 'Focus',
'GhostConv', 'ChannelAttention', 'SpatialAttention', 'CBAM', 'Concat', 'TransformerLayer',
'TransformerBlock', 'MLPBlock', 'LayerNorm2d', 'DFL', 'HGBlock', 'HGStem', 'SPP', 'SPPF', 'C1', 'C2', 'C3',
'C2f', 'C3x', 'C3TR', 'C3Ghost', 'GhostBottleneck', 'Bottleneck', 'BottleneckCSP', 'Proto', 'Detect',
'Segment', 'Pose', 'Classify', 'TransformerEncoderLayer', 'RepC3', 'RTDETRDecoder', 'AIFI',
'DeformableTransformerDecoder', 'DeformableTransformerDecoderLayer', 'MSDeformAttn', 'MLP', 'ASFF2', 'ASFF3',
'conv_bn_relu_maxpool', 'ShuffleV2Block')
from ultralytics.nn.modules import (AIFI, ASFF2, ASFF3, C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f,
C3Ghost, C3x, Classify, Concat, Conv, Conv2, ConvTranspose, Detect, DWConv,
DWConvTranspose2d, Focus, GhostBottleneck, GhostConv, HGBlock, HGStem, Pose, RepC3,
RepConv, RTDETRDecoder, Segment, conv_bn_relu_maxpool, ShuffleV2Block)
if m in (Classify, Conv, ConvTranspose, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, Focus,
BottleneckCSP, C1, C2, C2f, C3, C3TR, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, RepC3,
conv_bn_relu_maxpool, ShuffleV2Block):
c1, c2 = ch[f], args[0]
if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output)
c2 = make_divisible(min(c2, max_channels) * width, 8)
args = [c1, c2, *args[1:]]
# Ultralytics YOLO , AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
# YOLOv8.0n backbone
# [from, repeats, module, args]
- [-1, 1, conv_bn_relu_maxpool, [32]] # 0-P1/2
- [ -1, 1, ShuffleV2Block, [116, 2] ] # 1-P3/8
- [ -1, 9, ShuffleV2Block, [116, 1] ] # 2
- [ -1, 1, ShuffleV2Block, [232, 2] ] # 3-P4/16
- [ -1, 21, ShuffleV2Block, [232, 1] ] # 4
- [ -1, 1, ShuffleV2Block, [464, 2] ] # 5-P5/32
- [ -1, 9, ShuffleV2Block, [464, 1] ] # 6
- [-1, 1, SPPF, [1024, 5]] # 7
# YOLOv8.0n head
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 4], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 10
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 2], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 13 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 10], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 16 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 7], 1, Concat, [1]] # cat head P5
- [-1, 3, C2f, [1024]] # 19 (P5/32-large)
- [[13, 16, 19], 1, Detect, [nc]] # Detect(P3, P4, P5)
YOLOv8n-shufflenetv2 summary: 336 layers, 2005904 parameters, 2005888 gradients, 5.9 GFLOPs
# Ultralytics YOLO , AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
# YOLOv8.0n backbone
# [from, repeats, module, args]
- [-1, 1, conv_bn_relu_maxpool, [32]] # 0-P1/2
- [ -1, 1, ShuffleV2Block, [116, 2] ] # 1-P3/8
- [ -1, 9, ShuffleV2Block, [116, 1] ] # 2
- [ -1, 1, ShuffleV2Block, [232, 2] ] # 3-P4/16
- [ -1, 21, ShuffleV2Block, [232, 1] ] # 4
- [ -1, 1, ShuffleV2Block, [464, 2] ] # 5-P5/32
- [ -1, 3, ShuffleV2Block, [464, 1] ] # 6
# YOLOv8.0n head
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 4], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 9
- [-1, 1, nn.Upsample, [None, 2, 'nearest']]
- [[-1, 2], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 12 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 9], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 15 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 6], 1, Concat, [1]] # cat head P5
- [-1, 3, C2f, [1024]] # 18 (P5/32-large)
- [[12, 15, 18], 1, Detect, [nc]] # Detect(P3, P4, P5)
YOLOv8n-shufflenetv2 summary: 231 layers, 1975560 parameters, 1975544 gradients, 5.8 GFLOPs
from ultralytics import YOLO
# Load a model
model = YOLO("backbone/yolov8n-shufflenetv2.yaml") # build a new model from scratch
from ultralytics import YOLO
# Load a model
model = YOLO("backbone/yolov8n-shufflenetv2.yaml") # build a new model from scratch
# Use the model
batch=48) # train the model