2021SC@SDUSC
本周分析PaddleDetection-YOLOv3的模型结构
整体结构图:
对应PaddleDetection的流程图
modeling/architecture/yolo.py
在yaml的配置文件:/configs/_base_/models/yolov3_darknet53.yml
'''
YOLOv3:
backbone: DarkNet #主干网络类名
neck: YOLOv3FPN #neck FPN类名
yolo_head: YOLOv3Head #Head类名
post_process: BBoxPostProcess #BBox后处理类名
'''
YOLOv3整体构建类:
class YOLOv3(BaseArch):
__category__ = 'architecture'
__inject__ = [
'backbone', #backbone成员类
'neck', #neck成员类
'yolo_head', #yolo_head成员类
'post_process', #数据后处理成员类
]
def __init__(self,
backbone='DarkNet',
neck='YOLOv3FPN',
yolo_head='YOLOv3Head',
post_process='BBoxPostProcess'):
super(YOLOv3, self).__init__()
self.backbone = backbone
self.neck = neck
self.yolo_head = yolo_head
self.post_process = post_process
#构建模型
def model_arch(self, ):
# Backbone 主干网络
body_feats = self.backbone(self.inputs)
# neck 颈
body_feats = self.neck(body_feats)
# YOLO Head yolo头
self.yolo_head_outs = self.yolo_head(body_feats)
#计算损失
def get_loss(self, ):
loss = self.yolo_head.get_loss(self.yolo_head_outs, self.inputs)
return loss
#得到预测框、bbox个数
def get_pred(self):
bbox, bbox_num = self.post_process(
self.yolo_head_outs, self.yolo_head.mask_anchors,
self.inputs['im_shape'], self.inputs['scale_factor'])
outs = {
"bbox": bbox,
"bbox_num": bbox_num,
}
return outs
该类为YOLOv3模型类,定义了一些基础参数,模型结构又拆分为Backbone、Neck、Head、Post_Process四部分;
Backbone部分:
结构如下所示
在yaml的配置文件:/configs/_base_/models/yolov3_darknet53.yml
'''
###########*********在yaml的配置*************###########
# 主干网络
DarkNet:#初始化
depth: 53 #网络层数
return_idx: [2, 3, 4] # 主干网络返回的主要阶段特征用于FPN作进一步的特征融合
norm_type: sync_bn #同步BN
'''
相关引用库:
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register, serializable
from ppdet.modeling.ops import batch_norm
__all__ = ['DarkNet', 'ConvBNLayer']
卷积+BN模块:
class ConvBNLayer(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size=3,
stride=1,
groups=1,
padding=0,
norm_type='bn',
act="leaky",
name=None):
super(ConvBNLayer, self).__init__()
self.conv = nn.Conv2D(
in_channels=ch_in,
out_channels=ch_out,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(name=name + '.conv.weights'),
bias_attr=False)
self.batch_norm = batch_norm(ch_out, norm_type=norm_type, name=name)
self.act = act
def forward(self, inputs):
out = self.conv(inputs)
out = self.batch_norm(out)
if self.act == 'leaky':
out = F.leaky_relu(out, 0.1)
return out
下采样模块:
class DownSample(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size=3,
stride=2,
padding=1,
norm_type='bn',
name=None):
super(DownSample, self).__init__()
self.conv_bn_layer = ConvBNLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
norm_type=norm_type,
name=name)
self.ch_out = ch_out
def forward(self, inputs):
out = self.conv_bn_layer(inputs)
return out
基础残差模块:
class BasicBlock(nn.Layer):
def __init__(self, ch_in, ch_out, norm_type='bn', name=None):
super(BasicBlock, self).__init__()
self.conv1 = ConvBNLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=1,
padding=0,
norm_type=norm_type,
name=name + '.0')
self.conv2 = ConvBNLayer(
ch_in=ch_out,
ch_out=ch_out * 2,
filter_size=3,
stride=1,
padding=1,
norm_type=norm_type,
name=name + '.1')
def forward(self, inputs):
conv1 = self.conv1(inputs)
conv2 = self.conv2(conv1)
out = paddle.add(x=inputs, y=conv2)
return out
由相同的基础模块组成主干网络中的大模块(darknet中有5个)
class Blocks(nn.Layer):
def __init__(self, ch_in, ch_out, count, norm_type='bn', name=None):
super(Blocks, self).__init__()
self.basicblock0 = BasicBlock(
ch_in, ch_out, norm_type=norm_type, name=name + '.0')
self.res_out_list = []
for i in range(1, count):
block_name = '{}.{}'.format(name, i)
res_out = self.add_sublayer(
block_name,
BasicBlock(
ch_out * 2, ch_out, norm_type=norm_type, name=block_name))
self.res_out_list.append(res_out)
self.ch_out = ch_out
def forward(self, inputs):
y = self.basicblock0(inputs)
for basic_block_i in self.res_out_list:
y = basic_block_i(y)
return y
darknet53网络配置:DarkNet_cfg = {53: ([1, 2, 8, 8, 4])}
darknet主干网络(将上面所有模块,按照darknet53的结构组网)
@register
@serializable
class DarkNet(nn.Layer):
__shared__ = ['norm_type']
def __init__(self,
depth=53,
freeze_at=-1,
return_idx=[2, 3, 4],
num_stages=5,
norm_type='bn'):
super(DarkNet, self).__init__()
self.depth = depth
self.freeze_at = freeze_at
self.return_idx = return_idx
self.num_stages = num_stages
self.stages = DarkNet_cfg[self.depth][0:num_stages]
self.conv0 = ConvBNLayer(
ch_in=3,
ch_out=32,
filter_size=3,
stride=1,
padding=1,
norm_type=norm_type,
name='yolo_input')
self.downsample0 = DownSample(
ch_in=32,
ch_out=32 * 2,
norm_type=norm_type,
name='yolo_input.downsample')
self.darknet_conv_block_list = []
self.downsample_list = []
ch_in = [64, 128, 256, 512, 1024]
for i, stage in enumerate(self.stages):
name = 'stage.{}'.format(i)
conv_block = self.add_sublayer(
name,
Blocks(
int(ch_in[i]),
32 * (2**i),
stage,
norm_type=norm_type,
name=name))
self.darknet_conv_block_list.append(conv_block)
for i in range(num_stages - 1):
down_name = 'stage.{}.downsample'.format(i)
downsample = self.add_sublayer(
down_name,
DownSample(
ch_in=32 * (2**(i + 1)),
ch_out=32 * (2**(i + 2)),
norm_type=norm_type,
name=down_name))
self.downsample_list.append(downsample)
def forward(self, inputs):
x = inputs['image']
out = self.conv0(x)
out = self.downsample0(out)
blocks = []
for i, conv_block_i in enumerate(self.darknet_conv_block_list):
out = conv_block_i(out)
if i == self.freeze_at:
out.stop_gradient = True
if i in self.return_idx:
blocks.append(out)
if i < self.num_stages - 1:
out = self.downsample_list[i](out)
return blocks
Neck部分:算法结构图:
包含有两个模块
在yaml的配置文件:/configs/_base_/models/yolov3_darknet53.yml
'''
YOLOv3FPN:#初始化
feat_channels: [1024, 768, 384] #FPN融合后的特征通道数
# 根据特征图尺寸,在特征图的每个位置生成N个大小、长宽比各不同anchor
# N = anchor_sizes * aspect_ratios
'''
相关引用库:
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from ppdet.core.workspace import register, serializable
from ..backbone.darknet import ConvBNLayer
yolo检测模块(yolov3检测算法提出的一种特征融合方法)
class YoloDetBlock(nn.Layer):
def __init__(self, ch_in, channel, norm_type, name):
super(YoloDetBlock, self).__init__()
self.ch_in = ch_in
self.channel = channel
assert channel % 2 == 0, \
"channel {} cannot be divided by 2".format(channel)
conv_def = [
['conv0', ch_in, channel, 1, '.0.0'],
['conv1', channel, channel * 2, 3, '.0.1'],
['conv2', channel * 2, channel, 1, '.1.0'],
['conv3', channel, channel * 2, 3, '.1.1'],
['route', channel * 2, channel, 1, '.2'],
]
self.conv_module = nn.Sequential()
for idx, (conv_name, ch_in, ch_out, filter_size,
post_name) in enumerate(conv_def):
self.conv_module.add_sublayer(
conv_name,
ConvBNLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=filter_size,
padding=(filter_size - 1) // 2,
norm_type=norm_type,
name=name + post_name))
self.tip = ConvBNLayer(
ch_in=channel,
ch_out=channel * 2,
filter_size=3,
padding=1,
norm_type=norm_type,
name=name + '.tip')
def forward(self, inputs):
route = self.conv_module(inputs)
tip = self.tip(route)
return route, tip
yolov3 特征金字塔模块:
@register
@serializable
class YOLOv3FPN(nn.Layer):
__shared__ = ['norm_type']
def __init__(self, feat_channels=[1024, 768, 384], norm_type='bn'):
super(YOLOv3FPN, self).__init__()
assert len(feat_channels) > 0, "feat_channels length should > 0"
self.feat_channels = feat_channels
self.num_blocks = len(feat_channels)
self.yolo_blocks = []
self.routes = []
for i in range(self.num_blocks):
name = 'yolo_block.{}'.format(i)
yolo_block = self.add_sublayer(
name,
YoloDetBlock(
feat_channels[i],
channel=512 // (2**i),
norm_type=norm_type,
name=name))
self.yolo_blocks.append(yolo_block)
if i < self.num_blocks - 1:
name = 'yolo_transition.{}'.format(i)
route = self.add_sublayer(
name,
ConvBNLayer(
ch_in=512 // (2**i),
ch_out=256 // (2**i),
filter_size=1,
stride=1,
padding=0,
norm_type=norm_type,
name=name))
self.routes.append(route)
def forward(self, blocks):
assert len(blocks) == self.num_blocks
blocks = blocks[::-1]
yolo_feats = []
for i, block in enumerate(blocks):
if i > 0:
block = paddle.concat([route, block], axis=1)
route, tip = self.yolo_blocks[i](block)
yolo_feats.append(tip)
if i < self.num_blocks - 1:
route = self.routes[i](route)
route = F.interpolate(route, scale_factor=2.)
return yolo_feats