引用内容
网站上相关内容很多就不赘述了
在大型网络中,如果直接编写模型更改的时候会比较麻烦。因此很多github上的开源项目都会有一个配置文件,在其中简单修改一些参数就能更改整个模型的架构。
残差结构:将其之前的第三个网络的输出与其之前的第一个网络输出相加得到这一层的输出
路由层:在此设置为-4即输出路由层之前第四层的特征图,如果有两个值就输出两层的特征图按深度拼接的特征图(主要用于不同尺寸的检测)
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[unsample]
stride=2
[route]
layers=-4
[route]
layers=-1,64
完整文件yolov3.cfg
[yolo]
mask=0,1,2
anchors=10,13 16,30 33,23 30,61 62,45 59,119 116,90 156,198 373,326
classes=3
num=9
jitter=.3
ignore_thresh=.5
truth_thresh=1
random=1
定义九组锚点,每个单元预测三个框,因此检测层规模为3.
[net]
#Testing
batch=1
subdivisions=1
#Training
#batch=64
#subdivisions=16
width=320
height=320
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation=1.5
exposure=1.5
hue=.1
包括了训练参数与模型输入等相关信息
思路是将每个块存储为字典,由函数读入并返回包含键的列表
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
def parse_cfg(cfgfile):
# 读取cfg文件中网络模型参数等信息
file = open(cfgfile, 'r')
lines = file.read().split('\n')
# 去除空行
lines = [x for x in lines if len(x)>0]
# 去除注释
lines = [x for x in lines if x[0]!='#']
# 去除前后空格
lines = [x.rstrip().lstrip() for x in lines]
block = {}
blocks = []
for line in lines:
if line[0]=='[':
if len(block)!=0:
blocks.append(block)
block = {}
block["type"]=line[1:-1].rstrip().lstrip()
else:
key,value = line.split("=")
block[key.rstrip()] = value.lstrip()
return blocks
if __name__ == "__main__":
blocks = parse_cfg("yolo.cfg")
print(blocks)
网络构建函数
按照不同的类别构造不同的层,因为yolo一共有106层,如果均为卷积层的话利用循环就可以轻松搞定,但是由于路由层以及上采样等等特殊结构,所以利用别人写好的cfg加上构造函数相对而言比较简便。
def create_modules(blocks):
net_info = blocks[0]
module_list = nn.ModuleList()
prev_filters = 3
output_filters = []
for index, x in enumerate(blocks[1:]):
module = nn.Sequential()
if x["type"]=="convolutional":
activation = x["activation"]
try:
batch_normalize = int(x["batch_normalize"])
bias = False
except:
batch_normalize = 0
bias = True
filters = int(x["filters"])
padding = int(x["pad"])
kernel_size = int(x["size"])
stride = int(x["stride"])
if padding:
pad = (kernel_size-1)//2
else:
pad = 0
conv = nn.Conv2d(prev_filters, filters,kernel_size=kernel_size,stride=stride,padding=pad,bias=bias)
module.add_module("conv_{0}".format(index), conv)
if batch_normalize:
bn = nn.BatchNorm2d(filters)
module.add_module("batch_norm_{0}".format(index), bn)
if activation == 'leaky':
activn = nn.LeakyReLU(0.1, inplace=True)
module.add_module("leaky_{0}".format(index), activn)
elif x["type"] == "upsample":
stride = int(x["stride"])
upsample = nn.Upsample(scale_factor=2, mode="bilinear")
module.add_module("upsample_{0}".format(index), upsample)
elif x["type"] == "route":
x["layers"] = x["layers"].split(',')
start = int(x["layers"][0])
try:
end = int(x["layers"][1])
except:
end = 0
if start>0:
start = start-index
if end>0:
end = end-index
route = EmptyLayer()
module.add_module("route_{0}".format(index), route)
if end<0:
filters = output_filters[index+start]+output_filters[index+end]
else:
filters = output_filters[index+start]
elif x["type"] == "shortcut":
shortcut = EmptyLayer()
module.add_module("shortcut_{0}".format(index), shortcut)
elif x["type"] == "yolo":
mask = x["mask"].split(",")
mask = [int(x) for x in mask]
anchors = x["anchors"].split(",")
anchors = [int(a) for a in anchors]
anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors), 2)]
anchors = [anchors[i] for i in mask]
detection = DetectionLayer(anchors)
module.add_module("Detection_{0}".format(index), detection)
module_list.append(module)
prev_filters = filters
output_filters.append(filters)
return net_info, module_list