在学习过yolov3的相关原理后感觉距离实践是有一定差距的,在github上找到了一版源码。本文对这篇文章的源码进行中文注释,以求更加深入的了解实现的过程。
github的地址为
https://github.com/ayooshkathuria/pytorch-yolo-v3
这份代码目的是为了在pytorch框架下,使用原生的配置文件、权重文件,对获取到的新数据进行检测。
原作者提供了一个教程,连接如下
https://blog.paperspace.com/how-to-implement-a-yolo-object-detector-in-pytorch/
相对应的中文教程由两部分
https://www.jiqizhixin.com/articles/2018-04-23-3
https://www.jiqizhixin.com/articles/042602?from=synced&keyword=%E4%BB%8E%E9%9B%B6%E5%BC%80%E5%A7%8BPyTorch%E9%A1%B9%E7%9B%AE%EF%BC%9AYOLO%20v3%E7%9B%AE%E6%A0%87%E6%A3%80%E6%B5%8B%E5%AE%9E%E7%8E%B0
下面进行中文注释
首先对配置文件进行了解析
def parse_cfg(cfgfile):
#解析配置文件
#输出为模块的list, 每个模块都对应着网络中的某一个模块
file = open(cfgfile, 'r') #打开文件
lines = file.read().split('\n') #按行读入
lines = [x for x in lines if len(x) > 0] #去除空行
lines = [x for x in lines if x[0] != '#'] #去除注释
lines = [x.rstrip().lstrip() for x in lines] #两边的空格
block = {}
blocks = []
for line in lines:
if line[0] == "[": #查找模块的标识符,代表新模块的建立
if len(block) != 0: #如果block中包含其他信息,则先将数据存储并置空
blocks.append(block)
block = {}
block["type"] = line[1:-1].rstrip() #去除两边的中括号
else:
key,value = line.split("=") #将每一行的数据按照等号进行分割
block[key.rstrip()] = value.lstrip() #存入词典
blocks.append(block) #将最后一个模块存入列表中
return blocks
创建模型中,要将配置文件中的信息转化为modules信息和网络信息。
这里对于前传过程可以确定的层,如“convolutional”层,其中可能包含了“Conv2d”,“BatchNorm2d”,“LeakyReLU”,可以用nn.Sequential进行包装,在nn.Sequential包装后,内部会自动调用forward()方法。对于一些前传方法不确定的层,如“route”,“shortcut”可以显示用空层占位,具体的实现放在class Darknet中
def create_modules(blocks):
net_info = blocks[0] #读取网络配置
module_list = nn.ModuleList() #初始化网络结构
index = 0 #帮助跳过某些层
prev_filters = 3 #记录前一个网络的卷积核的数量,也就是前一层数据
output_filters = [] #输出卷积核的数量
for x in blocks: #按照不同block生成不同的层
module = nn.Sequential() #利用Sequential包裹每一个每个block
if (x["type"] == "net"): #跳过net
continue
#If it's a convolutional layer
if (x["type"] == "convolutional"): #卷积层处理
#Get the info about the layer
activation = x["activation"]
try:
batch_normalize = int(x["batch_normalize"]) #bn层是否存在,bn层与bias不能共存
bias = False
except:
batch_normalize = 0
bias = True
filters= int(x["filters"]) #卷积核的数量
padding = int(x["pad"]) #是否使用padding
kernel_size = int(x["size"]) #卷积核大小
stride = int(x["stride"]) #stride大小
if padding: #如果做padding,计算padding的数量
pad = (kernel_size - 1) // 2
else:
pad = 0
#Add the convolutional layer
conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias = bias) #卷积层
module.add_module("conv_{0}".format(index), conv) #讲层加入到module中,
#Add the Batch Norm Layer
if batch_normalize: #如果存在归一化,则添加bn层。bn层的参数为输入数据的通道数,也是上一个卷积层中的卷积核的数量
bn = nn.BatchNorm2d(filters)
module.add_module("batch_norm_{0}".format(index), bn)
#Check the activation.
#It is either Linear or a Leaky ReLU for YOLO
if activation == "leaky": #yolo中使用leakyReLU
activn = nn.LeakyReLU(0.1, inplace = True)
module.add_module("leaky_{0}".format(index), activn)
elif (x["type"] == "upsample"): #升采样层
stride = int(x["stride"])
# upsample = Upsample(stride)
upsample = nn.Upsample(scale_factor = 2, mode = "nearest")
module.add_module("upsample_{}".format(index), upsample)
#If it is a route layer
elif (x["type"] == "route"): #路由层,将两个层延通道数串联起来
x["layers"] = x["layers"].split(',')
#Start of a route
start = int(x["layers"][0])
#end, if there exists one.
try:
end = int(x["layers"][1])
except:
end = 0
#Positive anotation
if start > 0:
start = start - index
if end > 0:
end = end - index
route = EmptyLayer()
module.add_module("route_{0}".format(index), route)
if end < 0:
filters = output_filters[index + start] + output_filters[index + end]
else:
filters= output_filters[index + start]
#shortcut corresponds to skip connection
elif x["type"] == "shortcut": #shortcut是将两层叠加到一起
from_ = int(x["from"])
shortcut = EmptyLayer()
module.add_module("shortcut_{}".format(index), shortcut)
elif x["type"] == "maxpool": #池化层
stride = int(x["stride"])
size = int(x["size"])
if stride != 1:
maxpool = nn.MaxPool2d(size, stride)
else:
maxpool = MaxPoolStride1(size)
module.add_module("maxpool_{}".format(index), maxpool)
#Yolo is the detection layer
elif x["type"] == "yolo": #检测层
mask = x["mask"].split(",")
mask = [int(x) for x in mask]
anchors = x["anchors"].split(",")
anchors = [int(a) for a in anchors]
anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors),2)]
anchors = [anchors[i] for i in mask]
detection = DetectionLayer(anchors)
module.add_module("Detection_{}".format(index), detection)
else:
print("Something I dunno")
assert False
module_list.append(module)
prev_filters = filters
output_filters.append(filters)
index += 1
return (net_info, module_list)
在类中,主要实现了forward()、load_weights()、save_weights()三个函数
在上文的create_modules函数中,
def forward(self, x, CUDA):
detections = []
modules = self.blocks[1:]
outputs = {} #We cache the outputs for the route layer
write = 0
for i in range(len(modules)):
module_type = (modules[i]["type"])
if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool":
x = self.module_list[i](x)
outputs[i] = x
elif module_type == "route": #路由层的具体操作:将指定的两层或者 一层延通道的方向串联起来
layers = modules[i]["layers"]
layers = [int(a) for a in layers]
if (layers[0]) > 0:
layers[0] = layers[0] - i
if len(layers) == 1:
x = outputs[i + (layers[0])]
else:
if (layers[1]) > 0:
layers[1] = layers[1] - i
map1 = outputs[i + layers[0]]
map2 = outputs[i + layers[1]]
x = torch.cat((map1, map2), 1)
outputs[i] = x
elif module_type == "shortcut": #shortcut层,将指定层与前一层对应相加
from_ = int(modules[i]["from"])
x = outputs[i-1] + outputs[i+from_]
outputs[i] = x
elif module_type == 'yolo': #检测层,具体实现由predict_transform实现,并将多个层进行拼接
anchors = self.module_list[i][0].anchors
#Get the input dimensions
inp_dim = int (self.net_info["height"])
#Get the number of classes
num_classes = int (modules[i]["classes"])
#Output the result
x = x.data
x = predict_transform(x, inp_dim, anchors, num_classes, CUDA)
if type(x) == int:
continue
if not write: #如果没有检测结果的输出,对ditection进行初始化
detections = x
write = 1
else:
detections = torch.cat((detections, x), 1)
outputs[i] = outputs[i-1]
try:
return detections
except:
return 0
load_weights(),save_weights()暂不做详细展开