FCOS: Fully Convolutional One-Stage Object Detection(ICCV2019)代码复现

回顾论文阅读:论文翻译篇,主要建模如下

1、Backbone :采用现有模型,如resnet50,提取5个block:>>> [C1、C2、C3、C4、C5]

2、FPN模块:参考标准FPN。一般是Backbone参数抽象特征,再经过融合阶段。[https://zhuanlan.zhihu.com/p/148738276]

当前考虑,先通过seblock增加通道联系,分别经过1x1卷积,再经过RetinaNet中的FPN-BiFPN

>>> [P1、P2、P3、P4、P5] = F[C1、C2、C3、C4、C5]

3、检测头head:[pre_conf_i、pre_reg_i、pre_ctn_i] = detect_head[Pi]  {Pi=[P1、P2、P3、P4、P5]}

4、制作偏移标签     lrtb

5、制作类别标签     cls

6、制作中心度标签  ctn

7、计算cls_loss_i(pre_conf_i, cls)

8、计算reg_loss_i(pre_reg_i, lrtb)

9、计算ctn_loss_i(pre_ctn_i, ctn)

接下来一步步实现

import tensorflow as tf
from utils.data_aug_1 import file_reader, data_process # 数据
from models.Resnet_50 import resnet_v2_50
from models.fpn import fpn_block
from models.fcos_head import fcos_head
from models.get_target import compute_targets_for_locations, compute_centerness_targets
from models.loss import focal_loss, diou_loss
from utils import args
from models.config_optimizer import config_learning_rate,config_optimizer

# 验证子函数
from tqdm import trange
import numpy as np
import timeit
import random
from utils.pred_box import get_box_
from utils.eval import parse_gt_rec, voc_eval, get_preds_label, make_summary, plot, layer_plot

num_classes = 20
train_lines = file_reader(file_path=r"./dataset/train.txt", num_box_itr=60) # train_lines = 单元长度一致的列表

inputs = tf.placeholder(tf.float32, [None, 1024, 1024, 3], name='inputs')
labels = tf.placeholder(tf.float32, [None,None,5], name='labels')
is_training = tf.placeholder(tf.bool, name="phase_train") 


def Backbone(image, num_classes=None, is_training=is_training):
    _, end_points = resnet_v2_50(image, num_classes, is_training=is_training,
                               global_pool=False, output_stride=None, reuse=None, scope='resnet_v2_50')
    C3, C4, C5 = end_points['resnet_v2_50/block1'], end_points['resnet_v2_50/block2'], end_points['resnet_v2_50/block3']
    return [C3, C4, C5]

[C3, C4, C5] = Backbone(inputs, num_classes=num_classes, is_training=is_training)   # [C3, C4, C5]    

[P3, P4, P5, P6, P7] = fpn_block([C3, C4, C5], is_training = is_training)           # [P3, P4, P5, P6, P7] --> 标准的是该层便可当作目标检测层

# 回归坐标lrtb计算值、类别计算值、中心度计算值  ——  pred
[pconf_3, preg_3, pcenter_ness_3] = fcos_head(P3, num_classes, conv_dim=256)        # [pconf, preg, pcenter_ness]
[pconf_4, preg_4, pcenter_ness_4] = fcos_head(P4, num_classes, conv_dim=256)        
[pconf_5, preg_5, pcenter_ness_5] = fcos_head(P5, num_classes, conv_dim=256)        
[pconf_6, preg_6, pcenter_ness_6] = fcos_head(P6, num_classes, conv_dim=256)        
[pconf_7, preg_7, pcenter_ness_7] = fcos_head(P7, num_classes, conv_dim=256)        

print(pconf_3, preg_3, pcenter_ness_3) 

p_conf   =  [pconf_3, pconf_4, pconf_5, pconf_6, pconf_7]
p_reg    =  [preg_3, preg_4, preg_5, preg_6, preg_7]
p_center =  [pcenter_ness_3, pcenter_ness_4, pcenter_ness_5, pcenter_ness_6, pcenter_ness_7]

# 回归坐标目标值:lrtb标准值、类别标准值、中心度标准值  ——  target
object_sizes_of_interest = [[0,64],[64,128],[128,256],[256,512],[512,1e8]]
object_sizes_of_interest_recale = []
for i in range(len(object_sizes_of_interest)):
    object_sizes_of_interest_recale.append([object_sizes_of_interest[i][0]//(1330/416),object_sizes_of_interest[i][1]//(1330/416)])   # [[0.0, 20.0], [20.0, 40.0], [40.0, 80.0], [80.0, 160.0], [160.0, 31278195.0]]
strides = [8, 16, 32, 64, 128]
cls_targets, reg_targets, centerness_targets = [], [], []
for index in range(len(p_conf)):
    cls_target_i, reg_target_i = compute_targets_for_locations(p_conf[i], labels, object_sizes_of_interest_recale[i], stride_1v1=strides[i])
    # centerness_target_i = compute_centerness_targets(reg_target_i)
    print("reg_target_i", reg_target_i)
    cls_targets.append(cls_target_i)
    reg_targets.append(reg_target_i)
    # centerness_targets.append(centerness_target_i)

# 代价函数
cls_loss, reg_loss, center_loss, total_loss = [], [], [], []
for index in range(len(p_conf)):
    onehot_cls_target_i = tf.equal(
        tf.range(1, 20+1, dtype=tf.int32),                     # shape(num_classes)
        tf.cast(cls_targets[i][:,:,:,tf.newaxis], dtype = tf.int32)) # shape(batch_size,h,w,1)
    onehot_cls_target_i = tf.cast(onehot_cls_target_i, dtype = tf.float32)        
    npos = tf.where(cls_targets[i]>0)
    num_pos= tf.reduce_sum(npos) # 需要修改一下
    cls_loss_i = tf.cond(num_pos>0,
                            lambda:focal_loss(p_conf[i], onehot_cls_target_i, gamma=2.0, alpha=0.25, weight=None),
                            lambda:0.0)
    
    reg_targets_flatten = tf.gather_nd(reg_targets[i], npos) # shape=(?, 4)
    
    p_center_flatten = tf.gather_nd(p_center[i], npos)  # shape=(?, 1)
    p_center_flatten = tf.reshape(p_center_flatten, [-1])

    print("reg_targets_flatten",reg_targets_flatten)
    print("p_center_flatten",p_center_flatten)

    center_targets_flatten = compute_centerness_targets(reg_targets_flatten)
    center_loss_i = tf.cond(num_pos>0,
                            lambda: tf.nn.sigmoid_cross_entropy_with_logits( 
                                        logits = p_center_flatten, # (?, 1)
                                        labels = center_targets_flatten), #ogits and labels must have the same shape ((?, 4, 4, 1) vs (?,))
                            lambda: 0.0)
    center_loss_i = tf.reduce_mean(center_loss_i)

    p_reg_flatten = tf.gather_nd(p_reg[i], npos)
    print("p_reg_flatten",p_reg_flatten)
    reg_loss_i = diou_loss(p_reg_flatten, reg_targets_flatten, weight=center_targets_flatten)
    
    cls_loss.append(cls_loss_i)
    center_loss.append(center_loss_i)
    reg_loss.append(reg_loss_i)
    total_loss.append(cls_loss_i + center_loss_i + reg_loss_i)

训练部分就是加入学习率和优化器选择。

验证部分就是加入回归坐标的反向编码。

参考链接:

https://blog.csdn.net/WZZ18191171661/article/details/89258086

https://zhuanlan.zhihu.com/p/65459972

API:https://github.com/Stick-To/Object-Detection-API-Tensorflow/blob/master/FCOS.py

你可能感兴趣的:(#,深度学习论文研读之检测)