回顾论文阅读:论文翻译篇,主要建模如下
1、Backbone :采用现有模型,如resnet50,提取5个block:>>> [C1、C2、C3、C4、C5]
2、FPN模块:参考标准FPN。一般是Backbone参数抽象特征,再经过融合阶段。[https://zhuanlan.zhihu.com/p/148738276]
当前考虑,先通过seblock增加通道联系,分别经过1x1卷积,再经过RetinaNet中的FPN-BiFPN
>>> [P1、P2、P3、P4、P5] = F[C1、C2、C3、C4、C5]
3、检测头head:[pre_conf_i、pre_reg_i、pre_ctn_i] = detect_head[Pi] {Pi=[P1、P2、P3、P4、P5]}
4、制作偏移标签 lrtb
5、制作类别标签 cls
6、制作中心度标签 ctn
7、计算cls_loss_i(pre_conf_i, cls)
8、计算reg_loss_i(pre_reg_i, lrtb)
9、计算ctn_loss_i(pre_ctn_i, ctn)
接下来一步步实现
import tensorflow as tf
from utils.data_aug_1 import file_reader, data_process # 数据
from models.Resnet_50 import resnet_v2_50
from models.fpn import fpn_block
from models.fcos_head import fcos_head
from models.get_target import compute_targets_for_locations, compute_centerness_targets
from models.loss import focal_loss, diou_loss
from utils import args
from models.config_optimizer import config_learning_rate,config_optimizer
# 验证子函数
from tqdm import trange
import numpy as np
import timeit
import random
from utils.pred_box import get_box_
from utils.eval import parse_gt_rec, voc_eval, get_preds_label, make_summary, plot, layer_plot
num_classes = 20
train_lines = file_reader(file_path=r"./dataset/train.txt", num_box_itr=60) # train_lines = 单元长度一致的列表
inputs = tf.placeholder(tf.float32, [None, 1024, 1024, 3], name='inputs')
labels = tf.placeholder(tf.float32, [None,None,5], name='labels')
is_training = tf.placeholder(tf.bool, name="phase_train")
def Backbone(image, num_classes=None, is_training=is_training):
_, end_points = resnet_v2_50(image, num_classes, is_training=is_training,
global_pool=False, output_stride=None, reuse=None, scope='resnet_v2_50')
C3, C4, C5 = end_points['resnet_v2_50/block1'], end_points['resnet_v2_50/block2'], end_points['resnet_v2_50/block3']
return [C3, C4, C5]
[C3, C4, C5] = Backbone(inputs, num_classes=num_classes, is_training=is_training) # [C3, C4, C5]
[P3, P4, P5, P6, P7] = fpn_block([C3, C4, C5], is_training = is_training) # [P3, P4, P5, P6, P7] --> 标准的是该层便可当作目标检测层
# 回归坐标lrtb计算值、类别计算值、中心度计算值 —— pred
[pconf_3, preg_3, pcenter_ness_3] = fcos_head(P3, num_classes, conv_dim=256) # [pconf, preg, pcenter_ness]
[pconf_4, preg_4, pcenter_ness_4] = fcos_head(P4, num_classes, conv_dim=256)
[pconf_5, preg_5, pcenter_ness_5] = fcos_head(P5, num_classes, conv_dim=256)
[pconf_6, preg_6, pcenter_ness_6] = fcos_head(P6, num_classes, conv_dim=256)
[pconf_7, preg_7, pcenter_ness_7] = fcos_head(P7, num_classes, conv_dim=256)
print(pconf_3, preg_3, pcenter_ness_3)
p_conf = [pconf_3, pconf_4, pconf_5, pconf_6, pconf_7]
p_reg = [preg_3, preg_4, preg_5, preg_6, preg_7]
p_center = [pcenter_ness_3, pcenter_ness_4, pcenter_ness_5, pcenter_ness_6, pcenter_ness_7]
# 回归坐标目标值:lrtb标准值、类别标准值、中心度标准值 —— target
object_sizes_of_interest = [[0,64],[64,128],[128,256],[256,512],[512,1e8]]
object_sizes_of_interest_recale = []
for i in range(len(object_sizes_of_interest)):
object_sizes_of_interest_recale.append([object_sizes_of_interest[i][0]//(1330/416),object_sizes_of_interest[i][1]//(1330/416)]) # [[0.0, 20.0], [20.0, 40.0], [40.0, 80.0], [80.0, 160.0], [160.0, 31278195.0]]
strides = [8, 16, 32, 64, 128]
cls_targets, reg_targets, centerness_targets = [], [], []
for index in range(len(p_conf)):
cls_target_i, reg_target_i = compute_targets_for_locations(p_conf[i], labels, object_sizes_of_interest_recale[i], stride_1v1=strides[i])
# centerness_target_i = compute_centerness_targets(reg_target_i)
print("reg_target_i", reg_target_i)
cls_targets.append(cls_target_i)
reg_targets.append(reg_target_i)
# centerness_targets.append(centerness_target_i)
# 代价函数
cls_loss, reg_loss, center_loss, total_loss = [], [], [], []
for index in range(len(p_conf)):
onehot_cls_target_i = tf.equal(
tf.range(1, 20+1, dtype=tf.int32), # shape(num_classes)
tf.cast(cls_targets[i][:,:,:,tf.newaxis], dtype = tf.int32)) # shape(batch_size,h,w,1)
onehot_cls_target_i = tf.cast(onehot_cls_target_i, dtype = tf.float32)
npos = tf.where(cls_targets[i]>0)
num_pos= tf.reduce_sum(npos) # 需要修改一下
cls_loss_i = tf.cond(num_pos>0,
lambda:focal_loss(p_conf[i], onehot_cls_target_i, gamma=2.0, alpha=0.25, weight=None),
lambda:0.0)
reg_targets_flatten = tf.gather_nd(reg_targets[i], npos) # shape=(?, 4)
p_center_flatten = tf.gather_nd(p_center[i], npos) # shape=(?, 1)
p_center_flatten = tf.reshape(p_center_flatten, [-1])
print("reg_targets_flatten",reg_targets_flatten)
print("p_center_flatten",p_center_flatten)
center_targets_flatten = compute_centerness_targets(reg_targets_flatten)
center_loss_i = tf.cond(num_pos>0,
lambda: tf.nn.sigmoid_cross_entropy_with_logits(
logits = p_center_flatten, # (?, 1)
labels = center_targets_flatten), #ogits and labels must have the same shape ((?, 4, 4, 1) vs (?,))
lambda: 0.0)
center_loss_i = tf.reduce_mean(center_loss_i)
p_reg_flatten = tf.gather_nd(p_reg[i], npos)
print("p_reg_flatten",p_reg_flatten)
reg_loss_i = diou_loss(p_reg_flatten, reg_targets_flatten, weight=center_targets_flatten)
cls_loss.append(cls_loss_i)
center_loss.append(center_loss_i)
reg_loss.append(reg_loss_i)
total_loss.append(cls_loss_i + center_loss_i + reg_loss_i)
训练部分就是加入学习率和优化器选择。
验证部分就是加入回归坐标的反向编码。
参考链接:
https://blog.csdn.net/WZZ18191171661/article/details/89258086
https://zhuanlan.zhihu.com/p/65459972
API:https://github.com/Stick-To/Object-Detection-API-Tensorflow/blob/master/FCOS.py