在detectron训练网络的过程中,给网络送的blob在下面的函数中生成:(位于minibatch.py)
def get_minibatch(roidb):
"""Given a roidb, construct a minibatch sampled from it."""
# We collect blobs from each image onto a list and then concat them into a
# single tensor, hence we initialize each blob to an empty list
blobs = {k: [] for k in get_minibatch_blob_names()}
# Get the input image blob, formatted for caffe2
im_blob, im_scales = _get_image_blob(roidb) #对输入的图像处理程网络需要的形式(batch,channel,height,width),im_scales是变换的尺度
blobs['data'] = im_blob
if cfg.RPN.RPN_ON:
# RPN-only or end-to-end Faster/Mask R-CNN
valid = rpn_roi_data.add_rpn_blobs(blobs, im_scales, roidb)
elif cfg.RETINANET.RETINANET_ON:
im_width, im_height = im_blob.shape[3], im_blob.shape[2]
# im_width, im_height corresponds to the network input: padded image
# (if needed) width and height. We pass it as input and slice the data
# accordingly so that we don't need to use SampleAsOp
valid = retinanet_roi_data.add_retinanet_blobs(
blobs, im_scales, roidb, im_width, im_height
)
else:
# Fast R-CNN like models trained on precomputed proposals
valid = fast_rcnn_roi_data.add_fast_rcnn_blobs(blobs, im_scales, roidb)
return blobs, valid
其中给FPN网络输送blob的函数为valid = rpn_roi_data.add_rpn_blobs(blobs, im_scales, roidb),具体来分析这个函数。
def add_rpn_blobs(blobs, im_scales, roidb):
"""Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""
if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
# RPN applied to many feature levels, as in the FPN paper
k_max = cfg.FPN.RPN_MAX_LEVEL
k_min = cfg.FPN.RPN_MIN_LEVEL
foas = []
for lvl in range(k_min, k_max + 1): #对于每一层FPN
field_stride = 2.**lvl #元anchor的base_size,依次为4,8,16,32,64
anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) #每一层相应的的anchor size,依次为32 64 128 256 512(default下)
anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS #(0.5,1,2)
foa = data_utils.get_field_of_anchors(
field_stride, anchor_sizes, anchor_aspect_ratios
)
foas.append(foa)
all_anchors = np.concatenate([f.field_of_anchors for f in foas]) #将每一层FPN产生的anchors合并在一起
else:
foa = data_utils.get_field_of_anchors(
cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS
)
all_anchors = foa.field_of_anchors
这里以P2-P6的FPN网络为例(训练图片的大小为1024×1024,其余参数皆为默认):
层数 | field_stride | anchor_sizes | anchor_aspect_ratios | 生成的anchor个数 (乘以3是因为3种比例) |
P2 | 4(2^2) | 32 | 0.5,1,2 | (1024/4)^2×3=196608 |
P3 | 8(2^3) | 64 | 0.5,1,2 | (1024/8)^2×3= 49152 |
P4 | 16(2^4) | 128 | 0.5,1,2 | (1024/16)^2×3=12288 |
P5 | 32(2^5) | 256 | 0.5,1,2 | (1024/32)^2×3=3072 |
P6 | 64(2^6) | 512 | 0.5,1,2 | (1024/64)^2×3=768 |
每一个foa都代表着对应FPN层产生的anchor,下右二图就是foas,也就是P2层产生的anchor,以及相关的参数。
对后将foas中所有的anchor concatenate到一起,形成下面的形式,记为all_anchors,如下。为什么要这样的形式呢,是为了方便计算每一个anhcor与gt的重叠度,进而进行fg与bg的标记。
对一张图产生anchor之后,就要构建blob
for im_i, entry in enumerate(roidb):
scale = im_scales[im_i]
im_height = np.round(entry['height'] * scale)
im_width = np.round(entry['width'] * scale)
gt_inds = np.where(
(entry['gt_classes'] > 0) & (entry['is_crowd'] == 0)
)[0]
gt_rois = entry['boxes'][gt_inds, :] * scale
im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
blobs['im_info'].append(im_info)
# Add RPN targets
if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
# RPN applied to many feature levels, as in the FPN paper
rpn_blobs = _get_rpn_blobs(
im_height, im_width, foas, all_anchors, gt_rois
)
for i, lvl in enumerate(range(k_min, k_max + 1)):
for k, v in rpn_blobs[i].items():
blobs[k + '_fpn' + str(lvl)].append(v)
else:
# Classical RPN, applied to a single feature level
rpn_blobs = _get_rpn_blobs(
im_height, im_width, [foa], all_anchors, gt_rois
)
for k, v in rpn_blobs.items():
blobs[k].append(v)
这段代码是针对每一张送入的样本图片,获取其gt信息,主要包括:
获取到了上述的信息后,再调用_get_rpn_blobs(见下面),获取针对该样本图片的blob
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes):
total_anchors = all_anchors.shape[0]
straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH
if straddle_thresh >= 0: #保留在图片内部的anchors
# Only keep anchors inside the image by a margin of straddle_thresh
# Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all
# anchors
inds_inside = np.where(
(all_anchors[:, 0] >= -straddle_thresh) &
(all_anchors[:, 1] >= -straddle_thresh) &
(all_anchors[:, 2] < im_width + straddle_thresh) &
(all_anchors[:, 3] < im_height + straddle_thresh)
)[0]
# keep only inside anchors
anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
anchors = all_anchors
num_inside = len(inds_inside)
logger.debug('total_anchors: {}'.format(total_anchors))
logger.debug('inds_inside: {}'.format(num_inside))
logger.debug('anchors.shape: {}'.format(anchors.shape))
# Compute anchor labels:
# label=1 is positive, 0 is negative, -1 is don't care (ignore)
labels = np.empty((num_inside, ), dtype=np.int32) #np.empty创建无意义的数组
labels.fill(-1) #将数组全都填补为-1
if len(gt_boxes) > 0:
# Compute overlaps between the anchors and the gt boxes overlaps
anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) #计算每一个anchor与gt重叠率,anchor_by_gt_overlap.shape = [anchors_num, gt_num]
# Map from anchor to gt box that has highest overlap
anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) #返回每一个anchor与哪一个gt重叠率最大,anchor_to_gt_argmax.shape = [anchors_num, 1]
# For each anchor, amount of overlap with most overlapping gt box
anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside), #上述的重叠率是多少 anchor_to_gt_max.shape = [anchors_num, 1]
anchor_to_gt_argmax]
# Map from gt box to an anchor that has highest overlap
gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) #返回与每一个gt重叠最大的anchor的index。gt_to_anchor_argmax.shape = (3,).axis=0表示就是对于每一列找出最大值,刚好每一列代表的就所有anchor与该gt的重叠率
# For each gt box, amount of overlap with most overlapping anchor
gt_to_anchor_max = anchor_by_gt_overlap[
gt_to_anchor_argmax,
np.arange(anchor_by_gt_overlap.shape[1]) #返回与每个gt重叠最大的重叠率
]
# Find all anchors that share the max overlap amount
# (this includes many ties)
anchors_with_max_overlap = np.where(
anchor_by_gt_overlap == gt_to_anchor_max
)[0] #找到所有共享这个最大重叠率的anchors
# Fg label: for each gt use anchors with highest overlap
# (including ties)
labels[anchors_with_max_overlap] = 1 #1.首先将这些重叠最大的anchor,label设置为1
# Fg label: above threshold IOU
labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 #2.其次将大于0.7的重叠率的anchor的label设置为1
# subsample positive labels if we have too many
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM) #设置的前景数量,default:256×0.5
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False
) #多出来的数量为size=(len(fg_inds) - num_fg),随机地从fg_inds选出来,设置为False
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
# subsample negative labels if we have too many
# (samples with replacement, but since the set of bg inds is large most
# samples will not have repeats)
num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1) #需要的bg数量
bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0] #实际的bg数量
if len(bg_inds) > num_bg:
enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)]
else:
enable_inds = bg_inds
labels[enable_inds] = 0
bg_inds = np.where(labels == 0)[0]
bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
bbox_targets[fg_inds, :] = data_utils.compute_targets(
anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :] #根据fg_inds,取出对应的gt的index(anchor_to_gt_argmax[fg_inds]),再得到对应的gt(gt_boxes[anchor_to_gt_argmax[fg_inds], :])
)
# Bbox regression loss has the form:
# loss(x) = weight_outside * L(weight_inside * x)
# Inside weights allow us to set zero loss on an element-wise basis
# Bbox regression is only trained on positive examples so we set their
# weights to 1.0 (or otherwise if config is different) and 0 otherwise
bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0)
# The bbox regression loss only averages by the number of images in the
# mini-batch, whereas we need to average by the total number of example
# anchors selected
# Outside weights are used to scale each element-wise loss so the final
# average over the mini-batch is correct
bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
# uniform weighting of examples (given non-uniform sampling)
num_examples = np.sum(labels >= 0) #其实就是batch_size的数量 256
bbox_outside_weights[labels == 1, :] = 1.0 / num_examples
bbox_outside_weights[labels == 0, :] = 1.0 / num_examples
# Map up to original set of anchors
labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
bbox_targets = data_utils.unmap(
bbox_targets, total_anchors, inds_inside, fill=0
)
bbox_inside_weights = data_utils.unmap(
bbox_inside_weights, total_anchors, inds_inside, fill=0
)
bbox_outside_weights = data_utils.unmap(
bbox_outside_weights, total_anchors, inds_inside, fill=0
)
#利用合并的all_anchors对所有anchor贴标签和生成bbox_targets,bbox_inside_weights,bbox_outside_weights
#但是foas中是没有上述的标签以及参数的,所以就要Split the generated labels, etc. into labels per each field of anchors
blobs_out = []
start_idx = 0
for foa in foas:
H = foa.field_size
W = foa.field_size
A = foa.num_cell_anchors
end_idx = start_idx + H * W * A #也就是anchor的数量
_labels = labels[start_idx:end_idx] #因为lebels是按顺序(P2-P6依次排列的),所以取前面
_bbox_targets = bbox_targets[start_idx:end_idx, :]
_bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :]
_bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :]
start_idx = end_idx
# labels output with shape (1, A, height, width)
_labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2)
# bbox_targets output with shape (1, 4 * A, height, width)
_bbox_targets = _bbox_targets.reshape(
(1, H, W, A * 4)).transpose(0, 3, 1, 2)
# bbox_inside_weights output with shape (1, 4 * A, height, width)
_bbox_inside_weights = _bbox_inside_weights.reshape(
(1, H, W, A * 4)).transpose(0, 3, 1, 2)
# bbox_outside_weights output with shape (1, 4 * A, height, width)
_bbox_outside_weights = _bbox_outside_weights.reshape(
(1, H, W, A * 4)).transpose(0, 3, 1, 2)
blobs_out.append(
dict(
rpn_labels_int32_wide=_labels,
rpn_bbox_targets_wide=_bbox_targets,
rpn_bbox_inside_weights_wide=_bbox_inside_weights,
rpn_bbox_outside_weights_wide=_bbox_outside_weights
)
)
return blobs_out[0] if len(blobs_out) == 1 else blobs_out
上述代码完成的内容相当于anchor_target_layer,可见博客。最后一个for循环完成的任务是将与all_anchor同维度(行数一致)的labels,bbox_targets,inside_weights,outside_weights重新分配成foas的形式,见下图。
最后该函数返回的rpn_blobs形式如下,右图表示P2层。
由于blob是如下形式,所以还要将rpn_blobs中每一层的值对应的付给