PadMaskBatch
基本与基类 BaseOperator
关系不大了,只在基类的 __init__
中初始化变量self._id
PadMaskBatch
用来给同 batch 中的图片加 pad 0 使得同 batch 中的图片shape都一样,同时输入的图片必须是CHW
class PadMaskBatch(BaseOperator):
"""
Pad a batch of samples so they can be divisible by a stride.
The layout of each image should be 'CHW'.
Args:
pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
height and width is divisible by `pad_to_stride`.
return_pad_mask (bool): If `return_pad_mask = True`, return
`pad_mask` for transformer.
"""
def __init__(self, pad_to_stride=0, return_pad_mask=False):
super(PadMaskBatch, self).__init__()
self.pad_to_stride = pad_to_stride # 需要适配的步长
self.return_pad_mask = return_pad_mask # 是否需要返回对应的 mask
def __call__(self, samples, context=None):
"""
Args:
samples (list): a batch of sample, each is dict.
"""
coarsest_stride = self.pad_to_stride
max_shape = np.array([data['image'].shape for data in samples]).max(
axis=0)
if coarsest_stride > 0:
# np.ceil 用来向上取整
# 为了可以整除步长,则需要向上取整
max_shape[1] = int(
np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
max_shape[2] = int(
np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
for data in samples:
# ---------- 给当前的图片 pad 0 ----------
im = data['image']
im_c, im_h, im_w = im.shape[:]
padding_im = np.zeros(
(im_c, max_shape[1], max_shape[2]), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
data['image'] = padding_im
# ---------- 和分割有关的标注部分跳过 ----------
if 'semantic' in data and data['semantic'] is not None:
semantic = data['semantic']
padding_sem = np.zeros(
(1, max_shape[1], max_shape[2]), dtype=np.float32)
padding_sem[:, :im_h, :im_w] = semantic
data['semantic'] = padding_sem
if 'gt_segm' in data and data['gt_segm'] is not None:
gt_segm = data['gt_segm']
padding_segm = np.zeros(
(gt_segm.shape[0], max_shape[1], max_shape[2]),
dtype=np.uint8)
padding_segm[:, :im_h, :im_w] = gt_segm
data['gt_segm'] = padding_segm
# ---------- 是否返回对应的 mask ----------
if self.return_pad_mask:
padding_mask = np.zeros(
(max_shape[1], max_shape[2]), dtype=np.float32)
padding_mask[:im_h, :im_w] = 1.
data['pad_mask'] = padding_mask
return samples
注意 PaddleDetection 中的 mask 是:
>>> padding_mask
array([[1., 1., 1., ..., 0., 0., 0.],
[1., 1., 1., ..., 0., 0., 0.],
[1., 1., 1., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]], dtype=float32)
而 Torch 中的 NestedTensor 可能是相反的:
>>> samples.mask
tensor([[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[ True, True, True, ..., True, True, True],
[ True, True, True, ..., True, True, True],
[ True, True, True, ..., True, True, True]],
[[False, False, False, ..., True, True, True],
[False, False, False, ..., True, True, True],
[False, False, False, ..., True, True, True],
...,
[False, False, False, ..., True, True, True],
[False, False, False, ..., True, True, True],
[False, False, False, ..., True, True, True]]], device='cuda:0')