视频数据增强的方法

以下两种方法在CTCN中使用到:
论文:待发表
1.temporal random move

def random_move(img, o_boxes, labels):
        boxes = np.array(o_boxes)
        mask = np.zeros(img.shape[0])#生成img大小0矩阵
        for i in boxes:
            for j in range(i[0].astype('int'),
                           min(i[1].astype('int'), img.shape[0])):#从box开始到结束
                mask[j] = 1#把动作区间设为1
        mask = (mask == 0)
        bg = img[mask]
        bg_len = bg.shape[0]
        if bg_len < 5:
            return img, boxes, labels
        insert_place = random.sample(range(bg_len), len(boxes))
        index = np.argsort(insert_place)
        new_img = bg[0:insert_place[index[0]], :]
        new_boxes = []
        new_labels = []

        for i in range(boxes.shape[0]):
            new_boxes.append([
                new_img.shape[0],
                new_img.shape[0] + boxes[index[i]][1] - boxes[index[i]][0]
            ])
            new_labels.append(labels[index[i]])
            new_img = np.concatenate(
                (new_img,
                 img[int(boxes[index[i]][0]):int(boxes[index[i]][1]), :]))
            if i < boxes.shape[0] - 1:
                new_img = np.concatenate(
                    (new_img,
                     bg[insert_place[index[i]]:insert_place[index[i + 1]], :]))
        new_img = np.concatenate(
            (new_img, bg[insert_place[index[len(boxes) - 1]]:, :]))
        del img, boxes, mask, bg, labels
        gc.collect()
        return new_img, new_boxes, new_labels

2.random crop

 def random_crop( img, boxes, labels, min_scale=0.3):
        boxes = np.array(boxes)
        labels = np.array(labels)
        imh, imw = img.shape[:2]
        params = [(0, imh)]
        for min_iou in (0, 0.1, 0.3, 0.5, 0.7, 0.9):
            for _ in range(100):
                scale = random.uniform(0.3, 1)
                h = int(imh * scale)

                y = random.randrange(imh - h)
                roi = [[y, y + h]]
                ious = box_iou1D(boxes, roi)
                if ious.min() >= min_iou:
                    params.append((y, h))
                    break
        y, h = random.choice(params)
        img = img[y:y + h, :]
        center = (boxes[:, 0] + boxes[:, 1]) / 2
        mask = (center[:] >= y) & (center[:] <= y + h)
        if mask.any():
            boxes = boxes[np.squeeze(mask.nonzero())] - np.array([[y, y]])
            boxes = box_clamp1D(boxes, 0, h)
            labels = labels[mask]
        else:
            boxes = [[0, 0]]
            labels = [0]
        return img, boxes, labels

你可能感兴趣的:(深度学习)