数据增强Mosaic

数据增强Mosaic_第1张图片

丰富检测物体的背景,且在BN计算的时候一下子会计算四张图片的数据,使得mini-batch大小不需要很大,那么一个GPU就可以达到比较好的效果。

实现思路
1、每次读取四张图片。

2、分别对四张图片进行翻转、缩放、色域变化等,并且按照四个方向位置摆好。

3、进行图片的组合和框的组合

主要代码:

    def load_mosaic(self, index):
        # loads images in a mosaic

        s = self.img_size // 2
        xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)]  # mosaic center x, y

        cls = int(self.images_path[index].split('/')[-2])
        indices = []
        for index_aother in range(index, len(self.images_path)):
            if int(self.images_path[index_aother].split('/')[-2]) == cls:
                indices.append(index_aother)
            if len(indices) == 4:
                break
        if len(indices) < 4:
            for index_aother in range(index):
                if int(self.images_path[index_aother].split('/')[-2]) == cls:
                    indices.append(index_aother)
                if len(indices) == 4:
                    break

        for i, index in enumerate(indices):
            # Load image
            img, h, w = self.load_image(index)
            # place img in img4
            if i == 0:  # top left
                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
            elif i == 1:  # top right
                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
            elif i == 2:  # bottom left
                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
            elif i == 3:  # bottom right
                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]

        # Augment
        # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)]  # center crop (WARNING, requires box pruning)
        img4 = random_affine(img4)  # border to remove
        label4 = [0] * 2
        label4[cls] = 1

        return img4, label4
    def load_image(self, index):
        # loads 1 image from dataset, returns img, original hw, resized hw
        path = self.images_path[index]
        img = cv2.imread(path)  # BGR
        assert img is not None, 'Image Not Found ' + path
        h0, w0 = img.shape[:2]  # orig hw
        r = self.img_size / max(h0, w0)  # resize image to img_size
        print("r:", r)
        if r != 1:  # always resize down, only resize up if training with augmentation
            interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
            img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
        return img, img.shape[0], img.shape[1]  # img, hw_original, hw_resized
def random_affine(img, degrees=10, translate=.1, scale=.1, shear=10, border=0):
    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
    # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4


    height = img.shape[0] + border * 2
    width = img.shape[1] + border * 2

    # Rotation and Scale
    R = np.eye(3)
    a = random.uniform(-degrees, degrees)
    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
    s = random.uniform(1 - scale, 1 + scale)
    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)

    # Translation
    T = np.eye(3)
    T[0, 2] = random.uniform(-translate, translate) * img.shape[0] + border  # x translation (pixels)
    T[1, 2] = random.uniform(-translate, translate) * img.shape[1] + border  # y translation (pixels)

    # Shear
    S = np.eye(3)
    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)

    # Combined rotation matrix
    M = S @ T @ R  # ORDER IS IMPORTANT HERE!!
    if (border != 0) or (M != np.eye(3)).any():  # image changed
        img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))

    return img

 github:

https://github.com/hobart07/trick/blob/master/%E6%95%B0%E6%8D%AE%E5%A2%9E%E5%BC%BA/mosaic.py

 

 

你可能感兴趣的:(计算机视觉,图像增强,算法,深度学习,pytorch)