1. 简介


在进行接下来的内容前,我们首先介绍图像的色彩空间。这里我们只介绍本文所涉及的色彩空间 R G B {\rm RGB} RGB H S V {\rm HSV} HSV R G B {\rm RGB} RGB是我们最熟悉的一种表示图像色彩的方式,三个字母分别代表红、绿、蓝。

上图正方体上的每一个点在空间中都对应一个三维坐标,坐标的每个值分别表示 R {\rm R} R G {\rm G} G B {\rm B} B的值,该位置的值等于三者的叠加。使用 R G B {\rm RGB} RGB色彩空间有利于对图像色彩的定量分析,另一种直观的对图像色彩描述的方法是使用 H S V {\rm HSV} HSV色彩空间。其中,三个字母分别表示色调饱和度亮度,这种表示方法便于我们直观地分析图像的色彩特征。(二)深度学习实战 | 基于PyTorch的目标检测数据增强(上)_第2张图片


与上述 R G B {\rm RGB} RGB色彩空间的表示方法相同, H S V {\rm HSV} HSV色彩空间中也是使用三个值的迭代得到最后的颜色。最后,这里只对二者作了简要的介绍,具体内容可参考相关资料。

2. 针对像素的数据增强


首先,图像对比度的定义是一幅图像中明暗区域最亮的白和最暗的黑之间不同亮度层级的测量,视觉上就是整幅图像的反差。数据增强中的随机对比度的思想是给图像中的每个像素值乘以一个随机因子值,当该因子的值小于 1 {\rm 1} 1时,图像整体的对比度会减小;当该因子的值大于 1 {\rm 1} 1时,图像整体的对比度会增大。

class RandomContrast:
    def __init__(self, lower=0.5, upper=1.5):
        self.lower = lower
        self.upper = upper

    def __call__(self, image, boxes=None, labels=None):
        if random.randint(2):
        	# 生成随机因子
            alpha = random.uniform(self.lower, self.upper)
            image *= alpha
        return image, boxes, labels

其次,图像饱和度是指色彩纯度,纯度越高,则看起来更加鲜艳;纯度越低,则看起来较黯淡。如我们常说的红色比淡红色更加“红”,就是说红色的饱和度比淡红色的饱和度更大。数据增强中的随机对比度的思想是在 H S V {\rm HSV} HSV空间内对饱和度这一维的值进行缩放。所以,我们首先需要将图像从 R G B {\rm RGB} RGB空间转换到 H S V {\rm HSV} HSV空间。同时,我们将其乘上一个随机因子,当该因子的值小于 1 {\rm 1} 1时,图像的饱和度会减小;当该因子的值大于 1 {\rm 1} 1时,图像的饱和度会变大。

# 转换图像的色彩空间
class ConvertColor:
    def __init__(self, current='BGR', transform='HSV'):
        self.transform = transform
        self.current = current

    def __call__(self, image, boxes=None, labels=None):
        if self.current == 'BGR' and self.transform == 'HSV':
            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        elif self.current == 'HSV' and self.transform == 'BGR':
            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
            raise NotImplementedError
        return image, boxes, labels

class RandomSaturation:
    def __init__(self, lower=0.5, upper=1.5):
        self.lower = lower
        self.upper = upper

    def __call__(self, image, boxes=None, labels=None):
        if random.randint(2):
        	# 随机缩放S空间的值
            image[:, :, 1] *= random.uniform(self.lower, self.upper)
        return image, boxes, labels

同理,图像色调变化同上,在 H S V {\rm HSV} HSV空间内对色调这一维的值进行加减。

class RandomHue:
    def __init__(self, delta=18.0):
        self.delta = delta

    def __call__(self, image, boxes=None, labels=None):
        if random.randint(2):
            image[:, :, 0] += random.uniform(-self.delta, self.delta)
            # 规范超过范围的像素值
            image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
            image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
        return image, boxes, labels

其次,将 R G B {\rm RGB} RGB空间内的像素值均加上或减去一个值就可以改变图像整体的亮度

class RandomBrightness:
    def __init__(self, delta=32):
        self.delta = delta

    def __call__(self, image, boxes=None, labels=None):
        if random.randint(2):
            delta = random.uniform(-self.delta, self.delta)
            # 图像中的每个像素加上一个随机值
            image += delta
        return image, boxes, labels

最后一种变换是在 R G B {\rm RGB} RGB空间内随机交换通道的值,这样不同值的叠加最后也会得到不同的值。

class SwapChannels(object):
    def __init__(self, swaps):
        self.swaps = swaps

    def __call__(self, image):
        image = image[:, :, self.swaps]
        return image

class RandomLightingNoise:
    def __init__(self):
        self.perms = ((0, 1, 2), (0, 2, 1),
                      (1, 0, 2), (1, 2, 0),
                      (2, 0, 1), (2, 1, 0))

    def __call__(self, image, boxes=None, labels=None):
        if random.randint(2):
            swap = self.perms[random.randint(len(self.perms))]
            shuffle = SwapChannels(swap)
            image = shuffle(image)
        return image, boxes, labels


class PhotometricDistort:
    def __init__(self):
        self.pd = [
            RandomContrast(),  # 随机对比度
            ConvertColor(transform='HSV'),  # 转换色彩空间
            RandomSaturation(),  # 随机饱和度
            RandomHue(),  # 随机色调
            ConvertColor(current='HSV', transform='BGR'),  # 转换色彩空间
            RandomContrast()  # 随机对比度
        self.rand_brightness = RandomBrightness()  # 随机亮度
        self.rand_light_noise = RandomLightingNoise()  # 随机通道交换

    def __call__(self, image, boxes, labels):
        im = image.copy()
        im, boxes, labels = self.rand_brightness(im, boxes, labels)
        if random.randint(2):
            distort = Compose(self.pd[:-1])
            distort = Compose(self.pd[1:])
        im, boxes, labels = distort(im, boxes, labels)
        return self.rand_light_noise(im, boxes, labels)

3. 针对图像的数据增强


3.1 随机镜像


class RandomMirror:
    def __call__(self, image, boxes, classes=None):
        _, width, _ = image.shape
        if random.randint(2):
            # 图像翻转
            image = image[:, ::-1]
            boxes = boxes.copy()
            # 改变标注框
            boxes[:, 0::2] = width - boxes[:, 2::-2]
        return image, boxes, classes


3.2 随机缩放


class Expand:
    def __init__(self, mean):
        self.mean = mean

    def __call__(self, image, boxes, labels):
        if random.randint(2):
            return image, boxes, labels
        # 获取图像的各个维度
        height, width, depth = image.shape
        # 随机缩放尺度
        ratio = random.uniform(1, 4)
        left = random.uniform(0, width * ratio - width)
        top = random.uniform(0, height * ratio - height)
        # 确定缩放后的图像的维度
        expand_image = np.zeros((int(height * ratio), int(width * ratio), depth),
        expand_image[:, :, :] = self.mean
        expand_image[int(top): int(top + height), int(left): int(left + width)] = image
        # 返回缩放后的图像
        image = expand_image
        # 将边界框以同等方式缩放
        boxes = boxes.copy()
        boxes[:, :2] += (int(left), int(top))
        boxes[:, 2:] += (int(left), int(top))
        # 返回
        return image, boxes, labels

3.3 随机裁剪


class RandomSampleCrop:
    def __init__(self):
        self.sample_options = (
            (0.1, None),
            (0.3, None),
            (0.7, None),
            (0.9, None),
            (None, None)

    def __call__(self, image, boxes=None, labels=None):
        height, width, _ = image.shape
        while True:
            # 随机选择一种裁剪方式
            model = random.choice(self.sample_options)
            # 随机到None直接返回
            if model is None:
                return image, boxes, labels
            # 最大IoU和最小IoU
            min_iou, max_iou = model
            if min_iou is None:
                min_iou = float('-inf')
            if max_iou is None:
                max_iou = float('inf')
            # 迭代50次
            for _ in range(50):
                current_image = image
                # 宽和高随机采样
                w = random.uniform(0.3 * width, width)
                h = random.uniform(0.3 * height, height)
                # 宽高比例不当
                if h / w < 0.5 or h / w > 2:
                left = random.uniform(width - w)
                top = random.uniform(height - h)
                # 框坐标x1,y1,x2,y2
                rect = np.array([int(left), int(top), int(left + w), int(top + h)])
                # 求iou
                overlap = iou(boxes, rect)
                if overlap.min() < min_iou and max_iou < overlap.max():
                # 裁剪图像
                current_image = current_image[rect[1]: rect[3], rect[0]: rect[2], :]
                # 中心点坐标
                centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
                m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
                m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
                # 当m1和m2均为正时才保留
                mask = m1 * m2
                if not mask.any():
                current_boxes = boxes[mask, :].copy()
                current_labels = labels[mask]
                # 根据图像变换调整box
                current_boxes[:, :2] = np.maximum(current_boxes[:, :2], rect[:2])
                current_boxes[:, :2] -= rect[:2]
                current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], rect[2:])
                current_boxes[:, 2:] -= rect[:2]
                # 返回变换后的图像、box和label
                return current_image, current_boxes, current_labels


4. 总结



