基于pytorch的目标检测数据增强(tensor数据流版本)

我的示例代码的dataloader中打包传入的是一个target字典,里面包括boxes和label,如果你们传入的是boxes和label,直接修改参数就行了,然后因为我传入的image和target都是经过torch的转换,数据格式是tensor,所以有一些转换格式的代码,然后图片shape是(c,h,w),随机概率设的是0.3,都按需要修改就行。

随机缩放

class randomScale(object):
    def __call__(self,image,target):
        #固定住高度,以0.8-1.2伸缩宽度,做图像形变
        if random.random() < 0.3:
            image = np.array(image)
            image = np.transpose(image, (1, 2, 0))
            boxes = target["boxes"]
            scale = random.uniform(0.8,1.2)
            height,width,c = image.shape
            image = cv2.resize(image,(int(width*scale),height))
            scale_tensor = torch.FloatTensor([[scale,1,scale,1]]).expand_as(boxes)
            boxes = boxes * scale_tensor
            image = np.transpose(image, (2, 0, 1))
            image = torch.from_numpy(image)
            target["boxes"] = boxes
        return image,target

随机模糊

class randomBlur(object):
    def __call__(self, image, target):
        if random.random() < 0.3:
            image = np.array(image)
            image = np.transpose(image, (1, 2, 0))
            image = cv2.blur(image, (5, 5))
            image = np.transpose(image, (2, 0, 1))
            image = torch.from_numpy(image)
        return image, target

随机擦除(遮挡)
可以增加鲁棒性,提供两个经典算法,cutout和randomerase

class Cutout(object):
    """Randomly mask out one or more patches from an image.
    Args:
        n_holes (int): Number of patches to cut out of each image.
        length (int): The length (in pixels) of each square patch.
    """
    def __init__(self, n_holes=6, length=50):
        self.n_holes = n_holes
        self.length = length

    def __call__(self, image, target):
        """
        Args:
            img (Tensor): Tensor image of size (C, H, W).
        Returns:
            Tensor: Image with n_holes of dimension length x length cut out of it.
        """
        if random.random() < 0.3:
            img = image
            h = img.shape[1]
            w = img.shape[2]

            mask = np.ones((h, w), np.float32)

            for n in range(self.n_holes):
                y = np.random.randint(h)
                x = np.random.randint(w)

                y1 = np.clip(y - self.length // 2, 0, h)
                y2 = np.clip(y + self.length // 2, 0, h)
                x1 = np.clip(x - self.length // 2, 0, w)
                x2 = np.clip(x + self.length // 2, 0, w)

                mask[y1: y2, x1: x2] = 0.

            mask = torch.from_numpy(mask)
            mask = mask.expand_as(img)
            img = img * mask
            image = img
        return image, target
class RandomErasing(object):
    '''
    Class that performs Random Erasing in Random Erasing Data Augmentation by Zhong et al.
    -------------------------------------------------------------------------------------
    probability: The probability that the operation will be performed.
    sl: min erasing area
    sh: max erasing area
    r1: min aspect ratio
    mean: erasing value
    -------------------------------------------------------------------------------------
    '''
    def __init__(self, sl=0.01, sh=0.25, r1=0.3, mean=[0.4914, 0.4822, 0.4465]):

        self.mean = mean
        self.sl = sl
        self.sh = sh
        self.r1 = r1

    def __call__(self, image, target):
        if random.random() < 0.3:
            image = np.array(image)
            boxes = target["boxes"].numpy()
            area_box = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
            for attempt in range(100):
                area = image.shape[1] * image.shape[2]

                target_area = random.uniform(self.sl, self.sh) * area
                aspect_ratio = random.uniform(self.r1, 1 / self.r1)

                if target_area > area_box.all() * 3:
                    break

                h = int(round(math.sqrt(target_area * aspect_ratio)))
                w = int(round(math.sqrt(target_area / aspect_ratio)))

                if w < image.shape[2] and h < image.shape[1]:
                    x1 = random.randint(0, image.shape[1] - h)
                    y1 = random.randint(0, image.shape[2] - w)
                    if image.shape[0] == 3:
                        image[0, x1:x1 + h, y1:y1 + w] = self.mean[0]
                        image[1, x1:x1 + h, y1:y1 + w] = self.mean[1]
                        image[2, x1:x1 + h, y1:y1 + w] = self.mean[2]
                    else:
                        image[0, x1:x1 + h, y1:y1 + w] = self.mean[0]
            image = torch.from_numpy(image)
        return image, target

随机裁剪

class Random_crop(object):
    def __call__(self, image, target):
        if random.random() < 0.3:
            boxes = target["boxes"]
            labels = target["labels"]
            image = np.array(image)
            image = np.transpose(image, (1, 2, 0))
            center = (boxes[:, 2:] + boxes[:, :2]) / 2
            height, width, c = image.shape
            h = random.uniform(0.6 * height, height)
            w = random.uniform(0.6 * width, width)
            x = random.uniform(0, width - w)
            y = random.uniform(0, height - h)
            x, y, h, w = int(x), int(y), int(h), int(w)

            center = center - torch.FloatTensor([[x, y]]).expand_as(center)
            mask1 = (center[:, 0] > 0) & (center[:, 0] < w)
            mask2 = (center[:, 1] > 0) & (center[:, 1] < h)
            mask = (mask1 & mask2).view(-1, 1)

            boxes_in = boxes[mask.expand_as(boxes)].view(-1, 4)
            # if (len(boxes_in) == 0):
            #     return image, boxes, labels
            box_shift = torch.FloatTensor([[x, y, x, y]]).expand_as(boxes_in)

            boxes_in = boxes_in - box_shift
            boxes_in[:, 0] = boxes_in[:, 0].clamp_(min=0, max=w)
            boxes_in[:, 2] = boxes_in[:, 2].clamp_(min=0, max=w)
            boxes_in[:, 1] = boxes_in[:, 1].clamp_(min=0, max=h)
            boxes_in[:, 3] = boxes_in[:, 3].clamp_(min=0, max=h)
            labels_in = labels[mask.view(-1)]
            img_croped = image[y:y + h, x:x + w, :]
            image = np.transpose(img_croped, (2, 0, 1))
            image = torch.from_numpy(image)
            target["labels"] = labels_in
            target["boxes"] = boxes_in
        return image, target

随机平移

class randomShift(object):
    def __call__(self, image, target):
        #平移变换
        if random.random() <0.3:
            boxes = target["boxes"]
            labels = target["labels"]
            image = np.array(image)
            image = np.transpose(image, (1, 2, 0))
            center = (boxes[:, 2:] + boxes[:, :2]) / 2
            height,width,c = image.shape
            after_shfit_image = np.zeros((height,width,c),dtype=image.dtype)
            after_shfit_image[:,:,:] = (104,117,123) #bgr
            shift_x = random.uniform(-width*0.01,width*0.01)
            shift_y = random.uniform(-height*0.01,height*0.01)
            #print(bgr.shape,shift_x,shift_y)
            #原图像的平移
            if shift_x>=0 and shift_y>=0:
                after_shfit_image[int(shift_y):,int(shift_x):,:] = image[:height-int(shift_y),:width-int(shift_x),:]
            elif shift_x>=0 and shift_y<0:
                after_shfit_image[:height+int(shift_y),int(shift_x):,:] = image[-int(shift_y):,:width-int(shift_x),:]
            elif shift_x <0 and shift_y >=0:
                after_shfit_image[int(shift_y):,:width+int(shift_x),:] = image[:height-int(shift_y),-int(shift_x):,:]
            elif shift_x<0 and shift_y<0:
                after_shfit_image[:height+int(shift_y),:width+int(shift_x),:] = image[-int(shift_y):,-int(shift_x):,:]

            shift_xy = torch.FloatTensor([[int(shift_x),int(shift_y)]]).expand_as(center)
            center = center + shift_xy
            mask1 = (center[:,0] >0) & (center[:,0] < width)
            mask2 = (center[:,1] >0) & (center[:,1] < height)
            mask = (mask1 & mask2).view(-1,1)
            boxes_in = boxes[mask.expand_as(boxes)].view(-1,4)
            # if len(boxes_in) == 0:
            #     return bgr,boxes,labels
            box_shift = torch.FloatTensor([[int(shift_x),int(shift_y),int(shift_x),int(shift_y)]]).expand_as(boxes_in)
            boxes_in = boxes_in+box_shift
            labels_in = labels[mask.view(-1)]
            image = np.transpose(after_shfit_image, (2, 0, 1))
            image = torch.from_numpy(image)
            target["labels"] = labels_in
            target["boxes"] = boxes_in
        return image,target

随机变换通道

class Random_swap(object):
    def __call__(self, image, target):
        image = np.array(image)
        image = np.transpose(image, (1, 2, 0))
        perms = ((0, 1, 2), (0, 2, 1),
                 (1, 0, 2), (1, 2, 0),
                 (2, 0, 1), (2, 1, 0))
        if random.random() < 0.3:
            swap = perms[random.randrange(1, len(perms))]
            image = image[:, :, swap]

        image = np.transpose(image, (2, 0, 1))
        image = torch.from_numpy(image)
        return image, target

随机变换对比度

class Random_contrast(object):
    def __init__(self, lower=0.7, upper=1.3):
        self.lower = lower
        self.upper = upper
    def __call__(self, image, target):
        if random.random() < 0.3:
            alpha = random.uniform(self.lower, self.upper)
            image *= alpha
            image = image.clip(min=0, max=255)
        return image, target

随机变换饱和度

class Random_saturation(object):
    def __init__(self, lower=0.7, upper=1.3):
        self.lower = lower
        self.upper = upper
    def __call__(self, image, target):
        if random.random() < 0.3:
            image = np.array(image)
            image = np.transpose(image, (1, 2, 0))
            image[:, :, 1] *= random.uniform(self.lower, self.upper)
            image = np.transpose(image, (2, 0, 1))
            image = torch.from_numpy(image)
        return image, target

随机变换色度(HSV空间下(-180,180))

class Random_hue(object):
    def __init__(self, delta=18.0):
        self.delta = delta
    def __call__(self, image, target):
        if random.random() < 0.3:
            image = np.array(image)
            image = np.transpose(image, (1, 2, 0))
            image[:, :, 0] += random.uniform(-self.delta, self.delta)
            image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
            image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
            image = np.transpose(image, (2, 0, 1))
            image = torch.from_numpy(image)
        return image, target

转换图像的色彩空间

class ConvertColor(object):
    def __init__(self, current='BGR', transform='HSV'):
        self.transform = transform
        self.current = current

    def __call__(self, image, target):

            image = np.array(image)
            image = np.transpose(image,(1,2,0))
            if self.current == 'BGR' and self.transform == 'HSV':
                image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
            elif self.current == 'HSV' and self.transform == 'BGR':
                image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
            else:
                raise NotImplementedError
            image = np.transpose(image,(2,0,1))
            image = torch.from_numpy(image)
            return image, target

以上代码包括部分引用和开源代码,如有侵犯请作者联系我。

你可能感兴趣的:(目标检测,pytorch,数据增强,pytorch)