yolov7中的mosaic增强实现原理

yolov7源码链接:GitHub - WongKinYiu/yolov7: Implementation of paper - YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors

一、mosaic增强的总体思想

1、构建一个画布

s = self.img_size
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)

2、为该画布生成随机中心点

随机中心点的取值范围为:[img_size // 2, int(1.5*img_size )],即0.5*img_size到1.5img_size

self.mosaic_border = [-img_size // 2, -img_size // 2]
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]

3、将4张图片依次填入画布

左上

x1a(纵坐标同理)的取值有两种情况,

1、xc-w<=0,则x1a=0,x1b=w-xc,padw=xc-w(padw用于修正label)

2、xc-w>0,则x1a=xc-w,x1b=0,padw=xc-w(这种情况意味着小图片可以完全放入画布的左上方区域)

x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h

img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]
padw = x1a - x1b
padh = y1a - y1b

右上

x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h

img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]
padw = x1a - x1b
padh = y1a - y1b

左下

x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)

img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]
padw = x1a - x1b
padh = y1a - y1b

右下

x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]
padw = x1a - x1b
padh = y1a - y1b

4、修正label

labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
    labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
    segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
labels4.append(labels)
segments4.extend(segments)

labels[:, 1:]表示目标的坐标,格式是xywh,且是归一化后的坐标

利用xywhn2xyxy将以上坐标转换至w、h尺度上的坐标,并修正了坐标(根据padw和padh),坐标格式变成x1y1x2y2

def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
    # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw  # top left x
    y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh  # top left y
    y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw  # bottom right x
    y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh  # bottom right y
    return y

segments同理

def xyn2xy(x, w=640, h=640, padw=0, padh=0):
    # Convert normalized segments into pixel segments, shape (n,2)
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = w * x[:, 0] + padw  # top left x
    y[:, 1] = h * x[:, 1] + padh  # top left y
    return y

二、mosaic增强函数具体实现

yolov7的mosaic增强函数如下,

def load_mosaic(self, index):
    # loads images in a 4-mosaic

    labels4, segments4 = [], []
    s = self.img_size
    
    yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]
    
    indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
    
    for i, index in enumerate(indices):
        # Load image
        img, _, (h, w) = load_image(self, index)

        # place img in img4
        if i == 0:  # top left
            # base image with 4 tiles
            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)
            # xmin, ymin, xmax, ymax (large image)
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc
            # xmin, ymin, xmax, ymax (small image)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h
        elif i == 1:  # top right
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
        elif i == 2:  # bottom left
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
        elif i == 3:  # bottom right
            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
        padw = x1a - x1b
        padh = y1a - y1b

        # Labels
        labels, segments = self.labels[index].copy(), self.segments[index].copy()
        if labels.size:
            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
            segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
        labels4.append(labels)
        segments4.extend(segments)

    # Concat/clip labels
    labels4 = np.concatenate(labels4, 0)
    for x in (labels4[:, 1:], *segments4):
        np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()

    # Augment
    img4, labels4, segments4 = copy_paste(img4, labels4, segments4, probability=self.hyp['copy_paste'])
    img4, labels4 = random_perspective(img4, labels4, segments4,
                                       degrees=self.hyp['degrees'],
                                       translate=self.hyp['translate'],
                                       scale=self.hyp['scale'],
                                       shear=self.hyp['shear'],
                                       perspective=self.hyp['perspective'],
                                       border=self.mosaic_border)  # border to remove

    return img4, labels4

三、效果展示

演示代码如下,

import random
import numpy as np
import cv2


def load_mosaic(imgs):
    s = 320
    
    mosaic_border = [-160, -160]
    
    yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in mosaic_border]
        
    for i in range(4):        
        img = imgs[i]
        h, w = img.shape[0:2]

        if i == 0:  # top left
            img4 = np.full((s * 2, s * 2, 3), 114, dtype=np.uint8)
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h
        elif i == 1:  # top right
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
        elif i == 2:  # bottom left
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
        elif i == 3:  # bottom right
            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]
        
    cv2.imwrite("/home/projects/yolov7/debug/img_concat.jpg", img4)


img1 = cv2.resize(cv2.imread("/home/projects/yolov7/debug/img1.jpg"), [320, 320])
img2 = cv2.resize(cv2.imread("/home/projects/yolov7/debug/img2.jpg"), [320, 320])
img3 = cv2.resize(cv2.imread("/home/projects/yolov7/debug/img3.jpg"), [320, 320])
img4 = cv2.resize(cv2.imread("/home/projects/yolov7/debug/img4.jpg"), [320, 320])
imgs = [img1, img2, img3, img4]

load_mosaic(imgs=imgs)

mosaic增强前的图片

yolov7中的mosaic增强实现原理_第1张图片yolov7中的mosaic增强实现原理_第2张图片

yolov7中的mosaic增强实现原理_第3张图片

yolov7中的mosaic增强实现原理_第4张图片

mosaic增强后的图片

yolov7中的mosaic增强实现原理_第5张图片

你可能感兴趣的:(目标检测,YOLO,目标检测,深度学习,python)