matlab ud.save,YOLOV5训练与测试时数据加载dataset.py代码注释与解析

import glob

import math

import os

import random

import shutil

import time

from pathlib import Path

from threading import Thread

import cv2

import numpy as np

import torch

from PIL import Image, ExifTags

from torch.utils.data import Dataset

from tqdm import tqdm

from utils.utils import xyxy2xywh, xywh2xyxy

help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'

img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.dng']

vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']

# Get orientation exif tag

for orientation in ExifTags.TAGS.keys():

if ExifTags.TAGS[orientation] == 'Orientation':

break

# 此函数根据图片的信息获取图片的宽、高信息

def exif_size(img):

# Returns exif-corrected PIL size

s = img.size # (width, height)

try:

rotation = dict(img._getexif().items())[orientation]

if rotation == 6: # rotation 270

s = (s[1], s[0])

elif rotation == 8: # rotation 90

s = (s[1], s[0])

except:

pass

return s

# 根据LoadImagesAndLabels创建dataloader

def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False):

"""

参数解析:

path:包含图片路径的txt文件或者包含图片的文件夹路径

imgsz:网络输入图片大小

batch_size: 批次大小

stride:网络下采样最大总步长

opt:调用train.py时传入的参数,这里主要用到opt.single_cls,是否是单类数据集

hyp:网络训练时的一些超参数,包括学习率等,这里主要用到里面一些关于数据增强(旋转、平移等)的系数

augment:是否进行数据增强

cache:是否提前缓存图片到内存,以便加快训练速度

pad:设置矩形训练的shape时进行的填充

rect:是否进行矩形训练

"""

dataset = LoadImagesAndLabels(path, imgsz, batch_size,

augment=augment, # augment images

hyp=hyp, # augmentation hyperparameters

rect=rect, # rectangular training

cache_images=cache,

single_cls=opt.single_cls,

stride=int(stride),

pad=pad)

batch_size = min(batch_size, len(dataset))

nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers

dataloader = torch.utils.data.DataLoader(dataset,

batch_size=batch_size,

num_workers=nw,

pin_memory=True,

collate_fn=LoadImagesAndLabels.collate_fn)

return dataloader, dataset

class LoadImagesAndLabels(Dataset): # for training/testing

def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,

cache_images=False, single_cls=False, stride=32, pad=0.0):

try:

f = []

for p in path if isinstance(path, list) else [path]:

# 获取数据集路径path,包含图片路径的txt文件或者包含图片的文件夹路径

# 使用pathlib.Path生成与操作系统无关的路径,因为不同操作系统路径的‘/’会有所不同

p = str(Path(p)) # os-agnostic

# 获取数据集路径的上级父目录,os.sep为路径里的破折号(不同系统路径破折号不同,os.sep根据系统自适应)

parent = str(Path(p).parent) + os.sep

# 如果路径path为包含图片路径的txt文件

if os.path.isfile(p): # file

with open(p, 'r') as t:

# 获取图片路径,更换相对路径

t = t.read().splitlines()

f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path

# 如果路径path为包含图片的文件夹路径

elif os.path.isdir(p): # folder

f += glob.iglob(p + os.sep + '*.*')

else:

raise Exception('%s does not exist' % p)

path = p # *.npy dir

# 破折号替换为os.sep,os.path.splitext(x)将文件名与扩展名分开并返回一个列表

self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats]

except Exception as e:

raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))

# 数据集的数量

n = len(self.img_files)

assert n > 0, 'No images found in %s. See %s' % (path, help_url)

# 获取batch的索引

bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index

# 一个轮次batch的数量

nb = bi[-1] + 1 # number of batches

self.n = n # number of images

self.batch = bi # batch index of image

self.img_size = img_size # 输入图片分辨率大小

self.augment = augment # 数据增强

self.hyp = hyp # 超参数

self.image_weights = image_weights # 图片采样

self.rect = False if image_weights else rect # 矩形训练

self.mosaic = self.augment and not self.rect # mosaic数据增强

self.mosaic_border = [-img_size // 2, -img_size // 2] # mosaic增强的边界

self.stride = stride # 模型下采样的总步长

# 获取数据集的标签

self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')

for x in self.img_files]

# 保存图片shape的路径

sp = path.replace('.txt', '') + '.shapes' # shapefile path

try:

# 如果存在该路径,则读取

with open(sp, 'r') as f: # read existing shapefile

s = [x.split() for x in f.read().splitlines()]

assert len(s) == n, 'Shapefile out of sync'

except:

# 如果不存在,则读取图片shape再保存

s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')]

np.savetxt(sp, s, fmt='%g') # overwrites existing (if any)

self.shapes = np.array(s, dtype=np.float64)

# Rectangular Training https://github.com/ultralytics/yolov3/issues/232

# 矩形训练

if self.rect:

# Sort by aspect ratio

s = self.shapes # wh

ar = s[:, 1] / s[:, 0] # h/w

# 获取根据ar从小到大排序的索引

irect = ar.argsort()

# 根据索引排序数据集与标签路径、shape、h/w

self.img_files = [self.img_files[i] for i in irect]

self.label_files = [self.label_files[i] for i in irect]

self.shapes = s[irect] # wh

ar = ar[irect]

# Set training image shapes

# 初始化shapes,nb为一轮批次batch的数量

shapes = [[1, 1]] * nb

for i in range(nb):

ari = ar[bi == i]

mini, maxi = ari.min(), ari.max()

# 如果一个batch中最大的h/w小于1,则此batch的shape为(img_size*maxi, img_size)

if maxi < 1:

shapes[i] = [maxi, 1]

# 如果一个batch中最小的h/w大于1,则此batch的shape为(img_size, img_size/mini)

elif mini > 1:

shapes[i] = [1, 1 / mini]

self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride

# Cache labels

# 初始化图片与标签,为缓存图片、标签做准备

self.imgs = [None] * n

self.labels = [np.zeros((0, 5), dtype=np.float32)] * n

# 设置是否创建数据子集、提取目标检测框做再次分类,labels是否已加载

create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False

# 漏掉的标签数量,找到的标签数量,空的标签数量,数据子集的数量,相同的标签数量

nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate

# 保存labels的numpy文件路径

np_labels_path = str(Path(self.label_files[0]).parent) + '.npy' # saved labels in *.npy file

# 如果存在labels.npy,则直接加载,并设置labels_loaded=True

if os.path.isfile(np_labels_path):

s = np_labels_path # print string

x = np.load(np_labels_path, allow_pickle=True)

if len(x) == n:

self.labels = x

labels_loaded = True

else:

s = path.replace('images', 'labels')

# 对每一个标签文件做处理

pbar = tqdm(self.label_files)

for i, file in enumerate(pbar):

# 如果labels已经预加载了,直接取出来

if labels_loaded:

l = self.labels[i]

# np.savetxt(file, l, '%g') # save *.txt from *.npy file

else:

try:

# 读取标签txt文件,读取失败则nm+1,标签格式为:class x y w h

with open(file, 'r') as f:

l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)

except:

nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing

continue

if l.shape[0]:

# 判断标签是否有五列

assert l.shape[1] == 5, '> 5 label columns: %s' % file

# 判断标签是否全部>=0

assert (l >= 0).all(), 'negative labels: %s' % file

# 判断标签坐标x y w h是否归一化

assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file

# 找出标签中重复的坐标

if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows

nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows

# 如果数据集只有一个类,设置类别标签为0

if single_cls:

l[:, 0] = 0 # force dataset into single-class mode

self.labels[i] = l

nf += 1 # file found

# Create subdataset (a smaller dataset)

# 创建一个数据子集(默认不调用)

if create_datasubset and ns < 1E4:

# 创建文件夹

if ns == 0:

create_folder(path='./datasubset')

os.makedirs('./datasubset/images')

exclude_classes = 43

# 保存图片路径到本地

if exclude_classes not in l[:, 0]:

ns += 1

# shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image

with open('./datasubset/images.txt', 'a') as f:

f.write(self.img_files[i] + '\n')

# Extract object detection boxes for a second stage classifier

# 获取目标框与图片,并将框从图片截取下来保存到本地(默认不使用)

if extract_bounding_boxes:

p = Path(self.img_files[i])

img = cv2.imread(str(p))

h, w = img.shape[:2]

for j, x in enumerate(l):

f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)

if not os.path.exists(Path(f).parent):

os.makedirs(Path(f).parent) # make new output folder

# 对归一化的坐标乘以w,h

b = x[1:] * [w, h, w, h] # box

b[2:] = b[2:].max() # rectangle to square

b[2:] = b[2:] * 1.3 + 30 # pad

# xywh格式转xyxy

b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)

# 修正图片外的框

b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image

b[[1, 3]] = np.clip(b[[1, 3]], 0, h)

assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'

else:

# l.shape[0] == 0则为空的标签,ne+1

ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty

# os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove

# 显示信息

pbar.desc = 'Caching labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (

s, nf, nm, ne, nd, n)

assert nf > 0 or n == 20288, 'No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)

# 保存labels到本地

if not labels_loaded and n > 1000:

print('Saving labels to %s for faster future loading' % np_labels_path)

np.save(np_labels_path, self.labels) # save for next time

# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)

# 提前缓存图片到内存中,可以提升训练速度

if cache_images: # if training

gb = 0 # Gigabytes of cached images

pbar = tqdm(range(len(self.img_files)), desc='Caching images')

self.img_hw0, self.img_hw = [None] * n, [None] * n

for i in pbar: # max 10k images

self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized

gb += self.imgs[i].nbytes

pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)

# Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3

# 检测损坏的图片文件

detect_corrupted_images = False

if detect_corrupted_images:

from skimage import io # conda install -c conda-forge scikit-image

for file in tqdm(self.img_files, desc='Detecting corrupted images'):

try:

_ = io.imread(file)

except:

print('Corrupted image detected: %s' % file)

def __len__(self):

return len(self.img_files)

# def __iter__(self):

# self.count = -1

# print('ran dataset iter')

# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)

# return self

def __getitem__(self, index):

# 如果image_weights,则获取新的下标

if self.image_weights:

# print(index, self.indices[index])

"""

self.indices在train.py中设置

要配合着train.py中的以下代码配合使用

image_weights为根据标签中每个类别的数量设置的图片采样权重

如果image_weights=True,则根据图片采样权重获取新的下标

# Update image weights (optional)

if dataset.image_weights:

w = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights

image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)

dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx

"""

index = self.indices[index]

# 超参数

hyp = self.hyp

if self.mosaic:

# Load mosaic

# 使用mosaic数据增强方式加载

img, labels = load_mosaic(self, index)

shapes = None

else:

# Load image

# 加载图片并根据设定的输入大小与图片原大小的比例ratio进行resize(未做填充pad到正方形)

img, (h0, w0), (h, w) = load_image(self, index)

# Letterbox

# 如果进行矩形训练,则获取每个batch的输入图片的shape

shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape

# 根据shape对图片做resize和pad填充,返回resize+pad之后的图片、缩放因子ratio、填充大小pad

# 如果未进行矩形训练,则只进行pad填充到正方形

img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)

shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling

# Load labels

labels = []

x = self.labels[index]

if x.size > 0:

# Normalized xywh to pixel xyxy format

# 根据pad调整框的标签坐标,并从归一化的xywh->未归一化的xyxy

labels = x.copy()

labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width

labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height

labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]

labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

if self.augment:

# Augment imagespace

if not self.mosaic:

# 随机对图片进行旋转,平移,缩放,裁剪

img, labels = random_affine(img, labels,

degrees=hyp['degrees'],

translate=hyp['translate'],

scale=hyp['scale'],

shear=hyp['shear'])

# Augment colorspace

# 随机改变图片的色调(H),饱和度(S),亮度(V)

augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])

# Apply cutouts

# if random.random() < 0.9:

# labels = cutout(img, labels)

nL = len(labels) # number of labels

if nL:

# 调整框的标签,xyxy to xywh

# convert xyxy to xywh

labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

# 重新归一化标签0 - 1

# Normalize coordinates 0 - 1

labels[:, [2, 4]] /= img.shape[0] # height

labels[:, [1, 3]] /= img.shape[1] # width

if self.augment:

# 图片随机左右翻转

# random left-right flip

lr_flip = True

if lr_flip and random.random() < 0.5:

img = np.fliplr(img)

if nL:

labels[:, 1] = 1 - labels[:, 1]

# random up-down flip

# 图片随机上下翻转

ud_flip = False

if ud_flip and random.random() < 0.5:

img = np.flipud(img)

if nL:

labels[:, 2] = 1 - labels[:, 2]

# 初始化标签框对应的图片序号,配合下面的collate_fn使用

labels_out = torch.zeros((nL, 6))

if nL:

labels_out[:, 1:] = torch.from_numpy(labels)

# Convert

img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416

img = np.ascontiguousarray(img)

return torch.from_numpy(img), labels_out, self.img_files[index], shapes

"""

pytorch的DataLoader打包一个batch的数据集时要经过此函数进行打包

通过重写此函数实现标签与图片对应的划分,一个batch中哪些标签属于哪一张图片,形如

[[0, 6, 0.5, 0.5, 0.26, 0.35],

[0, 6, 0.5, 0.5, 0.26, 0.35],

[1, 6, 0.5, 0.5, 0.26, 0.35],

[2, 6, 0.5, 0.5, 0.26, 0.35],]

前两行标签属于第一张图片,第三行属于第二张。。。

"""

@staticmethod

def collate_fn(batch):

img, label, path, shapes = zip(*batch) # transposed

for i, l in enumerate(label):

l[:, 0] = i # add target image index for build_targets()

return torch.stack(img, 0), torch.cat(label, 0), path, shapes

def load_image(self, index):

# loads 1 image from dataset, returns img, original hw, resized hw

img = self.imgs[index]

if img is None: # not cached

path = self.img_files[index]

img = cv2.imread(path) # BGR

assert img is not None, 'Image Not Found ' + path

h0, w0 = img.shape[:2] # orig hw

r = self.img_size / max(h0, w0) # resize image to img_size

# 根据ratio选择不同的插值方式

if r != 1: # always resize down, only resize up if training with augmentation

interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR

img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)

return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized

else:

return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized

def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):

# 随机取-1到1三个实数,乘以hyp中的hsv三通道的系数

r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains

# 分离通道

hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))

dtype = img.dtype # uint8

x = np.arange(0, 256, dtype=np.int16)

lut_hue = ((x * r[0]) % 180).astype(dtype)

lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)

lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

# 随机调整hsv之后重新组合通道

img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)

# 将hsv格式转为BGR格式

cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed

# Histogram equalization

# if random.random() < 0.2:

# for i in range(3):

# img[:, :, i] = cv2.equalizeHist(img[:, :, i])

def load_mosaic(self, index):

# loads images in a mosaic

labels4 = []

s = self.img_size

# 随机取mosaic中心点

yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y

# 随机取其他三张图片的索引

indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices

for i, index in enumerate(indices):

# Load image

img, _, (h, w) = load_image(self, index)

# place img in img4

if i == 0: # top left

# 初始化大图

img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles

# 设置大图上的位置(左上角)

x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)

# 选取小图上的位置

x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)

elif i == 1: # top right右上角

x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc

x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h

elif i == 2: # bottom left左下角

x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)

x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)

elif i == 3: # bottom right右下角

x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)

x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

# 将小图上截取的部分贴到大图上

img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]

# 计算小图到大图上时所产生的偏移,用来计算mosaic增强后的标签框的位置

padw = x1a - x1b

padh = y1a - y1b

# Labels

x = self.labels[index]

labels = x.copy()

# 重新调整标签框的位置

if x.size > 0: # Normalized xywh to pixel xyxy format

labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw

labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh

labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw

labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh

labels4.append(labels)

# Concat/clip labels

if len(labels4):

# 调整标签框在图片内部

labels4 = np.concatenate(labels4, 0)

# np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:]) # use with center crop

np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_affine

# Replicate

# img4, labels4 = replicate(img4, labels4)

# Augment

# img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)] # center crop (WARNING, requires box pruning)

# print('mosica:', img4.shape)

# 进行mosaic的时候将四张图片整合到一起之后shape为[2*img_size, 2*img_size]

# 对mosaic整合的图片进行随机旋转、平移、缩放、裁剪,并resize为输入大小img_size

img4, labels4 = random_affine(img4, labels4,

degrees=self.hyp['degrees'],

translate=self.hyp['translate'],

scale=self.hyp['scale'],

shear=self.hyp['shear'],

border=self.mosaic_border) # border to remove

# print('mosica:', img4.shape)

return img4, labels4

def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):

# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232

shape = img.shape[:2] # current shape [height, width]

if isinstance(new_shape, int):

new_shape = (new_shape, new_shape)

# Scale ratio (new / old)

# 计算缩放因子

r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

"""

缩放(resize)到输入大小img_size的时候,如果没有设置上采样的话,则只进行下采样

因为上采样图片会让图片模糊,对训练不友好影响性能。

"""

if not scaleup: # only scale down, do not scale up (for better test mAP)

r = min(r, 1.0)

# Compute padding

ratio = r, r # width, height ratios

new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))

dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding

# 获取最小的矩形填充

if auto: # minimum rectangle

dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding

# 如果scaleFill=True,则不进行填充,直接resize成img_size,任由图片进行拉伸和压缩

elif scaleFill: # stretch

dw, dh = 0.0, 0.0

new_unpad = (new_shape[1], new_shape[0])

ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios

# 计算上下左右填充大小

dw /= 2 # divide padding into 2 sides

dh /= 2

if shape[::-1] != new_unpad: # resize

img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)

top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))

left, right = int(round(dw - 0.1)), int(round(dw + 0.1))

# 进行填充

img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border

return img, ratio, (dw, dh)

# import torchvision

# torchvision.transforms.RandomAffine

def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, border=(0, 0)):

# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))

# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4

# targets = [cls, xyxy]

height = img.shape[0] + border[0] * 2 # shape(h,w,c)

width = img.shape[1] + border[1] * 2

# 设置旋转和缩放的仿射矩阵

# Rotation and Scale

R = np.eye(3)

a = random.uniform(-degrees, degrees)

# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations

s = random.uniform(1 - scale, 1 + scale)

# s = 2 ** random.uniform(-scale, scale)

R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)

# 设置平移的仿射系数

# Translation

T = np.eye(3)

T[0, 2] = random.uniform(-translate, translate) * img.shape[1] + border[1] # x translation (pixels)

T[1, 2] = random.uniform(-translate, translate) * img.shape[0] + border[0] # y translation (pixels)

# 设置裁剪的仿射矩阵系数

# Shear

S = np.eye(3)

S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)

S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)

# Combined rotation matrix

# 融合仿射矩阵并作用在图片上

M = S @ T @ R # ORDER IS IMPORTANT HERE!!

if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed

img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=(114, 114, 114))

# Transform label coordinates

# 调整框的标签

n = len(targets)

if n:

# warp points

xy = np.ones((n * 4, 3))

xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1

xy = (xy @ M.T)[:, :2].reshape(n, 8)

# create new boxes

x = xy[:, [0, 2, 4, 6]]

y = xy[:, [1, 3, 5, 7]]

xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

# # apply angle-based reduction of bounding boxes

# radians = a * math.pi / 180

# reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5

# x = (xy[:, 2] + xy[:, 0]) / 2

# y = (xy[:, 3] + xy[:, 1]) / 2

# w = (xy[:, 2] - xy[:, 0]) * reduction

# h = (xy[:, 3] - xy[:, 1]) * reduction

# xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T

# reject warped points outside of image

xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)

xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)

w = xy[:, 2] - xy[:, 0]

h = xy[:, 3] - xy[:, 1]

area = w * h

area0 = (targets[:, 3] - targets[:, 1]) * (targets[:, 4] - targets[:, 2])

ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) # aspect ratio

i = (w > 2) & (h > 2) & (area / (area0 * s + 1e-16) > 0.2) & (ar < 20)

targets = targets[i]

targets[:, 1:5] = xy[i]

return img, targets

def create_folder(path='./new_folder'):

# Create folder

if os.path.exists(path):

shutil.rmtree(path) # delete output folder

os.makedirs(path) # make new output folder

你可能感兴趣的:(matlab,ud.save)