图像增强库官方英文介绍
pip install albumentations
[x_min, y_min, x_max, y_max] 坐标是非归一化的
[x_min, y_min, x_max, y_max]坐标是归一化的,需要除以长宽
[x_min, y_min, width, height] 坐标非归一化
[x_center, y_center, width, height] 坐标归一化
# 导入所需要的库
import albumentations as A
import cv2
# 定义pipeline,bbox_params参数定义了格式
transform = A.Compose([
A.RandomCrop(width=450, height=450),
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
], bbox_params=A.BboxParams(format='coco', min_area=1024, min_visibility=0.1, label_fields=['class_labels']))
# 读取图像
image = cv2.imread("/path/to/image.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 通过扩展坐标维度添加类标签,将变换后的坐标返回,其他值不变。
bboxes = [[23, 74, 295, 388, 'dog'], [377, 294, 252, 161, 'cat']]
transformed = transform(image=image, bboxes=bboxes)
transformed_image = transformed['image']
transformed_bboxes = transformed['bboxes']
# 添加单独的列表表示每个bbox的类标签,注意class_labels=需要和定义的transform中的label_fields参数值相同。
bboxes = [[23, 74, 295, 388], [377, 294, 252, 161], [333, 421, 49, 49]]
class_labels = ['cat', 'dog', 'parrot']
transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels)
transformed_image = transformed['image']
transformed_bboxes = transformed['bboxes']
transformed_class_labels = transformed['class_labels']
# 单独类标签列表,多标签使用
transform = A.Compose([
A.RandomCrop(width=450, height=450),
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
], bbox_params=A.BboxParams(format='coco', label_fields=['class_labels', 'class_categories'])))
class_labels = ['cat', 'dog', 'parrot']
class_categories = ['animal', 'animal', 'item']
transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels, class_categories=class_categories)
transformed_image = transformed['image']
transformed_bboxes = transformed['bboxes']
transformed_class_labels = transformed['class_labels']
transformed_class_categories = transformed['class_categories']
min_area
:表示bbox占据的像素总个数,当数据增强后,若bbox小于这个值则从返回的bbox列表删除该bbox.
min_visibility
:值域为[0,1],如果增强后的bbox面积和增强前的bbox面积比值小于该值,则删除该bbox.
label_fields
:表示自定义的类标签变量的名字,是一个列表,可以放置多个参数名称,表示多标签。
# 单标签
[[23, 74, 295, 388, 'dog'], [377, 294, 252, 161, 'cat']]
# 多标签
[[23, 74, 295, 388, 'dog', 'animal'], [377, 294, 252, 161, 'cat', 'animal']]
# 标签bbox
[[23, 74, 295, 388], [377, 294, 252, 161], [333, 421, 49, 49]]
# 类别
[['cat', 'dog', 'sports ball']]
Transform | Image | Masks | BBoxes | Keypoints |
---|---|---|---|---|
Affine | ✓ | ✓ | ✓ | ✓ |
CenterCrop | ✓ | ✓ | ✓ | ✓ |
CoarseDropout | ✓ | ✓ | ||
Crop | ✓ | ✓ | ✓ | ✓ |
CropAndPad | ✓ | ✓ | ✓ | ✓ |
CropNonEmptyMaskIfExists | ✓ | ✓ | ✓ | ✓ |
ElasticTransform | ✓ | ✓ | ||
Flip | ✓ | ✓ | ✓ | ✓ |
GridDistortion | ✓ | ✓ | ||
GridDropout | ✓ | ✓ | ||
HorizontalFlip | ✓ | ✓ | ✓ | ✓ |
Lambda | ✓ | ✓ | ✓ | ✓ |
LongestMaxSize | ✓ | ✓ | ✓ | ✓ |
MaskDropout | ✓ | ✓ | ||
NoOp | ✓ | ✓ | ✓ | ✓ |
OpticalDistortion | ✓ | ✓ | ||
PadIfNeeded | ✓ | ✓ | ✓ | ✓ |
Perspective | ✓ | ✓ | ✓ | ✓ |
PiecewiseAffine | ✓ | ✓ | ✓ | ✓ |
RandomCrop | ✓ | ✓ | ✓ | ✓ |
RandomCropNearBBox | ✓ | ✓ | ✓ | ✓ |
RandomGridShuffle | ✓ | ✓ | ||
RandomResizedCrop | ✓ | ✓ | ✓ | ✓ |
RandomRotate90 | ✓ | ✓ | ✓ | ✓ |
RandomScale | ✓ | ✓ | ✓ | ✓ |
RandomSizedBBoxSafeCrop | ✓ | ✓ | ✓ | |
RandomSizedCrop | ✓ | ✓ | ✓ | ✓ |
Resize | ✓ | ✓ | ✓ | ✓ |
Rotate | ✓ | ✓ | ✓ | ✓ |
SafeRotate | ✓ | ✓ | ✓ | ✓ |
ShiftScaleRotate | ✓ | ✓ | ✓ | ✓ |
SmallestMaxSize | ✓ | ✓ | ✓ | ✓ |
Transpose | ✓ | ✓ | ✓ | ✓ |
VerticalFlip | ✓ | ✓ | ✓ | ✓ |
归一化:ToTensorV2
from albumentations.pytorch import ToTensorV2
A.Compose([
A.Resize(opt.size, opt.size),
A.HorizontalFlip(),
A.VerticalFlip(),
A.ShiftScaleRotate(),
ToTensorV2(),
])