深t度学习中,数据增强是一种不会改变网络模型的正则化方法,通过对数据的增强可提升模型最终的训练效果。
数据增强的两种主要方式:
常用的增强技术:
翻转:对图像进行水平或者垂直翻转。
旋转:对图像给定一个角度进行顺时针或者逆时针旋转。
裁剪:在裁剪的过程中,对图像的一部分进行随机采样
亮度:增加或者减少图像的亮度
缩放:缩放图像可以向外缩放或向内缩放。向外缩放时,图像大小增大,向内缩放时图像大小减小
添加噪声:可以给现存的照片添加高斯噪声
目前主流的图像增强库主要有以下:
skimage
opencv
imgaug
Albumentations
Augmentor
Keras(ImageDataGenerator class)
SOLT
scikit-image是一个使用numpy数组的开放源码Python包。它为研究、教育和工业应用实现算法和工具。即使对于那些刚刚接触Python生态系统的人来说,它也是一个相当简单和直接的库。
首先导入必要的库和数据
import os
import glob
import numpy as np
import scipy as sp
import pandas as pd
#skimage
from skimage.io import imshow,imread,imsave
from skimage.transform import rotate,AffineTransform,warp,rescale,resize,downscale_local_mean
from skimage import color,data
from skimage.exposure import adjust_gamma
from skimage.util import random_noise
#OpenCV-Python
import cv2
#imgaug
import imageio
import imgaug as ia
import imgaug.augmenters as iaa
#Albumentations
import albumentations as A
#Augmentor
!pip install augmentor
import Augmentor
#Keras
from keras.preprocessing.image import ImageDataGenerator,array_to_img,img_to_array,load_img
#SOLT
!pip install solt
import solt
import solt.transforms as slt
#visualisation
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
import seaborn as sns
from IPython.display import HTML,Image
#source: https://www.kaggle.com/jpmiller/nfl-punt-analytics/edit
# set additional display options for report
pd.set_option("display.max_columns',100)
th_props=[('font-size','13px'),('background-color','white'),('color','#666666')]
td_props = [('font-size', '15px'), ('background-color', 'white')]
styles = [dict(selector="td", props=td_props), dict(selector="th",
props=th_props)]
#warnings
import warnings
warnings.filterwarnings("ignore")
#Helper function to display the images in a grid
# Source: https://stackoverflow.com/questions/42040747/more-idiomatic-way-to-display-images-in-a-grid-with-numpy which was pointed by
# this excellent article: https://towardsdatascience.com/data-augmentation-for-deep-learning-4fe21d1a4eb9
def gallery(array, ncols=3):
'''
Function to arange images into a grid.
INPUT:
array - numpy array containing images
ncols - number of columns in resulting imahe grid
OUTPUT:
result - reshaped array into a grid with given number of columns
'''
nindex, height, width, intensity = array.shape
nrows = nindex//ncols
assert nindex == nrows*ncols
result = (array.reshape(nrows, ncols, height, width, intensity)
.swapaxes(1,2)
.reshape(height*nrows, width*ncols, intensity))
return result
#Defining data path
Image_Data_Path = "../input/plant-pathology-2020-fgvc7/images/"
train_data = pd.read_csv("../input/plant-pathology-2020-fgvc7/train.csv")
test_data = pd.read_csv("../input/plant-pathology-2020-fgvc7/test.csv")
# Loading the training images #refer: https://www.kaggle.com/tarunpaparaju/plant-pathology-2020-eda-models
def load_image(image_id):
file_path=image_id+'.jpg'
image=imread(Image_Data_Path+file_path)
return image
train_image=train_data['image_id'][:50].apply(load_image)
#Horizontally flipped
hflipped_image=np.fliplr(image) #fliplr reverse the order of columns of pixels in matrix
#Vertically flipped
vflipped_image=np.flipud(image)#flipud reverse the order of rows of pixels in matrix
fig,ax=plt.subplots(nrows=1,ncols=3,figsize=(30,16))
ax[0].imshow(image)
ax[0].set_title('Original Image',size=30)
ax[1].imshow(hflipped_image)
ax[1].set_title('Horizontally flipped',size=30)
ax[2].imshow(vflipped_image)
ax[2].set_title('Vertically flipped',size=30);
#clockwise rotation
rot_clockwise_image=rotate(image,angle=45)
#Anticlockwise rotation
rot_anticlockwise_image=rotate(image,angle=-45)
fig,ax=plt.subplots(nrows=1,ncols,figsize=(30,16))
ax[0].imshow(image)
ax[0].set_title('Original Image',size=30)
ax[1].imshow(rot_clockwise_image)
ax[1].set_title('+45 degree Rotation',size=30)
ax[2].imshow(rot_anticlockwise_image)
ax[2].set_title('-45 degree rotation',size=30);
import random
import pylab as pl
def randRange(a,b):
return pl.rand()*(b-a)+a
def randomCrop(im):
margin=1/3.5
start=[int(randRange(0,im.shape[0]*margin)),
int(randRange(0,im.shape[1]*margin))]
end=[int(randRange(im.shape[0]*(1-margin),im.shape[0])),int(randRange(im.shape[1]*(1-margin),im.shape[1]))]
cropped_image=(im[start[0]:end[0],start[1]:end[1]])
return cropped_image
fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(20,12))
ax[0].imshow(image)
ax[0].set_title('Original Image',size=20)
ax[1].imshow(randomCrop(image))
ax[1].set_title('Cropped',size=20)
image_bright=adjust_gamma(image,gamma=0.5,gain=1)
image_dark=adjust_gamma(image,gamma=2,gain=1)
fig,ax=plt.subplots(nrow=1,ncols=3,figsize=(20,12))
ax[0].imshow(image)
ax[0].set_title('Original Image',size=20)
ax[1].imshow(image_bright)
ax[1].set_title('Brightened Image',size=20)
ax[2].imshow(image_dark)
ax[2].set_title('Darkened Image',size=20)
image_resize=resize(image,(image.shape[0]//2,image.shape[1]//2),anti_aliasing=True)
#image_downscaled=downscale_local_mean(image,(4,3))
fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(30,16))
ax[0].imshow(image)
ax[0].set_title('Original Image',size=20)
ax[1].imshow(image_resized)
ax[1].set_title('Resized image',size=20)
noisy_image=random_noise(image)
fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(30,16))
ax[0].imshow(image)
ax[0].set_title('Original Image',size=20)
ax[1].imshow(noisy_image)
ax[1].set_title('Image after adding noise',size=20)
OpenCV本质上是开源计算机视觉库的缩写。虽然它是用优化的C/C++编写的,但它有针对Python和Java以及c++的接口。您可以把它看作是OpenCV的c++实现的python包装器。OpenCV-Python不仅速度快(因为后台由C/ c++编写的代码组成),而且易于编码和部署(因为前台有Python包装器)。这使得它成为执行计算密集型程序的一个很好的选择。
#selecting a sample image
image13=train_images[13]
imshow(image13)
print(image13.shape)
plt.axis('off')
The image is flipped according to the value of flipCode as follows:
flipcode = 0: flip vertically
flipcode > 0: flip horizontally
flipcode < 0: flip vertically and horizontally
#vertical flip
img_flip_ud=cv2.flip(image13,0)
plt.imshow(img_flip_ud)
#horizontal filp
img_flip_lr=cv2.flip(image13,1)
plt.imshow(img_flip_lr)
fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(20,12))
ax[0].imshow(img_flip_ud)
ax[0].set_title('vertical flip',size=20)
ax[1].imshow(img_filp_lr)
ax[1].set_title('horizontal flip',size=20)
OpenCV函数中旋转图像的函数是cv2.rotate()
可以在旋转代码中指定以下三个常量
cv2.ROTATE_90_CLOCKWISE
cv2.ROTATE_90_COUNTERCLOCKWISE
cv2.ROTATE_180
img_rotate_90_clockwise=cv2.rotate(image13,cv.ROTATE_90_CLOCKWISE)
img_rotate_90_counterclockwise=cv2.rotate(image13,cv2.ROTATE_90_COUNTERCLOCKWISE)
img_rotate_180=cv2.rotate(image13,cv2.ROTATE_180)
fig,ax=plt.subplots(nrows=1,ncols=3,figsize=(20,12))
ax[0].imshow(img_rotate_90_clockwise)
ax[0].set_title('90 degrees clockwise',size=20)
ax[1].imshow(img_rotate_90_counterclockwise)
ax[1].set_title('90 degrees anticlockwise',size=20)
ax[2].imshow(img_rotate_180)
ax[2].set_title('180 degree rotation',size=20)
Scaling 是重塑图片的尺寸
#RESIZE
def resize_image(image,w,h):
resized_image=image=cv2.resize(image,(w,h))
return resize_image
imshow(resize_image(image13,500,500))
def add_light(image,gamma):
invGamma=1.0/gamma
table=np.array([((i/255.0)**invGamma)*255 for i in np.arange(0,256)]).astype('uint8')
image=cv2.LUT(image,table)
return image
imshow(add_light(image13,2))
#crop
def crop_image(image,y1,y2,x1,x2):
image=image[y1:y2,x1:x2]
return image
imshow(crop_image(image13,200,800,250,1500)
Gaussian滤波器是低通滤波器,能移除被减少的高频部分
def gaussian_blur(image,blur):
image=cv2.GaussianBlur(image,(5,5),blur)
return image
imshow(gaussian_blur(image13,0))
imgaug是一个机器学习实验中用于图像增强的库。它支持广泛的增强技术,允许轻松地以随机顺序结合和执行他们在多个CPU核心上,有一个简单而强大的随机界面,不仅可以增强图像,也能够用于检测关键点/地标,边界盒,热图和分割地图。
# selecting a sample image
image2=train_image[25]
imshow(image2)
print(image2.shape)
plt.axis('off')
#Horizontally flipped
hflip=iaa.Fliplr(p=1.0)
hflipped_image2=hflip.augment_image(image2)
#Vertically flipped
vflip=iaa.Flipud(p=1.0)
vflipped_image2=vflip.augment_image(image2)
image=image2
fig,ax=plt.subplots(nrows=1,ncols=3,figsize=(30,16))
ax[0].imshow(image)
ax[0].set_title('Original Image',size=30)
ax[1].imshow(hflipped_image2)
ax[1].set_title('Horizontally flipped',size=30)
ax[2].imshow(vflipped_image2)
ax[2].set_title('Vertically flipped',size=30)
#clockwise rotation
rot=iaa.Affine(rotate=(-25,25))
rot_clockwise_image2=rot.augment_image(image2)
image=image2
fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(30,16))
ax[0].imshow(image)
ax[0].set_title('Original Image',size=30)
ax[1].imshow(rot_clockwise_image2)
ax[1].set_title('Rotated Image',size=30)
image=image2
crop=iaa.Crop(precent=(0,0.2))
corp_image=crop.augment_image(image)
fig,ax=plt.subplots(nrows=1,ncols=2,figsize=(20,12))
ax[0].imshow(image)
ax[0].set_title('Original Image',size=20)
ax[1].imshow(randomCrop(corp_image))
ax[1].set_title('Cropped',size=20)
image=image2
#bright
contrast1=iaa.GammaContrast(gamma=0.5)
brightened_image=contrast1.augment_image(image)
#dark
contrast2=iaa.GammaContrast(gamma=2)
darkened_image=contrast2.augment_image(image)
fig,ax=plt.subplots(nrows=1,ncols=3,figsize=(20,12))
ax[0].imshow(image)
ax[0].set_title('Original Image',size=20)
ax[1].imshow(brightened_image)
ax[1].set_title('Brightened Image',size=20)
ax[2].imshow(darkened_image)
ax[2].set_title('darkened_image',size=20)
image=image2
scale_img=iaa.Affine(scale={'x':(1.5,1.0),'y':(0.5,1.0)})
scale_image=scale_im.augment_image(image)
fig,ax = plt.subplots(nrows=1,ncols=2,figsize=(20,12))
ax[0].imshow(image)
ax[0].set_title("Original Image", size=20)
ax[1].imshow(scale_image)
ax[1].set_title("Scaled", size=20)
image=image2
gaussian_noise=iaa.AdditiveGaussianNoise(15,20)
noise_image=gaussian_noise.augment_image(image)
fig,ax = plt.subplots(nrows=1,ncols=2,figsize=(20,12))
ax[0].imshow(image)
ax[0].set_title("Original Image", size=20)
ax[1].imshow(noise_image)
ax[1].set_title("Gaussian Noise added", size=20)
imgaug库提供了一个非常有用的特性,称为Augmentation pipeline。这样的管道是可以按固定或随机顺序应用的一系列步骤。这也提供了对一些图像应用某些转换和对其他图像应用其他转换的灵活性。在下面的例子中,我们在一些图像上应用翻转、锐化、裁剪等转换。模糊和仿射变换有时会被应用,所有这些变换将会被随机地应用。
#Defining a pipeline
#The example has been taken from the documentation
aug_pipeline=iaa.Sequential([iaa.SomeOf((0,3),[iaa.Fliplr(1.0),# horizontally flip
iaa.Flipud(1.0),# veritical flip
iaa.Sharpen(alpha=(0,1.0),lightness=(0.75,1.5)), #sharpen images
iaa.Crop(percent=(0,0.4)),
iaa.Sometimes(0.5,iaa.Affine(rotate=5)),
iaa.Sometimes(0.5,iaa.GaussianBlur(sigma=(0,0.5))),
iaa.AdditiveGaussianNoise(loc=0,scale=(0.0,0.05*255),per_channel=0.5),
])
],
random_order=True # apply the augmentations in random order)
# apply augmentation pipeline to sample image
images_aug = np.array([aug_pipeline.augment_image(image2) for _ in range(16)])
# visualize the augmented images
plt.figure(figsize=(30,10))
plt.axis('off')
plt.imshow(gallery(images_aug, ncols = 4))
plt.title('Augmentation examples')
Albumentations是一个快速的图像增强库,易于使用包装其他库。它是基于numpy, OpenCV, imgaug从中选出最好的。它是由Kagglers写的,被用于在Kaggle, topcoder, CVPR, MICCAI的许多DL比赛中获得最佳结果。更多信息请点击:https://www.mdpi.com/2078-2489/11/2/125
#initialize augmentations
horizontal_filp=A.HorizontalFlip(p=1)
rotate=A.ShiftScaleRotate(p=1)
gaus_noise=A.GaussNoise()#gaussian noise
bright_contrast = A.RandomBrightnessContrast(p=1) # random brightness and contrast
gamma = A.RandomGamma(p=1) # random gamma
blur = A.Blur()
# apply augmentations to images
img_flip = horizontal_flip(image = image2)
img_gaus = gaus_noise(image = image2)
img_rotate = rotate(image = image2)
img_bc = bright_contrast(image = image2)
img_gamma = gamma(image = image2)
img_blur = blur(image = image2)
# access the augmented image by 'image' key
img_list = [img_flip['image'],img_gaus['image'], img_rotate['image'], img_bc['image'], img_gamma['image'], img_blur['image']]
# visualize the augmented images
plt.figure(figsize=(10,10))
plt.axis('off')
plt.imshow(gallery(np.array(img_list), ncols = 3))
plt.title('Augmentation examples')
# Passing the path of the image directory
p = Augmentor.Pipeline(source_directory="/kaggle/input/plant-pathology-2020-fgvc7/images",
output_directory="/kaggle/output")
# Defining augmentation parameters and generating 10 samples
p.flip_left_right(probability=0.4)
p.flip_top_bottom(probability=0.8)
p.rotate(probability=0.5, max_left_rotation=5, max_right_rotation=10)
p.skew(0.4, 0.5)
p.zoom(probability = 0.2, min_factor = 1.1, max_factor = 1.5)
p.sample(10)
Keras库有一个为向图像添加转换而创建的内建类。这个类称为ImageDataGenerator,它会生成一批张量图像数据并进行实时数据扩充.
# selecting a sample image
image5 = train_images[15]
imshow(image5)
print(image5.shape)
plt.axis('off')
# Augmentation process
datagen = ImageDataGenerator(
rotation_range = 40,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True,
brightness_range = (0.5, 1.5))
img_arr = img_to_array(image5)
img_arr = img_arr.reshape((1,) + img_arr.shape)
i = 0
for batch in datagen.flow(
img_arr,
batch_size=1,
save_to_dir='../output/keras_augmentations',
save_prefix='Augmented_image',
save_format='jpeg'):
i += 1
if i > 20: # create 20 augmented images
break # otherwise the generator would loop indefinitely
images = os.listdir("../output/keras_augmentations/")
images
# Let's look at the augmented images
aug_images = []
for img_path in glob.glob("../output/keras_augmentations/*.jpeg"):
aug_images.append(mpimg.imread(img_path))
plt.figure(figsize=(20,10))
columns = 5
for i, image in enumerate(aug_images):
plt.subplot(len(aug_images) / columns + 1, columns, i + 1)
plt.imshow(image)
# selecting a sample image
image5 = train_images[25]
imshow(image5)
print(image5.shape)
plt.axis('off')
SOLT是一个快速的数据扩充库,支持任意数量的图像、分割掩码、关键点和数据标签。它的后端有OpenCV,因此工作非常快。
h,w,c = image5.shape
img = image5[:w]
stream = solt.Stream([
slt.Rotate(angle_range=(-90, 90), p=1, padding='r'),
slt.Flip(axis=1, p=0.5),
slt.Flip(axis=0, p=0.5),
slt.Shear(range_x=0.3, range_y=0.8, p=0.5, padding='r'),
slt.Scale(range_x=(0.8, 1.3), padding='r', range_y=(0.8, 1.3), same=False, p=0.5),
slt.Pad((w, h), 'r'),
slt.Crop((w, w), 'r'),
slt.CvtColor('rgb2gs', keep_dim=True, p=0.2),
slt.HSV((0, 10), (0, 10), (0, 10)),
slt.Blur(k_size=7, blur_type='m'),
solt.SelectiveStream([
slt.CutOut(40, p=1),
slt.CutOut(50, p=1),
slt.CutOut(10, p=1),
solt.Stream(),
solt.Stream(),
], n=3),
], ignore_fast_mode=True)
fig = plt.figure(figsize=(16,16))
n_augs = 6
random.seed(42)
for i in range(n_augs):
img_aug = stream({'image': img}, return_torch=False, ).data[0].squeeze()
ax = fig.add_subplot(1,n_augs,i+1)
if i == 0:
ax.imshow(img)
else:
ax.imshow(img_aug)
ax.set_xticks([])
ax.set_yticks([])
plt.show()