添加到transform 中自定义方法,函数和类的调用区别
aspect_ratio_scaling()# 类的方法,要有__init__(self,…) call(self,img)
aspect_ratio_scaling ##这里如果是一个函数,不写函数参数,参数默认会把img ,输入到函数中def 函数 def aspect_ratio_scaling(img,tartet = 160):
import os
import shutil
import cv2
import random
import numpy as np
from torchvision import transforms
import torchvision
from torch.utils.data import DataLoader,Dataset
import math
def aspect_ratio_scaling(img, target_size=160): #纵横比缩方接口
'''
:param img: 输入是 opencv numpy (rgb bgr 都可以) 0-255 hwc
:param target_size: 缩方到多大
:return: 统一缩放的图片
'''
height, width, _ = img.shape
ratio = float(height) / width
top_offset = 0
bottom_offset = 0
left_offset = 0
right_offset = 0
new_h = 0
new_w = 0
if height >= width:
new_h = target_size
new_w = int(target_size / ratio)
if (target_size - new_w) % 2 == 0:
left_offset = int((target_size - new_w) / 2)
right_offset = int((target_size - new_w) / 2)
else:
left_offset = int((target_size - new_w) / 2) + 1
right_offset = int((target_size - new_w) / 2)
else:
new_w = target_size
new_h = int(target_size * ratio)
if (target_size - new_h) % 2 == 0:
top_offset = int((target_size - new_h) / 2)
bottom_offset = int((target_size - new_h) / 2)
else:
top_offset = int((target_size - new_h) / 2) + 1
bottom_offset = int((target_size - new_h) / 2)
img = cv2.resize(img, (new_w, new_h))
img = cv2.copyMakeBorder(img, top_offset, bottom_offset,
left_offset, right_offset, cv2.BORDER_CONSTANT, value=0) #(255,255,255) 白色 0黑色
return img
class RandomPatch(object):
"""Random patch data augmentation.
输入是 : hwc 0-255
和 随机擦除是一致差不多的, 都是像素块遮挡,区别在于,这个遮挡区域不是灰色块,是 图片btach ,随机的一个面积放进去的
There is a patch pool that stores randomly extracted pathces from person images.
For each input image, RandomPatch
1) extracts a random patch and stores the patch in the patch pool;
2) randomly selects a patch from the patch pool and pastes it on the
input (at random position) to simulate occlusion.
Reference:
- Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
- Zhou et al. Learning Generalisable Omni-Scale Representations
for Person Re-Identification. arXiv preprint, 2019.
min_sample_size 和 batch 有关系
batch 64 min_sample_size=60 61张图片原来的样子, 3张处理后的图片
"""
def __init__(self, prob_happen=1, pool_capacity=50000, min_sample_size=5,
patch_min_area=0.01, patch_max_area=0.5, patch_min_ratio=0.1,
prob_rotate=0.5, prob_flip_leftright=0.5,
):
self.prob_happen = prob_happen
self.patch_min_area = patch_min_area
self.patch_max_area = patch_max_area
self.patch_min_ratio = patch_min_ratio
self.prob_rotate = prob_rotate
self.prob_flip_leftright = prob_flip_leftright
self.patchpool = deque(maxlen=pool_capacity)
self.min_sample_size = min_sample_size
def generate_wh(self, W, H):
area = W * H
for attempt in range(100):
target_area = random.uniform(self.patch_min_area, self.patch_max_area) * area
aspect_ratio = random.uniform(self.patch_min_ratio, 1. / self.patch_min_ratio)
h = int(round(math.sqrt(target_area * aspect_ratio)))
w = int(round(math.sqrt(target_area / aspect_ratio)))
if w < W and h < H:
return w, h
return None, None
def transform_patch(self, patch):
if random.uniform(0, 1) > self.prob_flip_leftright:
patch = patch.transpose(Image.FLIP_LEFT_RIGHT)
if random.uniform(0, 1) > self.prob_rotate:
patch = patch.rotate(random.randint(-10, 10))
return patch
def __call__(self, img):
W, H = img.size # original image size
# collect new patch
w, h = self.generate_wh(W, H)
if w is not None and h is not None:
x1 = random.randint(0, W - w)
y1 = random.randint(0, H - h)
new_patch = img.crop((x1, y1, x1 + w, y1 + h)) #剪切一部分图片
self.patchpool.append(new_patch)
print("**************************")
if len(self.patchpool) < self.min_sample_size:
print(len(self.patchpool))
# print(np.self.patchpool)
print(self.min_sample_size)
return img
if random.uniform(0, 1) > self.prob_happen:
return img
# paste a randomly selected patch on a random position
patch = random.sample(self.patchpool, 1)[0]
patchW, patchH = patch.size
x1 = random.randint(0, W - patchW)
y1 = random.randint(0, H - patchH)
patch = self.transform_patch(patch)
img.paste(patch, (x1, y1))
return img
class RandomErasing_cv(object):
"""
pil 或者 opencv dataloader ,都用这个接口 ,opencv 输入的图片是shape 获得尺寸 ,pil 是size ,,需要修改
图片 经过 hwc 0-255 变成 chw 0-1.0
可以通过 transforms.ToTensor(), 也可以不要这个操作,直接 img.transpose((2,0,1))/255.0也是一样的
Randomly selects a rectangle region in an image and erases its pixels.
'Random Erasing Data Augmentation' by Zhong et al.
See https://arxiv.org/pdf/1708.04896.pdf
Args:
probability: The probability that the Random Erasing operation will be performed.
sl: Minimum proportion of erased area against input image.
sh: Maximum proportion of erased area against input image.
r1: Minimum aspect ratio of erased area.
mean: Erasing value.
"""
def __init__(self, probability=0.5, sl=0.02, sh=0.1, r1=0.3, mean=(0.4914, 0.4822, 0.4465)):
self.probability = probability
self.mean = mean
self.sl = sl
self.sh = sh
self.r1 = r1
def __call__(self, img):
if random.uniform(0, 1) >= self.probability:
return img
for attempt in range(100):
# print(img.shape)
# print(img.size()) #
# area = img.size()[1] * img.size()[2] # PIL data 经过totensor 变成,chw
area = img.shape[1] * img.shape[2] #opencv 经过 totensor 或者 transpose((2,0,1)),
target_area = random.uniform(self.sl, self.sh) * area
aspect_ratio = random.uniform(self.r1, 1 / self.r1)
h = int(round(math.sqrt(target_area * aspect_ratio)))
w = int(round(math.sqrt(target_area / aspect_ratio)))
if w < img.shape[2] and h < img.shape[1]:
x1 = random.randint(0, img.shape[1] - h)
y1 = random.randint(0, img.shape[2] - w)
if img.shape[0] == 3:
img[0, x1:x1 + h, y1:y1 + w] = self.mean[0]
img[1, x1:x1 + h, y1:y1 + w] = self.mean[1]
img[2, x1:x1 + h, y1:y1 + w] = self.mean[2]
else:
img[0, x1:x1 + h, y1:y1 + w] = self.mean[0]
return img
return img
# ###数据增强显示, 图片
transform = transforms.Compose(
[
RandomPatch(),
transforms.ToTensor()
]
)
class ImageDataset(Dataset):
def __init__(self, dataset, transform=None):
self.dataset = dataset
self.transform = transform
def __len__(self):
return len(self.dataset)
def __getitem__(self, index):
img_path, pid = self.dataset[index]
# img = cv2.imread(img_path)[:,:,::-1] #bgr to rgb 适应 pytorch 接口rgb
img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) #不用 -1 索引 torch 不支持,训练会报错
if self.transform is not None:
img = self.transform(img)
print(img.shape)
# 如果是 随机擦除,输入经过了 通道转换 、255.0归一化,所以需要还原,才能 show
img = np.uint8((img * 255.0).numpy().transpose((1, 2, 0))) # cv2 需要是uint8 (astype(int) 是32 位,报错
cv2.imshow("shoe23",img)
cv2.waitKey(5000)
return img, pid
transform = transforms.Compose(
[
# aspect_ratio_scaling, #这里如果是一个函数,不写函数参数,参数默认会把img ,输入到函数中def 函数(img,tartet = 160):
transforms.ToTensor(),
RandomErasing_cv()
]
)
transform2 = transforms.Compose(
[
# 输入 是0-255 rgb 输出 也是0-255 rgb 可cv2.imshow() ,是没有归一化 0-1 不需要转换
aspect_ratio_scaling, #这里如果是一个函数,不写函数参数,参数默认会把img ,输入到函数中def 函数(img,tartet = 160):
# transforms.ToTensor()
]
)
# 随便给两个 图片路径,用来查看效果
dataset = [("/home/shiyy/nas/00091.jpg-car-1345_178_2045_420.jpg",2),("/home/shiyy/nas/00080.jpg-car-1674_1200_2280_2027.jpg",1)]
train_set = ImageDataset(dataset ,transform=transform)
# train_set = ImageDataset(dataset ,transform=transform2)
data_loader = DataLoader(dataset=train_set,batch_size=2,shuffle=False,)
# 1、 opencv 读取图片显示
## dataloader opencv 读取显示图片效果,我放在了,get_item transform 之后,show 一下
for input, target in data_loader:
print(target)
##################################################################
# 2、 pil 读取图片 显示图片效果
## 默认所有用 PIL 读取图片,transforms 有个显示接口直接可用
# ###数据增强显示, 图片
transform3 = transforms.Compose(
[
RandomPatch(), #totensor 之前的图片 都是默认 hwc 0-255 ,这里opencv pil 读取给它可以
transforms.ToTensor() # totenser 之后的图片,都会变成 chw 0-1.0 (/255.0) , batch 维度不是这里加的,
]
)
to_pil_image = transforms.ToPILImage()
for input, target in data_loader:
print(target)
for i in range(len(target)):
image = to_pil_image(input[i]) #to_pil_image = transforms.ToPILImage()
image.show()
######################################
## 3、其他操作可以都一个尝试看看效果
import torchvision.transforms as T
train_transforms = T.Compose([
T.Resize(224),
T.RandomHorizontalFlip(p=0.5),
T.Pad(10),
T.RandomCrop(224),
T.ToTensor(),
T.Normalize( mean = (0.5,0.5,0.5), std = (0.5,0.5,0.5) ) #0-1 变成-1 1
# or T.Normalize( mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
])
注意 : 上述的RandomPatch(object)是针对 PIL 图片操作的 用到了 pil属性方法
如果 opecv 读取的图片,需要自己替换掉 pil 方法
class RandomErasing_cv(object):
"""
修改一下, 输入是 rgb或者bgr 0-255 hwc 的图片
输入的是opecv 读取 或者 PIL 读取都可以
pil 读取图片这里有变成 numpy 操作 ,所以 cv2直接显示就可
Randomly selects a rectangle region in an image and erases its pixels.
'Random Erasing Data Augmentation' by Zhong et al.
See https://arxiv.org/pdf/1708.04896.pdf
Args:
probability: The probability that the Random Erasing operation will be performed.
sl: Minimum proportion of erased area against input image.
sh: Maximum proportion of erased area against input image.
r1: Minimum aspect ratio of erased area.
mean: Erasing value.
"""
def __init__(self, probability=1, sl=0.02, sh=0.1, r1=0.3, mean =(125,123,113)):#灰色擦除块
self.probability = probability
self.mean = mean
self.sl = sl
self.sh = sh
self.r1 = r1
def __call__(self, img):
if random.uniform(0, 1) >= self.probability:
return img
for attempt in range(100):
img = np.array(img) #shape 是 hwc 用于 pil opencv 都可以获得维度 pil 没有shape 有size (wh)
area = img.shape[0] * img.shape[0] #hw
target_area = random.uniform(self.sl, self.sh) * area
aspect_ratio = random.uniform(self.r1, 1 / self.r1)
h = int(round(math.sqrt(target_area * aspect_ratio)))
w = int(round(math.sqrt(target_area / aspect_ratio)))
if w < img.shape[1] and h < img.shape[0]:
x1 = random.randint(0, img.shape[0] - h)
y1 = random.randint(0, img.shape[1] - w)
if img.shape[2] == 3:
img[x1:x1 + h, y1:y1 + w,0] = self.mean[0]
img[x1:x1 + h, y1:y1 + w,1] = self.mean[1]
img[x1:x1 + h, y1:y1 + w,2] = self.mean[2]
else:
img[x1:x1 + h, y1:y1 + w,0] = self.mean[0]
return img
return img
class ImageDataset(Dataset):
def __init__(self, dataset, transform=None):
self.dataset = dataset
self.transform = transform
def __len__(self):
return len(self.dataset)
def __getitem__(self, index):
img_path, pid = self.dataset[index]
# img = cv2.imread(img_path)[:,:,::-1] #bgr to rgb 适应 pytorch 接口rgb
# img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) #不用 -1 索引 torch 不支持,训练会报错
img = PIL.Image.open(img_path)
if self.transform is not None:
img = self.transform(img)
print(img.shape)
# cv2.imshow("tran",img)
# cv2.waitKey(5000)
return img, pid
transform = transforms.Compose(
[
# transforms.ToTensor(),
RandomErasing_cv()
]
)
# 随便给两个 图片路径,用来查看效果
dataset = [("/home/shiyy/nas/00091.jpg-car-1345_178_2045_420.jpg",2),("/home/shiyy/nas/00080.jpg-car-1674_1200_2280_2027.jpg",1)]
train_set = ImageDataset(dataset ,transform=transform)
# train_set = ImageDataset(dataset ,transform=transform2)
data_loader = DataLoader(dataset=train_set,batch_size=2,shuffle=False,)
# 1、 opencv pil 读取图片都可以
## dataloader opencv 读取显示图片效果,我放在了,get_item transform 之后,show 一下
for input, target in data_loader:
print(target)