调整PILImage对象的尺寸
提示:不能是用io.imread或者cv2.imread读取的图片,这两种方法得到的是ndarray。
一般输入深度网络的特征图长宽是相等的,就不能采取等比例缩放的方式了,需要同时指定长宽:
transforms.Resize([h, w])
将图片短边缩放至x,长宽比保持不变:
transforms.Resize(x)
例如transforms.Resize([224, 224])就能将输入图片转化成224×224的输入特征图。
需要注意的一点是PILImage对象size属性返回的是w, h,而resize的参数顺序是h, w。
def load_data(root_path, dir, batch_size, phase):
transform_dict = {
'src': transforms.Compose(
[transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]),
'tar': transforms.Compose(
[transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])}
data = datasets.ImageFolder(root=root_path + dir, transform=transform_dict[phase])
data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=4)
return data_loader
def get_screen(self, env):
screen = env.render(mode='rgb_array').transpose((2, 0, 1)) # transpose into torch order (CHW)
# Strip off the top and bottom of the screen
screen = screen[:, 160:320]
view_width = 320
cart_location = self.get_cart_location(env)
if cart_location < view_width // 2:
slice_range = slice(view_width)
elif cart_location > (self.screen_width - view_width // 2):
slice_range = slice(-view_width, None)
else:
slice_range = slice(cart_location - view_width // 2,
cart_location + view_width // 2)
# Strip off the edges, so that we have a square image centered on a cart
screen = screen[:, :, slice_range]
# Convert to float, rescale, convert to torch tensor
screen = np.ascontiguousarray(screen, dtype=np.float32) / 255
screen = torch.from_numpy(screen)
# Resize, and add a batch dimension (BCHW)
return resize(screen).unsqueeze(0)
def load_data(data_folder, batch_size, phase='train', train_val_split=True, train_ratio=.8):
transform_dict = {
'train': transforms.Compose(
[transforms.Resize(256),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]),
'test': transforms.Compose(
[transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])}
data = datasets.ImageFolder(root=data_folder, transform=transform_dict[phase])
if phase == 'train':
if train_val_split:
train_size = int(train_ratio * len(data))
test_size = len(data) - train_size
data_train, data_val = torch.utils.data.random_split(data, [train_size, test_size])
train_loader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, shuffle=True, drop_last=True,
num_workers=4)
val_loader = torch.utils.data.DataLoader(data_val, batch_size=batch_size, shuffle=False, drop_last=False,
num_workers=4)
return [train_loader, val_loader]
else:
train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=True,
num_workers=4)
return train_loader
else:
test_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False, drop_last=False,
num_workers=4)
return test_loader
## Below are for ImageCLEF datasets
def load_imageclef_train(root_path, domain, batch_size, phase):
transform_dict = {
'src': transforms.Compose(
[transforms.Resize((256, 256)),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]),
'tar': transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])}
data = ImageCLEF(root_dir=root_path, domain=domain, transform=transform_dict[phase])
train_size = int(0.8 * len(data))
test_size = len(data) - train_size
data_train, data_val = torch.utils.data.random_split(data, [train_size, test_size])
train_loader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, shuffle=True, drop_last=False,
num_workers=4)
val_loader = torch.utils.data.DataLoader(data_val, batch_size=batch_size, shuffle=True, drop_last=False,
num_workers=4)
return train_loader, val_loader
def load_imageclef_test(root_path, domain, batch_size, phase):
transform_dict = {
'src': transforms.Compose(
[transforms.Resize((256,256)),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]),
'tar': transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])}
data = ImageCLEF(root_dir=root_path, domain=domain, transform=transform_dict[phase])
data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=4)
return data_loader
def load_imageclef_test(root_path, domain, batch_size, phase):
transform_dict = {
'src': transforms.Compose(
[transforms.Resize((256,256)),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]),
'tar': transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])}
data = ImageCLEF(root_dir=root_path, domain=domain, transform=transform_dict[phase])
data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=4)
return data_loader
def load_training(root_path, dir, batch_size, kwargs):
transform = transforms.Compose(
[transforms.Resize([256, 256]),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor()])
data = datasets.ImageFolder(root=root_path + dir, transform=transform)
train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=True, **kwargs)
return train_loader
def load_data(data_folder, batch_size, train, kwargs):
transform = {
'train': transforms.Compose(
[transforms.Resize([256, 256]),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])]),
'test': transforms.Compose(
[transforms.Resize([224, 224]),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
}
data = datasets.ImageFolder(root = data_folder, transform=transform['train' if train else 'test'])
data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, **kwargs, drop_last = True if train else False)
return data_loader
def load_train(root_path, dir, batch_size, phase):
transform_dict = {
'src': transforms.Compose(
[transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]),
'tar': transforms.Compose(
[transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])}
data = datasets.ImageFolder(root=root_path + dir, transform=transform_dict[phase])
train_size = int(0.8 * len(data))
test_size = len(data) - train_size
data_train, data_val = torch.utils.data.random_split(data, [train_size, test_size])
train_loader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=4)
val_loader = torch.utils.data.DataLoader(data_val, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=4)
return train_loader, val_loader
def __init__(self, train_mode, loader_params, dataset_params, augmentation_params):
super().__init__(train_mode, loader_params, dataset_params, augmentation_params)
self.image_transform = transforms.Compose([transforms.Resize((self.dataset_params.h, self.dataset_params.w)),
transforms.Grayscale(num_output_channels=3),
transforms.ToTensor(),
transforms.Normalize(mean=self.dataset_params.MEAN,
std=self.dataset_params.STD),
])
self.mask_transform = transforms.Compose([transforms.Resize((self.dataset_params.h, self.dataset_params.w),
interpolation=0),
transforms.Lambda(to_array),
transforms.Lambda(to_tensor),
])
self.image_augment_train = ImgAug(self.augmentation_params['image_augment_train'])
self.image_augment_with_target_train = ImgAug(self.augmentation_params['image_augment_with_target_train'])
if self.dataset_params.target_format == 'png':
self.dataset = ImageSegmentationPngDataset
elif self.dataset_params.target_format == 'json':
self.dataset = ImageSegmentationJsonDataset
else:
raise Exception('files must be png or json')
def get_transform2(dataset_name, net_transform, downscale):
"Returns image and label transform to downscale, crop and prepare for net."
orig_size = get_orig_size(dataset_name)
transform = []
target_transform = []
if downscale is not None:
transform.append(transforms.Resize(orig_size // downscale))
target_transform.append(
transforms.Resize(orig_size // downscale,
interpolation=Image.NEAREST))
transform.extend([transforms.Resize(orig_size), net_transform])
target_transform.extend([transforms.Resize(orig_size, interpolation=Image.NEAREST),
to_tensor_raw])
transform = transforms.Compose(transform)
target_transform = transforms.Compose(target_transform)
return transform, target_transform
def get_transform(params, image_size, num_channels):
# Transforms for PIL Images: Gray RGB
Gray2RGB = transforms.Lambda(lambda x: x.convert('RGB'))
RGB2Gray = transforms.Lambda(lambda x: x.convert('L'))
transform = []
# Does size request match original size?
if not image_size == params.image_size:
transform.append(transforms.Resize(image_size))
# Does number of channels requested match original?
if not num_channels == params.num_channels:
if num_channels == 1:
transform.append(RGB2Gray)
elif num_channels == 3:
transform.append(Gray2RGB)
else:
print('NumChannels should be 1 or 3', num_channels)
raise Exception
transform += [transforms.ToTensor(),
transforms.Normalize((params.mean,), (params.std,))]
return transforms.Compose(transform)
def get_mnist_dataloaders(batch_size=128):
"""MNIST dataloader with (32, 32) sized images."""
# Resize images so they are a power of 2
all_transforms = transforms.Compose([
transforms.Resize(32),
transforms.ToTensor()
])
# Get train and test data
train_data = datasets.MNIST('../data', train=True, download=True,
transform=all_transforms)
test_data = datasets.MNIST('../data', train=False,
transform=all_transforms)
# Create dataloaders
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
return train_loader, test_loader
def get_fashion_mnist_dataloaders(batch_size=128):
"""Fashion MNIST dataloader with (32, 32) sized images."""
# Resize images so they are a power of 2
all_transforms = transforms.Compose([
transforms.Resize(32),
transforms.ToTensor()
])
# Get train and test data
train_data = datasets.FashionMNIST('../fashion_data', train=True, download=True,
transform=all_transforms)
test_data = datasets.FashionMNIST('../fashion_data', train=False,
transform=all_transforms)
# Create dataloaders
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
return train_loader, test_loader
def get_lsun_dataloader(path_to_data='../lsun', dataset='bedroom_train',
batch_size=64):
"""LSUN dataloader with (128, 128) sized images.
path_to_data : str
One of 'bedroom_val' or 'bedroom_train'
"""
# Compose transforms
transform = transforms.Compose([
transforms.Resize(128),
transforms.CenterCrop(128),
transforms.ToTensor()
])
# Get dataset
lsun_dset = datasets.LSUN(db_path=path_to_data, classes=[dataset],
transform=transform)
# Create dataloader
return DataLoader(lsun_dset, batch_size=batch_size, shuffle=True)
def save_distorted(method=gaussian_noise):
for severity in range(1, 6):
print(method.__name__, severity)
distorted_dataset = DistortImageFolder(
root="/share/data/vision-greg/ImageNet/clsloc/images/val",
method=method, severity=severity,
transform=trn.Compose([trn.Resize(256), trn.CenterCrop(224)]))
distorted_dataset_loader = torch.utils.data.DataLoader(
distorted_dataset, batch_size=100, shuffle=False, num_workers=4)
for _ in distorted_dataset_loader: continue
# /// End Further Setup ///
# /// Display Results ///
def save_distorted(method=gaussian_noise):
for severity in range(1, 6):
print(method.__name__, severity)
distorted_dataset = DistortImageFolder(
root="./imagenet_val_bbox_crop/",
method=method, severity=severity,
transform=trn.Compose([trn.Resize((64, 64))]))
distorted_dataset_loader = torch.utils.data.DataLoader(
distorted_dataset, batch_size=100, shuffle=False, num_workers=6)
for _ in distorted_dataset_loader: continue
# /// End Further Setup ///
# /// Display Results ///
def save_distorted(method=gaussian_noise):
for severity in range(1, 6):
print(method.__name__, severity)
distorted_dataset = DistortImageFolder(
root="/share/data/vision-greg/ImageNet/clsloc/images/val",
method=method, severity=severity,
transform=trn.Compose([trn.Resize((64, 64))]))
distorted_dataset_loader = torch.utils.data.DataLoader(
distorted_dataset, batch_size=100, shuffle=False, num_workers=6)
for _ in distorted_dataset_loader: continue
# /// End Further Setup ///
# /// Display Results ///
def get_transform():
transform_image_list = [
transforms.Resize((256, 256), 3),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
]
transform_gt_list = [
transforms.Resize((256, 256), 0),
transforms.Lambda(lambda img: np.asarray(img, dtype=np.uint8)),
]
data_transforms = {
'img': transforms.Compose(transform_image_list),
'gt': transforms.Compose(transform_gt_list),
}
return data_transforms
def get_data(train):
data_raw = datasets.CIFAR10('../data/dl/', train=train, download=True, transform=transforms.Compose([
transforms.Grayscale(),
transforms.Resize((20, 20)),
transforms.ToTensor(),
lambda x: x.numpy().flatten()]))
data_x, data_y = zip(*data_raw)
data_x = np.array(data_x)
data_y = np.array(data_y, dtype='int32').reshape(-1, 1)
# binarize
label_0 = data_y < 5
label_1 = ~label_0
data_y[label_0] = 0
data_y[label_1] = 1
data = pd.DataFrame(data_x)
data[COLUMN_LABEL] = data_y
return data, data_x.mean(), data_x.std()
#---
def get_data(train):
data_raw = datasets.CIFAR10('../data/dl/', train=train, download=True, transform=transforms.Compose([
transforms.Grayscale(),
transforms.Resize((20, 20)),
transforms.ToTensor(),
lambda x: x.numpy().flatten()]))
data_x, data_y = zip(*data_raw)
data_x = np.array(data_x)
data_y = np.array(data_y, dtype='int32').reshape(-1, 1)
data = pd.DataFrame(data_x)
data[COLUMN_LABEL] = data_y
return data, data_x.mean(), data_x.std()
#---
def initialize_dataset(clevr_dir, dictionaries, state_description=True):
if not state_description:
train_transforms = transforms.Compose([transforms.Resize((128, 128)),
transforms.Pad(8),
transforms.RandomCrop((128, 128)),
transforms.RandomRotation(2.8), # .05 rad
transforms.ToTensor()])
test_transforms = transforms.Compose([transforms.Resize((128, 128)),
transforms.ToTensor()])
clevr_dataset_train = ClevrDataset(clevr_dir, True, dictionaries, train_transforms)
clevr_dataset_test = ClevrDataset(clevr_dir, False, dictionaries, test_transforms)
else:
clevr_dataset_train = ClevrDatasetStateDescription(clevr_dir, True, dictionaries)
clevr_dataset_test = ClevrDatasetStateDescription(clevr_dir, False, dictionaries)
return clevr_dataset_train, clevr_dataset_test
最常用:
def build_image_transforms(self):
self.image_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
def load_data(domain, root_dir, batch_size):
src_train_img, src_train_label, src_test_img, src_test_label = load_dataset(domain['src'], root_dir)
tar_train_img, tar_train_label, tar_test_img, tar_test_label = load_dataset(domain['tar'], root_dir)
transform = transforms.Compose([
transforms.Resize(32),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
data_src_train, data_src_test = GetDataset(src_train_img, src_train_label,
transform), GetDataset(src_test_img,
src_test_label,
transform)
data_tar_train, data_tar_test = GetDataset(tar_train_img, tar_train_label,
transform), GetDataset(tar_test_img,
tar_test_label,
transform)
dataloaders = {}
dataloaders['src'] = torch.utils.data.DataLoader(data_src_train, batch_size=batch_size, shuffle=True,
drop_last=False,
num_workers=4)
dataloaders['val'] = torch.utils.data.DataLoader(data_src_test, batch_size=batch_size, shuffle=True,
drop_last=False,
num_workers=4)
dataloaders['tar'] = torch.utils.data.DataLoader(data_tar_train, batch_size=batch_size, shuffle=True,
drop_last=False,
num_workers=4)
return dataloaders
def loader(path, batch_size=16, num_workers=1, pin_memory=True):
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
return data.DataLoader(
datasets.ImageFolder(path,
transforms.Compose([
transforms.Resize(256),
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
])),
batch_size=batch_size,
shuffle=True,
num_workers=num_workers,
pin_memory=pin_memory)
def test_loader(path, batch_size=16, num_workers=1, pin_memory=True):
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
return data.DataLoader(
datasets.ImageFolder(path,
transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
])),
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
pin_memory=pin_memory)
def load_training(root_path, dir, batch_size, kwargs):
transform = transforms.Compose(
[transforms.Resize([256, 256]),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor()])
data = datasets.ImageFolder(root=root_path + dir, transform=transform)
train_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, drop_last=True, **kwargs)
return train_loader
def load_testing(root_path, dir, batch_size, kwargs):
transform = transforms.Compose(
[transforms.Resize([224, 224]),
transforms.ToTensor()])
data = datasets.ImageFolder(root=root_path + dir, transform=transform)
test_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, **kwargs)
return test_loader