一,数据加载
#coding:utf-8
import torch as t
from torch.utils import data
import os
from PIL import Image
import numpy as np
class DogCat(data.Dataset):
def __init__(self, path):
imgs = os.listdir(path)
# 所有图片的绝对路径
# 这里不实际加载图片,只是指定路径,当调用__getitem__时才会真正读图片
self.imgs_list_path = [os.path.join(path, i) for i in imgs]
def __getitem__(self, index):
img_path = self.imgs_list_path[index]
# dog->1, cat->0
label = 1 if 'dog' in img_path.split('/')[-1] else 0
pil_img = Image.open(img_path)
array = np.asarray(pil_img)
img = t.from_numpy(array)
return img_path,img, label
def __len__(self):
return len(self.imgs_list_path)
if __name__ == '__main__':
dataset = DogCat('./data/dogcat/')
# img, label = dataset[0] # 相当于调用dataset.__getitem__(0)
print('len(dataset)=',len(dataset))
for img_path,img, label in dataset:
print(img_path,img.size(), img.float().mean(), label)
二,数据归一化
PyTorch提供了torchvision1。它是一个视觉工具包,提供了很多视觉图像处理的工具,其中transforms
模块提供了对PIL Image
对象和Tensor
对象的常用操作。
对PIL Image的操作包括:
Scale
:调整图片尺寸,长宽比保持不变CenterCrop
、RandomCrop
、RandomResizedCrop
: 裁剪图片Pad
:填充ToTensor
:将PIL Image对象转成Tensor,会自动将[0, 255]归一化至[0, 1]对Tensor的操作包括:
#coding:utf-8
import torch as t
from torch.utils import data
import os
from PIL import Image
import numpy as np
from torchvision import transforms
transform = transforms.Compose([
transforms.Resize(224), # 缩放图片(Image),保持长宽比不变,最短边为224像素
transforms.CenterCrop(224), # 从图片中间切出224*224的图片
transforms.ToTensor(), # 将图片(Image)转成Tensor,归一化至[0, 1]
transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]) # 标准化至[-1, 1],规定均值和标准差
#input[channel] = (input[channel] - mean[channel]) / std[channel]
])
class DogCat(data.Dataset):
def __init__(self, path,transforms=None):
imgs = os.listdir(path)
# 所有图片的绝对路径
# 这里不实际加载图片,只是指定路径,当调用__getitem__时才会真正读图片
self.imgs_list_path = [os.path.join(path, i) for i in imgs]
self.transforms=transforms
def __getitem__(self, index):
img_path = self.imgs_list_path[index]
# dog->1, cat->0
label = 1 if 'dog' in img_path.split('/')[-1] else 0
pil_img = Image.open(img_path)
if self.transforms:
pil_img=self.transforms(pil_img)
array = np.asarray(pil_img)
img = t.from_numpy(array)
return img_path,img, label
def __len__(self):
return len(self.imgs_list_path)
if __name__ == '__main__':
dataset = DogCat('./data/dogcat/',transforms=transform)
# img, label = dataset[0] # 相当于调用dataset.__getitem__(0)
print('len(dataset)=',len(dataset))
for img_path,img, label in dataset:
print(img_path,img.size(), img.float().mean(), label)
三,利用fer2013数据集进行预处理
数据集地址:https://download.csdn.net/download/fanzonghao/11183885
''' Fer2013 Dataset class'''
from __future__ import print_function
from PIL import Image
import numpy as np
import h5py
import torch.utils.data as data
import cv2
import torchvision.transforms as transforms
# 定义对数据的预处理
transform = transforms.Compose([
transforms.ToTensor(), # 转为Tensor 归一化至0~1
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # 归一化
])
class FER2013(data.Dataset):
"""`FER2013 Dataset.
Args:
train (bool, optional): If True, creates dataset from training set, otherwise
creates from test set.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
"""
def __init__(self, path,split='Training', transform=None):
self.transform = transform
self.split = split # training set or test set
self.data = h5py.File(path, 'r', driver='core')
# now load the picked numpy arrays
if self.split == 'Training':
self.train_data = self.data['Training_pixel']
self.train_labels = self.data['Training_label']
self.train_data = np.asarray(self.train_data)
self.train_data = self.train_data.reshape((28709, 48, 48))
elif self.split == 'PublicTest':
self.PublicTest_data = self.data['PublicTest_pixel']
self.PublicTest_labels = self.data['PublicTest_label']
self.PublicTest_data = np.asarray(self.PublicTest_data)
self.PublicTest_data = self.PublicTest_data.reshape((3589, 48, 48))
else:
self.PrivateTest_data = self.data['PrivateTest_pixel']
self.PrivateTest_labels = self.data['PrivateTest_label']
self.PrivateTest_data = np.asarray(self.PrivateTest_data)
self.PrivateTest_data = self.PrivateTest_data.reshape((3589, 48, 48))
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (image, target) where target is index of the target class.
"""
if self.split == 'Training':
img, target = self.train_data[index], self.train_labels[index]
elif self.split == 'PublicTest':
img, target = self.PublicTest_data[index], self.PublicTest_labels[index]
else:
img, target = self.PrivateTest_data[index], self.PrivateTest_labels[index]
# doing this so that it is consistent with all other datasets
# to return a PIL Image
img = img[:, :, np.newaxis]
img = np.concatenate((img, img, img), axis=2)
img = Image.fromarray(img)
if self.transform is not None:
img = self.transform(img)
return img, target
def __len__(self):
if self.split == 'Training':
return len(self.train_data)
elif self.split == 'PublicTest':
return len(self.PublicTest_data)
else:
return len(self.PrivateTest_data)
if __name__ == '__main__':
train_data=FER2013(path='./data/data.h5',split='Training',transform=transform)
train_loader = data.DataLoader(dataset=train_data,
batch_size=8,
shuffle=True,
num_workers=2)
print(len(train_data))
# for i,(img,label) in enumerate(train_data):
# if i<1:
# img=np.transpose(np.array(img),(1,2,0))
# print(img.shape)
# img=(img*0.5+0.5)*255
# cv2.imwrite('1.jpg',img)
# print(label.shape)
for i,(img, label) in enumerate(train_loader):
if i<1:
print('train')
img=np.transpose(np.array(img)[0],(1,2,0))
img = (img * 0.5 + 0.5) * 255
cv2.imwrite('2.jpg',img)
结果: