import torch
from torch.utils.data import TensorDataset, DataLoader, random_split, Dataset
from sklearn import datasets
# 根据Tensor创建数据集
iris = datasets.load_iris()
Tensor_iris = TensorDataset(torch.tensor(iris.data), torch.tensor(iris.target))
# 分割成训练集、验证集、测试集
num_train = int(len(Tensor_iris) * 0.6)
num_valid = int(len(Tensor_iris) * 0.2)
num_test = len(Tensor_iris) - num_train - num_valid
train, valid, test = random_split(Tensor_iris, [num_train, num_valid, num_test])
print(len(train), len(valid), len(test)) # 90 30 30(个数)
# 使用DataLoader加载数据集
train = DataLoader(train, batch_size=16, shuffle=True)
valid = DataLoader(valid, batch_size=16, shuffle=False)
test = DataLoader(test, batch_size=16, shuffle=False)
print(len(train), len(valid), len(test)) # 6 2 2(批数)
数据集直接用的sklearn的鸢尾花数据集。
多特征数值型数据(如上iris.data)
标签(如上iris.target)
from torch.utils.data import DataLoader, random_split
from torchvision import transforms, datasets
data_transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])
data = datasets.ImageFolder(root="dir", transform=data_transform)
num_train = int(len(data) * 0.6)
num_valid = int(len(data) * 0.2)
num_test = len(data) - num_train - num_valid
train, valid, test = random_split(data, [num_train, num_valid, num_test])
train = DataLoader(train, batch_size=16, shuffle=True)
valid = DataLoader(valid, batch_size=16, shuffle=False)
test = DataLoader(test, batch_size=16, shuffle=False)
data_transform:根据自己的需求进行数据增强。
图片数据集目录例子:dir-----dog
|----cat
dir的下级目录的名要是要分类的标签名,比如我要分类dog、cat,我就把把文件名改成dog、cat,这两个文件夹下是他俩的所有图片数据。
数据集链接:Cats-vs-Dogs | Kaggle或csdn搜cats vs dogs,有人传了百度网盘。