pytorch中如何构建自己的数据集

第一步,读取图片并打标签

用cv读取,经过基本的预处理(比如resize)后,转成pimg

def generate_dataset(dir, set_label):
    """
    set_label should be 'torch.tensor([1])' if two-catogory and positive sample
    """
    train_data = []
    labels = []
    for file_name in os.listdir(dir):
        img_dir = os.path.join(dir, file_name)
        img = cv.imread(img_dir)
        img = cv.resize(img, (120, 120))            # resize img
        pimg = Image.fromarray(img)
        train_data.append(pimg)
        labels.append(set_label)
    return train_data, labels

P_data, P_labels = generate_dataset("Positive/", torch.tensor([1]))
N_data, N_labels = generate_dataset("Negative/", torch.tensor([0]))
# 训练集和验证集的划分就交给大家了

第二步,重写dataset类

class MyDataset(Dataset):
    def __init__(self, data, labels, transform=None, target_transform=None):
        self.transform = transform
        self.target_transform = target_transform
        imgs = []
        for i in range(len(labels)):
            # print(type(data[i]))    # 
            im_tensor = transform(data[i]).to(torch.device("cpu"))
            imgs.append((im_tensor, labels[i]))
        self.imgs = imgs                         # DataLoader通过getitem读取图片数据
    def __getitem__(self, index):
        fn, label = self.imgs[index]
        return fn, label
    def __len__(self):
        return len(self.imgs)

第三步,用MyDataset构建数据集

normMean = [0.35174567, 0.4027052, 0.39986762]
normStd = [0.18738173, 0.17127964, 0.1971462]
transform = transforms.Compose(
    [transforms.ToTensor(),  # range [0, 255] -> [0.0,1.0]   
    transforms.Normalize(normMean, normStd)
    ])
# 也可以再定义train_transform加入一些数据增强 
train_data = MyDataset(train_data, train_labels, transform=transform)
valid_data = MyDataset(valid_data, valid_labels, transform=transform)
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=valid_data, batch_size=BATCH_SIZE, shuffle=True)

下面就是训练的事儿啦,这个大家在网上都能找到很多,就不赘述了

你可能感兴趣的:(pytorch,python,深度学习)