pytorch:dataset和dataloader实例(结构化数据,即表格数据)

入坑pytoch记录一下

import 部分

import numpy
import numpy as np
import pandas as pd
import torch
import pandas
import random
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

随机创造数据

流程是随机构造数据,保存到本地中,构建dataset后直接从本地文件中读取数据

x=np.array(random.sample(range(1,10001),10000)).reshape(100,100)
y=np.array(np.ones(x.shape[0])).reshape((-1,1))
xy=np.hstack((x,y)) 
np.savetxt("xy.csv",xy,delimiter=",")
filepath="xy.csv"

dataset

class mydataset(Dataset):
    def __init__(self, filepath):
        xy = pd.read_csv(filepath)
        self.len = xy.shape[0]
        self.x_data = torch.from_numpy(xy.iloc[:,:-1].values)
        self.y_data = torch.from_numpy(xy.iloc[:,[-1]].values)

    def __len__(self):
        return self.len
    def __getitem__(self,index):
        return self.x_data[index], self.y_data[index]
    #验证一下是否构建完成 (按行查看)
    for datas, labels in mydataset1:
        print(labels.tolist())

实例化dataset 和构建dataloader

mydataset1 = mydataset(filepath)
dataloader1=DataLoader(mydataset1,batch_size=3,shuffle=True)

验证(从dataloader和dataset拿取数据)

#查看dataloader方式1,查看一个mini_batch中的数据数据(3,100)
data = iter(dataloader1)
data = next(data)
print(data)
#查看dataloader方式2,查看整个data_loader里面的数据
for step,(batch_x,batch_y) in enumerate(dataloader1):    
    print('| Step:', step, '| batch x: ', batch_x.numpy(), '| batch y: ', batch_y.numpy())
#查看dataloader方式3 和1差不多哈
data = torch.utils.data.DataLoader(mydataset1,batch_size=3,shuffle=True)
data = iter(data)
(inputs, labels) = next(data)

你可能感兴趣的:(pytorch,python,深度学习)