非常感谢这些资料的作者:
【参考1】、【PyTorch速成教程 (by Sung Kim)】
├── main.py:实现训练 (train) 、验证(validation)和测试(test)
│ ├── model.py:实现的模型
│ ├── dataset.py:加载的数据
│ ├── utils.py:常用功能
from torch.utils.data import Dataset, DataLoader
from torch import from_numpy, tensor
from torch.autograd import Variable
import numpy as np
import model
import utils
# load data
dataset = MyDataset()
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=2)
# model
model=Model()
# define loss and optimizer
criterion=torch.nn.BCELoss(size_average=True)
optimizer=torch.optim.SGD(model.parameters(),lr=0.1)
# train
for epoch in range(2):
for i, data in enumerate(train_loader, 0):
# get the inputs
inputs, labels = data
# wrap them in Variable
inputs, labels = Variable(inputs), Variable(labels)
# Forward pass
y_pred=model(inputs)
# Compute and print loss
loss=criterion(y_pred,labels)
accuracy= ultis.accuracy(y_pred,labels)
print("[{:05d}/{:05d}] train_loss:{:.4f} accuracy: {:.4f}]".format(
i,epoch,loss.data[0],accuracy))
# update
optimizer.zero_grad() # zero gradients
loss.backward() # perform a backward pass
optimizer.step() # update weight or parameters
import torch
class Model(torch.nn.Module):
def __init__(self):
super(Model,self).__init__()
self.l1=torch.nn.Linear(8,6)
self.l2=torch.nn.Linear(6,4)
self.l3=torch.nn.Linear(4,1)
self.sigmoid=torch.nn.Sigmoid()
# 数据流
def forward(self,x):
out1=self.sigmoid(self.l1(x))
out2=self.sigmoid(self.l2(out1))
y_pred=self.sigmoid(self.l3(out2))
return y_pred
要点:
(1)必须重载 __getitem__
和__len__
;
(2)
import torch
from torch.utils.data import Dataset, DataLoader
class MyDataset(Dataset):
def __init__(self): # Initialize your data, download, etc.
xy = np.loadtxt('./data/diabetes.csv.gz', delimiter=',', dtype=np.float32)
self.len = xy.shape[0]
self.x_data = torch.from_numpy(xy[:, 0:-1])
self.y_data = torch.from_numpy(xy[:, [-1]])
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.len
import numpy as np
import scipy.sparse as sp
import torch
import os
def encode_onehot(labels):
classes = set(labels)
classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
enumerate(classes)}
labels_onehot = np.array(list(map(classes_dict.get, labels)),
dtype=np.int32)
return labels_onehot
def accuracy(output, labels):
preds = output.max(1)[1].type_as(labels)
correct = preds.eq(labels).double()
correct = correct.sum()
return correct / len(labels)
def list_all_files(rootdir):
_files = []
#列出文件夹下所有的目录与文件
list_file = os.listdir(rootdir)
for i in range(0,len(list_file)):
# 构造路径
path = os.path.join(rootdir,list_file[i])
# 判断路径是否是一个文件目录或者文件
# 如果是文件目录,继续递归
if os.path.isdir(path):
_files.extend(list_all_files(path))
if os.path.isfile(path):
_files.append(path)
return _files
def mkdir(path):
# 去除首位空格
path=path.strip()
# 去除尾部 \ 符号
path=path.rstrip("\\")
# 判断路径是否存在
# 存在 True
# 不存在 False
isExists=os.path.exists(path)
# 判断结果
if not isExists:
# 如果不存在则创建目录
# 创建目录操作函数
os.makedirs(path)
print(path+' create sucess')
return True
else:
# 如果目录存在则不创建,并提示目录已存在
print(path+' path exist !')
return False
问:dataset.py中__getitem__
返回的是一个元素,还是一个batch数据?
答: