就本人在学习深度学习的过程中,记录并分享基于pytorch框架的一个深度学习神经网络的分类训练的模板;文末有源码。
使用pytorch下的dataset类,其中分为两种形式:
# 使用已定义的数据集
train_data = torchvision.datasets.MNIST('./data/', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
]))
test_data = torchvision.datasets.MNIST('./data/', train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
]))
from torch.utils.data import Dataset
class dataset(Dataset):
def __init__(self, parameters, transform=None, target_transform=None):
super(dataset, self).__init__()
self.datas = []
# 这里对传入的parameters做处理
# 将你需要的数据处理后,存放到datas中
# datas中的每一个数据都是一条数据,格式为 特征值 目标值
# 这里也可对数据进行预处理
self.transform = transform
self.target_transform = target_transform
def __getitem__(self, index):
data, target = self.datas[index]
return data, target
def __len__(self):
return len(self.datas)
#使用数据集
parameters = []
dataset = dataset(parameters)
使用dataLoader 加载数据集
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)
# 适用情况,自定义的数据不规整,使用pytorch自带的会报错,这时就需要使用自定义的collate,否则将数据进行规整再使用
# 自定义dataloader加载数据方式
def my_collate(batch):
# 依据你自己的数据集来实现下面的内容
data = [item[0] for item in batch]
target = [item[1] for item in batch]
target = torch.LongTensor(target)
return [data, target]
def get_dataLoader(batch_size, train_size_rate, collate_fn, nw, dataset):
train_size = int(train_size_rate * len(dataset))
print("训练集的数据大小为:", train_size)
test_size = len(dataset) - train_size
print("测试集的数据大小为:", test_size)
# 使用torch random_split对数据集进行切割
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
if collate_fn is not None:
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=batch_size, shuffle=False,
num_workers=nw, collate_fn=collate_fn)
else:
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=batch_size, shuffle=False,
num_workers=nw)
return train_loader, test_loader
# 切分数据集
train_loader, test_loader = get_dataLoader(batch_size, train_size_rate, None, nw, dataset)
from torch import nn
class my_model(nn.Module):
def __init__(self):
super(my_model, self).__init__()
# 一般这里把需要使用的卷积、池化等函数在这里给到全局变量
def forward(self, x):
# 类的功能实现部分,x就是这个类的输入
# 这里使用上面的全局变量,整合成一个完整的神经网络,就是网络的前向过程
# 进行一系列的处理后,将结果返回
return x
一个例子
from torch import nn
import torch.nn.functional as F
class minst_model(nn.Module):
def __init__(self):
super(minst_model, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return x
这里对模型训练和测试的一些函数进行了封装。
import math
import torch
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from tqdm import tqdm
def sklearn_fn(epoch, predict, true, average='macro'):
accuracy = accuracy_score(true, predict)
precision = precision_score(true, predict, average=average)
recall = recall_score(true, predict, average=average)
f1 = f1_score(true, predict, average=average)
print(
"[EPOCH&BATCH] Epoch:{} accuracy:{:.4f} precision:{:.4f} recall:{:.4f} f1:{:.4f}".format(epoch + 1,
accuracy,
precision,
recall,
f1))
print(classification_report(true, predict))
def train(model, dataloader, optimizer, criterion, device, epoch):
model.train()
i = 0 # 训练次数
train_loss_list = [] # 每次训练的loss 用于展示数据
train_loss = 0.0 # 训练损失值,所有的loss累加的值
train_preds = [] # 预测值 prediction -s 总数
train_trues = [] # 真值,总数
train_batch_preds = [] # 每个batch训练的预测值
train_batch_trues = [] # 每次batch训练的真值
visual_acc = []
visual_precision = []
visual_recall = []
visual_f1 = []
train_bar = tqdm(dataloader) # 进度条显示数据
for step, data in enumerate(train_bar):
tokens, targets = data # 获取模型中的数据 特征、目标值 均为一个batch数组
# print(tokens.shape)
tokens = tokens.to(device)
targets = targets.to(device) # 根据device选择设备 GPU or CPU
optimizer.zero_grad() # 优化器清零
outputs = model(tokens) # 得到预测值
loss = criterion(outputs, targets) # 使用损失函数进行比对
loss.backward() # 反向传播
optimizer.step() # 使用优化器
train_loss += loss.item() # 累加统计损失值 ,注意一定是需要使用 .item(),具体原因自行百度!
train_loss_list.append(loss.item()) # 以数组的形式,添加到训练损失总值中
train_outputs = outputs.argmax(dim=1) # 比较并输出一组元素中最大值所在的索引 argmax(1) 横向比较
train_batch_preds.extend(train_outputs.detach().cpu().numpy()) # 统计每个batch的值
train_batch_trues.extend(targets.detach().cpu().numpy())
train_preds.extend(train_batch_preds) # 转换为数组的形式,并统计总预测值
train_trues.extend(train_batch_trues) # 转换为数组的形式,并统计总真值
# 使用sklearn 对数据进行分析,输出结果
sklearn_accuracy = accuracy_score(train_trues, train_preds)
visual_acc.append(sklearn_accuracy) # 统计每个Batch的准确值
sklearn_precision = precision_score(train_trues, train_preds, average='macro')
visual_precision.append(sklearn_precision)
sklearn_recall = recall_score(train_trues, train_preds, average='macro')
visual_recall.append(sklearn_recall)
sklearn_f1 = f1_score(train_trues, train_preds, average='macro')
visual_f1.append(sklearn_f1)
if step % (math.floor(len(dataloader) / 10)) == 0: # 所有batch切分成10部分输出, 向下取整
sklearn_fn(epoch, train_batch_preds, train_batch_trues, average='macro')
train_batch_preds = []
train_batch_trues = []
train_bar.desc = "[train__eppch__bar] Epoch:{} loss:{:.4f} accuracy:{:.4f} precision:{:.4f} recall:{:.4f} f1:{:.4f}".format(
epoch + 1, train_loss, sklearn_accuracy, sklearn_precision, sklearn_recall, sklearn_f1)
sklearn_fn(epoch, train_preds, train_trues, average='macro')
return train_loss_list, visual_acc, visual_precision, visual_recall, visual_f1
def evaluate(model, dataloader, criterion, device, epoch):
model.eval()
test_preds = []
test_trues = []
test_batch_preds = []
test_batch_trues = []
test_loss = 0.0
test_loss_list = []
i = 0
visual_acc = []
visual_precision = []
visual_recall = []
visual_f1 = []
test_bar = tqdm(dataloader)
with torch.no_grad(): # 这句话就将这里面的语句不去关注梯度信息
for step, data in enumerate(test_bar):
test_tokens, test_targets = data
test_tokens = test_tokens.to(device)
test_targets = test_targets.to(device)
outputs = model(test_tokens)
loss = criterion(outputs, test_targets) # 使用损失函数进行比对
test_loss += loss.item()
test_loss_list.append(loss.item())
test_outputs = outputs.argmax(dim=1)
test_batch_preds.extend(test_outputs.detach().cpu().numpy())
test_batch_trues.extend(test_targets.detach().cpu().numpy())
test_preds.extend(test_batch_preds)
test_trues.extend(test_batch_trues)
test_accuracy = accuracy_score(test_trues, test_preds)
visual_acc.append(test_accuracy)
test_precision = precision_score(test_trues, test_preds, average='macro')
visual_precision.append(test_precision)
test_recall = recall_score(test_trues, test_preds, average='macro')
visual_recall.append(test_recall)
test_f1 = f1_score(test_trues, test_preds, average='macro')
visual_f1.append(test_f1)
if step % (math.floor(len(dataloader) / 10)) == 0: # 所有batch切分成10部分输出, 向下取整
sklearn_fn(epoch, test_batch_preds, test_batch_trues, average='macro')
test_batch_preds = []
test_batch_trues = []
test_bar.desc = "[test__epoch__bar] Epoch:{} loss:{:.4f} accuracy:{:.4f} precision:{:.4f} recall:{:.4f} f1:{:.4f}".format(
epoch + 1, test_loss, test_accuracy, test_precision, test_recall, test_f1)
sklearn_fn(epoch, test_preds, test_trues, average='macro')
return test_loss_list, visual_acc, visual_precision, visual_recall, visual_f1
# 结果可视化
def visual_result(train_data, test_data, epoch, label_name_x, label_name_y, fig_name, result_type):
fig = plt.figure(figsize=(10, 8))
plt.plot(range(1, len(train_data) + 1), train_data, label='Train {}'.format(label_name_y))
# 对于测试集输出的结果 绘制散点图
scale = len(train_data)/len(test_data)
test_range = range(1, len(test_data)+1)
test_range = [i * scale for i in test_range]
plt.plot(test_range, test_data, label='Test {}'.format(label_name_y))
plt.xlabel(label_name_x)
plt.ylabel(label_name_y)
plt.ylim(0, max(train_data)) # consistent scale
plt.xlim(0, len(train_data) + 1) # consistent scale
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
fig.savefig('./visual/{}_plot_{}_{}.png'.format(result_type, fig_name, epoch + 1), bbox_inches='tight')
1.随机数固定,确保实验结果能够进行复现
SEED = 1210
def seed_everything(seed):
os.environ["PYTHONHASHSEED"] = str(seed)
random.seed(SEED) # 对venv2模组进行随机数设定
np.random.seed(seed) # 对numpy模组进行随机数设定
torch.manual_seed(seed) # 对torch中的CPU部分进行随机数设定
torch.cuda.manual_seed(seed) # 对torch中的GPU部分进行随机数设定
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
seed_everything(SEED) # 设置几乎所有的随机种子 随机种子,可使得结果可复现
# 超参数设置
epochs = 3 # 定义训练轮次
batch_size = 32 # batch 大小设置
learning_rate = 1e-3 # 学习率
weight_decay = 1e-4 # (权重衰减)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_size_rate = 0.6 # 训练集占总数据集的比例
# nw = min([os.cpu_count(), BATCH_SIZE if BATCH_SIZE > 1 else 0, 8]) # number of workers
nw = 0 # 并行使用几个gpu工作 ,windows环境下一般为 0 linux环境下可以有多个
print('Using {} dataloader workers every process'.format(nw))
# 模型的相关设置
model = my_model() # 使用定义的模型
model_name = "demo_model" # 模型的名称
model = model.to(device) # 使用不同的device
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss() # 二分类交叉熵损失函数
criterion = criterion.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# 传入参数进入到数据集中
parameters = [
]
# dataset 自定义数据集加载
dataset = dataset(parameters)
# 使用已定义的数据集
dataset = torchvision.datasets.MNIST('./data/', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
]))
# 切割数据集 ,使用dataloader加载数据
train_loader, test_loader = get_dataLoader(batch_size, train_size_rate, None, nw, dataset)
# 开始训练
for epoch in range(epochs):
train_loss, train_acc, train_prec, train_recall, train_f1 = train(model,
train_loader,
optimizer,
criterion,
device,
epoch)
test_loss, test_acc, test_prec, test_recall, test_f1 = evaluate(model,
test_loader,
criterion,
device,
epoch)
# 每个epoch都保存一次模型的参数
save_path = './{}_epoch_{}_net.pth'.format(model_name, epoch+1)
# 模型保存的位置
torch.save(model.state_dict(), save_path)
print("{}_epoch_{}:模型已保存!".format(model_name, epoch+1))
# 加载模型的参数
# model.load_state_dict(torch.load(save_path)) # model.load_state_dict()函数把加载的权重复制到模型的权重中去
# model.eval() # 一定要记住在评估模式的时候调用model.eval()来固定dropout和批次归一化。否则会产生不一致的推理结果。
# visualize the loss as the network trained 可视化每一个epoch的训练的效果
visual_result(train_loss, test_loss, epoch, "batch=32", "loss", model_name, 'loss')
# visual_result(train_acc, test_acc, epoch, "accuracy", model_name, 'acc')
# visual_result(train_prec, test_prec, epoch, "precision", model_name, 'prec')
# visual_result(train_recall, test_recall, epoch, "recall", model_name, 'recall')
visual_result(train_f1, test_f1, epoch, "batch=32", "f1", model_name, 'f1')
源码地址
点此查看(无需积分)
如文章中有侵权,请及时联系删除。
本文用于记录作者本人的学习,其中遇到的问题,欢迎大家在讨论区交流。