import torch
from torch.autograd import Variable
import torchvision
from torchvision import datasets, transforms
import os
import matplotlib.pyplot as plt
import time
%matplotlib inline
data_dir = './data/DogsVSCats'
# 定义要对数据进行的处理
data_transform = {x: transforms.Compose([transforms.Resize([64, 64]),
transforms.ToTensor()])
for x in ["train", "valid"]}
# 数据载入
image_datasets = {x: datasets.ImageFolder(root=os.path.join(data_dir, x),
transform=data_transform[x])
for x in ["train", "valid"]}
# 数据装载
dataloader = {x: torch.utils.data.DataLoader(dataset=image_datasets[x],
batch_size=16,
shuffle=True)
for x in ["train", "valid"]}
注:在以上代码中数据的变换和导入都采用了字典的形式,这是因为我们需要分别对训练数据集和验证数据集的数据载入方法进行简单定义,使用字典可以简化代码,也方便之后进行相应的调用和操作。
X_example, y_example = next(iter(dataloader["train"]))
print(u'X_example个数{}'.format(len(X_example)))
print(u'y_example个数{}'.format(len(y_example)))
print(X_example.shape)
print(y_example.shape)
index_classes = image_datasets["train"].class_to_idx
print(index_classes)
example_classes = image_datasets["train"].classes
print(example_classes)
输出结果:
img = torchvision.utils.make_grid(X_example)
img = img.numpy().transpose([1, 2, 0])
for i in range(len(y_example)):
index = y_example[i]
print(example_classes[index], end=' ')
if (i+1)%8 == 0:
print()
plt.imshow(img)
plt.show()
基于VGG16架构来搭建一个简化版的VGGNet模型,简化如下:
class Models(torch.nn.Module):
def __init__(self):
super(Models, self).__init__()
self.Conv = torch.nn.Sequential(
torch.nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2),
torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(128, 128, kernel_size=3, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2),
torch.nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2),
torch.nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2)
)
self.Classes = torch.nn.Sequential(
torch.nn.Linear(4*4*512, 1024),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(1024, 1024),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(1024, 2)
)
def forward(self, input):
x = self.Conv(input)
x = x.view(-1, 4*4*512)
x = self.Classes(x)
return x
查看模型细节:
model = Models()
print(model)
loss_f = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
loss_f = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
epoch_n = 10
time_open = time.time()
for epoch in range(epoch_n):
print("Epoch {}/{}".format(epoch+1, epoch_n))
print("-"*10)
for phase in ["train", "valid"]:
if phase == "train":
print("Training...")
# 设置为True,会进行Dropout并使用batch mean和batch var
model.train(True)
else:
print("Validing...")
# 设置为False,不会进行Dropout并使用running mean和running var
model.train(False)
running_loss = 0.0
running_corrects = 0
# enuerate(),返回的是索引和元素值,数字1表明设置start=1,即索引值从1开始
for batch, data in enumerate(dataloader[phase], 1):
# X: 图片,16*3*64*64; y: 标签,16
X, y = data
# 封装成Variable类
X, y = Variable(X), Variable(y)
# y_pred: 预测概率矩阵,16*2
y_pred = model(X)
# pred,概率较大值对应的索引值,可看做预测结果
_, pred = torch.max(y_pred.data, 1)
# 梯度归零
optimizer.zero_grad()
# 计算损失
loss = loss_f(y_pred, y)
# 若是在进行模型训练,则需要进行后向传播及梯度更新
if phase == "train":
loss.backward()
optimizer.step()
# 计算损失和
running_loss += loss
# 统计预测正确的图片数
running_corrects += torch.sum(pred==y.data)
# 共20000张测试图片,1250个batch,在使用500个及1000个batch对模型进行训练之后,输出训练结果
if batch%500==0 and phase=="train":
print("Batch {}, Train Loss:{:.4f}, Train ACC:{:.4F}%".format(batch, running_loss/batch,
100*running_corrects/(16*batch)))
epoch_loss = running_loss * 16 / len(image_datasets[phase])
epoch_acc = 100 * running_corrects / len(image_datasets[phase])
# 输出最终的结果
print("{} Loss:{:.4f} Acc:{:.4f}%".format(phase, epoch_loss, epoch_acc))
# 输出模型训练、参数优化用时
time_end = time.time() - time_open
print(time_end)
由于使用计算机CPU进行计算耗时过长,所以对代码进行适当调整,将在模型训练过程中需要计算的参数全部迁移至GPUs上。
这个过程非常简单和方便,只需要对这部分参数进行类型转换即可,但在此之前,需要先确认GPUs硬件是否可用,代码如下:
print(torch.cuda.is_available())
若输出结果为True,则说明GPUs具备了被使用的全部条件,若遇到False,则说明显卡暂不支持,需要查看具体问题的所在并进行调整。(此处略过)
Use_gpu = torch.cuda.is_available()
# 修改处
if Use_gpu:
model = model.cuda()
loss_f = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
epoch_n = 10
time_open = time.time()
for epoch in range(epoch_n):
print("Epoch {}/{}".format(epoch+1, epoch_n))
print("-"*10)
for phase in ["train", "valid"]:
if phase == "train":
print("Training...")
# 设置为True,会进行Dropout并使用batch mean和batch var
model.train(True)
else:
print("Validing...")
# 设置为False,不会进行Dropout并使用running mean和running var
model.train(False)
running_loss = 0.0
running_corrects = 0
# enuerate(),返回的是索引和元素值,数字1表明设置start=1,即索引值从1开始
for batch, data in enumerate(dataloader[phase], 1):
# X: 图片,16*3*64*64; y: 标签,16
X, y = data
# 修改处
if Use_gpu:
X, y = Variable(X.cuda()), Variable(y.cuda())
else:
X, y = Variable(X), Variable(y)
# y_pred: 预测概率矩阵,16*2
y_pred = model(X)
# pred,概率较大值对应的索引值,可看做预测结果
_, pred = torch.max(y_pred.data, 1)
# 梯度归零
optimizer.zero_grad()
# 计算损失
loss = loss_f(y_pred, y)
# 若是在进行模型训练,则需要进行后向传播及梯度更新
if phase == "train":
loss.backward()
optimizer.step()
# 计算损失和
running_loss += loss
# 统计预测正确的图片数
running_corrects += torch.sum(pred==y.data)
# 共20000张测试图片,1250个batch,在使用500个及1000个batch对模型进行训练之后,输出训练结果
if batch%500==0 and phase=="train":
print("Batch {}, Train Loss:{:.4f}, Train ACC:{:.4F}%".format(batch, running_loss/batch,
100*running_corrects/(16*batch)))
epoch_loss = running_loss * 16 / len(image_datasets[phase])
epoch_acc = 100 * running_corrects / len(image_datasets[phase])
# 输出最终的结果
print("{} Loss:{:.4f} Acc:{:.4f}%".format(phase, epoch_loss, epoch_acc))
# 输出模型训练、参数优化用时
time_end = time.time() - time_open
print(time_end)
RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 2.00 GiB total capacity; 1.32 GiB already allocated; 16.00 KiB free; 8.34 MiB cached)
# 计算损失和
running_loss += loss
修改为:
# 计算损失和
running_loss += float(loss)
三者的区别和相应的作用