- 本文为365天深度学习训练营 中的学习记录博客
- 参考文章:Pytorch实战 | 第P4周:猴痘病识别
- 原作者:K同学啊 | 接辅导、项目定制
- 文章来源:K同学的学习圈子
首先是引用依赖的包
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import pathlib, random
from PIL import Image
import numpy
import copy # 用来保存最佳模型
from torchinfo import summary #第三方包,用来打包模型实际的结构
然后是创建一个全局的设备对象,如果有显卡的话,使用显卡,创建全局对象是为了防止在运行过程中数据处于不同的设备中报错。
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
数据集在原作者的项目中提供了下载,我下载到了项目目录中的data
目录下。
数据集的文件结构如下
data
Monkeypox
images...
Others
images....
使用pathlib库加载数据集,读取分类名以及初步的展示一下数据集
data_path = 'data'
data_lib = pathlib.Path(data_path)
class_names = [f.parts[-1] for f in data_lib.glob('*')]
print(class_names)
随机抽取几个图像进行展示,这里我就不贴图片了,猴痘的图片非常的恶心,想看的话自己敲来看吧
image_list = list(data_lib.glob('*/*'))
plt.figure(figsize=(20,4))
for i in range(20):
plt.subplot(2, 10, i+1)
image_path = random.choice(image_list)
image = Image.open(str(image_path))
print(np.array(image).shape)
plt.imshow(image)
plt.axis('off')
plt.title(image_path.parts[-2]) # 取图片的上级目录名,其实就是分类名称
通过打印可以发现,图片的尺寸一致为224x224,这样我们就不需要对图片进行Resize操作了。接下来我们创建pytorch的数据集
dataset = datasets.ImageFolder(data_path, transform=transforms.ToTensor())
train_len = int(len(dataset) * 0.8)
test_len = len(dataset) - train_len
train_dataset, test_dataset = random_split(dataset, [train_len, test_len])
batch_size = 64 # 数据的批次大小
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
使用大小为3x3的卷积核加最大池化来设计模型,经历了4个Conv-BN-ReLU-Pool操作后,再加上两层全连接层。
class Network(nn.Module):
def __init__(self, num_classes):
super().__init__()
# 224 -> 222
self.conv1 = nn.Conv2d(3, 16, 3)
self.bn1 = nn.BatchNorm2d(16)
# 222 -> 111
self.maxpool = nn.MaxPool2d(2)
# 111 -> 109
self.conv2 = nn.Conv2d(16, 32, 3)
self.bn2 = nn.BatchNorm2d(32)
# 109 -> 54 -> 52
self.conv3 = nn.Conv2d(32, 64, 3)
self.bn3 = nn.BatchNorm2d(64)
# 52 -> 26 -> 24
self.conv4 = nn.Conv2d(64, 64, 3)
self.bn4 = nn.BatchNorm2d(64)
# 24 -> 12
self.fc1 = nn.Linear(64*12*12, 128)
self.fc2 = nn.Linear(128, num_classes)
self.dropout = nn.Dropout(0.4)
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = self.maxpool(x)
x = F.relu(self.bn2(self.conv2(x)))
x = self.maxpool(x)
x = F.relu(self.bn3(self.conv3(x)))
x = self.maxpool(x)
x = F.relu(self.bn4(self.conv4(x)))
x = self.maxpool(x)
x = x.view(x.size(0), -1)
x = self.dropout(x)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = F.softmax(self.fc2(x), dim =1)
return x
model = Network(len(class_names)).to(device)
summary(model, (batch_size, 3, 224, 224))
训练过程中,每个迭代都会跑一次训练和一次评估,因此将训练和评估的过程先写两个函数封装一下
def train(model, train_loader, loss_fn, optimizer):
data_len = len(train_loader.dataset)
batch_len = len(train_loader)
train_loss, train_acc = 0, 0
for x, y in train_loader:
x, y = x.to(device), y.to(device)
pred = model(x)
loss = loss_fn(pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
train_acc += (pred.argmax(1) == y).type(torch.float).sum().item()
train_loss /= batch_len
train_acc /= data_len
return train_loss, train_acc
def test(model, test_loader, loss_fn):
data_len = len(test_loader.dataset)
batch_len = len(test_loader)
test_loss, test_acc = 0, 0
for x, y in test_loader:
x, y = x.to(device), y.to(device)
pred = model(x)
loss = loss_fn(pred, y)
test_loss += loss.item()
test_acc += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= batch_len
test_acc /= data_len
return test_loss, test_acc
在训练之前,首先需要定义好损失函数,学习率优化器等对象,这里我还增加了一个动态修改学习率的scheduler,防止模型在最优解附近不收敛。
epochs = 50
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda epoch: 0.92**(epoch//4)) # 定义一个梯度衰减
开始训练
train_loss, train_acc = [], [] # 记录训练的历史过程数据
test_loss, test_acc = [], [] # 记录验证的历史过程数据
best_acc = 0
for epoch in range(epochs):
model.train()
epoch_train_loss, epoch_train_acc = train(model, train_loader, loss_fn, optimizer)
model.eval()
with torch.no_grad():
epoch_test_loss, epoch_test_acc = test(model, test_loader, loss_fn)
train_loss.append(epoch_train_loss)
train_acc.append(epoch_train_acc)
test_loss.append(epoch_test_loss)
test_acc.append(epoch_test_acc)
scheduler.step() # 每个epoch结束后调用一次梯度衰减,就会触发一次Lambda
if epoch_test_acc > best_acc:
best_acc = epoch_test_acc
best_model = copy.deepcopy(model) # 将正确率最高的模型深拷贝一份,保存下来
print(f"Epoch {epoch+1}, TrainLoss: {epoch_train_loss:.3f}, TrainAcc: {epoch_train_acc*100:.1f}, ValLoss: {epoch_test_loss:.3f}, ValAcc: {epoch_test_acc*100:.1f}")
print(f"Done. the best accuracy is {best_acc}")
经过50个epoch的迭代,模型的正确率最好可以达到91.3%
torch.save(best_model.state_dict(), 'best_model.pth')
使用matplotlib的折线图,打印训练过程中,训练集和验证集上的损失和正确率
data_range = range(epochs)
plt.figure(figsize=(20,5))
plt.subplot(1,2, 1)
plt.plot(data_range, train_loss, label='train loss')
plt.plot(data_range, test_loss, label='validation loss')
plt.title('loss')
plt.legend(loc='upper right')
plt.subplot(1,2,2)
plt.plot(data_range, train_acc, label='train accuracy')
plt.plot(data_range, test_acc, label='validation accuracy')
plt.title('accuracy')
plt.legend(loc='lower right')
model.load_state_dict(torch.load('best_model.pth', map_location=device))
image_path = random.choice(image_list)
raw_image = Image.open(str(image_path))
image = transforms.ToTensor()(raw_image)
image = image.unsqueeze(0).to(device)
pred = model(image)
plt.figure(figsize=(5,5))
plt.axis('off')
plt.imshow(raw_image)
plt.title(class_names[pred.argmax(1).cpu()])
结果就不打印了,无法直视。