目录
前言
数据集加载
定义网络
训练网络
验证网络
本篇文章会讲解一个使用pytorch这个深度学习框架完成一个kaggle上的图像分类任务。
主要会介绍如何加载数据集,导入网络训练数据,保存损失,精度变化曲线和最终模型,以及测试模型在验证集上的好坏。
其数据集介绍可以看一下kaggle的网址,这里就不过多介绍。
数据集来源:https://www.kaggle.com/datasets/puneet6060/intel-image-classification?select=seg_test
加载图像分类的数据集可以使用imagefolder这个库,不过在一些其他计算机视觉任务上,数据集加载的部分是需要我们自己写的,那么我们在这里也自己写一下数据集加载的类。
我们先导入需要用到的包:
import os
from torch.utils.data import Dataset
import torch
from torchvision import transforms
from PIL import Image
然后我们定义一个加载数据集的类:
class InterDataset(Dataset):
def __init__(self, root, transform, classify):
super(InterDataset, self).__init__()
self.root = root
self.transform = transform
self.classify = classify
self.filenames = []
self.labels = []
labels = os.listdir(self.root)
for i in labels:
filenames = os.listdir(os.path.join(self.root, i))
for j in filenames:
self.filenames.append(os.path.join(self.root, i, j))
self.labels.append(i)
def __getitem__(self, index):
image_name = self.filenames[index]
label = self.labels[index]
label_index = self.classify[label]
label_index = torch.tensor(label_index)
image = Image.open(image_name)
image = self.transform(image)
return image,label_index
def __len__(self):
return len(self.filenames)
在我们自己这个数据集加载的类中,我们首先要继承Dataset这个类,然后重写其中的__getitem__和__len__这两个类方法。
其中在__init__中主要是用os库读入每张图片以及各个图片对应的类别并分别将其存入一个列表中,然后在__getitem__中会依次使用Image库打开图片并对其进行数据增强的操作,然后对相应的类别做一个字符串到数字的映射,并返回图片和类别,__len__会返回总体有多少张图片。
然后我们就可以写一个函数来实例化这个类,从而加载数据
def load_data(batch_size, crop_size, classify):
stats = ((0.0017, 0.0018, 0.0018), (0.0010, 0.0010, 0.0011))
train_transform = transforms.Compose([
transforms.Resize((crop_size, crop_size)),
transforms.RandomCrop((crop_size, crop_size)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(*stats,inplace=True)])
test_transform = transforms.Compose([
transforms.Resize((crop_size, crop_size)),
transforms.ToTensor(),
transforms.Normalize(*stats,inplace=True)])
train_iter = torch.utils.data.DataLoader(InterDataset('./dataset/train',train_transform,classify), batch_size, shuffle=True, drop_last=True)
test_iter = torch.utils.data.DataLoader(InterDataset('./dataset/test',test_transform,classify), batch_size, shuffle=False, drop_last=True)
return train_iter, test_iter
在这个函数中,我们定义了图像增强的操作,如对图像进行剪裁,翻转等操作,然后使用DataLoader来构造小批量的样本,其中均值和方差通过一下代码得出
def get_mean_std(dl):
sum_, squared_sum, batches = 0, 0, 0
for data, _ in dl:
sum_ += torch.mean(data, dim=([0, 2, 3]))
squared_sum += torch.mean(data ** 2, dim=([0, 2, 3]))
batches += 1
mean = sum_ / batches
std = (squared_sum / batches - mean ** 2) ** 0.5
return mean, std
if __name__ == '__main__':
classify = {'buildings': 0, 'forest': 1, 'glacier': 2, 'mountain': 3, 'sea': 4, 'street': 5}
train_iter, test_iter = load_data(64, 64, classify)
for i, (X, y) in enumerate(train_iter):
print(X[0].shape,y[0])
break
mean, std = get_mean_std(train_iter)
print(mean,std)
运行这段代码我们可以得到训练集的均值和方差,用以对图像进行归一化操作,其中classify是标签映射(因为神经网络并不能传入字符串,所以我们这里做一个字典映射,这里类别较少所以直接用字典了,若类别较多建议写到一个txt文件中,然后读取文件)。
注:若想要单独运行这个py代码,需要将数据集的路径改为:'../dataset/train'和'../dataset/test'。
这里网络部分小编是直接使用的resnet34网络,在这里附上网络的代码和网络解析的传送门
import torch.nn as nn
from torch.nn import functional as F
class CommonBlock(nn.Module):
def __init__(self, in_channel, out_channel, stride):
super(CommonBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
def forward(self, x):
identity = x
x = F.relu(self.bn1(self.conv1(x)), inplace=True)
x = self.bn2(self.conv2(x))
x += identity
return F.relu(x, inplace=True)
class SpecialBlock(nn.Module):
def __init__(self, in_channel, out_channel, stride):
super(SpecialBlock, self).__init__()
self.change_channel = nn.Sequential(
nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride[0], padding=0, bias=False),
nn.BatchNorm2d(out_channel)
)
self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride[0], padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=stride[1], padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
def forward(self, x):
identity = self.change_channel(x)
x = F.relu(self.bn1(self.conv1(x)), inplace=True)
x = self.bn2(self.conv2(x))
x += identity
return F.relu(x, inplace=True)
class ResNet34(nn.Module):
def __init__(self, classes_num):
super(ResNet34, self).__init__()
self.prepare = nn.Sequential(
nn.Conv2d(3, 64, 7, 2, 3),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(3, 2, 1)
)
self.layer1 = nn.Sequential(
CommonBlock(64, 64, 1),
CommonBlock(64, 64, 1),
CommonBlock(64, 64, 1)
)
self.layer2 = nn.Sequential(
SpecialBlock(64, 128, [2, 1]),
CommonBlock(128, 128, 1),
CommonBlock(128, 128, 1),
CommonBlock(128, 128, 1)
)
self.layer3 = nn.Sequential(
SpecialBlock(128, 256, [2, 1]),
CommonBlock(256, 256, 1),
CommonBlock(256, 256, 1),
CommonBlock(256, 256, 1),
CommonBlock(256, 256, 1),
CommonBlock(256, 256, 1)
)
self.layer4 = nn.Sequential(
SpecialBlock(256, 512, [2, 1]),
CommonBlock(512, 512, 1),
CommonBlock(512, 512, 1)
)
self.pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
self.fc = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(512, 256),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(256, classes_num)
)
def forward(self, x):
x = self.prepare(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.pool(x)
x = x.reshape(x.shape[0], -1)
x = self.fc(x)
return x
深度残差网络(ResNet)之ResNet34的实现和个人浅见-CSDN博客https://blog.csdn.net/rothschild666/article/details/123497166?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522170721087816800182781328%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=170721087816800182781328&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_click~default-2-123497166-null-null.142%5Ev99%5Econtrol&utm_term=resnet34&spm=1018.2226.3001.4187
我们有了网络之后我们就可以训练了,首先导入相应的包
import torch
import torch.nn as nn
from utils.dataLoader import load_data
from utils.model import ResNet34
from tqdm import tqdm
import time
import matplotlib.pyplot as plt
然后我们再定义一个求准确率的函数(网络分类对的图片占全部图片的比例)
def accuracy(predictions, labels):
pred = torch.argmax(predictions, 1)
rights = (pred == labels).sum().float()
return rights, len(labels)
然后我们需要定义一些超参数
batch_size = 128 # 批量大小
crop_size = 64 # 裁剪大小
in_channels = 3 # 输入图像通道
classes_num = 6 # 输出标签类别
num_epochs = 100 # 总轮次
auto_save = 10 # 自动保存的间隔轮次
lr = 1e-3 # 学习率
weight_decay = 1e-4 # 权重衰退
device = 'cuda' if torch.cuda.is_available() else 'cpu' # 选择设备
这里相应的超参数的意义已经写在了注释中,如果出现了out of memory的报错,可以调小batch_size或者crop_size(但是请注意,加载数据集中求均值和标准差哪里也需要crop_size这个超参数,尽量二者保持一致)。
随后我们调用数据集加载的函数加载数据,并定义模型,损失函数,优化器以及模型保存路径
classify = {'buildings': 0, 'forest': 1, 'glacier': 2, 'mountain': 3, 'sea': 4, 'street': 5}
train_iter, test_iter = load_data(batch_size, crop_size, classify)
net = ResNet34(classes_num) # 定义模型
model_path = 'model_weights/ResNet34.pth'
loss = nn.CrossEntropyLoss() # 定义损失函数
optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay) # 定义优化器
接下来我们定义训练函数
def train(net, epochs, train_iter, test_iter, device, loss, optimizer, model_path, auto_save):
train_acc_list = []
test_acc_list = []
train_loss_list = []
test_loss_list = []
net = net.to(device)
for epoch in range(epochs):
net.train()
train_rights = 0
train_loss = 0
train_len = 0
with tqdm(range(len(train_iter)), ncols=100, colour='red',
desc="train epoch {}/{}".format(epoch + 1, num_epochs)) as pbar:
for i, (X, y) in enumerate(train_iter):
optimizer.zero_grad()
X, y = X.to(device), y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
train_rights += accuracy(y_hat, y)[0]
train_len += accuracy(y_hat, y)[1]
train_loss += l.detach()
pbar.set_postfix({'loss': "{:.6f}".format(train_loss / train_len),'acc':"{:.6f}".format(train_rights / train_len)})
pbar.update(1)
train_acc_list.append(train_rights.cpu().numpy() / train_len)
train_loss_list.append(train_loss.cpu().numpy() / train_len)
net.eval()
test_rights = 0
test_loss = 0
test_len = 0
with tqdm(range(len(test_iter)), ncols=100, colour='blue',
desc="test epoch {}/{}".format(epoch + 1, num_epochs)) as pbar:
for X, y in test_iter:
X, y = X.to(device), y.to(device)
y_hat = net(X)
test_rights += accuracy(y_hat, y)[0]
test_len += accuracy(y_hat, y)[1]
with torch.no_grad():
l = loss(y_hat, y)
test_loss += l.detach()
pbar.set_postfix({'loss': "{:.6f}".format(test_loss / test_len),'acc':"{:.6f}".format(test_rights / test_len)})
pbar.update(1)
test_acc_list.append(test_rights.cpu().numpy() / test_len)
test_loss_list.append(test_loss.cpu().numpy() / test_len)
if (epoch + 1) % auto_save == 0:
torch.save(net.state_dict(), model_path)
plt.subplot(211)
plt.plot([i+1 for i in range(len(train_acc_list))], train_acc_list, 'bo--', label="train_acc")
plt.plot([i+1 for i in range(len(test_acc_list))], test_acc_list, 'ro--', label="test_acc")
plt.title("train_acc vs test_acc")
plt.ylabel("accuracy")
plt.xlabel("epochs")
plt.legend()
plt.subplot(212)
plt.plot([i+1 for i in range(len(train_loss_list))], train_loss_list, 'bo--', label="train_loss")
plt.plot([i+1 for i in range(len(test_loss_list))], test_loss_list, 'ro--', label="test_loss")
plt.title("train_loss vs test_loss")
plt.ylabel("loss")
plt.xlabel("epochs")
plt.legend()
plt.savefig('logs/acc and loss.png')
plt.show()
在这段代码中,我们会依次遍历训练集和测试集,用训练集的数据进行更新模型参数,然后每迭代一轮,就用测试集测试一下模型的好坏,最终会画出训练集和测试集的精度以及损失并保存。
最后我们就可以训练并保存模型了
print("训练开始")
time_start = time.time()
train(net, num_epochs, train_iter, test_iter, device=device, loss=loss, optimizer=optimizer, model_path=model_path, auto_save=auto_save)
torch.save(net.state_dict(), model_path)
time_end = time.time()
seconds = time_end - time_start
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
print("训练结束")
print("本次训练时长为:%02d:%02d:%02d" % (h, m, s))
这里附上小编训练了100轮的训练图片和结果图片
100轮共训练了将近40分钟,通过曲线图可以发现,训练集的损失稳步下降,准确率稳步提高,测试集上有些震荡,不过整体趋势是趋于收敛的。
kaggle上的数据集中还有一个验证集,验证集没有给标签,这里我们来编写代码看看训练好的网络模型在验证集上的表现
import cv2
import os
import torch
import random
import matplotlib.pyplot as plt
from PIL import Image
from utils.model import ResNet34
from torchvision import transforms
classify = {0: 'buildings', 1: 'forest', 2: 'glacier', 3: 'mountain', 4: 'sea', 5: 'street'}
transform = transforms.Compose([
transforms.Resize((64, 64)),
transforms.ToTensor()])
path = 'dataset/pred'
image_path_list = os.listdir(path)
image_path_list_10 = random.sample(image_path_list, 10)
image_list = []
image_tensor_list = []
for i in image_path_list_10:
img = cv2.imread(os.path.join(path, i))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
image_list.append(img)
img = Image.fromarray(img)
img = transform(img)
image_tensor_list.append(img)
image = torch.stack(image_tensor_list, 0)
net = ResNet34(6)
net.load_state_dict(torch.load('model_weights/ResNet34.pth'))
pred = torch.argmax(net(image), dim=1)
pred_list = []
for i in pred:
pred_list.append(classify[int(i)])
print(pred_list)
for i in range(10):
plt.subplot(2, 5, i + 1)
frame = plt.gca()
# y 轴不可见
frame.axes.get_yaxis().set_visible(False)
# x 轴不可见
frame.axes.get_xaxis().set_visible(False)
plt.imshow(image_list[i])
plt.show()
这里小编从验证集中随机取出10张图像,并对这10张图像进行相应的处理后送入网络进行预测,看一下预测结果
这里测试多次发现随机取出10张图片进行验证,网络预测正确的图片数量为8张或者9张,看来网络训练的还算可以。
源码请查看:https://github.com/jvyou/Intel-Image-Classification
视频解析请查看:https://www.bilibili.com/video/BV15y421Y7Dm/?spm_id_from=333.999.0.0&vd_source=ea64b940c4e46744da2aa737dca8e183