在Pytorch中加载图片数据集一般有两种方法。
第一种是使用 torchvision.datasets中的ImageFolder来读取图片然后用 DataLoader来并行加载,适合图片分类问题,简单但不灵活;
第二种是通过继承 torch.utils.data.Dataset 实现用户自定义读取数据集然后用 DataLoader来并行加载,较为灵活。下面分别介绍一下。
我们使用热狗数据集,从这里下载
使用torchvision.datasets中的ImageFolder要求文件夹下的图片如图所示的形式放置。hotdog文件夹下分别是用于训练和评估的train和test文件夹,这两个文件夹下面均有hotdog和not-hotdog两个类别文件夹,每个类别文件夹里面是图像文件。
首先使用torchvision.transforms
将图像调整为224×224尺寸并归一化,ImageFolder
函数的第一个参数为每个类别文件夹所在的路径,第二个参数接收torchvision.transforms
变换,返回所有图片数据集。
接着使用DataLoader
来加载图片数据集,可设置batch_size大小。
train_dir = "../data/hotdog/train"
test_dir = "../data/hotdog/test"
# 将图像调整为224×224尺寸并归一化
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
train_augs = transforms.Compose([
transforms.RandomResizedCrop(size=224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
test_augs = transforms.Compose([
transforms.Resize(size=256),
transforms.CenterCrop(size=224),
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
train_set = datasets.ImageFolder(train_dir, transform=train_augs)
test_set = datasets.ImageFolder(test_dir, transform=test_augs)
batch_size = 32
train_iter = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(test_set, batch_size=batch_size)
随机选取9张图片可视化一下:
import random
from matplotlib import pyplot as plt
def denorm(img):
for i in range(img.shape[0]):
img[i] = img[i] * std[i] + mean[i]
return img
plt.figure(figsize=(8, 8))
for i in range(9):
img, label = train_set[random.randint(0, len(train_set))]
img = denorm(img)
img = img.permute(1, 2, 0)
ax = plt.subplot(3, 3, i + 1)
ax.imshow(img.numpy())
ax.set_title("label = %d" % label)
ax.set_xticks([])
ax.set_yticks([])
plt.show()
然后定义训练过程:
训练时,在每次epoch中,分别遍历训练集和验证集,分别设置net.train()
和net.eval()
。在训练集上,执行梯度清零,以及前向+反向+优化步骤;在验证集上,使用with torch.no_grad()
避免梯度计算。
def train(net, train_iter, test_iter, criterion, optimizer, num_epochs):
net = net.to(device)
print("training on", device)
for epoch in range(num_epochs):
start = time.time()
net.train() # 训练模式
train_loss_sum, train_acc_sum, n, batch_count = 0.0, 0.0, 0, 0
for X, y in train_iter:
X, y = X.to(device), y.to(device)
optimizer.zero_grad() # 梯度清零
y_hat = net(X)
loss = criterion(y_hat, y)
loss.backward()
optimizer.step()
train_loss_sum += loss.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
with torch.no_grad():
net.eval() # 评估模式
test_acc_sum, n2 = 0.0, 0
for X, y in test_iter:
test_acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
n2 += y.shape[0]
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_loss_sum / batch_count, train_acc_sum / n, test_acc_sum / n2, time.time() - start))
在ResNet18预训练模型上进行微调,对于预训练好的参数,使用较小的学习率来微调,而对于全连接层中的随机初始化参数,需要更大的学习率从头训练。最后执行训练过程:
pretrained_net = models.resnet18(pretrained=True)
num_ftrs = pretrained_net.fc.in_features
pretrained_net.fc = nn.Linear(num_ftrs, 2)
output_params = list(map(id, pretrained_net.fc.parameters()))
feature_params = filter(lambda p: id(p) not in output_params, pretrained_net.parameters())
lr = 0.01
optimizer = optim.SGD([{'params': feature_params},
{'params': pretrained_net.fc.parameters(), 'lr': lr * 10}],
lr=lr, weight_decay=0.001)
loss = torch.nn.CrossEntropyLoss()
train(pretrained_net, train_iter, test_iter, loss, optimizer, num_epochs=5)
训练过程:
training on cuda
epoch 1, loss 1.9044, train acc 0.793, test acc 0.815, time 25.9 sec
epoch 2, loss 0.4855, train acc 0.888, test acc 0.915, time 23.2 sec
epoch 3, loss 0.2449, train acc 0.923, test acc 0.938, time 17.3 sec
epoch 4, loss 0.2119, train acc 0.928, test acc 0.889, time 17.4 sec
epoch 5, loss 0.1601, train acc 0.939, test acc 0.925, time 17.3 sec
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets, models
import time
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_dir = "../data/hotdog/train"
test_dir = "../data/hotdog/test"
# 将图像调整为224×224尺寸并归一化
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
train_augs = transforms.Compose([
transforms.RandomResizedCrop(size=224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
test_augs = transforms.Compose([
transforms.Resize(size=256),
transforms.CenterCrop(size=224),
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
train_set = datasets.ImageFolder(train_dir, transform=train_augs)
test_set = datasets.ImageFolder(test_dir, transform=test_augs)
batch_size = 32
train_iter = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(test_set, batch_size=batch_size)
def train(net, train_iter, test_iter, criterion, optimizer, num_epochs):
net = net.to(device)
print("training on", device)
for epoch in range(num_epochs):
start = time.time()
net.train() # 训练模式
train_loss_sum, train_acc_sum, n, batch_count = 0.0, 0.0, 0, 0
for X, y in train_iter:
X, y = X.to(device), y.to(device)
optimizer.zero_grad() # 梯度清零
y_hat = net(X)
loss = criterion(y_hat, y)
loss.backward()
optimizer.step()
train_loss_sum += loss.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
with torch.no_grad():
net.eval() # 评估模式
test_acc_sum, n2 = 0.0, 0
for X, y in test_iter:
test_acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
n2 += y.shape[0]
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_loss_sum / batch_count, train_acc_sum / n, test_acc_sum / n2, time.time() - start))
pretrained_net = models.resnet18(pretrained=True)
num_ftrs = pretrained_net.fc.in_features
pretrained_net.fc = nn.Linear(num_ftrs, 2)
output_params = list(map(id, pretrained_net.fc.parameters()))
feature_params = filter(lambda p: id(p) not in output_params, pretrained_net.parameters())
lr = 0.01
optimizer = optim.SGD([{'params': feature_params},
{'params': pretrained_net.fc.parameters(), 'lr': lr * 10}],
lr=lr, weight_decay=0.001)
loss = torch.nn.CrossEntropyLoss()
train(pretrained_net, train_iter, test_iter, loss, optimizer, num_epochs=5)
通过继承 torch.utils.data.Dataset 实现用户自定义读取数据集,需要实现__init__
__getitem__
和__len__
方法。
在__init__
中,需要初始化文件路径或文件名列表,以方便后面在__getitem__
中读取。在这里,返回了所有图片样本的路径self.all_image_paths
以及对应的标签self.all_image_labels
,并对mean和std值进行了reshape。
在__getitem__
中,需要根据索引读取数据,并对数据进行预处理,返回数据对,例如(图片,标签)对。在这里,将一张图片调整为224×224尺寸并进行归一化,根据torch的输入图片通道要求(C,H,W)进行了转置,返回了(img, label)对。
在__len__
中,需要返回整个数据集的数量。
train_dir = "../data/hotdog/train"
test_dir = "../data/hotdog/test"
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
class Hotdog(Dataset):
def __init__(self, path):
data_root = pathlib.Path(path)
all_image_paths = list(data_root.glob('*/*'))
self.all_image_paths = [str(path) for path in all_image_paths]
label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir())
label_to_index = dict((label, index) for index, label in enumerate(label_names))
self.all_image_labels = [label_to_index[path.parent.name] for path in all_image_paths]
self.mean = np.array(mean).reshape((1, 1, 3))
self.std = np.array(std).reshape((1, 1, 3))
def __getitem__(self, index):
img = cv.imread(self.all_image_paths[index])
img = cv.resize(img, (224, 224))
img = img / 255.
img = (img - self.mean) / self.std
img = np.transpose(img, [2, 0, 1])
label = self.all_image_labels[index]
img = torch.tensor(img, dtype=torch.float32)
label = torch.tensor(label)
return img, label
def __len__(self):
return len(self.all_image_paths)
接着初始化该自定义数据集,使用DataLoader
来加载图片数据集,可设置batch_size大小。
train_set = Hotdog(train_dir)
test_set = Hotdog(test_dir)
print(len(train_set))
print(len(test_set))
batch_size = 32
train_iter = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(test_set, batch_size=batch_size)
2000
800
随机选取9张图片可视化一下:
import random
from matplotlib import pyplot as plt
def denorm(img):
for i in range(img.shape[0]):
img[i] = img[i] * std[i] + mean[i]
img = torch.clamp(img, 0., 1.)
return img
plt.figure(figsize=(8, 8))
for i in range(9):
img, label = train_set[random.randint(0, len(train_set))]
img = denorm(img)
img = img.permute(1, 2, 0)
ax = plt.subplot(3, 3, i + 1)
ax.imshow(img.numpy()[:, :, ::-1])
ax.set_title("label = %d" % label)
ax.set_xticks([])
ax.set_yticks([])
plt.show()
然后在ResNet18预训练模型上进行微调,最后执行训练过程,和上面一样,就不复述了。
训练过程:
training on cuda
epoch 1, loss 1.5702, train acc 0.834, test acc 0.911, time 23.2 sec
epoch 2, loss 0.1694, train acc 0.958, test acc 0.939, time 19.1 sec
epoch 3, loss 0.0469, train acc 0.984, test acc 0.956, time 19.4 sec
epoch 4, loss 0.0163, train acc 0.998, test acc 0.956, time 19.3 sec
epoch 5, loss 0.0155, train acc 0.996, test acc 0.949, time 19.8 sec
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models
import pathlib
import numpy as np
import cv2 as cv
import time
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_dir = "../data/hotdog/train"
test_dir = "../data/hotdog/test"
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
class Hotdog(Dataset):
def __init__(self, path):
data_root = pathlib.Path(path)
all_image_paths = list(data_root.glob('*/*'))
self.all_image_paths = [str(path) for path in all_image_paths]
label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir())
label_to_index = dict((label, index) for index, label in enumerate(label_names))
self.all_image_labels = [label_to_index[path.parent.name] for path in all_image_paths]
self.mean = np.array(mean).reshape((1, 1, 3))
self.std = np.array(std).reshape((1, 1, 3))
def __getitem__(self, index):
img = cv.imread(self.all_image_paths[index])
img = cv.resize(img, (224, 224))
img = img / 255.
img = (img - self.mean) / self.std
img = np.transpose(img, [2, 0, 1])
label = self.all_image_labels[index]
img = torch.tensor(img, dtype=torch.float32)
label = torch.tensor(label)
return img, label
def __len__(self):
return len(self.all_image_paths)
train_set = Hotdog(train_dir)
test_set = Hotdog(test_dir)
batch_size = 32
train_iter = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(test_set, batch_size=batch_size)
def train(net, train_iter, test_iter, criterion, optimizer, num_epochs):
net = net.to(device)
print("training on", device)
for epoch in range(num_epochs):
start = time.time()
net.train() # 训练模式
train_loss_sum, train_acc_sum, n, batch_count = 0.0, 0.0, 0, 0
for X, y in train_iter:
X, y = X.to(device), y.to(device)
optimizer.zero_grad() # 梯度清零
y_hat = net(X)
loss = criterion(y_hat, y)
loss.backward()
optimizer.step()
train_loss_sum += loss.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
with torch.no_grad():
net.eval() # 评估模式
test_acc_sum, n2 = 0.0, 0
for X, y in test_iter:
test_acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
n2 += y.shape[0]
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_loss_sum / batch_count, train_acc_sum / n, test_acc_sum / n2, time.time() - start))
pretrained_net = models.resnet18(pretrained=True)
num_ftrs = pretrained_net.fc.in_features
pretrained_net.fc = nn.Linear(num_ftrs, 2)
output_params = list(map(id, pretrained_net.fc.parameters()))
feature_params = filter(lambda p: id(p) not in output_params, pretrained_net.parameters())
lr = 0.01
optimizer = optim.SGD([{'params': feature_params},
{'params': pretrained_net.fc.parameters(), 'lr': lr * 10}],
lr=lr, weight_decay=0.001)
loss = torch.nn.CrossEntropyLoss()
train(pretrained_net, train_iter, test_iter, loss, optimizer, num_epochs=5)