作者用resnet50跑了一下standford cars数据集。该数据集共有196类,16185张图片,其中训练集有8144张,测试集有8041张图片。
训练集是这样的:
train/00001.jpg
train/00002.jpg
……
对应的标签放在另一个mat文件中。
读取mat文件的代码如下,将其写进txt文件,一行只有一个正整数,表示对应的一个标签
import scipy.io
data = scipy.io.loadmat('cars_train_annos.mat')
annotations = data['annotations']
f_train = open('./train.txt','w')
for i in range(annotations.shape[1]):
num = int(annotations[0,i][4])
num = str(num)
print(i,num)
f_train.write(num+'\n')
f_train.close()
有了图像和对应的标签之后,就可以开始写dataloader类了,代码如下:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import Dataset
from torchvision import models,transforms
import os
import time
from PIL import Image
# use PIL Image to read iamge
def default_loader(path):
try:
img = Image.open(path)
return img.convert('RGB')
except:
print("Cannot read image:{}".format(path))
class customData(Dataset):
def __init__(self,img_path,txt_path,dataset='',data_transforms=None,loader=default_loader):
with open(txt_path) as input_file:
lines = input_file.readlines()
self.img_label = [int(line.strip()) for line in lines]
self.img_name = []
for root,dirs,files in os.walk(img_path):
for name in sorted(files):
self.img_name.append(os.path.join(img_path,name))
self.data_transforms = data_transforms
self.dataset = dataset
self.loader = loader
# 最主要的是将图片路径存入self.img_name,将对应的标签写入self.img_label里面
def __len__(self):
return len(self.img_name)
def __getitem__(self, item):
img_name = self.img_name[item]
label = self.img_label[item]
img = self.loader(img_name)
if self.data_transforms is not None:
try:
img = self.data_transforms[self.dataset](img)
except:
print("Cannot transform images: ()".format(img_name))
return img,label
#每次读出一个图片和标签
下面定义train的主函数,因为没有设置验证集,下面代码中关于val的都需要注释掉,
def train_model(model, criterion, optimizer, scheduler, num_epochs, use_gpu):
since = time.time()
best_model_wts = model.state_dict()
best_acc = 0.0
for epoch in range(num_epochs):
begin_time = time.time()
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
# for phase in ['train','val']:
for phase in ['train']:
count_batch = 0
if phase == 'train':
scheduler.step()
model.train(True) # Set model to training mode
else:
model.train(False) # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0.0
# Iterate over data.
for data in dataloders[phase]:
count_batch += 1
# get the inputs
inputs, labels = data
# wrap them in Variable
if use_gpu:
inputs = Variable(inputs.cuda())
labels = Variable(labels.cuda())
else:
inputs, labels = Variable(inputs), Variable(labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs)
# print(outputs.data)
_, preds = torch.max(outputs.data, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item()
running_corrects += torch.sum(preds == labels.data).to(torch.float32)
# print result every 10 batch
if count_batch%10 == 0:
batch_loss = running_loss / (batch_size*count_batch)
batch_acc = running_corrects / (batch_size*count_batch)
print('{} Epoch [{}] Batch [{}] Loss: {:.4f} Acc: {:.4f} Time: {:.4f}s'. \
format(phase, epoch, count_batch, batch_loss, batch_acc, time.time()-begin_time))
begin_time = time.time()
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
# save model
if phase == 'train':
if not os.path.exists('output'):
os.makedirs('output')
torch.save(model, 'output/resnet_epoch{}.pkl'.format(epoch))
# deep copy the model
# if phase == 'val' and epoch_acc > best_acc:
if epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = model.state_dict()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
# print('Best val Acc: {:4f}'.format(best_acc))
print('Best acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
最后是主函数,同样,val的部分也注释掉了
if __name__ == '__main__':
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
# 'val': transforms.Compose([
# transforms.Resize(256),
# transforms.CenterCrop(224),
# transforms.ToTensor(),
# transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
# ]),
}
# use_gpu = torch.cuda.is_available() # 用gpu跑的话,可以把注释去掉
# print(use_gpu)
batch_size = 32
num_class = 197 # 用196会报错
# image_datasets = {x: customData(img_path='/ImagePath',
# txt_path=('/TxtFile/' + x + '.txt'),
# data_transforms=data_transforms,
# dataset=x) for x in ['train', 'val']}
image_datasets = {x: customData(img_path='./'+x,
txt_path='./'+ x + '.txt',
data_transforms=data_transforms,
dataset=x) for x in ['train']} # ['train', 'val']
# wrap your data and label into Tensor
dataloders = {x: torch.utils.data.DataLoader(image_datasets[x],
batch_size=batch_size,
shuffle=True) for x in ['train']} # ['train', 'val']
dataset_sizes = {x: len(image_datasets[x]) for x in ['train']} # ['train', 'val']
# get model and replace the original fc layer with your fc layer
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
# print(num_ftrs)
model_ft.fc = nn.Linear(num_ftrs, num_class) # 将resnet最后全连接层的输出换成数据集的类别总数
# if use gpu
# if use_gpu:
# model_ft = model_ft.cuda()
# define cost function
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.005, momentum=0.9)
# Decay LR by a factor of 0.2 every 5 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.2)
# multi-GPU
# model_ft = torch.nn.DataParallel(model_ft, device_ids=[0,1])
# train model
model_ft = train_model(model=model_ft,
criterion=criterion,
optimizer=optimizer_ft,
scheduler=exp_lr_scheduler,
num_epochs=25,
use_gpu=False)
# save best model
torch.save(model_ft,"output/best_resnet.pkl")
如果需要用gpu来跑代码,其中主函数关于gpu的部分可以把注释给消掉。
另外,作者发现这个代码训练的准确率不够高。
实验发现,将
transform.RandomResizedCrop(224)
改成
transform.Resized((256,256),interpolation=3)
transform.RandomCrop(224)
准确率将大幅提升,能达到0.98左右