boxcars数据集放在pkl文件中,首先需要读取 .pkl文件,定义一个读取函数。
import pickle
def load_cache(path, encoding="latin-1", fix_imports=True):
"""
encoding latin-1 is default for Python2 compatibility
"""
with open(path, "rb") as f:
return pickle.load(f, encoding=encoding, fix_imports=True)
下面放dataloader类
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import Dataset
from torchvision import models,transforms
import os
import time
import pickle
from PIL import Image
def default_loader(path, bb2d):
try:
img = Image.open(path)
img.crop((bb2d[0],bb2d[1],bb2d[0]+bb2d[3],bb2d[1]+bb2d[2]))
# 这里用了2d的bounding box对样本图片进行裁剪
img.convert('RGB')
return img
except:
print("Cannot read image:{}".format(path))
class customData(Dataset):
def __init__(self,path,part,mode,dataset='',data_transforms=None,loader=default_loader):
# 主要就是将样本图片的路径和对应的标签分别放到self.img_name和self.img_label这两个list中
self.img_name = []
self.img_label = []
self.bb2d = []
data = load_cache(path + 'dataset.pkl')
classification = load_cache(path + 'classification_splits.pkl')
list = classification[part][mode]
for i in range(len(list)):
vehicle_id = list[i][0]
class_id = list[i][1]
instances = data['samples'][vehicle_id]['instances']
for j in range(len(instances)):
image = path + 'images/' + instances[j]['path']
bb = instances[j]['2DBB']
self.bb2d.append(bb)
self.img_name.append(image)
self.img_label.append(class_id)
self.data_transforms = data_transforms
self.dataset = dataset
self.loader = loader
def __len__(self):
return len(self.img_name)
def __getitem__(self, item):
img_name = self.img_name[item]
label = self.img_label[item]
bb2d = self.bb2d[item]
img = self.loader(img_name,bb2d)
# 加载一个样本图片和标签,这里也可以将前面定义的data_loader函数内容放到这里
if self.data_transforms is not None:
try:
img = self.data_transforms[self.dataset](img)
except:
print("Cannot transform images: ()".format(img_name))
return img,label
# self.img_name这个list里面不一定要放样本图片路径
# 放atlas.pkl文件里面的图片编码应该也可以
# 在data_loader函数里再把它转化成图片格式应该就可以了
# 可以用cv2.imdecode这个函数,PIL里面的具体函数不清楚
下面定义train函数
def train_model(model, criterion, optimizer, scheduler, num_epochs, use_gpu):
since = time.time()
best_model_wts = model.state_dict()
best_acc = 0.0
for epoch in range(num_epochs):
begin_time = time.time()
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train','validation']:
count_batch = 0
if phase == 'train':
scheduler.step()
model.train(True) # Set model to training mode
else:
model.train(False) # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0.0
# Iterate over data.
for data in dataloders[phase]:
count_batch += 1
# get the inputs
inputs, labels = data
# wrap them in Variable
if use_gpu:
inputs = Variable(inputs.cuda())
labels = Variable(labels.cuda())
else:
inputs, labels = Variable(inputs), Variable(labels)
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs)
# print(outputs.data)
_, preds = torch.max(outputs.data, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item()
running_corrects += torch.sum(preds == labels.data).to(torch.float32)
# print result every 10 batch
if count_batch%10 == 0:
batch_loss = running_loss / (batch_size*count_batch)
batch_acc = running_corrects / (batch_size*count_batch)
print('{} Epoch [{}] Batch [{}] Loss: {:.4f} Acc: {:.4f} Time: {:.4f}s'. \
format(phase, epoch, count_batch, batch_loss, batch_acc, time.time()-begin_time))
begin_time = time.time()
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
# save model
if phase == 'train':
if not os.path.exists('output_hard'):
os.makedirs('output_hard')
torch.save(model, 'output_hard/resnet_epoch{}.pkl'.format(epoch))
# deep copy the model
if phase == 'validation' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = model.state_dict()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
然后是主函数:
if __name__ == '__main__':
# 这里没有用gpu,所以使用gpu的部分注释掉了
data_transforms = {
'train': transforms.Compose([
# transforms.RandomResizedCrop(224), 这里好像下面两行比这个训练效果要好
transforms.Resize((256, 256), interpolation=3),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'validation': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
# use_gpu = torch.cuda.is_available()
# print(use_gpu)
batch_size = 32
num_class = 200 # 这里的数值应该只要比实际类别数大就行了,不需要具体到多少类
# image_datasets = {x: customData(img_path='/ImagePath',
# txt_path=('/TxtFile/' + x + '.txt'),
# data_transforms=data_transforms,
# dataset=x) for x in ['train', 'val']}
path = '/home/wang/datasets/BoxCars116k/'
image_datasets = {x: customData(path=path,
part='hard', # 这里用了hard部分来做实验
mode=x,
data_transforms=data_transforms,
dataset=x) for x in ['train','validation']}
# wrap your data and label into Tensor
dataloders = {x: torch.utils.data.DataLoader(image_datasets[x],
batch_size=batch_size,
shuffle=True) for x in ['train','validation']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train','validation']}
# get model and replace the original fc layer with your fc layer
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
# print(num_ftrs)
model_ft.fc = nn.Linear(num_ftrs, num_class)
# if use gpu
# if use_gpu:
# model_ft = model_ft.cuda()
# define cost function
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.005, momentum=0.9)
# Decay LR by a factor of 0.2 every 5 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.2)
# multi-GPU
# model_ft = torch.nn.DataParallel(model_ft, device_ids=[0,1])
# train model
model_ft = train_model(model=model_ft,
criterion=criterion,
optimizer=optimizer_ft,
scheduler=exp_lr_scheduler,
num_epochs=25,
use_gpu=False)
# save best model
torch.save(model_ft,"output/best_resnet.pkl")
最后是test
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torchvision.datasets as datasets
from torch.utils.data import Dataset
from torchvision import models, transforms
import os
import time
from train import customData
from PIL import Image
model = torch.load('./output/resnet_epoch6.pkl')
model.eval()
criterion = nn.CrossEntropyLoss()
batch_size = 1
test_transforms = {
'test': transforms.Compose([
transforms.Resize((256,256),interpolation=3),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])}
path = '/home/wang/datasets/BoxCars116k/'
image_datasets = {x: customData(path=path,
part='hard', # 这里相应的用hard来做测试
mode=x,
data_transforms=test_transforms,
dataset=x) for x in ['test']}
test_loders = {x: torch.utils.data.DataLoader(image_datasets[x],
batch_size=batch_size,
shuffle=True) for x in ['test']}
total = 0
correct = 0
for i,(test_images,test_labels) in enumerate(test_loders['test']):
# print(test_images,test_labels)
# test_images = Variable(test_images.cuda())
# test_labels = Variable(test_labels.cuda())
outputs = model(test_images)
#print(outputs.data)
loss = criterion(outputs,test_labels)
#print(i,loss)
loss_num = loss.long()
_,predicted = torch.max(outputs.data,1)
total += 1
if predicted==test_labels:
correct += 1
#correct += (predicted == test_labels).sum()
print("Iter %d, Test loss:%.4f , correct_num:%d"%(i+1,loss_num,correct))
print('total {}'.format(total))
accuracy = correct/total
print(accuracy)