pytorch dataloader 自定义数据读取,resnet-50在boxcars数据集上

boxcars数据集放在pkl文件中,首先需要读取 .pkl文件,定义一个读取函数。

import pickle

def load_cache(path, encoding="latin-1", fix_imports=True):
    """
    encoding latin-1 is default for Python2 compatibility
    """
    with open(path, "rb") as f:
        return pickle.load(f, encoding=encoding, fix_imports=True)

下面放dataloader类

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable

from torch.utils.data import Dataset
from torchvision import models,transforms
import os
import time
import pickle

from PIL import Image


def default_loader(path, bb2d):
    try:
        img = Image.open(path)
        img.crop((bb2d[0],bb2d[1],bb2d[0]+bb2d[3],bb2d[1]+bb2d[2]))
        # 这里用了2d的bounding box对样本图片进行裁剪
        img.convert('RGB')
        return img
    except:
        print("Cannot read image:{}".format(path))

class customData(Dataset):
    def __init__(self,path,part,mode,dataset='',data_transforms=None,loader=default_loader):
# 主要就是将样本图片的路径和对应的标签分别放到self.img_name和self.img_label这两个list中
        self.img_name = []
        self.img_label = []
        self.bb2d = []
        data = load_cache(path + 'dataset.pkl')
        classification = load_cache(path + 'classification_splits.pkl')
        list = classification[part][mode]
        for i in range(len(list)):
            vehicle_id = list[i][0]
            class_id = list[i][1]
            instances = data['samples'][vehicle_id]['instances']
            for j in range(len(instances)):
                image = path + 'images/' + instances[j]['path']
                bb = instances[j]['2DBB']
                self.bb2d.append(bb)
                self.img_name.append(image)
                self.img_label.append(class_id)

        self.data_transforms = data_transforms
        self.dataset = dataset
        self.loader = loader
       
    def __len__(self):  
        return len(self.img_name)

    def __getitem__(self, item):
        img_name = self.img_name[item]
        label = self.img_label[item]
        bb2d = self.bb2d[item]
        img = self.loader(img_name,bb2d)
        # 加载一个样本图片和标签,这里也可以将前面定义的data_loader函数内容放到这里
        if self.data_transforms is not None:
            try:
                img = self.data_transforms[self.dataset](img)
            except:
                print("Cannot transform images: ()".format(img_name))

        return img,label

# self.img_name这个list里面不一定要放样本图片路径
# 放atlas.pkl文件里面的图片编码应该也可以
# 在data_loader函数里再把它转化成图片格式应该就可以了
# 可以用cv2.imdecode这个函数,PIL里面的具体函数不清楚

 

下面定义train函数

def train_model(model, criterion, optimizer, scheduler, num_epochs, use_gpu):
    since = time.time()
    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        begin_time = time.time()
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train','validation']:
            count_batch = 0
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0

            # Iterate over data.
            for data in dataloders[phase]:
                count_batch += 1
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                # print(outputs.data)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                # statistics
                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels.data).to(torch.float32)

                # print result every 10 batch
                if count_batch%10 == 0:
                    batch_loss = running_loss / (batch_size*count_batch)
                    batch_acc = running_corrects / (batch_size*count_batch)
                    print('{} Epoch [{}] Batch [{}] Loss: {:.4f} Acc: {:.4f} Time: {:.4f}s'. \
                          format(phase, epoch, count_batch, batch_loss, batch_acc, time.time()-begin_time))
                    begin_time = time.time()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # save model
            if phase == 'train':
                if not os.path.exists('output_hard'):
                    os.makedirs('output_hard')
                torch.save(model, 'output_hard/resnet_epoch{}.pkl'.format(epoch))

            # deep copy the model
            if phase == 'validation' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

然后是主函数:

if __name__ == '__main__':
    # 这里没有用gpu,所以使用gpu的部分注释掉了
    data_transforms = {
        'train': transforms.Compose([
            # transforms.RandomResizedCrop(224), 这里好像下面两行比这个训练效果要好
            transforms.Resize((256, 256), interpolation=3),
            transforms.RandomCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
       'validation': transforms.Compose([
           transforms.Resize(256),
           transforms.CenterCrop(224),
           transforms.ToTensor(),
           transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
       ]),
    }

    # use_gpu = torch.cuda.is_available()
    # print(use_gpu)


    batch_size = 32
    num_class = 200   # 这里的数值应该只要比实际类别数大就行了,不需要具体到多少类

    # image_datasets = {x: customData(img_path='/ImagePath',
    #                                 txt_path=('/TxtFile/' + x + '.txt'),
    #                                 data_transforms=data_transforms,
    #                                 dataset=x) for x in ['train', 'val']}

    path = '/home/wang/datasets/BoxCars116k/'
    image_datasets = {x: customData(path=path,
                                    part='hard',   # 这里用了hard部分来做实验
                                    mode=x,
                                    data_transforms=data_transforms,
                                    dataset=x) for x in ['train','validation']}


    # wrap your data and label into Tensor
    dataloders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                                 batch_size=batch_size,
                                                 shuffle=True) for x in ['train','validation']}

    dataset_sizes = {x: len(image_datasets[x]) for x in ['train','validation']}

    # get model and replace the original fc layer with your fc layer
    model_ft = models.resnet50(pretrained=True)
    num_ftrs = model_ft.fc.in_features
    # print(num_ftrs)
    model_ft.fc = nn.Linear(num_ftrs, num_class)

    # if use gpu
    # if use_gpu:
    #     model_ft = model_ft.cuda()

    # define cost function
    criterion = nn.CrossEntropyLoss()

    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.005, momentum=0.9)

    # Decay LR by a factor of 0.2 every 5 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.2)

    # multi-GPU
    # model_ft = torch.nn.DataParallel(model_ft, device_ids=[0,1])

    # train model
    model_ft = train_model(model=model_ft,
                           criterion=criterion,
                           optimizer=optimizer_ft,
                           scheduler=exp_lr_scheduler,
                           num_epochs=25,
                           use_gpu=False)

    # save best model
    torch.save(model_ft,"output/best_resnet.pkl")

 

最后是test

import torch
import torch.nn as nn

import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torchvision.datasets as datasets
from torch.utils.data import Dataset

from torchvision import models, transforms

import os
import time
from train import customData
from PIL import Image

model = torch.load('./output/resnet_epoch6.pkl')
model.eval()
criterion = nn.CrossEntropyLoss()

batch_size = 1

test_transforms = {
        'test': transforms.Compose([
            transforms.Resize((256,256),interpolation=3),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])}

path = '/home/wang/datasets/BoxCars116k/'

image_datasets = {x: customData(path=path,
                                part='hard',   # 这里相应的用hard来做测试
                                mode=x,
                                data_transforms=test_transforms,
                                dataset=x) for x in ['test']}

test_loders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                             batch_size=batch_size,
                                             shuffle=True) for x in ['test']}

total = 0
correct = 0

for i,(test_images,test_labels) in enumerate(test_loders['test']):
    # print(test_images,test_labels)
    # test_images = Variable(test_images.cuda())
    # test_labels = Variable(test_labels.cuda())
    outputs = model(test_images)
    #print(outputs.data)
    loss = criterion(outputs,test_labels)
    #print(i,loss)
    loss_num = loss.long()
    _,predicted = torch.max(outputs.data,1)
    total += 1
    if predicted==test_labels:
        correct += 1
    #correct += (predicted == test_labels).sum()
    print("Iter %d, Test loss:%.4f , correct_num:%d"%(i+1,loss_num,correct))

print('total {}'.format(total))
accuracy = correct/total
print(accuracy)

 

你可能感兴趣的:(pytorch)