PyTorch之迁移学习(Transfer Learning)

TRANSFER LEARNING TUTORIAL

  • Finetuning the convnet: Instead of random initializaion, we initialize the network with a pretrained network, like the one that is trained on imagenet 1000 dataset. Rest of the training looks as usual.
  • ConvNet as fixed feature extractor: Here, we will freeze the weights for all of the network except that of the final fully connected layer. This last fully connected layer is replaced with a new one with random weights and only this layer is trained.
import os
import time
import copy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

import numpy as np
import matplotlib.pyplot as plt

import torchvision
from torchvision import datasets, models, transforms

LOAD DATA

###
# ? normalize: mean and std values
# ? image show: [0,1] to [0,255]
###

# Data augmentation and normalization for training
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), # range [0, 255] -> [0.0,1.0]
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val':transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
}

data_dir = 'hymenoptera_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4)
               for x in ['train', 'val']
              }
dataset_sizes = {x : len(image_datasets[x]) for x in ['train', 'val']}

class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def imshow(inp, title=None):
    inp = inp.numpy().transpose((1, 2 ,0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0 ,1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)
    
inputs, classes = next(iter(dataloaders['train']))

out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

TRAINING THE MODEL

  • Scheduling the learning rate
  • Saving the best model
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict)
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs-1))
        print('-'*10)
        
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()
            else:
                model.eval()
            
            running_loss = 0.0
            running_corrects = 0
            
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            epoch_loss = running_loss/dataset_sizes[phase]
            epoch_acc = running_corrects.double()/dataset_sizes[phase]
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        print()
    
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    
    print('Best val Acc: {:4f}'.format(best_acc))
    
    model.load_state_dict(best_model_wts)
    
    return model
    

Visualizing the model predictions

def visualize_model(model, num_images=6):
    # ?
    was_training = model.training
    # ?
    model.eval()
    images_so_far = 0
    fig = plt.figure()
    
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            for j in range(inputs.size()[0]):
                images_so_far +=1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                imshow(inputs.cpu().data[j])
                
                if images_so_far == num_images:
                    # ?
                    model.train(mode=was_training)
                    return
        # ?
        model.train(mode=was_training)

FINETUNING THE CONVNET

model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

Train and evaluate

model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)
Epoch 0/24
----------
train Loss: 0.7851 Acc: 0.6270
val Loss: 1.0494 Acc: 0.6863

Epoch 1/24
----------
train Loss: 0.4664 Acc: 0.8033
val Loss: 0.2943 Acc: 0.8954

Epoch 2/24
----------
train Loss: 0.6914 Acc: 0.7254
val Loss: 0.2396 Acc: 0.9281

Epoch 3/24
----------
train Loss: 0.5356 Acc: 0.7951
val Loss: 0.3148 Acc: 0.9281

Epoch 4/24
----------
train Loss: 0.5372 Acc: 0.7910
val Loss: 0.5439 Acc: 0.8366

Epoch 5/24
----------
train Loss: 0.6590 Acc: 0.7787
val Loss: 0.3142 Acc: 0.8889

Epoch 6/24
----------
train Loss: 0.5722 Acc: 0.8033
val Loss: 0.5639 Acc: 0.8170

Epoch 7/24
----------
train Loss: 0.3940 Acc: 0.8402
val Loss: 0.2142 Acc: 0.9085

Epoch 8/24
----------
train Loss: 0.3693 Acc: 0.8484
val Loss: 0.2529 Acc: 0.8758

Epoch 9/24
----------
train Loss: 0.2801 Acc: 0.8852
val Loss: 0.2550 Acc: 0.8758

Epoch 10/24
----------
train Loss: 0.2674 Acc: 0.8934
val Loss: 0.2333 Acc: 0.9085

Epoch 11/24
----------
train Loss: 0.2915 Acc: 0.8689
val Loss: 0.2292 Acc: 0.9085

Epoch 12/24
----------
train Loss: 0.3659 Acc: 0.8443
val Loss: 0.2254 Acc: 0.9085

Epoch 13/24
----------
train Loss: 0.3359 Acc: 0.8525
val Loss: 0.1940 Acc: 0.9150

Epoch 14/24
----------
train Loss: 0.2926 Acc: 0.8689
val Loss: 0.2075 Acc: 0.8954

Epoch 15/24
----------
train Loss: 0.2606 Acc: 0.8934
val Loss: 0.2457 Acc: 0.9085

Epoch 16/24
----------
train Loss: 0.2715 Acc: 0.8770
val Loss: 0.2307 Acc: 0.9020

Epoch 17/24
----------
train Loss: 0.2026 Acc: 0.8975
val Loss: 0.2478 Acc: 0.9020

Epoch 18/24
----------
train Loss: 0.2696 Acc: 0.8934
val Loss: 0.2385 Acc: 0.8954

Epoch 19/24
----------
train Loss: 0.2277 Acc: 0.9016
val Loss: 0.1987 Acc: 0.9085

Epoch 20/24
----------
train Loss: 0.2901 Acc: 0.9016
val Loss: 0.2024 Acc: 0.9020

Epoch 21/24
----------
train Loss: 0.2981 Acc: 0.8811
val Loss: 0.2100 Acc: 0.8889

Epoch 22/24
----------
train Loss: 0.2462 Acc: 0.8893
val Loss: 0.2201 Acc: 0.9020

Epoch 23/24
----------
train Loss: 0.3056 Acc: 0.8730
val Loss: 0.1911 Acc: 0.9150

Epoch 24/24
----------
train Loss: 0.2561 Acc: 0.9057
val Loss: 0.2074 Acc: 0.9216

Training complete in 3m 57s
Best val Acc: 0.928105
visualize_model(model_ft)

CONVNET AS FIXED FEATURE EXTRACTOR

model_conv = torchvision.models.resnet18(pretrained=True)

for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features

model_conv.fc = nn.Linear(num_ftrs, 2)

model_conv = model_conv.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)
model_conv = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=25)
Epoch 0/24
----------
train Loss: 0.6007 Acc: 0.6967
val Loss: 0.3234 Acc: 0.8627

Epoch 1/24
----------
train Loss: 0.4813 Acc: 0.7664
val Loss: 0.2608 Acc: 0.8954

Epoch 2/24
----------
train Loss: 0.4790 Acc: 0.7992
val Loss: 0.2120 Acc: 0.9477

Epoch 3/24
----------
train Loss: 0.5683 Acc: 0.7869
val Loss: 0.3591 Acc: 0.8497

Epoch 4/24
----------
train Loss: 0.3770 Acc: 0.8402
val Loss: 0.2527 Acc: 0.9020

Epoch 5/24
----------
train Loss: 0.4000 Acc: 0.8279
val Loss: 0.1915 Acc: 0.9346

Epoch 6/24
----------
train Loss: 0.3457 Acc: 0.8402
val Loss: 0.2083 Acc: 0.9477

Epoch 7/24
----------
train Loss: 0.3756 Acc: 0.8443
val Loss: 0.2210 Acc: 0.9346

Epoch 8/24
----------
train Loss: 0.3511 Acc: 0.8607
val Loss: 0.2047 Acc: 0.9412

Epoch 9/24
----------
train Loss: 0.4228 Acc: 0.8361
val Loss: 0.2112 Acc: 0.9412

Epoch 10/24
----------
train Loss: 0.3002 Acc: 0.8648
val Loss: 0.1862 Acc: 0.9477

Epoch 11/24
----------
train Loss: 0.4303 Acc: 0.8320
val Loss: 0.2357 Acc: 0.9216

Epoch 12/24
----------
train Loss: 0.3457 Acc: 0.8320
val Loss: 0.1871 Acc: 0.9477

Epoch 13/24
----------
train Loss: 0.4396 Acc: 0.8033
val Loss: 0.2109 Acc: 0.9346

Epoch 14/24
----------
train Loss: 0.2644 Acc: 0.8852
val Loss: 0.2328 Acc: 0.9412

Epoch 15/24
----------
train Loss: 0.3323 Acc: 0.8689
val Loss: 0.2145 Acc: 0.9346

Epoch 16/24
----------
train Loss: 0.3899 Acc: 0.7951
val Loss: 0.2646 Acc: 0.9150

Epoch 17/24
----------
train Loss: 0.4303 Acc: 0.8074
val Loss: 0.2338 Acc: 0.9281

Epoch 18/24
----------
train Loss: 0.3758 Acc: 0.8607
val Loss: 0.2158 Acc: 0.9346

Epoch 19/24
----------
train Loss: 0.2428 Acc: 0.9303
val Loss: 0.2132 Acc: 0.9281

Epoch 20/24
----------
train Loss: 0.3515 Acc: 0.8566
val Loss: 0.2062 Acc: 0.9412

Epoch 21/24
----------
train Loss: 0.3181 Acc: 0.8689
val Loss: 0.2119 Acc: 0.9281

Epoch 22/24
----------
train Loss: 0.3594 Acc: 0.8156
val Loss: 0.2083 Acc: 0.9412

Epoch 23/24
----------
train Loss: 0.3170 Acc: 0.8648
val Loss: 0.1905 Acc: 0.9346

Epoch 24/24
----------
train Loss: 0.3265 Acc: 0.8607
val Loss: 0.2156 Acc: 0.9281

Training complete in 2m 52s
Best val Acc: 0.947712
visualize_model(model_conv)

你可能感兴趣的:(deep,learning)