Pytorch之目标检测（单个目标检测，Single Object Detection）

数据集下载地址

AMD-Training400.zip
https://ai.baidu.com/broad/introduction

引入包

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image, ImageDraw
import numpy as np
import pandas as pd
import os
import copy

from sklearn.model_selection import ShuffleSplit
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision
import torchvision.transforms as transforms
from torchvision import utils
import torch.nn.functional as F
from torch import optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchsummary import summary

# CPU or GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# dataloader里的多进程用到num_workers
workers = 0 if os.name=='nt' else 4

数据初探

查看标签数据情况

# 数据地址
data_path = './data/sod/'
labels_csv_path = os.path.join(data_path, 'Training400', 'Fovea_location.xlsx')
# 读取数据
labels_df = pd.read_excel(labels_csv_path, index_col='ID')
labels_df.head()
# 位置信息
AorN=[imn[0] for imn in labels_df.imgName]
sns.scatterplot(x=labels_df['Fovea_X'], y=labels_df['Fovea_Y'],hue=AorN)

head()

Fovea_X-Fovea_Y.png

查看数据

# 查看图片数据
np.random.seed(2019)
plt.rcParams['figure.figsize'] = (15, 9)
plt.subplots_adjust(wspace=0, hspace=0.3)
nrows, ncols = 2, 3

# 取得图片名字
img_name = labels_df["imgName"]

# 图片id值
ids = labels_df.index

# 随机选择一些图片 nr*nc
rnd_ids = np.random.choice(ids,nrows*ncols)
print(rnd_ids)
# [ 73 371 160 294 217 191]

def load_img_label(labels_df, id_):    
    img_name = labels_df["imgName"]    
    if img_name[id_][0]=="A":
        prefix="AMD"
    else:
        prefix="Non-AMD"
            
    img_full_path = os.path.join(data_path,"Training400", prefix, img_name[id_])
    img = Image.open(img_full_path)
    
    # 中心位置值
    x = labels_df["Fovea_X"][id_]
    y = labels_df["Fovea_Y"][id_]
    
    label = (x, y)
    return img,label


def show_img_label(img,label,w_h=(50,50),thickness=2):   
    w, h = w_h                   
    cx,cy = label
    
    # 画矩形框
    draw = ImageDraw.Draw(img)
    draw.rectangle(((cx-w/2, cy-h/2), (cx+w/2, cy+h/2)),outline="green",width=thickness)

    plt.imshow(np.asarray(img))
    
    
for i,id_ in enumerate(rnd_ids):
    
    img,label = load_img_label(labels_df,id_)   
    print(img.size,label)
    
    plt.subplot(nrows, ncols, i+1) 
    show_img_label(img,label,w_h = (150,150),thickness=20)
    plt.title(img_name[id_])
"""
(2124, 2056) (1037.89889229694, 1115.71768088143)
(1444, 1444) (635.148992978281, 744.648850248249)
(1444, 1444) (639.360312038611, 814.762764100936)
(2124, 2056) (1122.08407442503, 1067.58829793991)
(2124, 2056) (1092.93333646222, 1055.15333296773)
(2124, 2056) (1112.50135915347, 1070.7251775623)
"""

Random images

查看图片尺寸情况

# 查看图片长宽分布
h_list,w_list=[],[]
for id_ in ids:
    if img_name[id_][0]=="A":
        prefix="AMD"
    else:
        prefix="Non-AMD"
        
    fullPath2img = os.path.join(data_path,"Training400",prefix, img_name[id_])
        
    # load image
    img = Image.open(fullPath2img)
    h,w = img.size
    h_list.append(h)
    w_list.append(w)
    
    
sns.distplot(a=h_list, kde=False)
# The plots of distributions reveal that the majority of heights and width are in the range of 1900 to 2100.

Hist of Height

自定义一些数据增强函数

也可调用其他数据增强包

Augmenter
imgaug
Albumentations
……

# 定义数据转换
# 数据增强 data augmentation 
import torchvision.transforms.functional as TF

# 调整尺寸大小 
def resize_img_label(image,label=(0.,0.), target_size=(256,256)):
    w_orig, h_orig = image.size   
    w_target, h_target = target_size
    cx, cy = label
    
    # resize image and label
    image_new = TF.resize(image,target_size)
    label_new= cx/w_orig*w_target, cy/h_orig*h_target
    
    return image_new,label_new

# 随机水平翻转
def random_hflip(image,label):
    w, h = image.size
    x, y = label        

    image = TF.hflip(image)
    label = w-x, y
    return image,label

# 随机垂直翻转
def random_vflip(image,label):
    w, h = image.size
    x, y = label

    image = TF.vflip(image)
    label = x, h-y
    return image, label

np.random.seed(1)
# 随机移动（偏移）
def random_shift(image, label, max_translate=(0.2,0.2)):
    w, h = image.size
    max_t_w, max_t_h = max_translate
    cx, cy = label

    # translate coeficinet, random [-1,1]
    trans_coef = np.random.rand()*2-1
    w_t = int(trans_coef*max_t_w*w)
    h_t = int(trans_coef*max_t_h*h)

    image = TF.affine(image,translate=(w_t, h_t),shear=0,angle=0,scale=1)
    label = cx+w_t, cy+h_t
        
    return image,label

# 标签比例缩放
def scale_label(a,b):
    div = [ai/bi for ai,bi in zip(a,b)]
    return div

# 重新调节回原图片比例大小 
def rescale_label(a,b):
    div = [ai*bi for ai,bi in zip(a,b)]
    return div

# 亮度调整，直接调用，不改变大小标签等
img_t = TF.adjust_contrast(img_r, contrast_factor=0.4)

# gamma值改变，同样不改变大小与标签值
img_t = TF.adjust_gamma(img_r, gamma=1.4)

数据增强几个样例（有的增强，label需要相关操作）

大小

img, label=load_img_label(labels_df,1)   
print(img.size,label)

img_r,label_r=resize_img_label(img,label)
print(img_r.size,label_r)

plt.subplot(1,2,1)
show_img_label(img,label,w_h=(150,150),thickness=20)
plt.subplot(1,2,2)
show_img_label(img_r,label_r)

resize_img_label.png

随机移动 random_shift

img, label=load_img_label(labels_df,1)   

# 大小
img_r,label_r=resize_img_label(img,label)

# 水平翻转
img_t,label_t=random_shift(img_r,label_r,max_translate=(.5,.5))

plt.subplot(1,2,1)
show_img_label(img_r,label_r)
plt.subplot(1,2,2)
show_img_label(img_t,label_t)

random_shift.png

垂直翻转 random_vflip

img, label=load_img_label(labels_df,7)   

# 大小调整
img_r,label_r=resize_img_label(img,label)

# 垂直翻转
img_fv,label_fv=random_vflip(img_r,label_r)

plt.subplot(1,2,1)
show_img_label(img_r,label_r)
plt.subplot(1,2,2)
show_img_label(img_fv,label_fv)

random_vflip.png

创建Dataset，DataLoader

# 定义transformer
def normal_transformer(image, label, params):
    image,label=resize_img_label(image,label,params["target_size"])

    if random.random() < params["p_hflip"]:
        image,label=random_hflip(image,label)
        
    if random.random() < params["p_vflip"]:            
        image,label=random_vflip(image,label)
        
    if random.random() < params["p_shift"]:                            
        image,label=random_shift(image,label, params["max_translate"])

    if random.random() < params["p_brightness"]:
        brightness_factor=1+(np.random.rand()*2-1)*params["brightness_factor"]
        image=TF.adjust_brightness(image,brightness_factor)

    if random.random() < params["p_contrast"]:
        contrast_factor=1+(np.random.rand()*2-1)*params["contrast_factor"]
        image=TF.adjust_contrast(image,contrast_factor)

    if random.random() < params["p_gamma"]:
        gamma=1+(np.random.rand()*2-1)*params["gamma"]
        image=TF.adjust_gamma(image,gamma)

    if params["scale_label"]:
        label=scale_label(label,params["target_size"])
        
    image=TF.to_tensor(image)
    return image, label


# 定义dataset
class AMDDataset(Dataset):
    def __init__(self, data_path, transform, trans_params):
        # 标签文件地址
        labels_csv_path = os.path.join(data_path, "Training400", "Fovea_location.xlsx")

        # 读取并解析标签文件
        labels_df = pd.read_excel(labels_csv_path, index_col="ID")
        self.labels = labels_df[["Fovea_X", "Fovea_Y"]].values

        # 解析图片名
        self.img_name = labels_df["imgName"]
        self.ids = labels_df.index

        self.full_img_path = [0]*len(self.ids)
        for id_ in self.ids:
            if self.img_name[id_][0]=="A":
                prefix="AMD"
            else:
                prefix="Non-AMD"
            self.full_img_path[id_-1] = os.path.join(data_path, "Training400", prefix,self.img_name[id_])

        self.transform = transform
        self.trans_params = trans_params
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        # load PIL image
        image = Image.open(self.full_img_path[idx])  
        label = self.labels[idx]

        # transform to tensor
        image, label = self.transform(image, label, self.trans_params)

        return image, label
    
# 设置训练 验证转换参数 trans_params_train  trans_params_val
trans_params_train = {
    "target_size" : (256, 256),
    "p_hflip" : 0.5,
    "p_vflip" : 0.5,
    "p_shift" : 0.5,
    "max_translate": (0.2, 0.2),
    "p_brightness": 0.5,
    "brightness_factor": 0.2,
    "p_contrast": 0.5,
    "contrast_factor": 0.2,
    "p_gamma": 0.5,
    "gamma": 0.2,
    "scale_label": True,    
}

trans_params_val = {
    "target_size" : (256, 256),
    "p_hflip" : 0.0,
    "p_vflip" : 0.0,
    "p_shift" : 0.0,
    "p_brightness": 0.0,
    "p_contrast": 0.0,
    "p_gamma": 0.0,
    "gamma": 0.0,
    "scale_label": True,    
}

train_ds = AMDDataset(data_path, transformer, trans_params_train)
val_ds = AMDDataset(data_path, transformer, trans_params_val)

# 切分数据为训练测试集
sss = ShuffleSplit(n_splits=1, test_size=0.2, random_state=0)

indices = range(len(train_ds))
for train_index, val_index in sss.split(indices):
    train_ds = Subset(train_ds, train_index)
    print(len(train_ds))

    val_ds = Subset(val_ds, val_index)
    print(len(val_ds))

查看下经过转换后的图片

# 查看一下处理后的图片
def show(img,label=None):
    npimg = img.numpy().transpose((1,2,0))
    plt.imshow(npimg)
    if label is not None:
        label=rescale_label(label,img.shape[1:])        
        x,y=label
        plt.plot(x,y,'b+',markersize=20)
        
plt.figure(figsize=(5,5))
for img,label in train_ds:
    show(img,label)
    break

after trans-img

定义dataloader

因为标签值返回的是list结构，所以在后续我们需要将其转为tensor方式

# 定义dataloader
train_dl = DataLoader(
    train_ds, 
    batch_size=8, 
    shuffle=True
)
val_dl = DataLoader(
    val_ds, 
    batch_size=16, 
    shuffle=False
)
"""
for img_b, label_b in train_dl:
    print(img_b.shape,img_b.dtype)
    print(label_b)
    break
###################
torch.Size([8, 3, 256, 256]) torch.float32
[tensor([0.4825, 0.4530, 0.6596, 0.5515, 0.5801, 0.5192, 0.4439, 0.5710],
       dtype=torch.float64), tensor([0.5454, 0.4841, 0.6527, 0.5510, 0.5205, 0.5636, 0.4656, 0.7672],
       dtype=torch.float64)]

##################

"""

创建模型

Model diagram

# 构建模型
class Net(nn.Module):
    def __init__(self, params):
        super(Net, self).__init__()
        C_in, H_in, W_in = params["input_shape"]
        init_f = params["initial_filters"] 
        num_outputs = params["num_outputs"] 

        self.conv1 = nn.Conv2d(C_in, init_f, kernel_size=3,stride=2,padding=1)
        self.conv2 = nn.Conv2d(init_f+C_in, 2*init_f, kernel_size=3,stride=1,padding=1)
        self.conv3 = nn.Conv2d(3*init_f+C_in, 4*init_f, kernel_size=3,padding=1)
        self.conv4 = nn.Conv2d(7*init_f+C_in, 8*init_f, kernel_size=3,padding=1)
        self.conv5 = nn.Conv2d(15*init_f+C_in, 16*init_f, kernel_size=3,padding=1)
        self.fc1 = nn.Linear(16*init_f, num_outputs)
    
    def forward(self, x):
        identity = F.avg_pool2d(x,4,4)
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        # When concatenating two tensors, 
        # they must have the same shape except in the concatenating dimension.
        x = torch.cat((x, identity), dim=1)

        identity = F.avg_pool2d(x,2,2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = torch.cat((x, identity), dim=1)

        identity = F.avg_pool2d(x,2,2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2, 2)
        x = torch.cat((x, identity), dim=1)

        identity = F.avg_pool2d(x,2,2)
        x = F.relu(self.conv4(x))
        x = F.max_pool2d(x, 2, 2)
        x = torch.cat((x, identity), dim=1)

        x = F.relu(self.conv5(x))

        x = F.adaptive_avg_pool2d(x,1)
        x = x.reshape(x.size(0), -1)

        x = self.fc1(x)
        return x
    

params_model={
        "input_shape": (3,256,256),
        "initial_filters": 16, 
        "num_outputs": 2,
            }

model = Net(params_model).to(device)

print(model)

"""
Net(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv2): Conv2d(19, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(51, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(115, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(243, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=256, out_features=2, bias=True)
)
"""

一些中间函数

# 获取学习率方法
def get_lr(opt):
    for param_group in opt.param_groups:
        return param_group['lr']
    
def cxcy_to_bbox(cxcy, w=50./256, h=50./256):
    # define two new tensors for w and h
    w_tensor = torch.ones(cxcy.shape[0], 1, device=cxcy.device)*w
    h_tensor = torch.ones(cxcy.shape[0], 1, device=cxcy.device)*h

    # extract cx and cy
    cx = cxcy[:,0].unsqueeze(1)
    cy = cxcy[:,1].unsqueeze(1)
    
    # concat cx,cy,w and h
    boxes = torch.cat((cx,cy, w_tensor, h_tensor), -1) # cx,cy,w,h
    
    return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin
                     boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax


def metrics_batch(output, target):
    output = cxcy_to_bbox(output)
    target = cxcy_to_bbox(target)
    
    iou = torchvision.ops.box_iou(output, target)
    return torch.diagonal(iou, 0).sum().item()


def loss_batch(loss_func, output, target, opt=None):   
    # get loss 
    loss = loss_func(output, target)
    
    # get performance metric
    metric_b = metrics_batch(output,target)
    
    if opt is not None:
        opt.zero_grad()
        loss.backward()
        opt.step()

    return loss.item(), metric_b

def loss_epoch(model,loss_func,dataset_dl,sanity_check=False,opt=None):
    running_loss = 0.0
    running_metric = 0.0
    len_data = len(dataset_dl.dataset)

    for xb, yb in dataset_dl:
        # list转为tensor
        yb = torch.stack(yb,1)
        yb = yb.type(torch.float32).to(device)
        
        # 模型计算结果
        output = model(xb.to(device))
        
        # 每批次损失值
        loss_b, metric_b = loss_batch(loss_func, output, yb, opt)
        
        # 更新损失值
        running_loss += loss_b
        
        # 更新正确值
        if metric_b is not None:
            running_metric += metric_b

    # 损失值平均
    loss = running_loss / float(len_data)
    
    # 正确值平均
    metric = running_metric / float(len_data)
    
    return loss, metric

训练验证模型主函数

# 训练验证主函数
def train_val(model, params):
    # 提取各参数
    num_epochs = params["num_epochs"]
    loss_func = params["loss_func"]
    opt = params["optimizer"]
    train_dl = params["train_dl"]
    val_dl = params["val_dl"]
    sanity_check = params["sanity_check"]
    lr_scheduler = params["lr_scheduler"]
    path2weights = params["path2weights"]
    
    # 存储过程中损失值
    loss_history = {
        "train": [],
        "val": [],
    }
    
    # 存储过程中正确值
    metric_history = {
        "train": [],
        "val": [],
    }    
    
    
    # 存储中间较好的参数
    best_model_wts = copy.deepcopy(model.state_dict())
    
    # 初始化
    best_loss = float('inf')    
    
    for epoch in range(num_epochs):
        # 取得学习率
        current_lr = get_lr(opt)
        print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr))   

        # 训练模型
        model.train()
        train_loss, train_metric = loss_epoch(model,loss_func,train_dl,sanity_check,opt)

        # collect loss and metric for training dataset
        loss_history["train"].append(train_loss)
        metric_history["train"].append(train_metric)
        
        # evaluate the model
        model.eval()
        with torch.no_grad():
            val_loss, val_metric = loss_epoch(model,loss_func,val_dl,sanity_check)
       
        # collect loss and metric for validation dataset
        loss_history["val"].append(val_loss)
        metric_history["val"].append(val_metric)   
        
        
        # store best model
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            
            # store weights into a local file
            torch.save(model.state_dict(), path2weights)
            print("Copied best model weights!")
            
        # learning rate schedule
        lr_scheduler.step(val_loss)
        if current_lr != get_lr(opt):
            print("Loading best model weights!")
            model.load_state_dict(best_model_wts) 
            

        print("train loss: %.6f, accuracy: %.2f" %(train_loss,100*train_metric))
        print("val loss: %.6f, accuracy: %.2f" %(val_loss,100*val_metric))
        print("-"*10) 
        

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, loss_history, metric_history

模型训练

loss_func = nn.SmoothL1Loss(reduction="sum")
opt = optim.Adam(model.parameters(), lr=1e-4)
lr_scheduler = ReduceLROnPlateau(opt, mode='min',factor=0.5, patience=20,verbose=1)

path2models = "./models/sod/"
if not os.path.exists(path2models):
        os.mkdir(path2models)

params_train = {
    "num_epochs": 10,
    "optimizer": opt,
    "loss_func": loss_func,
    "train_dl": train_dl,
    "val_dl": val_dl,
    "sanity_check": False,
    "lr_scheduler": lr_scheduler,
    "path2weights": path2models+"weights_smoothl1.pt",
}

# train and validate the model
model, loss_hist, metric_hist = train_val(model,params_train)
"""
Epoch 0/9, current lr=0.0001
Copied best model weights!
train loss: 0.014286, accuracy: 27.88
val loss: 0.011519, accuracy: 50.27
----------
Epoch 1/9, current lr=0.0001
Copied best model weights!
train loss: 0.010053, accuracy: 36.32
val loss: 0.009709, accuracy: 54.58
----------
Epoch 2/9, current lr=0.0001
Copied best model weights!
train loss: 0.008984, accuracy: 37.51
val loss: 0.009206, accuracy: 59.38
----------
Epoch 3/9, current lr=0.0001
train loss: 0.009700, accuracy: 36.43
val loss: 0.009328, accuracy: 59.98
----------
Epoch 4/9, current lr=0.0001
train loss: 0.008283, accuracy: 37.85
val loss: 0.010192, accuracy: 50.60
----------
Epoch 5/9, current lr=0.0001
train loss: 0.007235, accuracy: 42.44
val loss: 0.009638, accuracy: 48.86
----------
Epoch 6/9, current lr=0.0001
train loss: 0.005808, accuracy: 44.42
val loss: 0.010148, accuracy: 57.19
----------
Epoch 7/9, current lr=0.0001
train loss: 0.006283, accuracy: 44.23
val loss: 0.010079, accuracy: 40.51
----------
Epoch 8/9, current lr=0.0001
train loss: 0.005408, accuracy: 47.86
val loss: 0.011009, accuracy: 34.36
----------
Epoch 9/9, current lr=0.0001
train loss: 0.005920, accuracy: 43.82
val loss: 0.011682, accuracy: 30.28
----------
"""

结果数据可视化

# 画出损失值与正确率
def show_loss_acc(num_epochs, loss_hist, metric_hist):
    # 损失值
    plt.title("Train-Val Loss")
    plt.plot(range(1,num_epochs+1),loss_hist["train"],label="train")
    plt.plot(range(1,num_epochs+1),loss_hist["val"],label="val")
    plt.ylabel("Loss")
    plt.xlabel("Training Epochs")
    plt.legend()
    plt.show()

    # 准确率
    plt.title("Train-Val Accuracy")
    plt.plot(range(1,num_epochs+1),metric_hist["train"],label="train")
    plt.plot(range(1,num_epochs+1),metric_hist["val"],label="val")
    plt.ylabel("Accuracy")
    plt.xlabel("Training Epochs")
    plt.legend()
    plt.show()
    
show_loss_acc(params_train['num_epochs'], loss_hist, metric_hist)

释放资源

# 可以看到cuda显存的信息
print(torch.cuda.memory_summary())

# 释放GPU内
if model:
    del model
    
torch.cuda.empty_cache()