PyTorch-PyramidBoxDemo

PyTorch-PyramidBoxDemo

硬件:NVIDIA-GTX1080

软件:Windows10、python3.6.5、pytorch-gpu-0.4.1

一、基础知识(一图看懂)

 

PyTorch-PyramidBoxDemo_第1张图片

二、代码展示

1、demo.py

#-*- coding:utf-8 -*-

from __future__ import division
from __future__ import absolute_import
from __future__ import print_function

import torch
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

import cv2
import time
import numpy as np
from PIL import Image

from config import cfg
from pyramidbox import build_net

use_cuda = torch.cuda.is_available()

if use_cuda:
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')
    
def to_chw_bgr(image):
    """
    Transpose image from HWC to CHW and from RBG to BGR.
    Args:
        image (np.array): an image with HWC and RBG layout.
    """
    # HWC to CHW
    if len(image.shape) == 3:
        image = np.swapaxes(image, 1, 2)
        image = np.swapaxes(image, 1, 0)
    # RBG to BGR
    image = image[[2, 1, 0], :, :]
    return image

def detect(net, img_path, thresh):
    img = Image.open(img_path)
    if img.mode == 'L':
        img = img.convert('RGB')

    img = np.array(img)
    height, width, _ = img.shape
    max_im_shrink = np.sqrt(
        1200 * 1100 / (img.shape[0] * img.shape[1]))
    image = cv2.resize(img, None, None, fx=max_im_shrink,
                       fy=max_im_shrink, interpolation=cv2.INTER_LINEAR)

    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]
    #x = x * cfg.scale

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor([img.shape[1], img.shape[0],
                          img.shape[1], img.shape[0]])

    for i in range(detections.size(1)):
        j = 0
        while detections[0, i, j, 0] >= thresh:
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int)
            left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            j += 1
            cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2)
            conf = "{:.2f}".format(score)
            text_size, baseline = cv2.getTextSize(
                conf, cv2.FONT_HERSHEY_SIMPLEX, 0.3, 1)
            p1 = (left_up[0], left_up[1] - text_size[1])
            cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline),
                          (p1[0] + text_size[0], p1[1] + text_size[1]), [255, 0, 0], -1)
            cv2.putText(img, conf, (p1[0], p1[
                        1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8)

    t2 = time.time()
    print('detect:{} timer:{}'.format(img_path, t2 - t1))

    cv2.imwrite("demo.jpg", img)


if __name__ == '__main__':
    model='weights/pyramidbox_120000_99.02.pth'
    
    net = build_net('test', cfg.NUM_CLASSES)
    net.load_state_dict(torch.load(model))
    net.eval()

    if use_cuda:
        net.cuda()
        cudnn.benckmark = True

    path = "test.jpg"
    thresh=0.4
    detect(net, path, thresh)

2、pyramidbox.py

#-*- coding:utf-8 -*-

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import torch
import torch.nn as nn
import torch.nn.init as init
from torch.autograd import Function
import torch.nn.functional as F
from torch.autograd import Variable
import os
from config import cfg

from itertools import product as product

class PriorBox(object):
    """Compute priorbox coordinates in center-offset form for each source
    feature map.
    """

    def __init__(self, input_size, feature_maps,cfg):
        super(PriorBox, self).__init__()
        self.imh = input_size[0]
        self.imw = input_size[1]

        # number of priors for feature map location (either 4 or 6)
        self.variance = cfg.VARIANCE or [0.1]  #cfg.VARIANCE
        #self.feature_maps = cfg.FEATURE_MAPS
        self.min_sizes = cfg.ANCHOR_SIZES  #[16, 32, 64, 128, 256, 512]
        self.steps = cfg.STEPS #[4, 8, 16, 32, 64, 128]
        self.clip = cfg.CLIP
        for v in self.variance:
            if v <= 0:
                raise ValueError('Variances must be greater than 0')
        self.feature_maps = feature_maps


    def forward(self):
        mean = []
        for k in range(len(self.feature_maps)):
            feath = self.feature_maps[k][0]
            featw = self.feature_maps[k][1]
            for i, j in product(range(feath), range(featw)):
                f_kw = self.imw / self.steps[k]
                f_kh = self.imh / self.steps[k]

                cx = (j + 0.5) / f_kw  # cx = (j + 0.5) * self.step / self.imw ; for org image, center
                cy = (i + 0.5) / f_kh  # cy = (i + 0.5) * self.step / self.imh ; for org image, center

                s_kw = self.min_sizes[k] / self.imw  # for org image, center
                s_kh = self.min_sizes[k] / self.imh  # for org image, center

                mean += [cx, cy, s_kw, s_kh]

        output = torch.Tensor(mean).view(-1, 4)
        if self.clip:
            output.clamp_(max=1, min=0)
        return output

def decode(loc, priors, variances):
    """Decode locations from predictions using priors to undo
    the encoding we did for offset regression at train time.
    Args:
        loc (tensor): location predictions for loc layers,
            Shape: [num_priors,4]
        priors (tensor): Prior boxes in center-offset form.
            Shape: [num_priors,4].
        variances: (list[float]) Variances of priorboxes
    Return:
        decoded bounding box predictions
    """

    boxes = torch.cat((
        priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
        priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
    boxes[:, :2] -= boxes[:, 2:] / 2
    boxes[:, 2:] += boxes[:, :2]
    return boxes

def nms(boxes, scores, overlap=0.5, top_k=200):
    """Apply non-maximum suppression at test time to avoid detecting too many
    overlapping bounding boxes for a given object.
    Args:
        boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
        scores: (tensor) The class predscores for the img, Shape:[num_priors].
        overlap: (float) The overlap thresh for suppressing unnecessary boxes.
        top_k: (int) The Maximum number of box preds to consider.
    Return:
        The indices of the kept boxes with respect to num_priors.
    """

    keep = scores.new(scores.size(0)).zero_().long()
    if boxes.numel() == 0:
        return keep
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = torch.mul(x2 - x1, y2 - y1)
    v, idx = scores.sort(0)  # sort in ascending order
    # I = I[v >= 0.01]
    idx = idx[-top_k:]  # indices of the top-k largest vals
    xx1 = boxes.new()
    yy1 = boxes.new()
    xx2 = boxes.new()
    yy2 = boxes.new()
    w = boxes.new()
    h = boxes.new()

    # keep = torch.Tensor()
    count = 0
    while idx.numel() > 0:
        i = idx[-1]  # index of current largest val
        # keep.append(i)
        keep[count] = i
        count += 1
        if idx.size(0) == 1:
            break
        idx = idx[:-1]  # remove kept element from view
        # load bboxes of next highest vals
        torch.index_select(x1, 0, idx, out=xx1)
        torch.index_select(y1, 0, idx, out=yy1)
        torch.index_select(x2, 0, idx, out=xx2)
        torch.index_select(y2, 0, idx, out=yy2)
        # store element-wise max with next highest score
        xx1 = torch.clamp(xx1, min=x1[i])
        yy1 = torch.clamp(yy1, min=y1[i])
        xx2 = torch.clamp(xx2, max=x2[i])
        yy2 = torch.clamp(yy2, max=y2[i])
        w.resize_as_(xx2)
        h.resize_as_(yy2)
        w = xx2 - xx1
        h = yy2 - yy1
        # check sizes of xx1 and xx2.. after each iteration
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
        inter = w * h
        # IoU = i / (area(a) + area(b) - i)
        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
        union = (rem_areas - inter) + area[i]
        IoU = inter / union  # store result in iou
        # keep only elements with an IoU <= overlap
        idx = idx[IoU.le(overlap)]
    return keep, count

class Detect(Function):
    """At test time, Detect is the final layer of SSD.  Decode location preds,
    apply non-maximum suppression to location predictions based on conf
    scores and threshold to a top_k number of output predictions for both
    confidence score and locations.
    """

    def __init__(self, cfg):
        self.num_classes = cfg.NUM_CLASSES
        self.top_k = cfg.TOP_K
        self.nms_thresh = cfg.NMS_THRESH
        self.conf_thresh = cfg.CONF_THRESH
        self.variance = cfg.VARIANCE # [0.1, 0.2]

    def forward(self, loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4] 
        """
        
        num = loc_data.size(0)
        num_priors = prior_data.size(0)

        conf_preds = conf_data.view(
            num, num_priors, self.num_classes).transpose(2, 1)
        batch_priors = prior_data.view(-1, num_priors,
                                       4).expand(num, num_priors, 4)
        batch_priors = batch_priors.contiguous().view(-1, 4)

        ################decode box###################

        decoded_boxes = decode(loc_data.view(-1, 4),
                               batch_priors, self.variance)
        decoded_boxes = decoded_boxes.view(num, num_priors, 4)

        output = torch.zeros(num, self.num_classes, self.top_k, 5)

        ###############nms box and take top5000###########

        for i in range(num):
            boxes = decoded_boxes[i].clone()
            conf_scores = conf_preds[i].clone()
            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.conf_thresh) # 0.05
                scores = conf_scores[cl][c_mask]

                if scores.dim() == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(boxes)
                boxes_ = boxes[l_mask].view(-1, 4)
                ids, count = nms(
                    boxes_, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = torch.cat((scores[ids[:count]].unsqueeze(1),
                                                   boxes_[ids[:count]]), 1)

        return output

class L2Norm(nn.Module):
    def __init__(self,n_channels, scale):
        super(L2Norm,self).__init__()
        self.n_channels = n_channels
        self.gamma = scale or None
        self.eps = 1e-10
        self.weight = nn.Parameter(torch.Tensor(self.n_channels))
        self.reset_parameters()

    def reset_parameters(self):
        init.constant(self.weight,self.gamma)

    def forward(self, x):
        norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
        #x /= norm
        x = torch.div(x,norm)
        out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
        return out

class conv_bn(nn.Module):
    """docstring for conv"""

    def __init__(self,
                 in_plane,
                 out_plane,
                 kernel_size,
                 stride,
                 padding):
        super(conv_bn, self).__init__()
        self.conv1 = nn.Conv2d(in_plane, out_plane,
                               kernel_size=kernel_size, stride=stride, padding=padding)
        self.bn1 = nn.BatchNorm2d(out_plane)

    def forward(self, x):
        x = self.conv1(x)
        return self.bn1(x)


class CPM(nn.Module):
    """docstring for CPM"""

    def __init__(self, in_plane):
        super(CPM, self).__init__()
        self.branch1 = conv_bn(in_plane, 1024, 1, 1, 0)
        self.branch2a = conv_bn(in_plane, 256, 1, 1, 0)
        self.branch2b = conv_bn(256, 256, 3, 1, 1)
        self.branch2c = conv_bn(256, 1024, 1, 1, 0)

        self.ssh_1 = nn.Conv2d(1024, 256, kernel_size=3, stride=1, padding=1)
        self.ssh_dimred = nn.Conv2d(
            1024, 128, kernel_size=3, stride=1, padding=1)
        self.ssh_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.ssh_3a = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.ssh_3b = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
        out_residual = self.branch1(x)
        x = F.relu(self.branch2a(x), inplace=True)
        x = F.relu(self.branch2b(x), inplace=True)
        x = self.branch2c(x)

        rescomb = F.relu(x + out_residual, inplace=True)
        ssh1 = self.ssh_1(rescomb)
        ssh_dimred = F.relu(self.ssh_dimred(rescomb), inplace=True)
        ssh_2 = self.ssh_2(ssh_dimred)
        ssh_3a = F.relu(self.ssh_3a(ssh_dimred), inplace=True)
        ssh_3b = self.ssh_3b(ssh_3a)

        ssh_out = torch.cat([ssh1, ssh_2, ssh_3b], dim=1)
        ssh_out = F.relu(ssh_out, inplace=True)
        return ssh_out


class PyramidBox(nn.Module):
    """docstring for PyramidBox"""

    def __init__(self,
                 phase,
                 base,
                 extras,
                 lfpn_cpm,
                 head,
                 num_classes):
        super(PyramidBox, self).__init__()
        #self.use_transposed_conv2d = use_transposed_conv2d

        self.vgg = nn.ModuleList(base)
        self.extras = nn.ModuleList(extras)

        self.L2Norm3_3 = L2Norm(256, 10)
        self.L2Norm4_3 = L2Norm(512, 8)
        self.L2Norm5_3 = L2Norm(512, 5)
        
        self.lfpn_topdown = nn.ModuleList(lfpn_cpm[0])
        self.lfpn_later = nn.ModuleList(lfpn_cpm[1])
        self.cpm = nn.ModuleList(lfpn_cpm[2])

        self.loc_layers = nn.ModuleList(head[0])
        self.conf_layers = nn.ModuleList(head[1])

        

        self.is_infer = False
        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(cfg)
            self.is_infer = True

    def _upsample_prod(self, x, y):
        _, _, H, W = y.size()
        return F.upsample(x, size=(H, W), mode='bilinear') * y

    def forward(self, x):
        size = x.size()[2:]
        
        #################################base_vgg#############################

        # apply vgg up to conv3_3 relu
        for k in range(16):
            x = self.vgg[k](x)
        conv3_3 = x
        # apply vgg up to conv4_3
        for k in range(16, 23):
            x = self.vgg[k](x)
        conv4_3 = x

        for k in range(23, 30):
            x = self.vgg[k](x)
        conv5_3 = x

        for k in range(30, len(self.vgg)):
            x = self.vgg[k](x)
        convfc_7 = x
       

        # apply extra layers and cache source layer outputs
        for k in range(2):
            x = F.relu(self.extras[k](x), inplace=True)
        conv6_2 = x
        for k in range(2, 4):
            x = F.relu(self.extras[k](x), inplace=True)
        conv7_2 = x
        
        ####################################lfpn##############################

        x = F.relu(self.lfpn_topdown[0](convfc_7), inplace=True)
        lfpn2_on_conv5 = F.relu(self._upsample_prod(
            x, self.lfpn_later[0](conv5_3)), inplace=True)

        x = F.relu(self.lfpn_topdown[1](lfpn2_on_conv5), inplace=True)
        lfpn1_on_conv4 = F.relu(self._upsample_prod(
            x, self.lfpn_later[1](conv4_3)), inplace=True)

        x = F.relu(self.lfpn_topdown[2](lfpn1_on_conv4), inplace=True)
        lfpn0_on_conv3 = F.relu(self._upsample_prod(
            x, self.lfpn_later[2](conv3_3)), inplace=True)
        
        ####################################cpm##############################

        ssh_conv3_norm = self.cpm[0](self.L2Norm3_3(lfpn0_on_conv3))
        ssh_conv4_norm = self.cpm[1](self.L2Norm4_3(lfpn1_on_conv4))
        ssh_conv5_norm = self.cpm[2](self.L2Norm5_3(lfpn2_on_conv5))
        ssh_convfc7 = self.cpm[3](convfc_7)
        ssh_conv6 = self.cpm[4](conv6_2)
        ssh_conv7 = self.cpm[5](conv7_2)
        
        ########################loc_layers[0]  conf_layers[0]#################

        face_locs, face_confs = [], []
        head_locs, head_confs = [], []

        N = ssh_conv3_norm.size(0)

        mbox_loc = self.loc_layers[0](ssh_conv3_norm)
        face_loc, head_loc = torch.chunk(mbox_loc, 2, dim=1)

        face_loc = face_loc.permute(
            0, 2, 3, 1).contiguous().view(N, -1, 4)
        if not self.is_infer:
            head_loc = head_loc.permute(
                0, 2, 3, 1).contiguous().view(N, -1, 4)

        mbox_conf = self.conf_layers[0](ssh_conv3_norm)
        face_conf1 = mbox_conf[:, 3:4, :, :]
        face_conf3_maxin, _ = torch.max(
            mbox_conf[:, 0:3, :, :], dim=1, keepdim=True)

        face_conf = torch.cat((face_conf3_maxin, face_conf1), dim=1)

        face_conf = face_conf.permute(
            0, 2, 3, 1).contiguous().view(N, -1, 2)

        if not self.is_infer:
            head_conf3_maxin, _ = torch.max(
                mbox_conf[:, 4:7, :, :], dim=1, keepdim=True)
            head_conf1 = mbox_conf[:, 7:, :, :]
            head_conf = torch.cat((head_conf3_maxin, head_conf1), dim=1)
            head_conf = head_conf.permute(
                0, 2, 3, 1).contiguous().view(N, -1, 2)
            
        ###############loc_layers[1-5]  conf_layers[1-5]#################

        face_locs.append(face_loc)
        face_confs.append(face_conf)
        if not self.is_infer:
            head_locs.append(head_loc)
            head_confs.append(head_conf)

        inputs = [ssh_conv4_norm, ssh_conv5_norm,
                  ssh_convfc7, ssh_conv6, ssh_conv7]

        feature_maps = []
        feat_size = ssh_conv3_norm.size()[2:]
        feature_maps.append([feat_size[0], feat_size[1]])

        for i, feat in enumerate(inputs):
            feat_size = feat.size()[2:]
            feature_maps.append([feat_size[0], feat_size[1]])
            mbox_loc = self.loc_layers[i + 1](feat)
            face_loc, head_loc = torch.chunk(mbox_loc, 2, dim=1)
            face_loc = face_loc.permute(
                0, 2, 3, 1).contiguous().view(N, -1, 4)
            if not self.is_infer:
                head_loc = head_loc.permute(
                    0, 2, 3, 1).contiguous().view(N, -1, 4)

            mbox_conf = self.conf_layers[i + 1](feat)
            face_conf1 = mbox_conf[:, 0:1, :, :]
            face_conf3_maxin, _ = torch.max(
                mbox_conf[:, 1:4, :, :], dim=1, keepdim=True)
            face_conf = torch.cat((face_conf1, face_conf3_maxin), dim=1)

            face_conf = face_conf.permute(
                0, 2, 3, 1).contiguous().view(N, -1, 2)

            if not self.is_infer:
                head_conf = mbox_conf[:, 4:, :, :].permute(
                    0, 2, 3, 1).contiguous().view(N, -1, 2)

            face_locs.append(face_loc)
            face_confs.append(face_conf)

            if not self.is_infer:
                head_locs.append(head_loc)
                head_confs.append(head_conf)
        
        ###########################PriorBox Set###############################
        
        face_mbox_loc = torch.cat(face_locs, dim=1)
        face_mbox_conf = torch.cat(face_confs, dim=1)

        if not self.is_infer:
            head_mbox_loc = torch.cat(head_locs, dim=1)
            head_mbox_conf = torch.cat(head_confs, dim=1)

        priors_boxes = PriorBox(size, feature_maps, cfg)
        priors = Variable(priors_boxes.forward(), volatile=True)  #priors_boxes [34125, 4]
        
        #################face_mbox_loc face_mbox_conf########################

        if not self.is_infer:
            output = (face_mbox_loc, face_mbox_conf,
                      head_mbox_loc, head_mbox_conf, priors)
        else:
            output = self.detect(face_mbox_loc,
                                 self.softmax(face_mbox_conf),
                                 priors)

        return output

    def load_weights(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict...')
            mdata = torch.load(base_file,
                               map_location=lambda storage, loc: storage)
            weights = mdata['weight']
            epoch = mdata['epoch']
            self.load_state_dict(weights)
            print('Finished!')
        else:
            print('Sorry only .pth and .pkl files supported.')
        return epoch

    def xavier(self, param):
        init.xavier_uniform(param)

    def weights_init(self, m):
        if isinstance(m, nn.Conv2d):
            self.xavier(m.weight.data)
            if 'bias' in m.state_dict().keys():
                m.bias.data.zero_()

        if isinstance(m, nn.ConvTranspose2d):
            self.xavier(m.weight.data)
            if 'bias' in m.state_dict().keys():
                m.bias.data.zero_()

        if isinstance(m, nn.BatchNorm2d):
            m.weight.data[...] = 1
            m.bias.data.zero_()


vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M',
           512, 512, 512, 'M']

def vgg(cfg, i, batch_norm=False):
    layers = []
    in_channels = i
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        elif v == 'C':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
    conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
    layers += [conv6,
               nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
    return layers

extras_cfg = [256, 'S', 512, 128, 'S', 256] # [2][5] not use

def add_extras(cfg, i, batch_norm=False):
    # Extra layers added to VGG for feature scaling
    layers = []
    in_channels = i
    flag = False
    for k, v in enumerate(cfg):
        if in_channels != 'S':
            if v == 'S':
                layers += [nn.Conv2d(in_channels, cfg[k + 1],
                                     kernel_size=(1, 3)[flag], stride=2, padding=1)] #(1,3)[False] == 1  -> (1,3)[0] == 1 
            else:
                layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
            flag = not flag
        in_channels = v
    return layers

lfpn_cpm_cfg = [256, 512, 512, 1024, 512, 256]

def add_lfpn_cpm(cfg):
    lfpn_topdown_layers = []
    lfpn_latlayer = []
    cpm_layers = []

    for k, v in enumerate(cfg):
        cpm_layers.append(CPM(v))

    fpn_list = cfg[::-1][2:]
    for k, v in enumerate(fpn_list[:-1]):
        lfpn_latlayer.append(nn.Conv2d(
            fpn_list[k + 1], fpn_list[k + 1], kernel_size=1, stride=1, padding=0))
        lfpn_topdown_layers.append(nn.Conv2d(
            v, fpn_list[k + 1], kernel_size=1, stride=1, padding=0))

    return (lfpn_topdown_layers, lfpn_latlayer, cpm_layers)

multibox_cfg = [512, 512, 512, 512, 512, 512]

def multibox(vgg, extra_layers):
    loc_layers = []
    conf_layers = []
    vgg_source = [21, 28, -2]
    i = 0
    loc_layers += [nn.Conv2d(multibox_cfg[i],
                             8, kernel_size=3, padding=1)]
    conf_layers += [nn.Conv2d(multibox_cfg[i],
                              8, kernel_size=3, padding=1)]
    i += 1
    for k, v in enumerate(vgg_source):
        loc_layers += [nn.Conv2d(multibox_cfg[i],
                                 8, kernel_size=3, padding=1)]
        conf_layers += [nn.Conv2d(multibox_cfg[i],
                                  6, kernel_size=3, padding=1)]
        i += 1
    for k, v in enumerate(extra_layers[1::2], 2):
        loc_layers += [nn.Conv2d(multibox_cfg[i],
                                 8, kernel_size=3, padding=1)]
        conf_layers += [nn.Conv2d(multibox_cfg[i],
                                  6, kernel_size=3, padding=1)]
        i += 1
    return vgg, extra_layers, (loc_layers, conf_layers)


def build_net(phase, num_classes=2):
    base_, extras_, head_ = multibox(
        vgg(vgg_cfg, 3), add_extras((extras_cfg), 1024))
    lfpn_cpm = add_lfpn_cpm(lfpn_cpm_cfg)
    return PyramidBox(phase, base_, extras_, lfpn_cpm, head_, num_classes)

3、config.py

#-*- coding:utf-8 -*-

from __future__ import division
from __future__ import absolute_import
from __future__ import print_function

from easydict import EasyDict
import numpy as np


_C = EasyDict()
cfg = _C
# data augument config
_C.img_mean = np.array([104., 117., 123.])[:, np.newaxis, np.newaxis].astype(
    'float32')

# anchor config
_C.FEATURE_MAPS = [160, 80, 40, 20, 10, 5]
_C.INPUT_SIZE = 640
_C.STEPS = [4, 8, 16, 32, 64, 128]
_C.ANCHOR_SIZES = [16, 32, 64, 128, 256, 512]
_C.CLIP = False
_C.VARIANCE = [0.1, 0.2] #方差

# loss config
_C.NUM_CLASSES = 2
_C.OVERLAP_THRESH = 0.35
_C.NEG_POS_RATIOS = 3


# detection config
_C.NMS_THRESH = 0.3
_C.TOP_K = 5000
_C.KEEP_TOP_K = 750
_C.CONF_THRESH = 0.05

三、结果展示

PyTorch-PyramidBoxDemo_第2张图片PyTorch-PyramidBoxDemo_第3张图片

四、参考:

https://github.com/yxlijun/Pyramidbox.pytorch

 

任何问题请加唯一QQ2258205918(名称samylee)!

唯一VX:samylee_csdn

你可能感兴趣的:(PyTorch,pyramidbox,pytorch)