


import torch
from itertools import product as product
import numpy as np
from math import ceil

class PriorBox(object):
    def __init__(self, cfg, image_size=None, phase='train'):
        super(PriorBox, self).__init__()
        self.min_sizes = cfg['min_sizes']
        self.steps = cfg['steps']
        self.clip = cfg['clip']
        self.image_size = image_size
        self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps] = "s"

    # 遍历多尺度的 特征图
    def forward(self):
        anchors = []
        for k, f in enumerate(self.feature_maps):
            min_sizes = self.min_sizes[k]
            # 遍历每个像素
            for i, j in product(range(f[0]), range(f[1])):
                for min_size in min_sizes:
                    #feature map 大小
                    s_kx = min_size / self.image_size[1]
                    s_ky = min_size / self.image_size[0]
                     # 每个框的中心坐标
                    dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
                    dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]

                    for cy, cx in product(dense_cy, dense_cx):
                        anchors += [cx, cy, s_kx, s_ky]

        # back to torch land
        # 转化为 torch的Tensor
        output = torch.Tensor(anchors).view(-1, 4)
        # 归一化,把输出设置在 [0,1]
        if self.clip:
            output.clamp_(max=1, min=0)
        return output

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from utils.box_utils import match, log_sum_exp
from data import cfg_mnet
GPU = cfg_mnet['gpu_train']

class MultiBoxLoss(nn.Module):
    """SSD Weighted Loss Function
    Compute Targets:
        1) Produce Confidence Target Indices by matching  ground truth boxes
           with (default) 'priorboxes' that have jaccard index > threshold parameter
           (default threshold: 0.5).
        2) Produce localization target by 'encoding' variance into offsets of ground
           truth boxes and their matched  'priorboxes'.
        3) Hard negative mining to filter the excessive number of negative examples
           that comes with using a large number of default bounding boxes.
           (default negative:positive ratio 3:1)
    Objective Loss:
        L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
        weighted by α which is set to 1 by cross val.
            c: class confidences,
            l: predicted boxes,
            g: ground truth boxes
            N: number of matched default boxes
        See: for more details.

    def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target):
        super(MultiBoxLoss, self).__init__()
        self.num_classes = num_classes
        self.threshold = overlap_thresh
        self.background_label = bkg_label
        self.encode_target = encode_target
        self.use_prior_for_matching = prior_for_matching
        self.do_neg_mining = neg_mining
        self.negpos_ratio = neg_pos
        self.neg_overlap = neg_overlap
        self.variance = [0.1, 0.2]

    def forward(self, predictions, priors, targets):
        """Multibox Loss
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            ground_truth (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).

        loc_data, conf_data, landm_data = predictions#loc是偏置值,conf是分类,land是五特征点
        priors = priors#priors是PriorBox方法画出来的box
        num = loc_data.size(0) #num =  batch_size
        num_priors = (priors.size(0))#num_proirs,一个batchsize所有框

        # match priors (default boxes) and ground truth boxes

        # 获取匹配每个prior box的 ground truth
        # 创建 loc_t 和 conf_t 保存真实box的位置和类别
        loc_t = torch.Tensor(num, num_priors, 4)
        landm_t = torch.Tensor(num, num_priors, 10)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):#一张张图片拿出来
            truths = targets[idx][:, :4].data #ground truth box信息
            labels = targets[idx][:, -1].data # ground truth conf信息
            landms = targets[idx][:, 4:14].data# # ground truth landms信息
            defaults = priors的 box 信息
            match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)# 匹配 ground truth
        if GPU:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
            landm_t = landm_t.cuda()

        zeros = torch.tensor(0).cuda()#创建了一个全为0的张量
        # landm Loss (Smooth L1)
        # Shape: [batch,num_priors,10]
        # 匹配中所有的正样本mask,shape[b,M]
        pos1 = conf_t > zeros ##pos1为 conf_t大于0的部分
        num_pos_landm = pos1.long().sum(1, keepdim=True)#不因jard被舍去的五特征点的个数
        # 正样本个数
        N1 = max(, 1)
        pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)#在最后加一维度
        landm_p = landm_data[pos_idx1].view(-1, 10)#预测的正样本land
        landm_t = landm_t[pos_idx1].view(-1, 10)#真实的正样本land
        loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')#Smooth L1 损失

        pos = conf_t != zeros
        conf_t[pos] = 1

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)##在最后加一维度
        loc_p = loc_data[pos_idx].view(-1, 4)#预测的正样本box信息
        loc_t = loc_t[pos_idx].view(-1, 4) #真实的正样本box信息
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')#Smooth L1 损失

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))# 使用logsoftmax,计算置信度,shape[b*M, 1]

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0 # 把正样本排除,剩下的就全是负样本,可以进行抽样
        loss_c = loss_c.view(num, -1) # filter out pos boxes for now #一个batchsize每张图的 loss_c
        # 两次sort排序,能够得到每个元素在降序排列中的位置idx_rank
        _, loss_idx = loss_c.sort(1, descending=True)# 整个batchsize的loss_c排序
        _, idx_rank = loss_idx.sort(1)#各个框loss_c(分类损失)的排名,从大到小 [batch,num_priors]
        # 抽取负样本
        # 每个batch中正样本的数目,shape[b,1]
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        # 抽取前top_k个负样本,shape[b, M]
        neg = idx_rank < num_neg.expand_as(idx_rank)#得到负样本

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data) #[batchsize,num_priors,num_class]为1的是大于阈值的框
        neg_idx = neg.unsqueeze(2).expand_as(conf_data) #[batchsize,num_priors,num_class]为1的是负样本
        # 提取出所有筛选好的正负样本(预测的和真实的)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)#pos=0是小于阈值的框,neg=0是损失很小的框
        # pos_idx+neg_idx大于0的数据保留,其余舍去
        targets_weighted = conf_t[(pos+neg).gt(0)]
        # 计算conf交叉熵
        loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        # 正样本个数
        N = max(, 1)

        loss_l /= N
        loss_c /= N

        loss_landm /= N1

        return loss_l, loss_c, loss_landm
