头号大眼睛

Mask_rcnn中文详解

一.在学习Mask之前，建议先看看faster_rcnn,(faster_rcnn代码解读)

Mask_rcnn关键技术：

1.多尺度检测(最早在yolo3中使用)，里面用到了FPN技术

2.rpn

2.ROI Align

二：系统学习mask_rcnn过程，B站视频讲解

三：代码中文注释

model.py

"""
Mask R-CNN
The main Mask R-CNN model implemenetation.
Copyright (c) 2017 Matterport, Inc.
Licensed under the MIT License (see LICENSE for details)
Written by Waleed Abdulla
"""

import datetime
import math
import os
import random
import re

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

import utils
import visualize
#from nms.nms_wrapper import nms
from roialign.roi_align.crop_and_resize import CropAndResizeFunction

############################################################
# nms
############################################################
# boxes=np.array([[100,100,210,210,0.72],
        # [250,250,420,420,0.8],
        # [220,220,320,330,0.92],
        # [100,100,210,210,0.72],
        # [230,240,325,330,0.81],
        # [220,230,315,340,0.9]]) 
def nms(dets, thresh):
    # dets:(m,5)  thresh:scaler
    x1 = dets[:,0]
    y1 = dets[:,1]
    x2 = dets[:,2]
    y2 = dets[:,3]
    areas = (y2-y1+1) * (x2-x1+1)
    scores = dets[:,4]
    keep = []
    index = scores.argsort()[::-1]
    while index.size >0:
        i = index[0]       # every time the first is the biggst, and add it directly
        keep.append(i)
        x11 = np.maximum(x1[i], x1[index[1:]])    # calculate the points of overlap 
        y11 = np.maximum(y1[i], y1[index[1:]])
        x22 = np.minimum(x2[i], x2[index[1:]])
        y22 = np.minimum(y2[i], y2[index[1:]])
        w = np.maximum(0, x22-x11+1)    # the weights of overlap
        h = np.maximum(0, y22-y11+1)    # the height of overlap
        overlaps = w*h
        ious = overlaps / (areas[i]+areas[index[1:]] - overlaps)
        idx = np.where(ious<=thresh)[0]
        index = index[idx+1]   # because index start from 1
    return keep
# import matplotlib.pyplot as plt
# def plot_bbox(dets, c='k'):
    
    # x1 = dets[:,0]
    # y1 = dets[:,1]
    # x2 = dets[:,2]
    # y2 = dets[:,3]
    
    # plt.plot([x1,x2], [y1,y1], c)
    # plt.plot([x1,x1], [y1,y2], c)
    # plt.plot([x1,x2], [y2,y2], c)
    # plt.plot([x2,x2], [y1,y2], c)
    # plt.title("after nms")  

############################################################
#  Logging Utility Functions
############################################################

def log(text, array=None):
    """Prints a text message. And, optionally, if a Numpy array is provided it
    prints it's shape, min, and max values.
    if array is not None  shape: (m,n)
    """
    if array is not None:
        text = text.ljust(25) #方法返回一个原字符串左对齐,并使用空格填充至指定长度的新字符串。如果指定的长度小于原字符串的长度则返回原字符串
        text += ("shape: {:20}  min: {:10.5f}  max: {:10.5f}".format(
            str(array.shape), #将一个元组变成一个字符串 (m,n)-> '(m,n)'
            array.min() if array.size else "",  #array.size返回矩阵的元素数量m*n, array.min()返回数组最小值
            array.max() if array.size else ""))
    print(text)

def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\n')
    # Print New Line on Complete
    if iteration == total:
        print()


############################################################
#  Pytorch Utility Functions
############################################################

def unique1d(tensor):
    if tensor.size()[0] == 0 or tensor.size()[0] == 1:
        return tensor
    tensor = tensor.sort()[0]  #对tensor每行进行排序
    unique_bool = tensor[1:] != tensor [:-1]  #检测除了第一行和最后一行的数据之外的数据
    first_element = Variable(torch.ByteTensor([True]), requires_grad=False).bool()
    if tensor.is_cuda:
        first_element = first_element.cuda()
    unique_bool = torch.cat((first_element, unique_bool),dim=0)
    return tensor[unique_bool.data]

#求两个一维度的行向量的交集
def intersect1d(tensor1, tensor2):  
    assert len(tensor1.shape)==1 and len(tensor2.shape)==1 and len(tensor1)>1 and len(tensor2)>1,"输入的维度为1且数据长度大于1"
    aux = torch.cat((tensor1, tensor2),dim=0)
    aux = aux.sort()[0]
    return aux[:-1][(aux[1:] == aux[:-1]).data]

def log2(x):
    """Implementatin of Log2. Pytorch doesn't have a native implemenation."""
    ln2 = Variable(torch.log(torch.FloatTensor([2.0])), requires_grad=False)
    if x.is_cuda:
        ln2 = ln2.cuda()
    return torch.log(x) / ln2

class SamePad2d(nn.Module):
    """Mimics tensorflow's 'SAME' padding.
    """

    def __init__(self, kernel_size, stride):
        super(SamePad2d, self).__init__()
        self.kernel_size = torch.nn.modules.utils._pair(kernel_size)  #函数将输入变成成对的元组
        self.stride = torch.nn.modules.utils._pair(stride) 

    def forward(self, input):
        #input (batch,c,h,w)
        in_width = input.size()[3]
        in_height = input.size()[2]
        out_width = math.ceil(float(in_width) / float(self.stride[0]))  #向上取整
        out_height = math.ceil(float(in_height) / float(self.stride[1]))
        pad_along_width = ((out_width - 1) * self.stride[0] +
                           self.kernel_size[0] - in_width)
        pad_along_height = ((out_height - 1) * self.stride[1] +
                            self.kernel_size[1] - in_height)       #这里用到out_w=(in_w +pad-k+s)/s 反向求出pad
        pad_left = math.floor(pad_along_width / 2)  #向下取整
        pad_top = math.floor(pad_along_height / 2)
        pad_right = pad_along_width - pad_left
        pad_bottom = pad_along_height - pad_top
        return F.pad(input, (pad_left, pad_right, pad_top, pad_bottom), 'constant', 0)

    def __repr__(self):
        return self.__class__.__name__


############################################################
#  FPN Graph
############################################################


class TopDownLayer(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TopDownLayer, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1)  #将通道压缩
        self.padding2 = SamePad2d(kernel_size=3, stride=1)   #初始化类SamePad2d,在特征图周围用0填充
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,stride=1)

    def forward(self, x, y):
        y = F.upsample(y, scale_factor=2) #将y的宽高扩充到2倍
        x = self.conv1(x)                 #改变x的通道数
        return self.conv2(self.padding2(x+y))  #先将x与y融合后的特征,再用0填充,保证卷积前后宽度不变
          
class FPN(nn.Module):
    def __init__(self, C1, C2, C3, C4, C5, out_channels):
        super(FPN, self).__init__()
        self.out_channels = out_channels  #每层输出的通道数相同
        self.C1 = C1    #卷积操作
        self.C2 = C2
        self.C3 = C3
        self.C4 = C4
        self.C5 = C5
        self.P6 = nn.MaxPool2d(kernel_size=1, stride=2)
        
        self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1)
        self.P5_conv2 = nn.Sequential(
            SamePad2d(kernel_size=3, stride=1), #先将特征图padding,用于下面的卷积操作
            nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1), #消除混叠效应
        )
        self.P4_conv1 =  nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1)
        self.P4_conv2 = nn.Sequential(
            SamePad2d(kernel_size=3, stride=1),
            nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1),
        )
        self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1)
        self.P3_conv2 = nn.Sequential(
            SamePad2d(kernel_size=3, stride=1),
            nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1),
        )
        self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1)
        self.P2_conv2 = nn.Sequential(
            SamePad2d(kernel_size=3, stride=1),
            nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1),
        )

    def forward(self, x):
        x = self.C1(x)
        x = self.C2(x)
        c2_out = x
        x = self.C3(x)
        c3_out = x
        x = self.C4(x)
        c4_out = x
        x = self.C5(x)
        p5_out = self.P5_conv1(x)
        p4_out = self.P4_conv1(c4_out) + F.upsample(p5_out, scale_factor=2)
        p3_out = self.P3_conv1(c3_out) + F.upsample(p4_out, scale_factor=2)
        p2_out = self.P2_conv1(c2_out) + F.upsample(p3_out, scale_factor=2)

        p5_out = self.P5_conv2(p5_out)
        p4_out = self.P4_conv2(p4_out)
        p3_out = self.P3_conv2(p3_out)
        p2_out = self.P2_conv2(p2_out)

        # P6 is used for the 5th anchor scale in RPN. Generated by
        # subsampling from P5 with stride of 2.
        p6_out = self.P6(p5_out)

        return [p2_out, p3_out, p4_out, p5_out, p6_out]


############################################################
#  Resnet Graph
############################################################
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride)  
        self.bn1 = nn.BatchNorm2d(planes, eps=0.001, momentum=0.01) 
        self.padding2 = SamePad2d(kernel_size=3, stride=1)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3)
        self.bn2 = nn.BatchNorm2d(planes, eps=0.001, momentum=0.01)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1)   
        self.bn3 = nn.BatchNorm2d(planes * 4, eps=0.001, momentum=0.01) 
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x
        #如果x是 (n,inplanes,h,w)
        out = self.conv1(x)  #通道压缩  (n,planes,h,w)
        out = self.bn1(out)  #归一化    (n,planes,h,w)
        out = self.relu(out) #非线性化  (n,planes,h,w)

        out = self.padding2(out)  #添加padding ((n,planes,h+pad,w+pad))
        out = self.conv2(out)     #卷积调整宽高 (n,planes,h,w)
        out = self.bn2(out)       #(n,planes,h,w)
        out = self.relu(out)      #(n,planes,h,w)

        out = self.conv3(out)     #(n,4*planes,h,w)
        out = self.bn3(out)       #(n,4*planes,h,w)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out
        
class ResNet(nn.Module):

    def __init__(self, architecture, stage5=False):
        super(ResNet, self).__init__()
        assert architecture in ["resnet50", "resnet101"]
        self.inplanes = 64
        self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3]
        self.block = Bottleneck
        self.stage5 = stage5

        self.C1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(64, eps=0.001, momentum=0.01),
            nn.ReLU(inplace=True),
            SamePad2d(kernel_size=3, stride=2),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.C2 = self.make_layer(self.block, 64, self.layers[0])
        self.C3 = self.make_layer(self.block, 128, self.layers[1], stride=2)
        self.C4 = self.make_layer(self.block, 256, self.layers[2], stride=2)
        if self.stage5:
            self.C5 = self.make_layer(self.block, 512, self.layers[3], stride=2)
        else:
            self.C5 = None

    def forward(self, x):
        x = self.C1(x)
        x = self.C2(x)
        x = self.C3(x)
        x = self.C4(x)
        x = self.C5(x)
        return x


    def stages(self):
        return [self.C1, self.C2, self.C3, self.C4, self.C5]

    def make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes * block.expansion, eps=0.001, momentum=0.01),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)


############################################################
#  Proposal Layer
############################################################

def apply_box_deltas(boxes, deltas):
    """Applies the given deltas to the given boxes.
    boxes: [N, 4] where each row is y1, x1, y2, x2
    deltas: [N, 4] where each row is [dy, dx, log(dh), log(dw)]
    """
    # Convert to y, x, h, w
    height = boxes[:, 2] - boxes[:, 0]
    width = boxes[:, 3] - boxes[:, 1]
    center_y = boxes[:, 0] + 0.5 * height
    center_x = boxes[:, 1] + 0.5 * width
    # Apply deltas
    center_y += deltas[:, 0] * height
    center_x += deltas[:, 1] * width
    height *= torch.exp(deltas[:, 2])
    width *= torch.exp(deltas[:, 3])
    # Convert back to y1, x1, y2, x2
    y1 = center_y - 0.5 * height
    x1 = center_x - 0.5 * width
    y2 = y1 + height
    x2 = x1 + width
    result = torch.stack([y1, x1, y2, x2], dim=1)
    return result

def clip_boxes(boxes, window):
    """
    boxes: [N, 4] each col is y1, x1, y2, x2
    window: [4] in the form y1, x1, y2, x2
    """
    boxes = torch.stack( \
        [boxes[:, 0].clamp(float(window[0]), float(window[2])),
         boxes[:, 1].clamp(float(window[1]), float(window[3])),
         boxes[:, 2].clamp(float(window[0]), float(window[2])),
         boxes[:, 3].clamp(float(window[1]), float(window[3]))], 1)
    return boxes

def proposal_layer(inputs, proposal_count, nms_threshold, anchors, config=None):
    """Receives anchor scores and selects a subset to pass as proposals
    to the second stage. Filtering is done based on anchor scores and
    non-max suppression to remove overlaps. It also applies bounding
    box refinment detals to anchors.
    anchors: (anchors_num,4) 它是在图片尺寸维度的
    Inputs:
        rpn_probs: [batch, anchors_num, (bg prob, fg prob)]
        rpn_bbox: [batch, anchors_num, (dy, dx, log(dh), log(dw))]
    Returns:
        Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)]
    """

    # Currently only supports batchsize 1
    # inputs是一个list [rpn_probs,rpn_bbox]
    # inputs[0] 是rpn_probs (batch,anchors_num,2)
    # inputs[1] 是rpn_bbox  (batch,anchors_num,4)
    
    inputs[0] = inputs[0].squeeze(0)  #去掉batch_size那个维度,因为每个batch只支持一张图片
    inputs[1] = inputs[1].squeeze(0)

    # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1]
    scores = inputs[0][:, 1]

    # Box deltas [batch, num_rois, 4]
    deltas = inputs[1] #获得deltals,这是每张图片在第一阶段的预测输出
    #RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
    std_dev = Variable(torch.from_numpy(np.reshape(config.RPN_BBOX_STD_DEV, [1, 4])).float(), requires_grad=False)
    if config.GPU_COUNT:   #GPU_COUNT=1,则表示使用GPU,为0,则表示使用CPU
        std_dev = std_dev.cuda()
    deltas = deltas * std_dev

    # Improve performance by trimming to top anchors by score
    # and doing the rest on the smaller subset.
    pre_nms_limit = min(6000, anchors.size()[0])   #在进行nms之前取出多少个anchors
    scores, order = scores.sort(descending=True)   #将scores进行倒序排序，返回排序后的结果，以及他们在原序列的的index
    order = order[:pre_nms_limit]                  #切片前pre_nms_limit个scores
    scores = scores[:pre_nms_limit]                #切片  (pre_nms_limit,)
    deltas = deltas[order.data, :] # TODO: Support batch size > 1 ff.  #取出scores排名前pre_nms_limit的deltas
    anchors = anchors[order.data, :]               #同上   

    # Apply deltas to anchors to get refined anchors.
    # [batch, N, (y1, x1, y2, x2)]
    boxes = apply_box_deltas(anchors, deltas)      #根据输出偏差对anchors进行修正
                                                   #将取出来的deltas进行变换成方框的左上角和右下角坐标(pre_nms_limit,4)
                                                   #(pre_nms_limit,(y1, x1, y2, x2))

    # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)]
    height, width = config.IMAGE_SHAPE[:2]         #图片的边界
    window = np.array([0, 0, height, width]).astype(np.float32)  
    boxes = clip_boxes(boxes, window)       #将boxs限制在图片边界内

    # Filter out small boxes
    # According to Xinlei Chen's paper, this reduces detection accuracy
    # for small objects, so we're skipping it.

    # Non-max suppression
    #torch.cat((boxes, scores.unsqueeze(1)), 1)将scores增加一个维度变成(pre_nms_limit,1),
    #与boxes (pre_nms_limit,4)在第dims=1的维度上拼接变成维度为(pre_nms_limit,5)
    keep = nms(torch.cat((boxes, scores.unsqueeze(1)), 1).data, nms_threshold)#keep是一个list，保存的是经过nms后剩下来的box的index
    if len(keep)>proposal_count:
        keep = keep[:proposal_count]
    boxes = boxes[keep, :]

    # Normalize dimensions to range of 0 to 1.
    norm = Variable(torch.from_numpy(np.array([height, width, height, width])).float(), requires_grad=False)
    if config.GPU_COUNT:
        norm = norm.cuda()
    normalized_boxes = boxes / norm  #将boxs归一化

    # Add back batch dimension
    normalized_boxes = normalized_boxes.unsqueeze(0)  #增加一个维度 (proposal_count,4)--> (1,proposal_count,4)

    return normalized_boxes


############################################################
#  ROIAlign Layer
############################################################

def pyramid_roi_align(inputs, pool_size, image_shape):
    """Implements ROI Pooling on multiple levels of the feature pyramid.
    Params:
    - pool_size: [height, width] of the output pooled regions. Usually [7, 7]
    - image_shape: [channels,height, width]. Shape of input image in pixels  
    Inputs:
    - boxes: [batch, num_boxes, (y1, x1, y2, x2)] in normalized
             coordinates.
    - Feature maps: List of feature maps from different levels of the pyramid.
                    Each is [batch, channels, height, width]
    Output:
    Pooled regions in the shape: [num_boxes, channels, height, width,].
    The width and height are those specific in the pool_shape in the layer
    constructor.
    """

    # Currently only supports batchsize 1
    for i in range(len(inputs)):
        inputs[i] = inputs[i].squeeze(0)  #去掉batch_size那个维度

    # Crop boxes [batch, num_boxes, (y1, x1, y2, x2)] in normalized coords
    boxes = inputs[0]  #(num_boxes,4)

    # Feature Maps. List of feature maps from different level of the
    # feature pyramid. Each is [batch,channels, height, width]
    feature_maps = inputs[1:]  #[p2,p3,p4,p5,p6]

    # Assign each ROI to a level in the pyramid based on the ROI area.
    y1, x1, y2, x2 = boxes.chunk(4, dim=1)  #将boxes沿着类方向分成四个块
    h = y2 - y1        
    w = x2 - x1

    # Equation 1 in the Feature Pyramid Networks paper. Account for
    # the fact that our coordinates are normalized here.
    # a 224x224 ROI (in pixels) maps to P4
    image_area = Variable(torch.FloatTensor([float(image_shape[1]*image_shape[2])]), requires_grad=False)
    if boxes.is_cuda:  
        image_area = image_area.cuda()
    roi_level = 4 + log2(torch.sqrt(h*w)/(224.0/torch.sqrt(image_area)))  #选在哪个特征输出层上进行ROI_pooling 
    roi_level = roi_level.round().int()   #round()向下取整
    roi_level = roi_level.clamp(2,5)   #FPN产生了[P2,P3,P4,P5,P6]五个特征层，但是只有[P2,P3,P4,P5]进行了roi_pooling


    # Loop through levels and apply ROI pooling to each. P2 to P5.
    pooled = []
    box_to_level = []
    for i, level in enumerate(range(2, 6)):  #i[0,1,2,3] level[2,3,4,5]
        ix  = roi_level==level   #bool
        if not ix.any(): #any() 函数用于判断给定的可迭代参数 iterable 是否全部为 False，则返回 False，如果有一个为 True，则返回 True
            continue
        assert len(ix.shape)>1,"ix的维度<=1"
        ix = torch.nonzero(ix)[:,0] #torch.nonzero() 输出的非零元素在矩阵中的下标矩阵 维度(m,n1,n2,...)
                                    #m是非零元素的个数，n1,n2,...则是ix除了第一维度的其他维度(对于一维的行向量,则为1)
                                    #假设 ix=tensor([0,1,0,1])  >>> 输出tensor([[1],[3]])  维度(2,1)
                                    #假设 ix=tensor([[0,1,0,1],[0,1,0,1]]) >>>输出tensor([[0,1],[0,3],[1,0],[1,3]]) 维度(4,2)
        #因为ix的维度是[n,1],torch.nonzero(ix)生成的维度是(m,2),而通过[:,0],生成的则是,ix中不为0的index
        level_boxes = boxes[ix.data, :]   #分别取出roi_level为2,3,4,5的roi
        # Keep track of which box is mapped to which level
        box_to_level.append(ix.data) #将roi的roi_level为2,3,4,5的index依次append到box_to_level中

        # Stop gradient propogation to ROI proposals
        level_boxes = level_boxes.detach()  #将抽离导数计算图，也就是说在进行反向传播是不进行求导

        # Crop and Resize
        # From Mask R-CNN paper: "We sample four regular locations, so
        # that we can evaluate either max or average pooling. In fact,
        # interpolating only a single value at each bin center (without
        # pooling) is nearly as effective."
        #
        # Here we use the simplified approach of a single value per bin,
        # which is how it's done in tf.crop_and_resize()
        # Result: [batch * num_boxes, pool_height, pool_width, channels]
        per_level_boxes_num=level_boxes.size()[0]  #每一个level_layer有多少个box
        ind = Variable(torch.zeros(per_level_boxes_num),requires_grad=False).int()#每一个level层级的roi的个数,生成的一个行向量
        if level_boxes.is_cuda:
            ind = ind.cuda()
        feature_maps[i] = feature_maps[i].unsqueeze(0) #依次取出各个特征层的特征图,并且增加一个维度[1,1,c,h,w]           
                                                       #CropAndResizeFunction needs batch dimension
                                                       
        pooled_features = CropAndResizeFunction(pool_size, pool_size, 0)(feature_maps[i], level_boxes, ind) 
                                                       #输出的维度为(per_level_boxes_num,c,pool_size,pool_size)
        pooled.append(pooled_features)

    # Pack pooled features into one tensor
    pooled = torch.cat(pooled, dim=0)   #横向拼接  # (num_rois,c,pool_size,pool_size)

    # Pack box_to_level mapping into one array and add another
    # column representing the order of pooled boxes
    box_to_level = torch.cat(box_to_level, dim=0)    

    # Rearrange pooled features to match the order of the original boxes
    _, box_to_level = torch.sort(box_to_level)  #将因为pooling操作时是按照layer_level来处理的，
                                                #所以打乱了原来roi安装scores的高度排列的顺序
                                                #现在又按照rois原来的顺序来排列
                                                 
    pooled = pooled[box_to_level, :, :, :]  #第一维是boxes的数量 (boxes_num,c,7,7)

    return pooled


############################################################
#  Detection Target Layer
############################################################
def bbox_overlaps(boxes1, boxes2):
    """Computes IoU overlaps between two sets of boxes.
    boxes1, boxes2: [N, (y1, x1, y2, x2)].
    """
    # 1. Tile boxes2 and repeate boxes1. This allows us to compare
    # every boxes1 against every boxes2 without loops.
    # TF doesn't have an equivalent to np.repeate() so simulate it
    # using tf.tile() and tf.reshape.
    boxes1_repeat = boxes2.size()[0]   #box1有多少个框
    boxes2_repeat = boxes1.size()[0]   #box2有多少个框
    boxes1 = boxes1.repeat(1,boxes1_repeat).view(-1,4)  #一行一行的复制,先将第一行复制boxes1_repeat份,
                                                        #然后将第二行复制boxes1_repeat份，依次类推
                                                      
    boxes2 = boxes2.repeat(boxes2_repeat,1)  #整体赋值boxes2_repeat份

    # 2. Compute intersections
    b1_y1, b1_x1, b1_y2, b1_x2 = boxes1.chunk(4, dim=1) #这样分块能保持原数据的维度数量
    b2_y1, b2_x1, b2_y2, b2_x2 = boxes2.chunk(4, dim=1)  
    y1 = torch.max(b1_y1, b2_y1)[:, 0]  #返回比较大小的结果,经过[:,0]得到一个一维行向量
    x1 = torch.max(b1_x1, b2_x1)[:, 0]
    y2 = torch.min(b1_y2, b2_y2)[:, 0]
    x2 = torch.min(b1_x2, b2_x2)[:, 0]
    zeros = Variable(torch.zeros(y1.size()[0]), requires_grad=False)
    if y1.is_cuda:
        zeros = zeros.cuda()
    intersection = torch.max(x2 - x1, zeros) * torch.max(y2 - y1, zeros)

    # 3. Compute unions
    b1_area = (b1_y2 - b1_y1) * (b1_x2 - b1_x1)
    b2_area = (b2_y2 - b2_y1) * (b2_x2 - b2_x1)
    union = b1_area[:,0] + b2_area[:,0] - intersection

    # 4. Compute IoU and reshape to [boxes1, boxes2]
    iou = intersection / union
    overlaps = iou.view(boxes2_repeat, boxes1_repeat)

    return overlaps
    
  
def ious(boxes1,boxes2):
    '''
    inputs:
        boxes1: [n,4]
        boxes2: [m,4]
    outputs:
        [n,m]
    '''
    
    b1_y1,b1_x1,b1_y2,b1_x2=boxes1[:,0],boxes1[:,1],boxes1[:,2],boxes1[:,3]
    b2_y1,b2_x1,b2_y2,b2_x2=boxes2[:,0],boxes2[:,1],boxes2[:,2],boxes2[:,3]
   
    y1=torch.max(b1_y1.unsqueeze(1),b2_y1)
    x1=torch.max(b1_x1.unsqueeze(1),b2_x1)
    y2=torch.min(b1_y2.unsqueeze(1),b2_y2)
    x2=torch.min(b1_x2.unsqueeze(1),b2_x2)
   
    intersection = torch.max(x2 - x1, torch.zeros(x1.size()))* torch.max(y2 - y1, torch.zeros(y1.size()))
   
    b1_area = ((b1_y2 - b1_y1) * (b1_x2 - b1_x1))
    b2_area = ((b2_y2 - b2_y1) * (b2_x2 - b2_x1))
    areas=b1_area.unsqueeze(1)+b2_area
    
    ious=intersection/(areas-intersection)
    return ious
                         
    
    
    

def detection_target_layer(proposals, gt_class_ids, gt_boxes, gt_masks, config):
    """Subsamples proposals and generates target box refinment, class_ids,
    and masks for each.
    Inputs:
    proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might
               be zero padded if there are not enough proposals.   rpn_rois
    gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs.
    gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized
              coordinates.
    gt_masks: [batch,MAX_GT_INSTANCES, height, width,] of boolean type
    Returns: Target ROIs and corresponding class IDs, bounding box shifts,
    and masks.
    rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized
          coordinates
    target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs.
    target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, NUM_CLASSES,
                    (dy, dx, log(dh), log(dw), class_id)]
                   Class-specific bbox refinments.
    target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width)
                 Masks cropped to bbox boundaries and resized to neural
                 network output size.
    """

    # Currently only supports batchsize 1
    proposals = proposals.squeeze(0) #去掉batch
    gt_class_ids = gt_class_ids.squeeze(0)
    gt_boxes = gt_boxes.squeeze(0)
    gt_masks = gt_masks.squeeze(0)

    # Handle COCO crowds
    # A crowd box in COCO is a bounding box around several instances. Exclude
    # them from training. A crowd box is given a negative class ID.
    # gt_class_ids<0代表该区域是背景，
    # gt_class_ids>0代表该区域是前景。
    if torch.nonzero(gt_class_ids < 0).size():   #当gt_class_ids中存在小于0的数
        crowd_ix = torch.nonzero(gt_class_ids < 0)[:, 0]  #获得gt_class_ids<0的index
        non_crowd_ix = torch.nonzero(gt_class_ids > 0)[:, 0] #获得gt_class_ids>0的index，
        
        crowd_boxes = gt_boxes[crowd_ix.data, :]             #获取gt_class_ids<0的box,背景框
        crowd_masks = gt_masks[:,:,crowd_ix.data]            #获取gt_class_ids<0的mask
        gt_class_ids = gt_class_ids[non_crowd_ix.data]
        gt_boxes = gt_boxes[non_crowd_ix.data, :]            #获取正样本的真实框
        gt_masks = gt_masks[:,:,non_crowd_ix.data]           #

        # Compute overlaps with crowd boxes [anchors, crowds]
        crowd_overlaps = bbox_overlaps(proposals, crowd_boxes) #计算感兴趣框与属性为背景的真实框的ious
        crowd_iou_max = torch.max(crowd_overlaps, dim=1)[0]    #计算每行的最大值
        no_crowd_bool = crowd_iou_max < 0.001                  #获得取每行最大值小于<0.001的index(bool类型,选中为true,未选中为false)
    else:
        #如果gt_class_ids全部大于0，则全为true
        no_crowd_bool =  Variable(torch.ByteTensor(proposals.size()[0]*[True]), requires_grad=False)
        if config.GPU_COUNT:
            no_crowd_bool = no_crowd_bool.cuda()

    # Compute overlaps matrix [proposals, gt_boxes]
    overlaps = bbox_overlaps(proposals, gt_boxes)   #计算感兴趣框与全部真实框的ious

    # Determine postive and negative ROIs
    roi_iou_max = torch.max(overlaps, dim=1)[0]

    # 1. Positive ROIs are those with >= 0.5 IoU with a GT box
    positive_roi_bool = roi_iou_max >= 0.5        #获取取proposals与gt_boxes的ious值的最大值大于0.5的作为正样本

    # Subsample ROIs. Aim for 33% positive
    # Positive ROIs
    if torch.nonzero(positive_roi_bool).size():
        positive_indices = torch.nonzero(positive_roi_bool)[:, 0]  #获取正样本的索引

        positive_count = int(config.TRAIN_ROIS_PER_IMAGE *   
                             config.ROI_POSITIVE_RATIO)         #自定义正样本的数量 int(200*0.33)
        rand_idx = torch.randperm(positive_indices.size()[0])   #给定参数n，返回一个从0到n-1的随机整数排列
                                                                #rand_idx是长度为len(positive_indices)的打乱的自然数序列
                                                                #也就是他是positive_indices这个一维向量的下标
        rand_idx = rand_idx[:positive_count]                    #取出一部下标
        if config.GPU_COUNT:
            rand_idx = rand_idx.cuda()
        positive_indices = positive_indices[rand_idx] #获取从positive_indices选出的下标的对应的值，而这些值又是proposals的下标
        positive_count = positive_indices.size()[0]   #获取最终选取的正样本数
        positive_rois = proposals[positive_indices.data,:]  #获取最终选取的正样本

        # Assign positive ROIs to GT boxes.
        positive_overlaps = overlaps[positive_indices.data,:]  #刷选出正样本与gt_boxes的ious
        roi_gt_box_assignment = torch.max(positive_overlaps, dim=1)[1] #获取每行的最大值处于哪一列，也就是看哪个真实框与该
                                                                       #正样本最匹配
        roi_gt_boxes = gt_boxes[roi_gt_box_assignment.data,:] #将每个gt_box与每个正样本计算的的ious，然后看哪个gt_box与
                                                              #该正样本的ious最大，就将该样本的label(包括真实box,box框中的物体的类别)  
                                                              #赋值该正样本，作为该正样本的label                                                               
        roi_gt_class_ids = gt_class_ids[roi_gt_box_assignment.data]

        # Compute bbox refinement for positive ROIs
        # 对RPN网络输出的正样本框，进行修正，计算正样本框与分配给它的真实框之间的偏差
        deltas = Variable(utils.box_refinement(positive_rois.data, roi_gt_boxes.data), requires_grad=False)
        # BBOX_STD_DEV=np.array([0.1, 0.1, 0.2, 0.2])
        std_dev = Variable(torch.from_numpy(config.BBOX_STD_DEV).float(), requires_grad=False)
        if config.GPU_COUNT:
            std_dev = std_dev.cuda()
        deltas /= std_dev         #这里把deltas除以tensor([0.1, 0.1, 0.2, 0.2]),投入网络，所以在proposal_layer
                                  #输出的deltas应该乘以tensor([0.1, 0.1, 0.2, 0.2]),才网络真正的输出

        # Assign positive ROIs to GT masks
        roi_masks = gt_masks[roi_gt_box_assignment.data,:,:]  #获取正样本的mask

        # Compute mask targets
        boxes = positive_rois      
        if config.USE_MINI_MASK:
            # Transform ROI corrdinates from normalized image space
            # to normalized mini-mask space.
            y1, x1, y2, x2 = positive_rois.chunk(4, dim=1)   #获取正样本的y_min,x_min,y_max,x_max
            gt_y1, gt_x1, gt_y2, gt_x2 = roi_gt_boxes.chunk(4, dim=1) #计算给这些正样本分配的真实标签中的box的y_min,x_min,y_max,x_max
            gt_h = gt_y2 - gt_y1
            gt_w = gt_x2 - gt_x1
            dy1 = (y1 - gt_y1) / gt_h
            dx1 = (x1 - gt_x1) / gt_w
            dy2 = (y2 - gt_y1) / gt_h
            dx2 = (x2 - gt_x1) / gt_w
            boxes = torch.cat([dy1, dx1, dy2, dx2], dim=1)  #正样本与真实框的偏差
        box_ids = Variable(torch.arange(roi_masks.size()[0]), requires_grad=False).int()
        if config.GPU_COUNT:
            box_ids = box_ids.cuda()
        masks = Variable(CropAndResizeFunction(config.MASK_SHAPE[0], config.MASK_SHAPE[1], 0)(roi_masks.unsqueeze(1), boxes, box_ids).data, requires_grad=False)
                #将mask进行对其操作，其实就是把它从(56,56)变成(28,28)
        masks = masks.squeeze(1)  

        # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with
        # binary cross entropy loss.
        masks = torch.round(masks)
    else:
        positive_count = 0

    # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds.
    negative_roi_bool = roi_iou_max < 0.5     ##获取取proposals与gt_boxes的ious值的最大值小于0.5的作为负样本
    negative_roi_bool = negative_roi_bool & no_crowd_bool    #负样本满足两个条件，一是iou<0.5,二是与背景框的iou<0.001
    # Negative ROIs. Add enough to maintain positive:negative ratio.
    if torch.nonzero(negative_roi_bool).size() and positive_count>0:
        negative_indices = torch.nonzero(negative_roi_bool)[:, 0]    #获取负样本的index
        r = 1.0 / config.ROI_POSITIVE_RATIO
        negative_count = int(r * positive_count - positive_count)
        rand_idx = torch.randperm(negative_indices.size()[0])
        rand_idx = rand_idx[:negative_count]
        if config.GPU_COUNT:
            rand_idx = rand_idx.cuda()
        negative_indices = negative_indices[rand_idx]
        negative_count = negative_indices.size()[0]
        negative_rois = proposals[negative_indices.data, :]
    else:
        negative_count = 0

    # Append negative ROIs and pad bbox deltas and masks that
    # are not used for negative ROIs with zeros.
    if positive_count > 0 and negative_count > 0:
        rois = torch.cat((positive_rois, negative_rois), dim=0)  #将正负样本纵向拼接
        zeros = Variable(torch.zeros(negative_count), requires_grad=False).int()   #将选择出来的负样本的类别label设置为0
        if config.GPU_COUNT:
            zeros = zeros.cuda()
        roi_gt_class_ids = torch.cat([roi_gt_class_ids, zeros], dim=0)        #将正负样本类别label纵向拼接
        zeros = Variable(torch.zeros(negative_count,4), requires_grad=False)  #将负样本的(dy1,dx1,dy2,dx2)设置为0
        if config.GPU_COUNT:
            zeros = zeros.cuda()
        deltas = torch.cat([deltas, zeros], dim=0)     #将正负样本的delats拼接
        zeros = Variable(torch.zeros(negative_count,config.MASK_SHAPE[0],config.MASK_SHAPE[1]), requires_grad=False)
                                                       #将负样本的掩模矩阵设置为0
        if config.GPU_COUNT:
            zeros = zeros.cuda()
        masks = torch.cat([masks, zeros], dim=0)  #将正负样本的掩模矩阵拼接
    elif positive_count > 0:  #只有正样本没有负样本
        rois = positive_rois
    elif negative_count > 0:  #只有负样本没有正样本
        rois = negative_rois
        zeros = Variable(torch.zeros(negative_count), requires_grad=False)
        if config.GPU_COUNT:
            zeros = zeros.cuda()
        roi_gt_class_ids = zeros
        zeros = Variable(torch.zeros(negative_count,4), requires_grad=False).int()
        if config.GPU_COUNT:
            zeros = zeros.cuda()
        deltas = zeros
        zeros = Variable(torch.zeros(negative_count,config.MASK_SHAPE[0],config.MASK_SHAPE[1]), requires_grad=False)
        if config.GPU_COUNT:
            zeros = zeros.cuda()
        masks = zeros
    else:   #既没有正样本又没有负样本
        rois = Variable(torch.FloatTensor(), requires_grad=False)
        roi_gt_class_ids = Variable(torch.IntTensor(), requires_grad=False)
        deltas = Variable(torch.FloatTensor(), requires_grad=False)
        masks = Variable(torch.FloatTensor(), requires_grad=False)
        if config.GPU_COUNT:
            rois = rois.cuda()
            roi_gt_class_ids = roi_gt_class_ids.cuda()
            deltas = deltas.cuda()
            masks = masks.cuda()

    return rois, roi_gt_class_ids, deltas, masks

############################################################
#  Detection Layer
############################################################

def clip_to_window(window, boxes):
    """
        window: (y1, x1, y2, x2). The window in the image we want to clip to.
        boxes: [N, (y1, x1, y2, x2)]
    """
    boxes[:, 0] = boxes[:, 0].clamp(float(window[0]), float(window[2]))
    boxes[:, 1] = boxes[:, 1].clamp(float(window[1]), float(window[3]))
    boxes[:, 2] = boxes[:, 2].clamp(float(window[0]), float(window[2]))
    boxes[:, 3] = boxes[:, 3].clamp(float(window[1]), float(window[3]))

    return boxes

def refine_detections(rois, probs, deltas, window, config):
    """Refine classified proposals and filter overlaps and return final
    detections.
    Inputs:
        rois: [N, (y1, x1, y2, x2)] in normalized coordinates
        probs: [N, num_classes]. Class probabilities.
        deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific
                bounding box deltas.
        window: (y1, x1, y2, x2) in image coordinates. The part of the image
            that contains the image excluding the padding.
    Returns detections shaped: [N, (y1, x1, y2, x2, class_id, score)]
    """

    # Class IDs per ROI
    class_scores, class_ids = torch.max(probs, dim=1) #计算每行的最大值，并且返回该最大值已经最大值在该行的索引
                                                      #每行代表一个rois的预测对各个类别的预测分数
                                                      #每行的最大值在该行的索引和最大值代表代表该rois最可能预测的类别和它的预测分数

    # Class probability of the top class of each ROI
    # Class-specific bounding box deltas
    idx=torch.arange(class_ids.size()[0]).long()
    if config.GPU_COUNT:
        idx = idx.cuda()
    #class_scores = probs[idx, class_ids.data]        #
    deltas_specific = deltas[idx, class_ids.data,:]  #idx可以保证每一个样本中只选择一个box的deltas
                                                     #这条代码是选择预测类别分数最高的类别的那个box的deltas

    # Apply bounding box deltas
    # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates
    std_dev = Variable(torch.from_numpy(np.reshape(config.RPN_BBOX_STD_DEV, [1, 4])).float(), requires_grad=False)
    if config.GPU_COUNT:
        std_dev = std_dev.cuda()
    refined_rois = apply_box_deltas(rois, deltas_specific * std_dev) #用这个样本最有可预测的类别的box的deltas去修正rois

    # Convert coordiates to image domain
    height, width = config.IMAGE_SHAPE[:2] #图像shape
    scale = Variable(torch.from_numpy(np.array([height, width, height, width])).float(), requires_grad=False)
    if config.GPU_COUNT:
        scale = scale.cuda()
    refined_rois *= scale     #将修正后的rois尺寸放大到原图上来

    # Clip boxes to image window
    refined_rois = clip_to_window(window, refined_rois)  #将尺寸放大后的refined_rois压缩到图像尺寸里

    # Round and cast to int since we're deadling with pixels now
    refined_rois = torch.round(refined_rois) #对坐标取整数

    # TODO: Filter out boxes with zero area

    # Filter out background boxes
    # class_ids中结果为0的
    keep_bool = class_ids>0   

    # Filter out low confidence boxes
    if config.DETECTION_MIN_CONFIDENCE:
        keep_bool = keep_bool & (class_scores >= config.DETECTION_MIN_CONFIDENCE) 
    keep = torch.nonzero(keep_bool)[:,0]  #取出前景样本中，类别分数大于最小置信度的样本index

    # Apply per-class NMS
    pre_nms_class_ids = class_ids[keep.data]  #取出样本  (m,)
    pre_nms_scores = class_scores[keep.data]  #(m,)
    pre_nms_rois = refined_rois[keep.data]   #(m,4)

    #对每张图上同一个类别的不同roi进行nms
    #因为pre_nms_class_ids是所有样本的类别，它们里面有很多重复的
    #unique1d的目的相对于set(list_),使得pre_nms_class_ids中每个种类只保留一个
    
    for i, class_id in enumerate(unique1d(pre_nms_class_ids)):
        # Pick detections of this class
        ixs = torch.nonzero(pre_nms_class_ids == class_id)[:,0]

        # Sort
        ix_rois = pre_nms_rois[ixs.data]
        ix_scores = pre_nms_scores[ixs]
        ix_scores, order = ix_scores.sort(descending=True)
        ix_rois = ix_rois[order.data,:]  #将同一类别的roi的类别分数排序

        class_keep = nms(torch.cat((ix_rois, ix_scores.unsqueeze(1)), dim=1).data, config.DETECTION_NMS_THRESHOLD)#nms
                                         #class_keep是ix_rois的下标

        # Map indicies
        # keep 前景中类别分数大于最小置信度的样本index
        # class_keep,是属于某类别的的index经过nms后保留的index
        # order 是所有属于某个类别的按照score从大到小排列的index
        # ixs是某个类别的索引，ixs与order的区别是order是排好序的，ixs没有排序
        class_keep = keep[ixs[order[class_keep].data].data]  #order[class_keep]是位于这些下标lass_keep的值

        if i==0:
            nms_keep = class_keep
        else:
            nms_keep = unique1d(torch.cat((nms_keep, class_keep)))#这里有点多余了,因为class_keep本来就是某一类别经过nms后剩余的index
                                                                  #不同类别之间之间的index本来就不会重复，二类间的索引也不会重复
    
    keep = intersect1d(keep, nms_keep)  #求两个tensor的交集，注意:这里的两个tensor里面没有重复放入元素
                                        #这句程序感觉没什么用，因为每个class_keep本身不会重叠，而且都是从keep中取出来的
                                        #这里有个问题，上面for循环没有执行时，nms_keep为None,该句程序会报错
                                 

    # Keep top detections
    roi_count = config.DETECTION_MAX_INSTANCES  #最终检测样本100个
    rest_sample=len(class_scores[keep.data])
    if roi_count 0)[:,0]
        # Gather the deltas (predicted and true) that contribute to loss
        target_bbox = target_bbox[positive_roi_ix,:]
        pred_bbox = pred_bbox[positive_roi_ix,:,:]

        # Smooth L1 loss
        loss = F.smooth_l1_loss(pred_bbox, target_bbox)
    else:
        loss = Variable(torch.FloatTensor([0]), requires_grad=False)
        if target_class_ids.is_cuda:
            loss = loss.cuda()
    return loss
    
def compute_mrcnn_mask_loss(target_masks, target_class_ids, pred_masks):
    """Mask binary cross-entropy loss for the masks head.

    target_masks: [batch, num_rois, height, width].
        A float32 tensor of values 0 or 1. Uses zero padding to fill array.
    target_class_ids: [batch, num_rois]. Integer class IDs. Zero padded.
    pred_masks: [batch, proposals, height, width, num_classes] float32 tensor
                with values from 0 to 1.
    """
    if target_class_ids.size():
        # Only positive ROIs contribute to the loss. And only
        # the class specific mask of each ROI.
        positive_ix = torch.nonzero(target_class_ids > 0)[:, 0]
        positive_class_ids = target_class_ids[positive_ix.data].long()
        indices = torch.stack((positive_ix, positive_class_ids), dim=1)

        # Gather the masks (predicted and true) that contribute to loss
        y_true = target_masks[indices[:,0].data,:,:]
        y_pred = pred_masks[indices[:,0].data,indices[:,1].data,:,:]

        # Binary cross entropy
        loss = F.binary_cross_entropy(y_pred, y_true)
    else:
        loss = Variable(torch.FloatTensor([0]), requires_grad=False)
        if target_class_ids.is_cuda:
            loss = loss.cuda()

    return loss
    
#def compute_mrcnn_mask_loss(target_masks, target_class_ids, pred_masks):
    """Mask binary cross-entropy loss for the masks head.

    target_masks: [batch, num_rois, height, width].
        A float32 tensor of values 0 or 1. Uses zero padding to fill array.
    target_class_ids: [batch, num_rois]. Integer class IDs. Zero padded.
    pred_masks: [batch, num_rois, height, width, num_classes] float32 tensor
                with values from 0 to 1.
    """
    if target_class_ids.size():
        target_masks=target_masks.view(-1,target_masks.size(2),target_masks.size()[3]) #(m,height,width)
        target_class_ids=target_class_ids.view(-1)  #(m,)
        pred_masks=pred_masks.view(-1,pred_masks.size()[2],pred_masks.size()[3],pred_masks.size()[4]) #(N,height,width,num_classes)
        #pred_masks=pred_masks.permute(0, 3, 1, 2) #(N,num_classes,height,width)

        positive_ix=torch.nonzero(target_class_ids>0)[:,0]
        y_true=target_masks[positive_ix,:,:].view(-1)
        y_pred=pred_masks[positive_ix,:,:,:].view(-1,pred_masks.size()[-1])
        print(y_true.shape)
        print(y_pred.shape)
        loss = F.binary_cross_entropy(y_pred, y_true)
    else:
        loss = Variable(torch.FloatTensor([0]), requires_grad=False)
        if target_class_ids.is_cuda:
            loss = loss.cuda()
    return loss    
    
    

def compute_losses(rpn_match,rpn_class_logits, rpn_target_bbox, rpn_pred_bbox,
                  target_class_ids, mrcnn_class_logits, target_deltas, mrcnn_bbox, target_mask, mrcnn_mask):
    #rpn_match: 每个anchor的类别(-1(负样本),0(不需要参与计算的样本),1(正样本)
    #rpn_target_bbox：
    #rpn_class_logits, rpn输出的scores (b,num_anchor,2)
    #rpn_pred_bbox: rpn网络输出的boxes (b,num_anchor,4)
    #target_class_ids: (m, num_rois)
    #mrcnn_class_logits: (batch, num_rois, num_classes)
    #target_deltas: (batch,num_rois,4)
    #mrcnn_bbox: (batch,num_rois,num_classes,4)
    rpn_class_loss = compute_rpn_class_loss(rpn_match, rpn_class_logits)
    rpn_bbox_loss = compute_rpn_bbox_loss(rpn_target_bbox, rpn_match, rpn_pred_bbox)
    mrcnn_class_loss = compute_mrcnn_class_loss(target_class_ids, mrcnn_class_logits)
    mrcnn_bbox_loss = compute_mrcnn_bbox_loss(target_deltas, target_class_ids, mrcnn_bbox)
    mrcnn_mask_loss = compute_mrcnn_mask_loss(target_mask, target_class_ids, mrcnn_mask)  #这个还不清楚
    return [rpn_class_loss, rpn_bbox_loss, mrcnn_class_loss, mrcnn_bbox_loss, mrcnn_mask_loss]


############################################################
#  Data Generator
############################################################

def load_image_gt(dataset, config, image_id, augment=False,
                  use_mini_mask=False):
    """Load and return ground truth data for an image (image, mask, bounding boxes).
    augment: If true, apply random image augmentation. Currently, only
        horizontal flipping is offered.
    use_mini_mask: If False, returns full-size masks that are the same height
        and width as the original image. These can be big, for example
        1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
        224x224 and are generated by extracting the bounding box of the
        object and resizing it to MINI_MASK_SHAPE.
        如果为False，则返回高度和宽度与原始图像相同的全尺寸蒙版。它们可能很大，
        例如1024x1024x100（用于100个实例）。迷你遮罩较小，
        通常为224x224，是通过提取对象的边界框并将其大小调整为MINI_MASK_SHAPE生成的
    Returns:
    image: [3，height, width]
    shape: the original shape of the image before resizing and cropping.
    class_ids: [instance_count] Integer class IDs
    bbox: [instance_count, (y1, x1, y2, x2)]
    mask: [height, width, instance_count]. The height and width are those
        of the image unless use_mini_mask is True, in which case they are
        defined in MINI_MASK_SHAPE.
    """
    # Load image and mask
    image = dataset.load_image(image_id)  #image_id一个数，这是一张numpy数据类型的BGR图()
                                          #训练时，将图片全部add到dataset类中了，可以把dataset看成一个容器来理解
                                          #这里在用的时候，是从这个容器中取出一张图片
    mask, class_ids = dataset.load_mask(image_id) #tensor(,,), tensor()它是一个空的，但是shape分别是(1,3),(1)
                                                  #不懂这么为什么要生成一个空的mask
                                                  #现在懂了，这一块程序是作者留在这里要自己去改写的
                                                  #反正这个函数就是取出图片上的mask区域(也就是待检测物体的区域)，但是mask
                                                  #区域是个方形的，它比待检测区域大
                                                  #(num_instance,height,width) num_instance是该张图片上有多少个待检测的物体
                                                  #这个mask是怎么生成的不重要，重要的是知道mask是什么
    shape = image.shape   #图片的形状
    image, window, scale, padding = utils.resize_image(
        image,
        min_dim=config.IMAGE_MIN_DIM,  #800
        max_dim=config.IMAGE_MAX_DIM,  #1024
        padding=config.IMAGE_PADDING)  #True
    
    #image 经过处理后的图片
    #window [0,0,height),width] 一个tuple，其中height,width是经常处理后的图片的高和宽
    #scale  图片放缩因子
    #padding 图片每个维度填充的宽度
    
    mask = utils.resize_mask(mask, scale, padding) #将mask做相同的处理
    #mask的维度(heigt,width,num_instance)
    #mask 第0维度和第1维度是图像是否有框住一个图片的的物体使用的框的大小,

    # Random horizontal flips.
    if augment:
        if random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)  #随机水平切割

    # Bounding boxes. Note that some boxes might be all zeros
    # if the corresponding mask got cropped out.
    # bbox: [num_instances, (y1, x1, y2, x2)]
    bbox = utils.extract_bboxes(mask) 
    #首先搞懂mask到底是什么,通俗来说mask就是图片上包含带检测物体的区域块,
    #这里用到包含，是因为它的mask是个矩形，它的面积要比物体的区域面积大，可以理解为
    #mask的最圈是物体的外接矩形，在这个矩形中有物体的区域像素是1，没有物体的区域像素为0
    #每一个物体都有一个mask,

    # Active classes
    # Different datasets have different classes, so track the
    # classes supported in the dataset of this image.
    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)  #这里的num_classes投入网络训练的所有数据集上的所有类别
    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]] 
                       #source_class_ids它是将所有图片的ids作了一个分类，把具有相同的source的图片的id看成一类
                       #source_class_ids={'cooc':[0,1,3,6],'dfa':[2,4,5]}
                       #dataset.image_info[image_id]["source"]是返回该张图片属于哪个source
                       #此代码就将属于这个source的ids以一个list的方式取出来   
                      
    active_class_ids[source_class_ids] = 1 #将这张图片所属的source的ids置1
    #理解：在进行训练的时候，可以投入不同的数据集，但是不同的数据集上的类别不同，且类别数也不同
    #假设模型训练用了三个训练集合: 
    #集合1的类别包括:人，车，房子
    #集合2的类别包括：苹果，香蕉，人，狗
    #集合3的类别包括：飞机，手机，米饭
    #那么dataset.num_classes=3+4+3=10，active_class_ids=np.array([0,0,0,0,0,0,0,0,0,0])
    #当正在参与训练的图片来自于集合2,此时只需要使用到四个类别苹果，香蕉，人，狗
    #也就是active_class_ids中的第3,4,5,6的位置需要置1
    #即active_class_ids=np.array([0,0,0,1,1,1,1,0,0,0])

    # Resize masks to smaller size to reduce memory usage
    if use_mini_mask:
        mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE) #config.MINI_MASK_SHAPE(56,56)

    # Image meta data
    image_meta = compose_image_meta(image_id, shape, window, active_class_ids)
    # class_ids: [instance_count,] 这张图上所有的类别数
    # (active_class_ids==1).sum()  这张图属于的数据集的所有类别数

    return image, image_meta, class_ids, bbox, mask 

def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config):
    """Given the anchors and GT boxes, compute overlaps and identify positive
    anchors and deltas to refine them to match their corresponding GT boxes.
    anchors: [num_anchors, (y1, x1, y2, x2)]
    gt_class_ids: [num_gt_boxes] Integer class IDs.
    gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)]
    Returns:
    rpn_match: [N] (int32) matches between anchors and GT boxes.
               1 = positive anchor, -1 = negative anchor, 0 = neutral
    rpn_bbox: [N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
    """
    # RPN Match: 1 = positive anchor, -1 = negative anchor, 0 = neutral
    rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32)
    # RPN bounding boxes: [max anchors per image, (dy, dx, log(dh), log(dw))]
    rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4))

    # Handle COCO crowds
    # A crowd box in COCO is a bounding box around several instances. Exclude
    # them from training. A crowd box is given a negative class ID.
    crowd_ix = np.where(gt_class_ids < 0)[0]   #负样本索引
    if crowd_ix.shape[0] > 0: #如果存在负样本
        # Filter out crowds from ground truth class IDs and boxes
        non_crowd_ix = np.where(gt_class_ids > 0)[0]  #获取ids>0的样本下标
        crowd_boxes = gt_boxes[crowd_ix]  #得到负样本box
        gt_class_ids = gt_class_ids[non_crowd_ix] #得到ids>0的样本的index
        gt_boxes = gt_boxes[non_crowd_ix]  #得到ids>0的样本的boxes
        # Compute overlaps with crowd boxes [anchors, crowds]
        crowd_overlaps = utils.compute_overlaps(anchors, crowd_boxes) #计算负样本与anchor的ious
        crowd_iou_max = np.argmax(crowd_overlaps, axis=1) 
        no_crowd_bool = (crowd_iou_max < 0.001) #将ids为0的样本中与anchors的ious<0.001的样本作为真正的负样本
    else: #如果不存在ids==0的样本
        # All anchors don't intersect a crowd
        no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool)  #ids>0的样本的index为全部样本的索引

    # Compute overlaps [num_anchors, num_gt_boxes]
    overlaps = utils.compute_overlaps(anchors, gt_boxes) #计算ids>0的样本与anchor的ious

    # Match anchors to GT Boxes
    # If an anchor overlaps a GT box with IoU >= 0.7 then it's positive.
    # If an anchor overlaps a GT box with IoU < 0.3 then it's negative.
    # Neutral anchors are those that don't match the conditions above,
    # and they don't influence the loss function.
    # However, don't keep any GT box unmatched (rare, but happens). Instead,
    # match it to the closest anchor (even if its max IoU is < 0.3).
    #
    # 1. Set negative anchors first. They get overwritten below if a GT box is
    # matched to them. Skip boxes in crowd areas.
    anchor_iou_argmax = np.argmax(overlaps, axis=1)  #返回每行的最大值在该行的索引
    anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax]
    rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1 #将从ids<0的样本中选出的负样本
                                                             #加上ids>0的样本中与anchor的ious<0.3的样本一起作为负样本
    # 2. Set an anchor for each GT box (regardless of IoU value).
    # TODO: If multiple anchors have the same IoU match all of them
    gt_iou_argmax = np.argmax(overlaps, axis=0)  #返回overlaps(ious)矩阵中每列最大值
    rpn_match[gt_iou_argmax] = 1      #每列最大值作为正样本
    # 3. Set anchors with high overlap as positive.
    rpn_match[anchor_iou_max >= 0.7] = 1  #将ids>0的样本中与anchor的ious>0.7的样本作为正样本

    # Subsample to balance positive and negative anchors
    # Don't let positives be more than half the anchors
    ids = np.where(rpn_match == 1)[0] # 将正样本的index抽离出(正样本为true,其余为false)
    #RPN_TRAIN_ANCHORS_PER_IMAGE=256
    extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2) #计算符合要求得正样本的数量与实际需要的正样本数量的差值
    if extra > 0: #如果符合要求的正样本数量多余需要的正样本数
        # Reset the extra ones to neutral
        ids = np.random.choice(ids, extra, replace=False) #则从从中随机选择需要的正样本数量
        rpn_match[ids] = 0
    # Same for negative proposals
    ids = np.where(rpn_match == -1)[0]
    extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE -
                        np.sum(rpn_match == 1))
    if extra > 0:
        # Rest the extra ones to neutral
        ids = np.random.choice(ids, extra, replace=False) #同理
        rpn_match[ids] = 0

    # For positive anchors, compute shift and scale needed to transform them
    # to match the corresponding GT boxes.
    ids = np.where(rpn_match == 1)[0]
    ix = 0  # index into rpn_bbox
    # TODO: use box_refinment() rather than duplicating the code here
    gt=gt_boxes[ids]
    a=anchors[ids]
    gt_h=gt[:,2]-gt[:,0]
    gt_w=gt[:,3]-gt[:,1]
    gt_center_y=gt[0]+0.5*gt_h
    gt_center_x=gt[1]+0.5*gt_w
    a_h = a[:,2] - a[:,0]
    a_w = a[:,3] - a[:,1]
    a_center_y = a[:,0] + 0.5 * a_h
    a_center_x = a[:,1] + 0.5 * a_w
    rpn_bbox[:len(ids),0]=(gt_center_y - a_center_y) / a_h   #:len(ids)是因为这里只需要计算正样本与真实box的偏差
                                                             #负样本的偏差为0
    rpn_bbox[:len(ids),1]=(gt_center_x - a_center_x) / a_w
    rpn_bbox[:len(ids),2]=np.log(gt_h / a_h)
    rpn_bbox[:len(ids),3]=np.log(gt_w / a_w)
    # Normalize
    rpn_bbox /= config.RPN_BBOX_STD_DEV
    
    #rpn_match 里面是(-1,0,1)记录哪些样本是正样本，哪些样本是负样本，哪些样本是无关紧要的样本
    #rpn_bbox：前128个是正样本与真实框的偏差，后128个是负样本与真实框的偏差(全为0)
    return rpn_match, rpn_bbox
    
    # for i, a in zip(ids, anchors[ids]):
        # # Closest gt box (it might have IoU < 0.7)
        # gt = gt_boxes[anchor_iou_argmax[i]]  #anchor_iou_argmax[i]返回的是第i行最大在在行方向的索引，
                                             # #也就是第几个真实box
        # # Convert coordinates to center plus width/height.
        # # GT Box
        # gt_h = gt[2] - gt[0]
        # gt_w = gt[3] - gt[1]
        # gt_center_y = gt[0] + 0.5 * gt_h
        # gt_center_x = gt[1] + 0.5 * gt_w
        # # Anchor
        # a_h = a[2] - a[0]
        # a_w = a[3] - a[1]
        # a_center_y = a[0] + 0.5 * a_h
        # a_center_x = a[1] + 0.5 * a_w

        # # Compute the bbox refinement that the RPN should predict.
        # rpn_bbox[ix] = [
            # (gt_center_y - a_center_y) / a_h,
            # (gt_center_x - a_center_x) / a_w,
            # np.log(gt_h / a_h),
            # np.log(gt_w / a_w),
        # ]
        # # Normalize
        # rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV
        # ix += 1
        
               

class Dataset(torch.utils.data.Dataset):
    def __init__(self, dataset, config, augment=True):
        """A generator that returns images and corresponding target class ids,
            bounding box deltas, and masks.
            dataset: The Dataset object to pick data from
            config: The model config object
            shuffle: If True, shuffles the samples before every epoch
            augment: If True, applies image augmentation to images (currently only
                     horizontal flips are supported)
            Returns a Python generator. Upon calling next() on it, the
            generator returns two lists, inputs and outputs. The containtes
            of the lists differs depending on the received arguments:
            inputs list:
            - images: [batch, H, W, C]
            - image_metas: [batch, size of image meta]
            - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
            - rpn_bbox: [batch, M, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
            - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
            - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
            - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width
                        are those of the image unless use_mini_mask is True, in which
                        case they are defined in MINI_MASK_SHAPE.
            outputs list: Usually empty in regular training. But if detection_targets
                is True then the outputs list contains target class_ids, bbox deltas,
                and masks.
            """
        self.b = 0  # batch item index
        self.image_index = -1
        self.image_ids = np.copy(dataset.image_ids)
        self.error_count = 0

        self.dataset = dataset 
        self.config = config
        self.augment = augment  #图像增强方式

        # Anchors
        # [anchor_count, (y1, x1, y2, x2)]
        self.anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                                 config.RPN_ANCHOR_RATIOS,
                                                 config.BACKBONE_SHAPES,
                                                 config.BACKBONE_STRIDES,
                                                 config.RPN_ANCHOR_STRIDE)
            #一共有6个特征图，每个特征图上都要产生一组anchor
            #并且将每个特征层上产生的anchor都映射到原图上去了

    def __getitem__(self, image_index):  #image_index是属于这一批图像中的第几张图片
        # Get GT bounding boxes and masks for image.
        image_id = self.image_ids[image_index]
        
        image, image_metas, gt_class_ids, gt_boxes, gt_masks = \
            load_image_gt(self.dataset, self.config, image_id, augment=self.augment,
                          use_mini_mask=self.config.USE_MINI_MASK)

        # Skip images that have no instances. This can happen in cases
        # where we train on a subset of classes and the image doesn't
        # have any of the classes we care about.
        #跳过图片上没有实例样本
        if not np.any(gt_class_ids > 0):
            return None

        # RPN Targets
        rpn_match, rpn_bbox = build_rpn_targets(image.shape, self.anchors,
                                                gt_class_ids, gt_boxes, self.config)

        # If more instances than fits in the array, sub-sample from them.
        if gt_boxes.shape[0] > self.config.MAX_GT_INSTANCES:  #如果图片上的box超过了100个，则只从中挑选100个
                                                              #图片上要检测的物体不能太多
            ids = np.random.choice(
                np.arange(gt_boxes.shape[0]), self.config.MAX_GT_INSTANCES, replace=False)
            gt_class_ids = gt_class_ids[ids]
            gt_boxes = gt_boxes[ids]
            gt_masks = gt_masks[:, :, ids]

        # Add to batch
        rpn_match = rpn_match[:, np.newaxis] #将其变成二维的 （N,1）
        images = mold_image(image.astype(np.float32), self.config)#减去平均值

        # Convert
        images = torch.from_numpy(images.transpose(2, 0, 1)).float()
        image_metas = torch.from_numpy(image_metas)
        rpn_match = torch.from_numpy(rpn_match)
        rpn_bbox = torch.from_numpy(rpn_bbox).float()
        gt_class_ids = torch.from_numpy(gt_class_ids)
        gt_boxes = torch.from_numpy(gt_boxes).float()
        gt_masks = torch.from_numpy(gt_masks.astype(int).transpose(2, 0, 1)).float() #(hight,width,num_instance)

        return images, image_metas, rpn_match, rpn_bbox, gt_class_ids, gt_boxes, gt_masks

    def __len__(self):
        return self.image_ids.shape[0]


############################################################
#  MaskRCNN Class
############################################################

class MaskRCNN(nn.Module):
    """Encapsulates the Mask RCNN model functionality.
    """

    def __init__(self, config, model_dir):
        """
        config: A Sub-class of the Config class
        model_dir: Directory to save training logs and trained weights
        """
        super(MaskRCNN, self).__init__()
        self.config = config
        self.model_dir = model_dir  
        self.set_log_dir()  #设置log路径，以及每个eopch保存模型参数路径,并且self.epoch=0
                            # 'C://Desktop/coco20171029T2315\\mask_rcnn_coco_{:04d}.pth'
                            # 'C://Desktop' 是model_dir
                            # 'coco' 是 config.NAME 数据集名字
                            # '20171029T2315'  程序运行该条代码的时间
                            # 'mask_rcnn_coco_{:04d}.pth' 固定字符
                            #  如果set_log_dir传入参数(model_path),则生成的地址中的时间部分是model_path中的时间
                            #  并且将self.epoch改为model_path('.../**.pth')的**部分的值
                            #  该代码衍生物self.log_dir='C://Desktop/coco20171029T2315/'
                            #  self.checkpoint_path='C://Desktop/coco20171029T2315\\mask_rcnn_coco_{:04d}.pth'
                            
        self.build(config=config) #初始还网络模型
        self.initialize_weights() #初始化模型参数
        self.loss_history = []
        self.val_loss_history = []

    def build(self, config):
        """Build Mask R-CNN architecture.
        """

        # Image size must be dividable by 2 multiple times
        h, w = config.IMAGE_SHAPE[:2]
        if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): #如果h,w不是64的整数倍
            raise Exception("Image size must be dividable by 2 at least 6 times "
                            "to avoid fractions when downscaling and upscaling."
                            "For example, use 256, 320, 384, 448, 512, ... etc. ")

        # Build the shared convolutional layers.
        # Bottom-up Layers
        # Returns a list of the last layers of each stage, 5 in total.
        # Don't create the thead (stage 5), so we pick the 4th item in the list.
        resnet = ResNet("resnet101", stage5=True)
        C1, C2, C3, C4, C5 = resnet.stages() 

        # Top-down Layers
        # TODO: add assert to varify feature map sizes match what's in config
        self.fpn = FPN(C1, C2, C3, C4, C5, out_channels=256) #输出[p2_out, p3_out, p4_out, p5_out, p6_out]
                                                             #特征融合再输出
                                                             #p2_out(1,256,256,256)  4
                                                             #p3_out(1,256,128,128)  8
                                                             #p4_out(1,256,64,64)    16
                                                             #p5_out(1,256,32,32)    32 
                                                             #p6_out(1,256,16,16)                                                           
                                                                                                                         
        # Generate Anchors
        #
        self.anchors = Variable(torch.from_numpy(utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                                                                config.RPN_ANCHOR_RATIOS,
                                                                                config.BACKBONE_SHAPES,
                                                                                config.BACKBONE_STRIDES,
                                                                                config.RPN_ANCHOR_STRIDE)).float(), requires_grad=False)
                                              
        if self.config.GPU_COUNT:
            self.anchors = self.anchors.cuda()

        # RPN          
        #len(config.RPN_ANCHOR_RATIOS)=3
        #config.RPN_ANCHOR_STRIDE=1
        #output: [rpn_class_logits, rpn_probs,   rpn_bbox] 
        #        (1,256*256*3,2) (1,256*256*3,2) (1,256*256*3,4)
        #        (1,128*128*3,2) (1,128*128*3,2) (1,128*128*3,4)
        #        (1,64*64*3,2)   (1,64*64*3,2)   (1,64*64*3,4)
        #        (1,32*32*3,2)   (1,32*32*3,2)   (1,32*32*3,4)
        self.rpn = RPN(len(config.RPN_ANCHOR_RATIOS), config.RPN_ANCHOR_STRIDE, 256)
        

        # FPN Classifier
        self.classifier = Classifier(256, config.POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES)

        # FPN Mask
        #config.MASK_POOL_SIZE=14
        #config.IMAGE_SHAPE=[28,28]
        #config.NUM_CLASSES=1
        self.mask = Mask(256, config.MASK_POOL_SIZE, config.IMAGE_SHAPE, config.NUM_CLASSES)

        # Fix batch norm layers
        # 固定BatchNorm层的参数
        def set_bn_fix(m):
            classname = m.__class__.__name__
            if classname.find('BatchNorm') != -1:
                for p in m.parameters(): 
                    p.requires_grad = False

        self.apply(set_bn_fix)

    def initialize_weights(self):
        """Initialize model weights.
        """

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform(m.weight) #将nn.Conv2d层的w初始化付出(0,1)的均匀分布
                if m.bias is not None:
                    m.bias.data.zero_()    #将nn.Conv2d层的bias置0
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)     #对于nn.BatchNorm2d层w初始化参数置1
                m.bias.data.zero_()        #对于nn.BatchNorm2d层b初始化参数置0
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01) #(0,1)正态分布
                m.bias.data.zero_()

    def set_trainable(self, layer_regex, model=None, indent=0, verbose=1):
        """Sets model layers as trainable if their names match
        the given regular expression.
        """

        for param in self.named_parameters():
            layer_name = param[0] #每层网络中的每个参数的名字
            trainable = bool(re.fullmatch(layer_regex, layer_name))
                            #re.fullmatch(pattern, string) 完成匹配string
                            #只有当pattern与string一模一样的时候
                            #bool(re.fullmatch(pattern, string))返回true
            if not trainable:
                param[1].requires_grad = False  #参数反向传播不改变数据

    def set_log_dir(self, model_path=None):
        """Sets the model log directory and epoch counter.
        model_path: If None, or a format different from what this code uses
            then set a new log directory and start epochs from 0. Otherwise,
            extract the log directory and the epoch counter from the file
            name.
        """

        # Set date and epoch counter as if starting a new model
        self.epoch = 0
        now = datetime.datetime.now() #当前日期时间(精确到秒)

        # If we have a model path with date and epochs use them
        if model_path:  #区分是保存模型参数的路径还是保存其他的路径
            # Continue from we left of. Get epoch and date from the file name
            # A sample model path might look like:
            # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5
            regex = r".*/\w+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})/mask\_rcnn\_\w+(\d{4})\.pth"
            m = re.match(regex, model_path)  #查看model_path是不是regex这样的模式
            if m:
                now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
                                        int(m.group(4)), int(m.group(5)))
                #这里计算now主要保证模型的路径不会被下面的代码改变
                self.epoch = int(m.group(6)) #获取模型参数是哪个epoch的模型

        # Directory for training logs
        # 训练过程需要保存的路径
        self.log_dir = os.path.join(self.model_dir, "{}{:%Y%m%dT%H%M}".format(
            self.config.NAME.lower(), now))  #now是实时保存路径

        # Path to save after each epoch. Include placeholders that get filled by Keras.
        self.checkpoint_path = os.path.join(self.log_dir, "mask_rcnn_{}_*epoch*.pth".format(
            self.config.NAME.lower()))
        self.checkpoint_path = self.checkpoint_path.replace(
            "*epoch*", "{:04d}")

    def find_last(self):
        """Finds the last checkpoint file of the last trained model in the
        model directory.
        Returns:
            log_dir: The directory where events and weights are saved
            checkpoint_path: the path to the last checkpoint file
        """
        # Get directory names. Each directory corresponds to a model
        dir_names = next(os.walk(self.model_dir))[1] #获得self.model_dir路径下文件夹的名字
        key = self.config.NAME.lower()
        dir_names = filter(lambda f: f.startswith(key), dir_names)#过滤掉不是以key字符为开始的文件夹
        dir_names = sorted(dir_names)  #对dir_names进行排序
        if not dir_names:
            return None, None
        # Pick last directory
        dir_name = os.path.join(self.model_dir, dir_names[-1]) #选择最后一个eopch参数的模型参数文件夹
        # Find the last checkpoint
        checkpoints = next(os.walk(dir_name))[2] #选择文件夹dir_name下面的文件
        checkpoints = filter(lambda f: f.startswith("mask_rcnn"), checkpoints) #过滤不是以mask_rcnn开通的文件
        checkpoints = sorted(checkpoints) #对过滤后的文件进行排序
        if not checkpoints:
            return dir_name, None
        checkpoint = os.path.join(dir_name, checkpoints[-1]) #选择排序后的文件的最后一个文件
        return dir_name, checkpoint

    def load_weights(self, filepath):
        """Modified version of the correspoding Keras function with
        the addition of multi-GPU support and the ability to exclude
        some layers from loading.
        exlude: list of layer names to excluce
        """
        if os.path.exists(filepath):
            state_dict = torch.load(filepath)
            self.load_state_dict(state_dict, strict=False)
        else:
            print("Weight file not found ...")

        # Update the log directory
        self.set_log_dir(filepath)  #set_log_dir对这个函数不是很了解
        if not os.path.exists(self.log_dir):  #如果log_dir不存在，则创建一个
            os.makedirs(self.log_dir)

    def detect(self, images):      
        """Runs the detection pipeline.
        images: List of images, potentially of different sizes.
        Returns a list of dicts, one dict per image. The dict contains:
        rois: [N, (y1, x1, y2, x2)] detection bounding boxes
        class_ids: [N] int class IDs
        scores: [N] float probability scores for the class IDs
        masks: [H, W, N] instance binary masks
        """
        '''
        meta = np.array(
        [image_id] +            # size=1
        list(image_shape) +     # size=3
        list(window) +          # size=4 (y1, x1, y2, x2) in image cooredinates
        list(active_class_ids)  # size=num_classes
        )
        '''
        # Mold inputs to format expected by the neural network
        # 将images(list)中大小不同的图片，经过resize到(1024,1024,3),在经过normalize,最后将这些图片矩阵，合成一个四维度矩阵
        # molded_images (N,1024,1024,3) ,N是images的长度
        # 某一张图片的metas信息 (image_id,image_shape,window,active_class_ids)
        # 这里的每张图片的image_ids都是0
        # 每张图片的iamge_shape是(1024,1024)
        # window则是原始图片在加上边之后的图片中的左上角和右下角坐标
        # active_class_ids是每张图的需要激活哪些类别
        molded_images, image_metas, windows = self.mold_inputs(images)
        

        # Convert images to torch tensor
        #将图片变成tensor
        molded_images = torch.from_numpy(molded_images.transpose(0, 3, 1, 2)).float()
        
        # To GPU
        if self.config.GPU_COUNT:
            molded_images = molded_images.cuda()

        # Wrap in variable
        # 现在tensor和Variable合并了，下面这句可以省略
        molded_images = Variable(molded_images, volatile=True)  

        # Run object detection
        #detections (1,num_detections,6)
        #mrcnn_mask (1,num_detections,num_class,28,28)
        detections, mrcnn_mask = self.predict([molded_images, image_metas], mode='inference')

        # Convert to numpy
        detections = detections.data.cpu().numpy()
        mrcnn_mask = mrcnn_mask.permute(0, 1, 3, 4, 2).data.cpu().numpy() #(1,num_detections,28,28,num_class)

        ################################################################################
        # Process detections
        results = []
        #images是一个list
        #这里的images里面只能是一张图片
        #unmold_detectionss是将detections的box和mrcnn_mask反向缩放平移到原图上
        for i, image in enumerate(images):
            final_rois, final_class_ids, final_scores, final_masks =\
                self.unmold_detections(detections[i], mrcnn_mask[i],
                                       image.shape, windows[i])
            results.append({
                "rois": final_rois,
                "class_ids": final_class_ids,
                "scores": final_scores,
                "masks": final_masks,
            })
        return results
        
        

    def predict(self, input, mode):
        molded_images = input[0]  
        image_metas = input[1]

        if mode == 'inference':
            self.eval()
        elif mode == 'training':
            self.train()

            # Set batchnorm always in eval mode during training
            # 在训练模式时冻结BatchNorm
            def set_bn_eval(m):
                classname = m.__class__.__name__
                if classname.find('BatchNorm') != -1:
                    m.eval()

            self.apply(set_bn_eval)

        # Feature extraction
        [p2_out, p3_out, p4_out, p5_out, p6_out] = self.fpn(molded_images)
        #p2_out (N,256,256,256)
        #p3_out (N,256,128,128)
        #p4_out (N,256,64,64)
        #p5_out (N,256,32,32)
        #p6_out (N,256,16,16)
        
        # Note that P6 is used in RPN, but not in the classifier heads.
        rpn_feature_maps = [p2_out, p3_out, p4_out, p5_out, p6_out]
        mrcnn_feature_maps = [p2_out, p3_out, p4_out, p5_out]

        # Loop through pyramid layers
        # rpn 输出
        layer_outputs = []  # list of lists
        for p in rpn_feature_maps:
            layer_outputs.append(self.rpn(p))
        # Concatenate layer outputs
        # Convert from list of lists of level outputs to list of lists
        # of outputs across levels.
        # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
        outputs = list(zip(*layer_outputs))
        outputs = [torch.cat(list(o), dim=1) for o in outputs]  #这里的o是一个三维的,dim=1,按照这里的第1维进行拼接
        rpn_class_logits, rpn_class, rpn_bbox = outputs
        # Generate proposals
        # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates
        # and zero padded.
        #config.POST_NMS_ROIS_TRAINING=2000
        #config.POST_NMS_ROIS_INFERENCE=1000
        proposal_count = self.config.POST_NMS_ROIS_TRAINING if mode == "training" \
            else self.config.POST_NMS_ROIS_INFERENCE
        rpn_rois = proposal_layer([rpn_class, rpn_bbox],
                                 proposal_count=proposal_count,
                                 nms_threshold=self.config.RPN_NMS_THRESHOLD,
                                 anchors=self.anchors,
                                 config=self.config)

        if mode == 'inference':
            # Network Heads
            # Proposal classifier and BBox regressor heads
            # self.classifier将rnp_rois algin_pooling成 (N,c,7,7)
            # 然后投入mrcnn回归和分类网络
            # mrcnn_class_logits  (num_rpn_rois,num_class)
            # mrcnn_class         (num_rpn_rois,num_class)
            # mrcnn_bbox          (num_rpn_rois,num_class,4)
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox = self.classifier(mrcnn_feature_maps, rpn_rois)

            # Detections
            # detections [num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates
            # 根据预测结果对rpn_rois进行修正，最终选取前100个与gt_box匹配最好的
            # detections (100,6)
            detections = detection_layer(self.config, rpn_rois, mrcnn_class, mrcnn_bbox, image_metas)
            

            # Convert boxes to normalized coordinates
            # TODO: let DetectionLayer return normalized coordinates to avoid
            #       unnecessary conversions
            h, w = self.config.IMAGE_SHAPE[:2]
            scale = Variable(torch.from_numpy(np.array([h, w, h, w])).float(), requires_grad=False)
            if self.config.GPU_COUNT:
                scale = scale.cuda()
            detection_boxes = detections[:, :4] / scale  #将坐标归一化

            # Add back batch dimension
            detection_boxes = detection_boxes.unsqueeze(0) #[1,num_detections,4]

            # Create masks for detections
            # mrcnn_feature_maps=[p2_out, p3_out, p4_out, p5_out]
            # mrcnn_mask (rois_num,num_classes,28,28)
            mrcnn_mask = self.mask(mrcnn_feature_maps, detection_boxes)

            # Add back batch dimension
            detections = detections.unsqueeze(0)  #(1,num_detections,6)
            mrcnn_mask = mrcnn_mask.unsqueeze(0)  #(1,rois_num ,num_classes,28,28)

            return [detections, mrcnn_mask]

        elif mode == 'training':

            gt_class_ids = input[2]
            gt_boxes = input[3]
            gt_masks = input[4]

            # Normalize coordinates
            h, w = self.config.IMAGE_SHAPE[:2]
            scale = Variable(torch.from_numpy(np.array([h, w, h, w])).float(), requires_grad=False)
            if self.config.GPU_COUNT:
                scale = scale.cuda()
            gt_boxes = gt_boxes / scale   #gt_boxes归一化

            # Generate detection targets
            # Subsamples proposals and generates target outputs for training
            # Note that proposal class IDs, gt_boxes, and gt_masks are zero
            # padded. Equally, returned rois and targets are zero padded.
            # rois选出128个正样本和128负样本,
            # target_class_ids (正样本的class_ids是和rois匹配度最高的框的类别，负样本的class_ids为0)
            # target_deltas   同target_class_ids
            # target_mask     同target_class_ids
            rois, target_class_ids, target_deltas, target_mask = \
                detection_target_layer(rpn_rois, gt_class_ids, gt_boxes, gt_masks, self.config)

            if not rois.size():
                mrcnn_class_logits = Variable(torch.FloatTensor())
                mrcnn_class = Variable(torch.IntTensor())
                mrcnn_bbox = Variable(torch.FloatTensor())
                mrcnn_mask = Variable(torch.FloatTensor())
                if self.config.GPU_COUNT:
                    mrcnn_class_logits = mrcnn_class_logits.cuda()
                    mrcnn_class = mrcnn_class.cuda()
                    mrcnn_bbox = mrcnn_bbox.cuda()
                    mrcnn_mask = mrcnn_mask.cuda()
            else:
                # Network Heads
                # Proposal classifier and BBox regressor heads
                mrcnn_class_logits, mrcnn_class, mrcnn_bbox = self.classifier(mrcnn_feature_maps, rois)

                # Create masks for detections
                mrcnn_mask = self.mask(mrcnn_feature_maps, rois)

            return [rpn_class_logits, rpn_bbox, target_class_ids, mrcnn_class_logits, target_deltas, mrcnn_bbox, target_mask, mrcnn_mask]

    def train_model(self, train_dataset, val_dataset, learning_rate, epochs, layers):
        """Train the model.
        train_dataset, val_dataset: Training and validation Dataset objects.
        learning_rate: The learning rate to train with
        epochs: Number of training epochs. Note that previous training epochs
                are considered to be done alreay, so this actually determines
                the epochs to train in total rather than in this particaular
                call.
        layers: Allows selecting witch layers to train. It can be:
            - A regular expression to match layer names to train
            - One of these predefined values:
              heaads: The RPN, classifier and mask heads of the network
              all: All the layers
              3+: Train Resnet stage 3 and up
              4+: Train Resnet stage 4 and up
              5+: Train Resnet stage 5 and up
        """

        # Pre-defined layer regular expressions
        layer_regex = {
            # all layers but the backbone
            "heads": r"(fpn.P5\_.*)|(fpn.P4\_.*)|(fpn.P3\_.*)|(fpn.P2\_.*)|(rpn.*)|(classifier.*)|(mask.*)",
            # From a specific Resnet stage and up
            "3+": r"(fpn.C3.*)|(fpn.C4.*)|(fpn.C5.*)|(fpn.P5\_.*)|(fpn.P4\_.*)|(fpn.P3\_.*)|(fpn.P2\_.*)|(rpn.*)|(classifier.*)|(mask.*)",
            "4+": r"(fpn.C4.*)|(fpn.C5.*)|(fpn.P5\_.*)|(fpn.P4\_.*)|(fpn.P3\_.*)|(fpn.P2\_.*)|(rpn.*)|(classifier.*)|(mask.*)",
            "5+": r"(fpn.C5.*)|(fpn.P5\_.*)|(fpn.P4\_.*)|(fpn.P3\_.*)|(fpn.P2\_.*)|(rpn.*)|(classifier.*)|(mask.*)",
            # All layers
            "all": ".*",
        }
        if layers in layer_regex.keys():
            layers = layer_regex[layers]

        # Data generators
        # results of Dataset
        # images, image_metas, rpn_match, rpn_bbox, gt_class_ids, gt_boxes, gt_masks
        train_set = Dataset(train_dataset, self.config, augment=True)
        train_generator = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=4)
        val_set = Dataset(val_dataset, self.config, augment=True)
        val_generator = torch.utils.data.DataLoader(val_set, batch_size=1, shuffle=True, num_workers=4)

        # Train
        # self.epoch初始为0
        log("\nStarting at epoch {}. LR={}\n".format(self.epoch+1, learning_rate))
        
        #Checkpoint Path: C://desktop/coco20171029T2315\mask_rcnn_coco_{:04d}.pth
        log("Checkpoint Path: {}".format(self.checkpoint_path))
        self.set_trainable(layers)

        # Optimizer object
        # Add L2 Regularization
        # Skip gamma and beta weights of batch normalization layers.
        # trainables_wo_bn 非BatchNorm层的参数
        # trainables_only_bn BatchNorm层的参数
        trainables_wo_bn = [param for name, param in self.named_parameters() if param.requires_grad and not 'bn' in name]
        trainables_only_bn = [param for name, param in self.named_parameters() if param.requires_grad and 'bn' in name]
        optimizer = optim.SGD([
            {'params': trainables_wo_bn, 'weight_decay': self.config.WEIGHT_DECAY}, #在batchnorm层中加入权重衰减
            {'params': trainables_only_bn} #其他层不使用权重衰减
        ], lr=learning_rate, momentum=self.config.LEARNING_MOMENTUM)

        #self.epoch初始轮数，它在初始化的时候会改变
        #没次执行set_log_dir(model_path)是时候都会随着model_path的变化而变化
        #当在调节参数的时候，你想从第几轮开始调训练，你就导入第几层的模型参数，
        #如当你将这个地址的模型参数'.../mask_rcnn_coco_0003.pth'导入作为模型初始化参数
        #那么当你训练的时候相当于是第4轮进行训练了，这个是不是很方便
        for epoch in range(self.epoch+1, epochs+1):
            log("Epoch {}/{}.".format(epoch,epochs))

            # Training
            # 里面进行了正向传播求loss和打印loss,并进行反向传播更新参数，且执行1000次
            loss, loss_rpn_class, loss_rpn_bbox, loss_mrcnn_class, loss_mrcnn_bbox, loss_mrcnn_mask = self.train_epoch(train_generator, optimizer, self.config.STEPS_PER_EPOCH)

            # Validation
            # 里面进行了正向传播求loss和打印loss，且执行50次
            val_loss, val_loss_rpn_class, val_loss_rpn_bbox, val_loss_mrcnn_class, val_loss_mrcnn_bbox, val_loss_mrcnn_mask = self.valid_epoch(val_generator, self.config.VALIDATION_STEPS)

            # Statistics
            self.loss_history.append([loss, loss_rpn_class, loss_rpn_bbox, loss_mrcnn_class, loss_mrcnn_bbox, loss_mrcnn_mask])
            self.val_loss_history.append([val_loss, val_loss_rpn_class, val_loss_rpn_bbox, val_loss_mrcnn_class, val_loss_mrcnn_bbox, val_loss_mrcnn_mask])
            visualize.plot_loss(self.loss_history, self.val_loss_history, save=True, log_dir=self.log_dir)

            # Save model
            torch.save(self.state_dict(), self.checkpoint_path.format(epoch))

        self.epoch = epochs

    def train_epoch(self, datagenerator, optimizer, steps):
        batch_count = 0
        loss_sum = 0
        loss_rpn_class_sum = 0
        loss_rpn_bbox_sum = 0
        loss_mrcnn_class_sum = 0
        loss_mrcnn_bbox_sum = 0
        loss_mrcnn_mask_sum = 0
        step = 0

        optimizer.zero_grad()   #将梯度初始化为零
                                #(因为一个batch的loss关于weight的导数是所有sample的loss关于weight的导数的累加和)

        for inputs in datagenerator:
            batch_count += 1

            images = inputs[0]
            image_metas = inputs[1]
            rpn_match = inputs[2]
            rpn_bbox = inputs[3]
            gt_class_ids = inputs[4]
            gt_boxes = inputs[5]
            gt_masks = inputs[6]

            # image_metas as numpy array
            image_metas = image_metas.numpy()

            # Wrap in variables
            images = Variable(images)
            rpn_match = Variable(rpn_match)
            rpn_bbox = Variable(rpn_bbox)
            gt_class_ids = Variable(gt_class_ids)
            gt_boxes = Variable(gt_boxes)
            gt_masks = Variable(gt_masks)

            # To GPU
            if self.config.GPU_COUNT:
                images = images.cuda()
                rpn_match = rpn_match.cuda()
                rpn_bbox = rpn_bbox.cuda()
                gt_class_ids = gt_class_ids.cuda()
                gt_boxes = gt_boxes.cuda()
                gt_masks = gt_masks.cuda()

            # Run object detection
            rpn_class_logits, rpn_pred_bbox, target_class_ids, mrcnn_class_logits, target_deltas, mrcnn_bbox, target_mask, mrcnn_mask = \
                self.predict([images, image_metas, gt_class_ids, gt_boxes, gt_masks], mode='training')

            # Compute losses
            rpn_class_loss, rpn_bbox_loss, mrcnn_class_loss, mrcnn_bbox_loss, mrcnn_mask_loss = compute_losses(rpn_match, rpn_bbox, rpn_class_logits, rpn_pred_bbox, target_class_ids, mrcnn_class_logits, target_deltas, mrcnn_bbox, target_mask, mrcnn_mask)
            loss = rpn_class_loss + rpn_bbox_loss + mrcnn_class_loss + mrcnn_bbox_loss + mrcnn_mask_loss

            # Backpropagation
            loss.backward()   #loss反向传播
            torch.nn.utils.clip_grad_norm(self.parameters(), 5.0) #梯度裁剪
            if (batch_count % self.config.BATCH_SIZE) == 0:  #每隔config.BATCH_SIZE个batch进行一次参数更新
                optimizer.step()   #参数更新
                optimizer.zero_grad()  #梯度清零
                batch_count = 0  

            # Progress
            printProgressBar(step + 1, steps, prefix="\t{}/{}".format(step + 1, steps),
                             suffix="Complete - loss: {:.5f} - rpn_class_loss: {:.5f} - rpn_bbox_loss: {:.5f} - mrcnn_class_loss: {:.5f} - mrcnn_bbox_loss: {:.5f} - mrcnn_mask_loss: {:.5f}".format(
                                 loss.data.cpu()[0], rpn_class_loss.data.cpu()[0], rpn_bbox_loss.data.cpu()[0],
                                 mrcnn_class_loss.data.cpu()[0], mrcnn_bbox_loss.data.cpu()[0],
                                 mrcnn_mask_loss.data.cpu()[0]), length=10)

            # Statistics
            loss_sum += loss.data.cpu()[0]/steps
            loss_rpn_class_sum += rpn_class_loss.data.cpu()[0]/steps
            loss_rpn_bbox_sum += rpn_bbox_loss.data.cpu()[0]/steps
            loss_mrcnn_class_sum += mrcnn_class_loss.data.cpu()[0]/steps
            loss_mrcnn_bbox_sum += mrcnn_bbox_loss.data.cpu()[0]/steps
            loss_mrcnn_mask_sum += mrcnn_mask_loss.data.cpu()[0]/steps

            # Break after 'steps' steps
            if step==steps-1:
                break
            step += 1 #训练步数加1

        return loss_sum, loss_rpn_class_sum, loss_rpn_bbox_sum, loss_mrcnn_class_sum, loss_mrcnn_bbox_sum, loss_mrcnn_mask_sum

    def valid_epoch(self, datagenerator, steps):

        step = 0
        loss_sum = 0
        loss_rpn_class_sum = 0
        loss_rpn_bbox_sum = 0
        loss_mrcnn_class_sum = 0
        loss_mrcnn_bbox_sum = 0
        loss_mrcnn_mask_sum = 0

        for inputs in datagenerator:
            images = inputs[0]
            image_metas = inputs[1]
            rpn_match = inputs[2]
            rpn_bbox = inputs[3]
            gt_class_ids = inputs[4]
            gt_boxes = inputs[5]
            gt_masks = inputs[6]

            # image_metas as numpy array
            image_metas = image_metas.numpy()

            # Wrap in variables
            images = Variable(images, volatile=True)
            rpn_match = Variable(rpn_match, volatile=True)
            rpn_bbox = Variable(rpn_bbox, volatile=True)
            gt_class_ids = Variable(gt_class_ids, volatile=True)
            gt_boxes = Variable(gt_boxes, volatile=True)
            gt_masks = Variable(gt_masks, volatile=True)

            # To GPU
            if self.config.GPU_COUNT:
                images = images.cuda()
                rpn_match = rpn_match.cuda()
                rpn_bbox = rpn_bbox.cuda()
                gt_class_ids = gt_class_ids.cuda()
                gt_boxes = gt_boxes.cuda()
                gt_masks = gt_masks.cuda()

            # Run object detection
            rpn_class_logits, rpn_pred_bbox, target_class_ids, mrcnn_class_logits, target_deltas, mrcnn_bbox, target_mask, mrcnn_mask = \
                self.predict([images, image_metas, gt_class_ids, gt_boxes, gt_masks], mode='training')

            if not target_class_ids.size():
                continue

            # Compute losses
            rpn_class_loss, rpn_bbox_loss, mrcnn_class_loss, mrcnn_bbox_loss, mrcnn_mask_loss = compute_losses(rpn_match, rpn_bbox, rpn_class_logits, rpn_pred_bbox, target_class_ids, mrcnn_class_logits, target_deltas, mrcnn_bbox, target_mask, mrcnn_mask)
            loss = rpn_class_loss + rpn_bbox_loss + mrcnn_class_loss + mrcnn_bbox_loss + mrcnn_mask_loss

            # Progress
            # 花里胡哨
            # 只要知道下面这条代码是打印每个step的loss
            printProgressBar(step + 1, steps, prefix="\t{}/{}".format(step + 1, steps),
                             suffix="Complete - loss: {:.5f} - rpn_class_loss: {:.5f} - rpn_bbox_loss: {:.5f} - mrcnn_class_loss: {:.5f} - mrcnn_bbox_loss: {:.5f} - mrcnn_mask_loss: {:.5f}".format(
                                 loss.data.cpu()[0], rpn_class_loss.data.cpu()[0], rpn_bbox_loss.data.cpu()[0],
                                 mrcnn_class_loss.data.cpu()[0], mrcnn_bbox_loss.data.cpu()[0],
                                 mrcnn_mask_loss.data.cpu()[0]), length=10)

            # Statistics
            loss_sum += loss.data.cpu()[0]/steps
            loss_rpn_class_sum += rpn_class_loss.data.cpu()[0]/steps
            loss_rpn_bbox_sum += rpn_bbox_loss.data.cpu()[0]/steps
            loss_mrcnn_class_sum += mrcnn_class_loss.data.cpu()[0]/steps
            loss_mrcnn_bbox_sum += mrcnn_bbox_loss.data.cpu()[0]/steps
            loss_mrcnn_mask_sum += mrcnn_mask_loss.data.cpu()[0]/steps

            # Break after 'steps' steps
            if step==steps-1:
                break
            step += 1

        return loss_sum, loss_rpn_class_sum, loss_rpn_bbox_sum, loss_mrcnn_class_sum, loss_mrcnn_bbox_sum, loss_mrcnn_mask_sum

    def mold_inputs(self, images):
        """Takes a list of images and modifies them to the format expected
        as an input to the neural network.
        images: List of image matricies [height,width,depth]. Images can have
            different sizes.
        Returns 3 Numpy matricies:
        molded_images: [N, h, w, 3]. Images resized and normalized.
        image_metas: [N, length of meta data]. Details about each image.
        windows: [N, (y1, x1, y2, x2)]. The portion of the image that has the
            original image (padding excluded).
        """
        molded_images = []
        image_metas = []
        windows = []
        for image in images:
            '''
                依次将每张图片进行尺寸处理,然后做平均值平移
            '''
            # Resize image to fit the model expected size
            # TODO: move resizing to mold_image()
            molded_image, window, scale, padding = utils.resize_image(
                image,
                min_dim=self.config.IMAGE_MIN_DIM,
                max_dim=self.config.IMAGE_MAX_DIM,
                padding=self.config.IMAGE_PADDING)
             
            molded_image = mold_image(molded_image, self.config)#将图片减去平均值
            # Build image_meta
            image_meta = compose_image_meta(
                0, image.shape, window,
                np.zeros([self.config.NUM_CLASSES], dtype=np.int32))
                #compose_image_meta()将括号内的数据打包成一个元组
            # Append
            molded_images.append(molded_image)
            windows.append(window)
            image_metas.append(image_meta)
        # Pack into arrays
        molded_images = np.stack(molded_images) #将每张图片拼成一个大矩阵
        image_metas = np.stack(image_metas)
        windows = np.stack(windows)
        return molded_images, image_metas, windows

    def unmold_detections(self, detections, mrcnn_mask, image_shape, window):
        """Reformats the detections of one image from the format of the neural
        network output to a format suitable for use in the rest of the
        application.
        detections: [N, (y1, x1, y2, x2, class_id, score)]  #(N,6)
        mrcnn_mask: [N, height, width, num_classes]         
        image_shape: [height, width, depth] Original size of the image before resizing
        window: [y1, x1, y2, x2] Box in the image where the real image is
                excluding the padding.
        Returns:
        boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels
        class_ids: [N] Integer class IDs for each bounding box
        scores: [N] Float probability scores of the class_id
        masks: [height, width, num_instances] Instance masks
        """
        # How many detections do we have?
        # Detections array is padded with zeros. Find the first class_id == 0.
        zero_ix = np.where(detections[:, 4] == 0)[0]   #找出类别为0的index
        #如果存检测类别为0的,则将检测为0的分数最高的那个index赋值给N(因为detections的中的class_ids是按照score从高到低的顺序排的)
        #如果不存在检测为0的,则将检测框的个数赋值给N
        N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0]

        # Extract boxes, class_ids, scores, and class-specific masks
        boxes = detections[:N, :4]    #取出前N个框
        class_ids = detections[:N, 4].astype(np.int32)  #取出前N个类别
        scores = detections[:N, 5]   #取出钱N个分数
        #masks(N,hight,width,num_classes)
        #每一个mask预测一个物体
        #这里有N个mask就会预测N个物体
        #那么mask预测的物体它是什么类别呢？
        #事实上每个mask会把所有的类别都预测并且给他的预测结果打分，这也是一个优点消除了类间竞争
        #至于最终mask会把它判断成什么类别是由这对应的预测框的class_ids来决定的
        #所以我们只要把masks中队class_ids进行打分的那个mask取出来就行
        #比如在苹果,香蕉,梨子,已知一个box预测的是苹果,对应的mask相对于有3个预测器
        #每个预测器分别对苹果，香蕉，梨子进行打分,而我们只关心对苹果进行打分的那个预测器打出的分数
        
        masks = mrcnn_mask[np.arange(N), :, :, class_ids]  #从每一张图中选择一个出前N个class_ids的mask
                                                           #(N,hight,width)
                                                           
                                                           

        # Compute scale and shift to translate coordinates to image domain.
        # img1：原始图像
        # img2: 经过放缩之后的图像
        # img3: 经过放缩之后的图像，并且还进行了边缘填充，window是将img2贴的到img3上按照左下角对齐后img2在img3的坐标
                #仔细品品window的坐标(top_pad, left_pad, h + top_pad, w + left_pad),
                #img3的宽高是在img2的基础上加上pad后的长宽
        #通过上面的描述img2和window的其实是一个东西，只是他们的坐标的表示做了一个平移
        #window的长宽与img2的长宽相等
        h_scale = image_shape[0] / (window[2] - window[0]) #这里是原图在高度上进行放缩的比例因子
        w_scale = image_shape[1] / (window[3] - window[1]) #这里是原图在宽度上进行放缩的比例因子  
        scale = min(h_scale, w_scale) #比较两个放缩因子谁更小
        shift = window[:2]  # y, x    #获取top_pad,left_pad
        scales = np.array([scale, scale, scale, scale])
        shifts = np.array([shift[0], shift[1], shift[0], shift[1]])

        # Translate bounding boxes to image domain
        # 将box以相反的缩放和平移到元素图像上去
        boxes = np.multiply(boxes - shifts, scales).astype(np.int32)

        # Filter out detections with zero area. Often only happens in early
        # stages of training when the network weights are still a bit random.
        # 进一步筛选取出合格的边框
        exclude_ix = np.where(
            (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0]
        if exclude_ix.shape[0] > 0:
            boxes = np.delete(boxes, exclude_ix, axis=0)
            class_ids = np.delete(class_ids, exclude_ix, axis=0)
            scores = np.delete(scores, exclude_ix, axis=0)
            masks = np.delete(masks, exclude_ix, axis=0)
            N = class_ids.shape[0]

        # Resize masks to original image size and set boundary threshold.
        full_masks = []
        for i in range(N):
            # Convert neural network mask to full size mask
            full_mask = utils.unmold_mask(masks[i], boxes[i], image_shape)
            full_masks.append(full_mask)
        if full_masks:
            full_masks = np.stack(full_masks, axis=-1)
        else:
            full_masks = np.empty((0,) + masks.shape[1:3])
        
        return boxes, class_ids, scores, full_masks


############################################################
#  Data Formatting
############################################################

def compose_image_meta(image_id, image_shape, window, active_class_ids):
    """Takes attributes of an image and puts them in one 1D array. Use
    parse_image_meta() to parse the values back.
    image_id: An int ID of the image. Useful for debugging.
    image_shape: [height, width, channels]
    window: (y1, x1, y2, x2) in pixels. The area of the image where the real
            image is (excluding the padding)
    active_class_ids: List of class_ids available in the dataset from which
        the image came. Useful if training on images from multiple datasets
        where not all classes are present in all datasets.
    """
    meta = np.array(
        [image_id] +            # size=1
        list(image_shape) +     # size=3
        list(window) +          # size=4 (y1, x1, y2, x2) in image cooredinates
        list(active_class_ids)  # size=num_classes
    )
    return meta


# Two functions (for Numpy and TF) to parse image_meta tensors.
def parse_image_meta(meta):
    """Parses an image info Numpy array to its components.
    See compose_image_meta() for more details.
    """
    image_id = meta[:, 0]
    image_shape = meta[:, 1:4]
    window = meta[:, 4:8]   # (y1, x1, y2, x2) window of image in in pixels
    active_class_ids = meta[:, 8:]
    return image_id, image_shape, window, active_class_ids


def parse_image_meta_graph(meta):
    """Parses a tensor that contains image attributes to its components.
    See compose_image_meta() for more details.
    meta: [batch, meta length] where meta length depends on NUM_CLASSES
    """
    image_id = meta[:, 0]
    image_shape = meta[:, 1:4]
    window = meta[:, 4:8]
    active_class_ids = meta[:, 8:]
    return [image_id, image_shape, window, active_class_ids]


def mold_image(images, config):
    """Takes RGB images with 0-255 values and subtraces
    the mean pixel and converts it to float. Expects image
    colors in RGB order.
    """
    return images.astype(np.float32) - config.MEAN_PIXEL


def unmold_image(normalized_images, config):
    """Takes a image normalized with mold() and returns the original."""
    return (normalized_images + config.MEAN_PIXEL).astype(np.uint8)

utils.py

"""
Mask R-CNN
Common utility functions and classes.

Copyright (c) 2017 Matterport, Inc.
Licensed under the MIT License (see LICENSE for details)
Written by Waleed Abdulla
"""

import sys
import os
import math
import random
import numpy as np
import scipy.misc
import scipy.ndimage
import skimage.color
import skimage.io
import torch
import cv2

############################################################
#  Bounding Boxes
############################################################

def extract_bboxes(mask):
    """Compute bounding boxes from masks.
    mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
          num_instances# 代表图片上实际的类别数 
          mask中的元素不是0就是1，1代表该点是像素点，0表示该点不是像素点
    Returns: bbox array [num_instances, (y1, x1, y2, x2)].
    """
    #mask应该的数据分布应该是一个方形,且外围是用0填充，里面全是1,
    
    boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)  #num_instances,4)
    for i in range(mask.shape[-1]):
        m = mask[:, :, i]  #[height,width]
        # Bounding box.
        horizontal_indicies = np.where(np.any(m, axis=0))[0] #获得m中不全为0的行的index
        vertical_indicies = np.where(np.any(m, axis=1))[0]   #获得m中不全为0的列的index
        if horizontal_indicies.shape[0]:
            x1, x2 = horizontal_indicies[[0, -1]] #返回horizontal_indicies的第一个数和最后一个数
            y1, y2 = vertical_indicies[[0, -1]]   #返回vertical_indicies的第一个数和最后一个数(这里的数代表第几个像素)
            # x2 and y2 should not be part of the box. Increment by 1.
            x2 += 1
            y2 += 1                    #以上操作时找出mask全为1的方形块的位置
        else:
            # No mask for this instance. Might happen due to
            # resizing or cropping. Set bbox to zeros
            x1, x2, y1, y2 = 0, 0, 0, 0
        boxes[i] = np.array([y1, x1, y2, x2])
    return boxes.astype(np.int32)


def compute_iou(box, boxes, box_area, boxes_area):
    """Calculates IoU of the given box with the array of the given boxes.
    box: 1D vector [y1, x1, y2, x2]
    boxes: [boxes_count, (y1, x1, y2, x2)]
    box_area: float. the area of 'box'
    boxes_area: array of length boxes_count.

    Note: the areas are passed in rather than calculated here for
          efficency. Calculate once in the caller to avoid duplicate work.
    """
    # Calculate intersection areas
    y1 = np.maximum(box[0], boxes[:, 0])
    y2 = np.minimum(box[2], boxes[:, 2])
    x1 = np.maximum(box[1], boxes[:, 1])
    x2 = np.minimum(box[3], boxes[:, 3])
    intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
    union = box_area + boxes_area[:] - intersection[:]
    iou = intersection / union
    return iou


def compute_overlaps(boxes1, boxes2):
    """Computes IoU overlaps between two sets of boxes.
    boxes1, boxes2: [N, (y1, x1, y2, x2)].

    For better performance, pass the largest set first and the smaller second.
    """
    # Areas of anchors and GT boxes
    area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
    area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])

    # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
    # Each cell contains the IoU value.
    overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
    for i in range(overlaps.shape[1]):
        box2 = boxes2[i]
        overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
    return overlaps

def box_refinement(box, gt_box):
    """Compute refinement needed to transform box to gt_box.
    box and gt_box are [N, (y1, x1, y2, x2)]
    """

    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width

    gt_height = gt_box[:, 2] - gt_box[:, 0]
    gt_width = gt_box[:, 3] - gt_box[:, 1]
    gt_center_y = gt_box[:, 0] + 0.5 * gt_height
    gt_center_x = gt_box[:, 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = torch.log(gt_height / height)
    dw = torch.log(gt_width / width)

    result = torch.stack([dy, dx, dh, dw], dim=1)
    return result


############################################################
#  Dataset
############################################################

class Dataset(object):
    """The base class for dataset classes.
    To use it, create a new class that adds functions specific to the dataset
    you want to use. For example:

    class CatsAndDogsDataset(Dataset):
        def load_cats_and_dogs(self):
            ...
        def load_mask(self, image_id):
            ...
        def image_reference(self, image_id):
            ...

    See COCODataset and ShapesDataset as examples.
    """

    def __init__(self, class_map=None):
        self._image_ids = []
        self.image_info = []
        # Background is always the first class
        self.class_info = [{"source": "", "id": 0, "name": "BG"}]
        self.source_class_ids = {}

    def add_class(self, source, class_id, class_name):
        assert "." not in source, "Source name cannot contain a dot"
        # Does the class exist already?
        for info in self.class_info:
            if info['source'] == source and info["id"] == class_id: #如果增加的source为空,id==0，则放弃这次添加
                # source.class_id combination already available, skip
                return
        # Add the class
        self.class_info.append({
            "source": source,
            "id": class_id,
            "name": class_name,
        })   
        #add_class在训练中会执行20次(数据集中所有的类别数之和)
       

    def add_image(self, source, image_id, path, **kwargs):
        image_info = {
            "id": image_id,    #图片读进来的序号
            "source": source,  #图片属于什么数据集
            "path": path,
        }
        image_info.update(kwargs)  #这里可以出传入多个dict
        self.image_info.append(image_info)     #add_image数据集有多少图片就执行多少次

    def image_reference(self, image_id):
        """Return a link to the image in its source Website or details about
        the image that help looking it up or debugging it.

        Override for your dataset, but pass to this function
        if you encounter images not in your dataset.
        """
        return ""

    def prepare(self, class_map=None):
        """Prepares the Dataset class for use.

        TODO: class map is not supported yet. When done, it should handle mapping
              classes from different datasets to the same class ID.
        """
        def clean_name(name):
            """Returns a shorter version of object names for cleaner display."""
            return ",".join(name.split(",")[:1])

        # Build (or rebuild) everything else from the info dicts.
        self.num_classes = len(self.class_info)   #添加进dataset中的有类别的种类，假设为20类
        self.class_ids = np.arange(self.num_classes)  #生成一个序列 [0,1,2,3,....19]
        self.class_names = [clean_name(c["name"]) for c in self.class_info]  #将类别名字简化用于显示
        self.num_images = len(self.image_info)  #添加进dataset中的图片数量
        self._image_ids = np.arange(self.num_images)   #将添加的图片按照顺序生成一个id序列

        self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
                                      for info, id in zip(self.class_info, self.class_ids)} 
                                    #将数据来与和数据类别作出一张map

        # Map sources to class_ids they support
        self.sources = list(set([i['source'] for i in self.class_info]))  #去掉相同的类别
        self.source_class_ids = {} #这个dict记录一张图
        # Loop over datasets
        for source in self.sources:
            self.source_class_ids[source] = []
            # Find classes that belong to this dataset
            for i, info in enumerate(self.class_info):
                # Include BG class in all datasets
                #if i == 0 or source == info['source']: #源码是这样的，感觉应该改成下面的这个
                if source == info['source']:            #理由:看model.py在调用self.source_class_ids中的注解，
                    self.source_class_ids[source].append(i)

    def map_source_class_id(self, source_class_id):
        """Takes a source class ID and returns the int class ID assigned to it.

        For example:
        dataset.map_source_class_id("coco.12") -> 23
        """
        return self.class_from_source_map[source_class_id]

    def get_source_class_id(self, class_id, source):
        """Map an internal class ID to the corresponding class ID in the source dataset."""
        info = self.class_info[class_id]
        assert info['source'] == source
        return info['id']

    def append_data(self, class_info, image_info):
        self.external_to_class_id = {}
        for i, c in enumerate(self.class_info):
            for ds, id in c["map"]:
                self.external_to_class_id[ds + str(id)] = i

        # Map external image IDs to internal ones.
        self.external_to_image_id = {}
        for i, info in enumerate(self.image_info):
            self.external_to_image_id[info["ds"] + str(info["id"])] = i

    @property
    def image_ids(self):
        return self._image_ids

    def source_image_link(self, image_id):
        """Returns the path or URL to the image.
        Override this to return a URL to the image if it's availble online for easy
        debugging.
        """
        return self.image_info[image_id]["path"]

    def load_image(self, image_id):
        """Load the specified image and return a [H,W,3] Numpy array.
        """
        # Load image
        image = cv2.imread(self.image_info[image_id]['path'])  #numpy
        
        image=image.transpose((2,0,1) 
        return image

    def load_mask(self, image_id):
        """Load instance masks for the given image.

        Different datasets use different ways to store masks. Override this
        method to load instance masks and return them in the form of am
        array of binary masks of shape [height, width, instances].

        Returns:
            masks: A bool array of shape [height, width, instance count] with
                a binary mask per instance.
            class_ids: a 1D array of class IDs of the instance masks.
        """
        # Override this function to load a mask from your dataset.
        # Otherwise, it returns an empty mask.
        # 覆盖此功能以从数据集中加载遮罩,这里需要从写该函数
        mask = np.empty([0, 0, 0])
        class_ids = np.empty([0], np.int32)
        return mask, class_ids


def resize_image(image, min_dim=None, max_dim=None, padding=False):
    """
    Resizes an image keeping the aspect ratio.

    min_dim: if provided, resizes the image such that it's smaller
        dimension == min_dim
    max_dim: if provided, ensures that the image longest side doesn't
        exceed this value.
    padding: If true, pads image with zeros so it's size is max_dim x max_dim

    Returns:
    image: the resized image
    window: (y1, x1, y2, x2). If max_dim is provided, padding might
        be inserted in the returned image. If so, this window is the
        coordinates of the image part of the full image (excluding
        the padding). The x2, y2 pixels are not included.
    scale: The scale factor used to resize the image
    padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
    """
    # Default window (y1, x1, y2, x2) and default scale == 1.
    h, w = image.shape[:2]  #image(h,w) h,w是nrray数据类型
    window = (0, 0, h, w)
    scale = 1

    # Scale?
    if min_dim: #(800) 
        # Scale up but not down
        scale1 = max(1, min_dim / min(h, w)) #当最小边都比800大了,则图像最小边不需要resize，
    # Does it exceed max dim?
    if max_dim: #(1024)
        image_max = max(h, w)
        if round(image_max * scale1) > max_dim: #将最最大边大于1024,则需要将最大便压缩
            scale = max_dim / (image_max)
    # Resize image and mask
    if scale != 1:
        image = scipy.misc.imresize(  #scipy.misc.imresize
            image, (round(h * scale), round(w * scale)))
    # Need padding?
    if padding:
        # Get new height and width
        h, w = image.shape[:2]
        top_pad = (max_dim - h) // 2
        bottom_pad = max_dim - h - top_pad
        left_pad = (max_dim - w) // 2
        right_pad = max_dim - w - left_pad
        padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 
        image = np.pad(image, padding, mode='constant', constant_values=0)
        window = (top_pad, left_pad, h + top_pad, w + left_pad) #在padding之后原始图像的左上角和右下角的位置
    return image, window, scale, padding


def resize_mask(mask, scale, padding):
    """Resizes a mask using the given scale and padding.
    Typically, you get the scale and padding from resize_image() to
    ensure both, the image and the mask, are resized consistently.

    scale: mask scaling factor
    padding: Padding to add to the mask in the form
            [(top, bottom), (left, right), (0, 0)]
    """
    h, w = mask.shape[:2]
    mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) #order=0最临近插值
                                    #zoom 各个维度的缩放系数         #order=1双线性插值
                                                                     #order=3立方体插值
    mask = np.pad(mask, padding, mode='constant', constant_values=0)  
    return mask


def minimize_mask(bbox, mask, mini_shape):
    """Resize masks to a smaller version to cut memory load.
    Mini-masks can then resized back to image scale using expand_masks()

    See inspect_data.ipynb notebook for more details.
    """
    #mini_shape=(56,56)
    #mask.shape[-1] 一张图片有几个掩膜
    mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
    for i in range(mask.shape[-1]):
        m = mask[:, :, i]
        y1, x1, y2, x2 = bbox[i][:4]
        m = m[y1:y2, x1:x2] #先将mask第一次缩小到box的高宽
        if m.size == 0:
            raise Exception("Invalid bounding box with area of zero")
        m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear')
        mini_mask[:, :, i] = np.where(m >= 128, 1, 0) #图像二值化
    return mini_mask


def expand_mask(bbox, mini_mask, image_shape):
    """Resizes mini masks back to image size. Reverses the change
    of minimize_mask().

    See inspect_data.ipynb notebook for more details.
    """
    mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
    for i in range(mask.shape[-1]):
        m = mini_mask[:, :, i]
        y1, x1, y2, x2 = bbox[i][:4]
        h = y2 - y1
        w = x2 - x1
        m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear')
        mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0)
    return mask


# TODO: Build and use this function to reduce code duplication
def mold_mask(mask, config):
    pass


def unmold_mask(mask, bbox, image_shape):
    """Converts a mask generated by the neural network into a format similar
    to it's original shape.
    mask: [height, width] of type float. A small, typically 28x28 mask.
    bbox: [y1, x1, y2, x2]. The box to fit the mask in.

    Returns a binary mask with the same size as the original image.
    """
    threshold = 0.5
    y1, x1, y2, x2 = bbox
    #输出固定大小的mask
    mask = scipy.misc.imresize(
        mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0
    mask = np.where(mask >= threshold, 1, 0).astype(np.uint8) #

    # Put the mask in the right location.
    # 生成一张全0的原始图片大小的图片，然后将mask进行安装box的位置贴到这张图片上
    full_mask = np.zeros(image_shape[:2], dtype=np.uint8)
    full_mask[y1:y2, x1:x2] = mask
    return full_mask


############################################################
#  Anchors
############################################################

def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
    """
    scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
    ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
    shape: [height, width] spatial shape of the feature map over which
            to generate anchors.
    feature_stride: Stride of the feature map relative to the image in pixels.
    anchor_stride: Stride of anchors on the feature map. For example, if the
        value is 2 then generate anchors for every other feature map pixel.
    """
    # Get all combinations of scales and ratios
    scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) #
    # array([[ 32],           array([[0.5], 
           # [ 32],                  [1.],
           # [ 32]])                 [2.]])
    
    scales = scales.flatten() #array([ 32,32,32])
    ratios = ratios.flatten() #array([0.5,1.,2.])

    # Enumerate heights and widths from scales and ratios
    heights = scales / np.sqrt(ratios) #(3,)
    widths = scales * np.sqrt(ratios)  #(3,)

    # Enumerate shifts in feature space
    '''
       shape是特征图的长宽, anchor_stride是将特征图shape均分的步长，
       这样一张特征图上就有(shape[0]//anchor_stride)*(shape[1]//anchor_stride)个网格点
       将其乘以feature_stride,就将这些特征点坐标映射到原图尺寸上
    '''
    shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
    shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
    shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)  

    # Enumerate combinations of shifts, widths, and heights
    # 将widths纵向复制len(shifts_x)份,
    # 将shifts_x每一个元素横向复制len(widths)份，再纵向将其拼接
    box_widths, box_centers_x = np.meshgrid(widths, shifts_x) 
    box_heights, box_centers_y = np.meshgrid(heights, shifts_y)

    # Reshape to get a list of (y, x) and a list of (h, w)
    box_centers = np.stack(
        [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
    box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])

    # Convert to corner coordinates (y1, x1, y2, x2)
    boxes = np.concatenate([box_centers - 0.5 * box_sizes,
                            box_centers + 0.5 * box_sizes], axis=1)
    return boxes


def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
                             anchor_stride):
    """Generate anchors at different levels of a feature pyramid. Each scale
    is associated with a level of the pyramid, but each ratio is used in
    all levels of the pyramid.

    Returns:
    anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
        with the same order of the given scales. So, anchors of scale[0] come
        first, then anchors of scale[1], and so on.
    """
    # Anchors
    # [anchor_count, (y1, x1, y2, x2)]
    anchors = []
    for i in range(len(scales)):
        anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
                                        feature_strides[i], anchor_stride))
    return np.concatenate(anchors, axis=0)

全部代码(pytoch版,有中文解读)，本人水平有限对(ROI Align)的gpu版未能注释，还望谅解

代码思维导图

github源码(keras版)

你可能感兴趣的:(机器学习,pytorch)

第26篇：pFedLoRA: Model-Heterogeneous Personalized Federated Learning with LoRA使用lora微调的模型异构个性化联邦学习还不秃顶的计科生联邦学习深度学习人工智能开发语言
第一部分：解决的问题联邦学习（FederatedLearning,FL）是一种分布式机器学习方法，允许客户端在本地数据上训练模型，同时通过中心服务器共享学习成果。传统FL框架假设客户端使用相同的模型结构（模型同构），但在实际中可能面对：统计异质性：客户端的数据分布不均（non-IID）。资源异质性：客户端硬件资源有限。模型异质性：客户端可能拥有不同的模型结构。模型异构的个性化联邦学习（MHPFL）
零基础学会asp.net做AI大模型网站/小程序十六：专栏总结借雨醉东风 asp.net 小程序后端
本专栏以实战为主，轻理论。如果哪里有不太懂的，可关注博主后加个人微信（平台规定文章中不能贴联系方式，需先关注博主，再加微信），后续一起交流学习。-------------------------------------正文----------------------------------------目录本专栏总结后续方向项目简介项目结构使用方法项目地址关键特点LLaMA机器学习简介使用LLaMA
ptython setup.py install 设置python包编译时的并行数 leo0308 基础知识 Python python pytorch3d
通过源码编译安装pytorch3d的时候，直接执行pythonsetup.pyinstall时，默认开的并行数很多，有10几个，直接导致机器卡死。通过设置下面的环境变量，可以设置较小的并行数，避免占用过多的资源。exportMAX_JOBS=4设置后，同时只有4个编译的进程。
Python从0到100（三十九）：数据提取之正则（文末免费送书）是Dream呀 python mysql 开发语言
前言：零基础学Python：Python从0到100最新最全教程。想做这件事情很久了，这次我更新了自己所写过的所有博客，汇集成了Python从0到100，共一百节课，帮助大家一个月时间里从零基础到学习Python基础语法、Python爬虫、Web开发、计算机视觉、机器学习、神经网络以及人工智能相关知识，成为学习学习和学业的先行者！欢迎大家订阅专栏：零基础学Python：Python从0到100最新
深度学习工厂的蓝图：拆解CUDA驱动、PyTorch与OpenCV的依赖关系时光旅人01号深度学习 pytorch opencv
想象一下，你正在建造一座深度学习工厂，这座工厂专门用于高效处理深度学习任务（如训练神经网络）和计算机视觉任务（如图像处理）。为了让工厂顺利运转，你需要搭建基础设施、安装设备、设置生产线，并配备控制台来管理整个生产过程。以下是这座工厂的详细构建过程：1.工厂的基础设施：Ubuntu比喻：Ubuntu是工厂所在的土地和建筑，提供了基础设施和运行环境。作用：提供操作系统环境，支持安装和运行各种工具和框架
（九万字）面向2025年BOSS直聘人工智能算法工程师高频面试题解析快撑死的鱼人工智能回归 python pytorch
面向2025年BOSS直聘人工智能算法工程师高频面试题解析1.机器学习（ML）理论解析机器学习是让计算机从数据中学习规律的一套方法论，包含监督学习、无监督学习和强化学习等范式。在监督学习中，给定带标签的数据，算法尝试学习从输入到输出的映射关系；无监督学习则在缺乏标签的情况下挖掘数据内在结构；强化学习则让智能体通过与环境交互、依据奖赏反馈来改进策略(Q-learning-Wikipedia)。机器学
Centos7 搭建 Jupyter + Nginx 服务某龙兄 python nginx linux centos
JupyterNotebook（此前被称为IPythonnotebook）是一个交互式笔记本，支持运行40多种编程语言。JupyterNotebook的本质是一个Web应用程序，便于创建和共享文学化程序文档，支持实时代码，数学方程，可视化和markdown。用途包括：数据清理和转换，数值模拟，统计建模，机器学习等等。本文讲述如何搭建Jupyter+Nginx服务,仅供学习与交流，请勿用于商业用途一
人工智能与机器学习入门：基尼系数（Gini Index）和基于熵（Entropy）基尼系数基于熵机器学习入门
在决策树应用一文中，在构建决策分类树应用决策算法时，介绍了基尼系数（GiniIndex）和基于熵（Entropy）两种算法。本文通过实例来更加深入的介绍一下这两个算法。仍然以简单的数据为例：id喜欢颜色是否有喉结身高性别1绿否165女2蓝是170男3粉否172女4绿是175男基尼系数分别对喜欢颜色是否有喉结求基尼系数如下：喜欢的颜色id喜欢颜色性别1绿女2蓝男3粉女4绿男对于姓别女分类而言，数据如
YOLOv8 Pose使用RKNN进行推理い不靠譜︶朱Sir 实用项目部署 YOLO 人工智能 python linux pip
关注微信公众号：朱sir的小站，发送202411081即可免费获取源代码下载链接一、简单介绍YOLOv8-Pose是一种基于YOLOv8架构的姿态估计模型，能够识别图像中的关键点位置，这些关键点通常表示人体的关节、特征点或其他显著位置。该模型在COCO关键点数据集上训练，适合多种姿势估计任务。二、ONNX推理1.首先需要先将Pytorch模型转换为Onnx模型，下载pt模型这里给出官方的权重下载地
【CUDA】Pytorch_Extensions joker D888 深度学习 pytorch python cuda c++深度学习
【CUDA】Pytorch_Extensions为什么要开发CUDA扩展？当我们在PyTorch中实现自定义算子时，通常有两种选择：使用纯Python实现（简单但效率低）使用C++/CUDA扩展（高效但需要编译）对于计算密集型的操作（如神经网络中的自定义激活函数），使用CUDA扩展可以获得接近硬件极限的性能。本文将以实现一个多项式激活函数x²+x+1为例，展示完整的开发流程。完整CUDA扩展代码解
本地搭建小型 DeepSeek 并进行微调非著名架构师大模型知识文档智能硬件人工智能大数据大模型 deepseek
本文将指导您在本地搭建一个小型的DeepSeek模型，并进行微调，以处理您的特定数据。1.环境准备Python3.7或更高版本PyTorch1.8或更高版本CUDA(可选，用于GPU加速)Git2.克隆DeepSeek仓库bash复制gitclonehttps://github.com/deepseek-ai/deepseek.gitcddeepseek3.安装依赖bash复制pipinstall
Transformer 模型架构 2401_89793006 热门话题 transformer 深度学习人工智能
Transformer是一种模型架构（ModelArchitecture），而不是一个软件框架（Framework）。它的定位更接近于一种设计蓝图，类似于建筑中的结构设计方案。以下是详细解释：1.架构vs框架的区别概念定义示例模型架构定义神经网络的结构设计Transformer、CNN、RNN开发框架提供实现模型的工具和库PyTorch、TensorFlow2.Transformer作为架构的核心
利用Beautiful Soup和Pandas进行网页数据抓取与清洗处理实战傻啦嘿哟 pandas
目录一、准备工作二、抓取网页数据三、数据清洗四、数据处理五、保存数据六、完整代码示例七、总结在数据分析和机器学习的项目中，数据的获取、清洗和处理是非常关键的步骤。今天，我们将通过一个实战案例，演示如何利用Python中的BeautifulSoup库进行网页数据抓取，并使用Pandas库进行数据清洗和处理。这个案例不仅适合初学者，也能帮助有一定经验的朋友快速掌握这两个强大的工具。一、准备工作在开始之
鸢尾花分类项目 GUI 编织幻境的妖分类数据挖掘人工智能
1.机器学习的定义机器学习是一门人工智能的分支，专注于开发算法和统计模型，使计算机能够在没有明确编程的情况下从数据中自动学习和改进。通过识别数据中的模式和规律，机器学习系统可以做出预测或决策。常见的应用包括图像识别、语音识别、推荐系统等。2.为什么使用鸢尾花数据集（Irisdataset）鸢尾花数据集是一个经典的多类分类问题数据集，由英国统计学家和遗传学家RonaldFisher在1936年引入。
《神经网络与深度学习》(邱锡鹏) 内容概要【不含数学推导】 code_stream #机器学习神经网络
第1章绪论基本概念：介绍了人工智能的发展历程及不同阶段的特点，如符号主义、连接主义、行为主义等。还阐述了深度学习在人工智能领域的重要地位和发展现状，以及其在图像、语音、自然语言处理等多个领域的成功应用。术语解释人工智能：旨在让机器模拟人类智能的技术和科学。深度学习：一种基于对数据进行表征学习的方法，通过构建具有很多层的神经网络模型，自动从大量数据中学习复杂的模式和特征。第2章机器学习概述基本概念：
Python中的 redis keyspace 通知_python 操作redis psubscribe(‘__keyspace@0__ ‘) 2301_82243733 程序员 python 学习面试
最后Python崛起并且风靡，因为优点多、应用领域广、被大牛们认可。学习Python门槛很低，但它的晋级路线很多，通过它你能进入机器学习、数据挖掘、大数据，CS等更加高级的领域。Python可以做网络应用，可以做科学计算，数据分析，可以做网络爬虫，可以做机器学习、自然语言处理、可以写游戏、可以做桌面应用…Python可以做的很多，你需要学好基础，再选择明确的方向。这里给大家分享一份全套的Pytho
DeepSeek原理介绍以及对网络安全行业的影响 AI拉呱 Deepseek 人工智能
大家好，我是AI拉呱，一个专注于人工智领域与网络安全方面的博主，现任资深算法研究员一职，兼职硕士研究生导师；热爱机器学习和深度学习算法应用，深耕大语言模型微调、量化、私域部署。曾获多次获得AI竞赛大奖，拥有多项发明专利和学术论文。对于AI算法有自己独特见解和经验。曾辅导十几位非计算机学生转行到算法岗位就业。关注评审分享一起学习更多知识。1.DeepSeek公司介绍1.1DeepSeek是什么：wh
FOKS-TROT: 一个高效、易用的全功能开源知识图谱生成工具柳旖岭
FOKS-TROT:一个高效、易用的全功能开源知识图谱生成工具项目简介FOKS-TROT是一个基于Python的全功能开源知识图谱生成工具，旨在帮助研究人员和开发者快速构建具有丰富信息的知识图谱。该项目由hkx3upper在GitCode上开发并维护。通过FOKS-TROT，您可以轻松地将各种数据源（如文本文件、数据库、API）转换为结构化的知识图谱，并对其进行可视化分析和机器学习任务。此外，该工
基于python深度学习遥感影像地物分类与目标识别、分割实践技术应用 xiao5kou4chang6kai4 深度学习遥感勘测 python 深度学习分类
专题一：深度学习发展与机器学习深度学习的历史发展过程机器学习，深度学习等任务的基本处理流程梯度下降算法讲解不同初始化，学习率对梯度下降算法的实例分析从机器学习到深度学习算法专题二深度卷积网络、卷积神经网络、卷积运算的基本原理池化操作，全连接层，以及分类器的作用BP反向传播算法的理解一个简单CNN模型代码理解特征图，卷积核可视化分析专题三TensorFlow与keras介绍与入门TensorFlow
LLM与知识图谱融合:智能运维知识库构建 AI天才研究院 DeepSeek R1 &大数据AI人工智能大模型 AI大模型企业级应用开发实战 AI实战计算科学神经计算深度学习神经网络大数据人工智能大型语言模型 AI AGI LLM Java Python 架构设计 Agent RPA
1.背景介绍随着信息技术的飞速发展，IT运维管理面临着越来越大的挑战。海量的设备、复杂的网络环境、日益增长的数据量，使得传统的运维方式难以满足需求。为了提高运维效率和质量，智能运维应运而生。智能运维的核心是将人工智能技术应用于运维领域，通过机器学习、深度学习等算法，实现自动化、智能化的运维管理。其中，大语言模型（LLM）和知识图谱是两个重要的技术方向。LLM能够理解和生成自然语言，可以用于构建智能
机器学习·文本数据读写处理 AAA顶置摸鱼 python 深度学习机器学习人工智能数据处理
前言在自然语言处理的第一步，需要面对的是各种各样以不同形式表现的文本数据，比如，txt、Excel中的表格数据，还有无法直接打开的pkl文件等。针对这些不同类型的数据，可以基于Python中的基本功能函数或者调用某些库进行读写以及作一些基本的处理。一、文本数据读写方法1.读写TXT文件读取方法：read()：读取整个文件，返回字符串。readline()：逐行读取，返回字符串。readlines(
用 TensorFlow 搭建简单的手写数字识别模型 lozhyf 工作面试学习 tensorflow 人工智能 python
一、引言手写数字识别是机器学习领域中一个经典且基础的问题，它在很多实际场景中都有广泛的应用，比如邮政系统中的邮件分拣、银行支票金额识别等。TensorFlow是一个强大的开源机器学习框架，由Google开发并维护，它提供了丰富的工具和接口，能帮助我们快速搭建和训练深度学习模型。在这篇博客中，我们将使用TensorFlow构建一个简单的神经网络模型，用于识别手写数字。二、环境准备在开始之前，你需要安
DeepSeek使用中的问题及解决方案（部分） WeiLai1112 DeepSeek 人工智能
1.模型部署与配置问题问题1：环境依赖冲突现象：安装模型依赖库时出现版本不兼容（如Python、PyTorch版本冲突）。解决方案：使用虚拟环境（如conda或venv）隔离依赖。严格按照官方文档的版本要求安装依赖，例如：condacreate-ndeepseekpython=3.9condaactivatedeepseekpipinstalltorch==2.0.1transformers==4
【机器学习】基于3D CNN通过CT图像分类预测肺炎 MUKAMO AI Python应用机器学习深度学习人工智能神经网络 3D CNN
1.引言1.1.研究背景在医学诊断中，医生通过分析CT影像来预测疾病时，面临一些挑战和局限性：图像信息的广度与复杂性：CT扫描生成的大量图像对医生来说既是信息的宝库也是处理上的负担。每组CT数据可能包含数百张切片，医生必须迅速审阅这些图像，以便捕捉到病变的微小细节。这种庞大的信息量要求医生在有限的时间内做出精准诊断，但同时也增加了漏诊或误诊的风险。部分容积效应也可能模糊小病变的边界，使得准确诊断变
TensorFlow LiteRT 概览姚家湾 tensorflow 人工智能 python
LiteRT（简称LiteRuntime，以前称为TensorFlowLite）是Google面向设备端AI的高性能运行时。您可以找到适用于各种机器学习/AI任务的LiteRT就绪模型，也可以使用AIEdge转换和优化工具将TensorFlow、PyTorch和JAX模型转换为TFLite格式并运行。主要特性针对设备端机器学习进行了优化：LiteRT解决了五项关键的ODML约束条件：延迟时间（无需
机器学习（1）安装Pytorch CoderIsArt 机器学习与深度学习机器学习 pytorch 人工智能
1.安装命令pip3installtorchtorchvisiontorchaudio--index-urlhttps://download.pytorch.org/whl/cu1182.安装过程Log：Lookinginindexes:https://download.pytorch.org/whl/cu118CollectingtorchDownloadinghttps://download.
27岁大龄转码秋招惨败，朋友劝我转Java来得及吗？还是继续走前端或机器学习？程序员yt java 机器学习开发语言
今天给大家分享的是一位粉丝的提问，27岁大龄转码秋招惨败，朋友劝我转Java来得及吗？还是继续走前端或机器学习？接下来把粉丝的具体提问和我的回复分享给大家，希望也能给一些类似情况的小伙伴一些启发和帮助。同学提问：211建筑本科，22年毕业后gap一年转码去了英国读的QS100的it的水硕（24年12月份毕业），转码后对就业形势认知不足，时间全花在课业上，八股文和算法准备的不充足，秋招算是惨败。读研
Pytorch实现之利用特征分布的差异来指导GAN的训练这张生成的图像能检测吗优质GAN模型训练自己的数据集 GAN系列 pytorch 生成对抗网络人工智能神经网络深度学习计算机视觉机器学习
简介简介：FIDGAN通过将FID损失引入GAN的训练过程，显著提升了生成图像的质量。其核心思想是利用特征分布的差异来指导生成器的训练，同时通过使用轻量级的MobileNet-v3提高了计算效率。这种方法在图像生成任务中具有广泛的应用前景。论文题目：FIDGAN:AGenerativeAdversarialNetworkwithAnInceptionDistance（FIDGAN：具有初始距离的生
Pytorch实现论文之利用多生成器来预防模式崩溃这张生成的图像能检测吗 GAN系列优质GAN模型训练自己的数据集人工智能 python 生成对抗网络机器学习 pytorch 深度学习计算机视觉
简介简介：一般来说，生成器相比判别器要完成的任务更加困难，前者需要完成数据概率密度的拟合，而后者只需要判别真伪，影响GAN性能的一个问题就是模式奔溃。而采用多生成器可以缓解这个问题。论文中主要设计了多生成器的架构和一个对于鉴别器的新损失设计来缓解这个问题。模型结构采用DCGAN的框架，原始损失基于WGAN-GP的设计理念。论文题目：StudyofPreventionofModeCollapsein
【深度学习pytorch-93】Transformer 相比 RNN 的优势华东算法王 DL-pytorch 深度学习 pytorch transformer
Transformer相比RNN的优势Transformer和RNN（循环神经网络）都是自然语言处理（NLP）领域的重要架构，但它们的工作原理和应用方式有很大不同。Transformer由于其独特的结构和机制，在多个方面优于RNN。以下是Transformer相比RNN的主要优势：1.并行计算能力RNN的局限性RNN是按顺序处理输入的，即每个时间步的输出都依赖于前一个时间步的输出。这意味着，在训练
ztree设置禁用节点 3213213333332132 JavaScript ztree json setDisabledNode Ajax
ztree设置禁用节点的时候注意，当使用ajax后台请求数据,必须要设置为同步获取数据，否者会获取不到节点对象，导致设置禁用没有效果。 $(function(){ showTree(); setDisabledNode(); });
JVM patch by Taobao bookjovi java HotSpot
在网上无意中看到淘宝提交的hotspot patch，共四个，有意思，记录一下。 7050685：jsdbproc64.sh has a typo in the package name 7058036：FieldsAllocationStyle=2 does not work in 32-bit VM 7060619：C1 should respect inline and
将session存储到数据库中 dcj3sjt126com sql PHP session
CREATE TABLE sessions ( id CHAR(32) NOT NULL, data TEXT, last_accessed TIMESTAMP NOT NULL, PRIMARY KEY (id) ); <?php /** * Created by PhpStorm. * User: michaeldu * Date
Vector 171815164 vector
public Vector<CartProduct> delCart(Vector<CartProduct> cart, String id) { for (int i = 0; i < cart.size(); i++) { if (cart.get(i).getId().equals(id)) { cart.remove(i);
各连接池配置参数比较 g21121 连接池
排版真心费劲，大家凑合看下吧，见谅~ Druid DBCP C3P0 Proxool 数据库用户名称 Username Username User 数据库密码 Password Password Password 驱动名
[简单]mybatis insert语句添加动态字段 53873039oycg mybatis
mysql数据库,id自增,配置如下： <insert id="saveTestTb" useGeneratedKeys="true" keyProperty="id" parameterType=&
struts2拦截器配置云端月影 struts2拦截器
struts2拦截器interceptor的三种配置方法方法1. 普通配置法 <struts> <package name="struts2" extends="struts-default"> &
IE中页面不居中，火狐谷歌等正常 aijuans IE中页面不居中
问题是首页在火狐、谷歌、所有IE中正常显示，列表页的页面在火狐谷歌中正常，在IE6、7、8中都不中，觉得可能那个地方设置的让IE系列都不认识，仔细查看后发现，列表页中没写HTML模板部分没有添加DTD定义，就是<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3
String,int,Integer,char 几个类型常见转换 antonyup_2006 html sql .net
如何将字串 String 转换成整数 int? int i = Integer.valueOf(my_str).intValue(); int i=Integer.parseInt(str); 如何将字串 String 转换成Integer ? Integer integer=Integer.valueOf(str); 如何将整数 int 转换成字串 String ? 1.
PL/SQL的游标类型百合不是茶显示游标(静态游标)隐式游标游标的更新和删除 %rowtype ref游标(动态游标)
游标是oracle中的一个结果集,用于存放查询的结果; PL/SQL中游标的声明; 1,声明游标 2,打开游标(默认是关闭的); 3,提取数据 4,关闭游标注意的要点:游标必须声明在declare中,使用open打开游标,fetch取游标中的数据,close关闭游标隐式游标:主要是对DML数据的操作隐
JUnit4中@AfterClass @BeforeClass @after @before的区别对比 bijian1013 JUnit4 单元测试
一.基础知识 JUnit4使用Java5中的注解（annotation），以下是JUnit4常用的几个annotation： @Before：初始化方法对于每一个测试方法都要执行一次（注意与BeforeClass区别，后者是对于所有方法执行一次）@After：释放资源对于每一个测试方法都要执行一次（注意与AfterClass区别，后者是对于所有方法执行一次
精通Oracle10编程SQL(12)开发包 bijian1013 oracle 数据库 plsql
/* *开发包 *包用于逻辑组合相关的PL/SQL类型（例如TABLE类型和RECORD类型）、PL/SQL项（例如游标和游标变量）和PL/SQL子程序（例如过程和函数） */ --包用于逻辑组合相关的PL/SQL类型、项和子程序，它由包规范和包体两部分组成 --建立包规范：包规范实际是包与应用程序之间的接口，它用于定义包的公用组件，包括常量、变量、游标、过程和函数等 --在包规
【EhCache二】ehcache.xml配置详解 bit1129 ehcache.xml
在ehcache官网上找了多次，终于找到ehcache.xml配置元素和属性的含义说明文档了，这个文档包含在ehcache.xml的注释中！ ehcache.xml ： http://ehcache.org/ehcache.xml ehcache.xsd ： http://ehcache.org/ehcache.xsd ehcache配置文件的根元素是ehcahe ehcac
java.lang.ClassNotFoundException: org.springframework.web.context.ContextLoaderL 白糖_ java eclipse spring tomcat Web
今天学习spring+cxf的时候遇到一个问题：在web.xml中配置了spring的上下文监听器： <listener> <listener-class>org.springframework.web.context.ContextLoaderListener</listener-class> </listener> 随后启动
angular.element boyitech AngularJS AngularJS API angular.element
angular.element 描述: 包裹着一部分DOM element或者是HTML字符串，把它作为一个jQuery元素来处理。（类似于jQuery的选择器啦）如果jQuery被引入了，则angular.element就可以看作是jQuery选择器，选择的对象可以使用jQuery的函数；如果jQuery不可用，angular.e
java-给定两个已排序序列，找出共同的元素。 bylijinnan java
import java.util.ArrayList; import java.util.Arrays; import java.util.List; public class CommonItemInTwoSortedArray { /** * 题目：给定两个已排序序列，找出共同的元素。 * 1.定义两个指针分别指向序列的开始。 * 如果指向的两个元素
sftp 异常，有遇到的吗？求解 Chen.H java jcraft auth jsch jschexception
com.jcraft.jsch.JSchException: Auth cancel at com.jcraft.jsch.Session.connect(Session.java:460) at com.jcraft.jsch.Session.connect(Session.java:154) at cn.vivame.util.ftp.SftpServerAccess.connec
[生物智能与人工智能]神经元中的电化学结构代表什么? comsci 人工智能
我这里做一个大胆的猜想,生物神经网络中的神经元中包含着一些化学和类似电路的结构,这些结构通常用来扮演类似我们在拓扑分析系统中的节点嵌入方程一样,使得我们的神经网络产生智能判断的能力,而这些嵌入到节点中的方程同时也扮演着"经验"的角色.... 我们可以尝试一下...在某些神经
通过LAC和CID获取经纬度信息 dai_lm lac cid
方法1：用浏览器打开http://www.minigps.net/cellsearch.html，然后输入lac和cid信息(mcc和mnc可以填0)，如果数据正确就可以获得相应的经纬度方法2：发送HTTP请求到http://www.open-electronics.org/celltrack/cell.php?hex=0&lac=<lac>&cid=&
JAVA的困难分析 datamachine java
前段时间转了一篇SQL的文章（http://datamachine.iteye.com/blog/1971896），文章不复杂，但思想深刻，就顺便思考了一下java的不足，当砖头丢出来，希望引点和田玉。 -----------------------------------------------------------------------------------------
小学5年级英语单词背诵第二课 dcj3sjt126com english word
money 钱 paper 纸 speak 讲，说 tell 告诉 remember 记得，想起 knock 敲，击，打 question 问题 number 数字，号码 learn 学会，学习 street 街道 carry 搬运，携带 send 发送，邮寄，发射 must 必须 light 灯，光线，轻的 front
linux下面没有tree命令 dcj3sjt126com linux
centos p安装 yum -y install tree mac os安装 brew install tree 首先来看tree的用法 tree 中文解释：tree 功能说明：以树状图列出目录的内容。语　　法：tree [-aACdDfFgilnNpqstux][-I <范本样式>][-P <范本样式
Map迭代方式，Map迭代，Map循环蕃薯耀 Map循环 Map迭代 Map迭代方式
Map迭代方式，Map迭代，Map循环 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 蕃薯耀 2015年
Spring Cache注解+Redis hanqunfeng spring
Spring3.1 Cache注解依赖jar包：  <dependency> <groupId>org.springframework.data</groupId> <artifactId>spring-data-redis</artifactId>
Guava中针对集合的 filter和过滤功能 jackyrong filter
在guava库中，自带了过滤器(filter)的功能，可以用来对collection 进行过滤，先看例子： @Test public void whenFilterWithIterables_thenFiltered() { List<String> names = Lists.newArrayList("John"
学习编程那点事 lampcy 编程 android PHP html5
一年前的夏天，我还在纠结要不要改行，要不要去学php？能学到真本事吗？改行能成功吗？太多的问题，我终于不顾一切，下定决心，辞去了工作，来到传说中的帝都。老师给的乘车方式还算有效，很顺利的就到了学校，赶巧了，正好学校搬到了新校区。先安顿了下来，过了个轻松的周末，第一次到帝都，逛逛吧！接下来的周一，是我噩梦的开始，学习内容对我这个零基础的人来说，除了勉强完成老师布置的作业外，我已经没有时间和精力去
架构师之流处理---------bytebuffer的mark,limit和flip nannan408 ByteBuffer
1.前言。如题，limit其实就是可以读取的字节长度的意思，flip是清空的意思，mark是标记的意思。 2.例子. 例子代码: String str = "helloWorld"; ByteBuffer buff = ByteBuffer.wrap(str.getBytes()); Sy
org.apache.el.parser.ParseException: Encountered " ":" ": "" at line 1, column 1 Everyday都不同 $转义 el表达式
最近在做Highcharts的过程中，在写js时，出现了以下异常：严重: Servlet.service() for servlet jsp threw exception org.apache.el.parser.ParseException: Encountered " ":" ": "" at line 1,
用Java实现发送邮件到163 tntxia java实现
/* 在java版经常看到有人问如何用javamail发送邮件？如何接收邮件？如何访问多个文件夹等。问题零散，而历史的回复早已经淹没在问题的海洋之中。本人之前所做过一个java项目，其中包含有WebMail功能，当初为用java实现而对javamail摸索了一段时间，总算有点收获。看到论坛中的经常有此方面的问题，因此把我的一些经验帖出来，希望对大家有些帮助。此篇仅介绍用
探索实体类存在的真正意义 java小叶檀 POJO
一. 实体类简述实体类其实就是俗称的POJO,这种类一般不实现特殊框架下的接口，在程序中仅作为数据容器用来持久化存储数据用的 POJO（Plain Old Java Objects）简单的Java对象它的一般格式就是 public class A{ private String id; public Str