晓码bigdata

目标检测系列5——Faster R-CNN2（代码实现）

参考黑马程序员教程

(1) train.py

# train
import os
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from detection.datasets import coco, data_generator
from detection.models.detectors import faster_rcnn
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

## 新加,解决Failed to get convolution algorithm. This is probably because cuDNN failed to initialize
os.environ['CUDA_VISIBLE_DEVICES'] = '/device:GPU:0'

def train():
    # 1 调用coco处理数据的方法， 获取预处理后的数据集
    train_dataset = coco.CocoDataSet('./data/coco2017', 'train',
                         flip_ratio=0.5,
                         pad_mode='fixed',
                         mean=(123.675, 116.28, 103.53),
                         std=(1., 1., 1.),
                         scale=(800, 1216))
    print('=====', train_dataset)

    #2 数据处理，生成一个训练数据，生成每4个列表，每个列表都是一个多为列表
    # tf.data.Dataset
    train_generator = data_generator.DataGenerator(train_dataset)
    print("+++++", train_generator)

    #3 生成了数据，但是不太清楚train_generator到底是个啥东西
    tf_dataset = tf.data.Dataset.from_generator(train_generator,
                                                (tf.float32, tf.float32, tf.float32, tf.float32))
    print('------', tf_dataset)
    # 4因为这里只有1张图像，所以batch是1，打乱
    tf_dataset = tf_dataset.batch(1).prefetch(100).shuffle(100)

    # 5建立模型以及训练
    num_classes = len(train_dataset.get_categories())
    print('-=-=-=-=', num_classes)
    # 6获取模型
    model = faster_rcnn.FasterRCNN(num_classes=num_classes)
    # 7定义优化器
    optimizer = tf.keras.optimizers.SGD(1e-3, momentum=0.9, nesterov=True)
    # 8进行循环训练，循环1个epoch
    for epoch in range(1):
        # 这里是因为前面给它加了批次，所以tf_dataset变成了5维数据，第一维是它的批次量
        for (batch, inputs) in enumerate(tf_dataset):
            # 这里不懂，4个列表分别为，原始图像，第2个不知道，边界框，标签
            batch_imgs, batch_metas, batch_bboxes, batch_labels = inputs
            print("batch_imgs形状：",batch_imgs.shape)
            print(batch_imgs, batch_metas, batch_bboxes, batch_labels)

            with tf.GradientTape() as tape:
                # 每一个结构的损失
                rpn_class_loss, rpn_bbox_loss, rcnn_class_loss, rcnn_bbox_loss = \
                    model((batch_imgs, batch_metas, batch_bboxes, batch_labels), training=True)
                # rpn网络的损失和网络的总损失
                loss = rpn_class_loss + rpn_bbox_loss + rcnn_class_loss + rcnn_bbox_loss
            # 进行对损失求梯度
            grads = tape.gradient(loss, model.trainable_variables)
            # 将梯度应用到变量上
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

            print("迭代次数：%d, batch大小：%d, 损失大小：%f" % (epoch+1, batch+1, loss))


def test():

    train_dataset = coco.CocoDataSet('./data/coco2017', 'val')

    # 获取数据和模型
    train_generator = data_generator.DataGenerator(train_dataset)
    tf_dataset = tf.data.Dataset.from_generator(train_generator,
                                                (tf.float32, tf.float32, tf.float32, tf.float32))
    tf_dataset = tf_dataset.batch(1).prefetch(100).shuffle(100)
    num_classes = len(train_dataset.get_categories())
    model = faster_rcnn.FasterRCNN(num_classes=num_classes)
    print(num_classes)

    for (batch, inputs) in enumerate(tf_dataset):
        img, img_meta, _, _ = inputs
        print(img, img_meta)

        detections_list = model((img, img_meta), training=False)

        print(detections_list)


if __name__ == '__main__':
    train()
    # test()

core
(2) anchor——generator

# anchor_generator
import tensorflow as tf
from detection.utils.misc import calc_img_shapes, calc_batch_padded_shape

class AnchorGenerator:
    """
    This class operate on padded iamge, eg. [1216, 1216]
    and generate scales*ratios number of anchor boxes for each point in
    padded image, with stride = feature_strides
    number of anchor = (1216 // feature_stride)^2
    number of anchor boxes = number of anchor * (scales_len*ratio_len)
    """
    def __init__(self, 
                 scales=(32, 64, 128, 256, 512), 
                 ratios=(0.5, 1, 2), 
                 feature_strides=(4, 8, 16, 32, 64)):
        '''
        Anchor Generator
        
        Attributes
        ---
            scales: 1D array of anchor sizes in pixels.
            ratios: 1D array of anchor ratios of width/height.
            feature_strides: Stride of the feature map relative to the image in pixels.
        '''
        self.scales = scales
        self.ratios = ratios
        self.feature_strides = feature_strides
     
    def generate_pyramid_anchors(self, img_metas):
        '''
        Generate the multi-level anchors for Region Proposal Network
        
        Args
        ---
            img_metas: [batch_size, 11]
        
        Returns
        ---
            anchors: [num_anchors, (y1, x1, y2, x2)] in image coordinates.
            valid_flags: [batch_size, num_anchors]
        '''
        # generate anchors
        pad_shape = calc_batch_padded_shape(img_metas) # [1216, 1216]
        # : [(304, 304), (152, 152), (76, 76), (38, 38), (19, 19)]
        feature_shapes = [(pad_shape[0] // stride, pad_shape[1] // stride)
                          for stride in self.feature_strides]
        anchors = [
            self._generate_level_anchors(level, feature_shape)
            for level, feature_shape in enumerate(feature_shapes)
        ] # [277248, 4], [69312, 4], [17328, 4], [4332, 4], [1083, 4]
        anchors = tf.concat(anchors, axis=0) # [369303, 4]
        # print('total anchors:', anchors.shape)
        # print('---------')

        # generate valid flags
        img_shapes = calc_img_shapes(img_metas) # (800, 1067)
        valid_flags = [
            self._generate_valid_flags(anchors, img_shapes[i])
            for i in range(img_shapes.shape[0])
        ]
        valid_flags = tf.stack(valid_flags, axis=0)
        
        anchors = tf.stop_gradient(anchors)
        valid_flags = tf.stop_gradient(valid_flags)
        
        return anchors, valid_flags
    
    def _generate_valid_flags(self, anchors, img_shape):
        '''
        remove these anchor boxed on padded area
        ---
            anchors: [num_anchors, (y1, x1, y2, x2)] in image coordinates.
            img_shape: Tuple. (height, width, channels)
            
        Returns
        ---
            valid_flags: [num_anchors]
        '''
        y_center = (anchors[:, 2] + anchors[:, 0]) / 2 # [369300]
        x_center = (anchors[:, 3] + anchors[:, 1]) / 2
        
        valid_flags = tf.ones(anchors.shape[0], dtype=tf.int32) # [369300]
        zeros = tf.zeros(anchors.shape[0], dtype=tf.int32)
        # set boxes whose center is out of image area as invalid.
        valid_flags = tf.where(y_center <= img_shape[0], valid_flags, zeros)
        valid_flags = tf.where(x_center <= img_shape[1], valid_flags, zeros)
        
        return valid_flags
    
    def _generate_level_anchors(self, level, feature_shape):
        '''Generate the anchors given the spatial shape of feature map.
        
        scale: 32
        ratios: tf.Tensor([0.5 1.  2. ], shape=(3,), dtype=float32)
        pos: (256, 256) (256, 256)
        scale: 64
        ratios: tf.Tensor([0.5 1.  2. ], shape=(3,), dtype=float32)
        pos: (128, 128) (128, 128)
        scale: 128
        ratios: tf.Tensor([0.5 1.  2. ], shape=(3,), dtype=float32)
        pos: (64, 64) (64, 64)
        scale: 256
        ratios: tf.Tensor([0.5 1.  2. ], shape=(3,), dtype=float32)
        pos: (32, 32) (32, 32)
        scale: 512
        ratios: tf.Tensor([0.5 1.  2. ], shape=(3,), dtype=float32)
        pos: (16, 16) (16, 16)


        scale: 32
        ratios: tf.Tensor([0.5 1.  2. ], shape=(3,), dtype=float32)
        pos: (304, 304) (304, 304)
        boxes: (277248, 4)
        scale: 64
        ratios: tf.Tensor([0.5 1.  2. ], shape=(3,), dtype=float32)
        pos: (152, 152) (152, 152)
        boxes: (69312, 4)
        scale: 128
        ratios: tf.Tensor([0.5 1.  2. ], shape=(3,), dtype=float32)
        pos: (76, 76) (76, 76)
        boxes: (17328, 4)
        scale: 256
        ratios: tf.Tensor([0.5 1.  2. ], shape=(3,), dtype=float32)
        pos: (38, 38) (38, 38)
        boxes: (4332, 4)
        scale: 512
        ratios: tf.Tensor([0.5 1.  2. ], shape=(3,), dtype=float32)
        pos: (19, 19) (19, 19)
        boxes: (1083, 4)
        total anchors: (369303, 4)

        ---
            feature_shape: (height, width)

        Returns
        ---
            numpy.ndarray [anchors_num, (y1, x1, y2, x2)]
        '''
        scale = self.scales[level]
        ratios = self.ratios
        feature_stride = self.feature_strides[level]
        
        # Get all combinations of scales and ratios
        scales, ratios = tf.meshgrid([float(scale)], ratios)
        scales = tf.reshape(scales, [-1]) # [32, 32, 32]
        ratios = tf.reshape(ratios, [-1]) # [0.5, 1, 2]
        
        # Enumerate heights and widths from scales and ratios
        heights = scales / tf.sqrt(ratios) # [45, 32, 22], square root
        widths = scales * tf.sqrt(ratios)  # [22, 32, 45]

        # Enumerate shifts in feature space, [0, 4, ..., 1216-4]
        shifts_y = tf.multiply(tf.range(feature_shape[0]), feature_stride)
        shifts_x = tf.multiply(tf.range(feature_shape[1]), feature_stride)
        
        shifts_x, shifts_y = tf.cast(shifts_x, tf.float32), tf.cast(shifts_y, tf.float32)
        shifts_x, shifts_y = tf.meshgrid(shifts_x, shifts_y) # [304, 304, 2] coordinates

        # Enumerate combinations of shifts, widths, and heights # mesh A: [3] B:[304,304]=>[92416] =>[92416,3,2]
        box_widths, box_centers_x = tf.meshgrid(widths, shifts_x)
        box_heights, box_centers_y = tf.meshgrid(heights, shifts_y)

        # Reshape to get a list of (y, x) and a list of (h, w)
        box_centers = tf.reshape(tf.stack([box_centers_y, box_centers_x], axis=2), (-1, 2))
        box_sizes = tf.reshape(tf.stack([box_heights, box_widths], axis=2), (-1, 2))

        # Convert to corner coordinates (y1, x1, y2, x2) [304x304, 3, 4] => [277448, 4]
        boxes = tf.concat([box_centers - 0.5 * box_sizes,
                           box_centers + 0.5 * box_sizes], axis=1)
        # print('scale:', scale)
        # print('ratios:', ratios)
        # print('pos:', shifts_x.shape, shifts_y.shape)
        # print('boxes:', boxes.shape)
        return boxes

(3) anchor_target

# anchor_target
import tensorflow as tf
from detection.core.bbox import geometry, transforms
from detection.utils.misc import trim_zeros



class AnchorTarget:
    """
    for every generated anchors boxes: [326393, 4],
    create its rpn_target_matchs and rpn_target_matchs
    which is used to train RPN network.
    """
    def __init__(self,
                 target_means=(0., 0., 0., 0.), 
                 target_stds=(0.1, 0.1, 0.2, 0.2),
                 num_rpn_deltas=256,
                 positive_fraction=0.5,
                 pos_iou_thr=0.7,
                 neg_iou_thr=0.3):
        '''
        Compute regression and classification targets for anchors.
        
        Attributes
        ---
            target_means: [4]. Bounding box refinement mean for RPN.
            target_stds: [4]. Bounding box refinement standard deviation for RPN.
            num_rpn_deltas: int. Maximal number of Anchors per image to feed to rpn heads.
            positive_fraction: float.
            pos_iou_thr: float.
            neg_iou_thr: float.
        '''
        self.target_means = target_means
        self.target_stds = target_stds
        self.num_rpn_deltas = num_rpn_deltas
        self.positive_fraction = positive_fraction
        self.pos_iou_thr = pos_iou_thr
        self.neg_iou_thr = neg_iou_thr

    def build_targets(self, anchors, valid_flags, gt_boxes, gt_class_ids):
        '''
        Given the anchors and GT boxes, compute overlaps and identify positive
        anchors and deltas to refine them to match their corresponding GT boxes.

        Args
        ---
            anchors: [num_anchors, (y1, x1, y2, x2)] in image coordinates.
            valid_flags: [batch_size, num_anchors]
            gt_boxes: [batch_size, num_gt_boxes, (y1, x1, y2, x2)] in image 
                coordinates. batch_size = 1 usually
            gt_class_ids: [batch_size, num_gt_boxes] Integer class IDs.

        Returns
        ---
            rpn_target_matchs: [batch_size, num_anchors] matches between anchors and GT boxes.
                1 = positive anchor, -1 = negative anchor, 0 = neutral anchor
            rpn_target_deltas: [batch_size, num_rpn_deltas, (dy, dx, log(dh), log(dw))] 
                Anchor bbox deltas.
        '''
        rpn_target_matchs = []
        rpn_target_deltas = []
        
        num_imgs = gt_class_ids.shape[0] # namely, batchsz , 1
        for i in range(num_imgs):
            target_match, target_delta = self._build_single_target(
                anchors, valid_flags[i], gt_boxes[i], gt_class_ids[i])
            rpn_target_matchs.append(target_match)
            rpn_target_deltas.append(target_delta)
        
        rpn_target_matchs = tf.stack(rpn_target_matchs)
        rpn_target_deltas = tf.stack(rpn_target_deltas)
        
        rpn_target_matchs = tf.stop_gradient(rpn_target_matchs)
        rpn_target_deltas = tf.stop_gradient(rpn_target_deltas)
        
        return rpn_target_matchs, rpn_target_deltas

    def _build_single_target(self, anchors, valid_flags, gt_boxes, gt_class_ids):
        '''Compute targets per instance.
        
        Args
        ---
            anchors: [num_anchors, (y1, x1, y2, x2)]
            valid_flags: [num_anchors]
            gt_class_ids: [num_gt_boxes]
            gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)]
        
        Returns
        ---
            target_matchs: [num_anchors]
            target_deltas: [num_rpn_deltas, (dy, dx, log(dh), log(dw))] 
        '''
        gt_boxes, _ = trim_zeros(gt_boxes) # remove padded zero boxes, [new_N, 4]
        
        target_matchs = tf.zeros(anchors.shape[0], dtype=tf.int32) # [326393]
        
        # Compute overlaps [num_anchors, num_gt_boxes] 326393 vs 10 => [326393, 10]
        overlaps = geometry.compute_overlaps(anchors, gt_boxes)

        # Match anchors to GT Boxes
        # If an anchor overlaps ANY GT box with IoU >= 0.7 then it's positive.
        # If an anchor overlaps ALL GT box with IoU < 0.3 then it's negative.
        # Neutral anchors are those that don't match the conditions above,
        # and they don't influence the loss function.
        # However, don't keep any GT box unmatched (rare, but happens). Instead,
        # match it to the closest anchor (even if its max IoU is < 0.3).
        
        neg_values = tf.constant([0, -1])
        pos_values = tf.constant([0, 1])
        
        # 1. Set negative anchors first. They get overwritten below if a GT box is
        # matched to them. [N_anchors, N_gt_boxes]
        anchor_iou_argmax = tf.argmax(overlaps, axis=1) # [326396] get clost gt boxes for each anchors
        anchor_iou_max = tf.reduce_max(overlaps, axis=[1]) # [326396] get closet gt boxes's overlap scores
        # if an anchor box overlap all GT box with IoU < 0.3, marked as -1 background
        target_matchs = tf.where(anchor_iou_max < self.neg_iou_thr, 
                                 -tf.ones(anchors.shape[0], dtype=tf.int32), target_matchs)

        # filter invalid anchors
        target_matchs = tf.where(tf.equal(valid_flags, 1),
                                 target_matchs, tf.zeros(anchors.shape[0], dtype=tf.int32))
        # if an anchor overlap with any GT box with IoU > 0.7, marked as foreground
        # 2. Set anchors with high overlap as positive.
        target_matchs = tf.where(anchor_iou_max >= self.pos_iou_thr, 
                                 tf.ones(anchors.shape[0], dtype=tf.int32), target_matchs)

        # 3. Set an anchor for each GT box (regardless of IoU value).        
        gt_iou_argmax = tf.argmax(overlaps, axis=0) # [N_gt_boxes]
        target_matchs = tf.compat.v1.scatter_update(tf.Variable(target_matchs), gt_iou_argmax, 1)
        # update corresponding value=>1 for GT boxes' closest boxes
        
        # Subsample to balance positive and negative anchors
        # Don't let positives be more than half the anchors
        ids = tf.where(tf.equal(target_matchs, 1))  # [N_pos_anchors, 1], [15, 1]
        ids = tf.squeeze(ids, 1) # [15]
        extra = ids.shape.as_list()[0] - int(self.num_rpn_deltas * self.positive_fraction) # 256*0.5
        if extra > 0: # extra means the redundant pos_anchors
            # Reset the extra random ones to neutral
            ids = tf.random.shuffle(ids)[:extra]
            target_matchs = tf.compat.v1.scatter_update(target_matchs, ids, 0)
        # Same for negative proposals
        ids = tf.where(tf.equal(target_matchs, -1)) # [213748, 1]
        ids = tf.squeeze(ids, 1)
        extra = ids.shape.as_list()[0] - (self.num_rpn_deltas - # 213748 - (256 - num_of_pos_anchors:15)
            tf.reduce_sum(tf.cast(tf.equal(target_matchs, 1), tf.int32)))
        if extra > 0: # 213507, so many negative anchors!
            # Rest the extra ones to neutral
            ids = tf.random.shuffle(ids)[:extra]
            target_matchs = tf.compat.v1.scatter_update(target_matchs, ids, 0)
        # since we only need 256 anchors, and it had better contains half positive anchors, and harlf neg .
        
        # For positive anchors, compute shift and scale needed to transform them
        # to match the corresponding GT boxes.
        ids = tf.where(tf.equal(target_matchs, 1)) # [15]
        
        a = tf.gather_nd(anchors, ids) # [369303, 4], [15] => [15, 4]
        anchor_idx = tf.gather_nd(anchor_iou_argmax, ids) # closed gt boxes index for 369303 anchors
        gt = tf.gather(gt_boxes, anchor_idx) # get closed gt boxes coordinates for ids=15
        # a: [15, 4], postive anchors, gt: [15, 4] closed gt boxes for each anchors=15
        target_deltas = transforms.bbox2delta(
            a, gt, self.target_means, self.target_stds)
        # target_deltas: [15, (dy,dx,logw,logh)]?
        padding = tf.maximum(self.num_rpn_deltas - tf.shape(target_deltas)[0], 0) # 256-15
        target_deltas = tf.pad(target_deltas, [(0, padding), (0, 0)]) #padding to [256,4], last padding 0

        return target_matchs, target_deltas

(4) bbox_target

# bbox_target
import numpy as np
import tensorflow as tf

from detection.core.bbox import geometry, transforms
from detection.utils.misc import *

class ProposalTarget:

    def __init__(self,
                 target_means=(0., 0., 0., 0.),
                 target_stds=(0.1, 0.1, 0.2, 0.2), 
                 num_rcnn_deltas=256,
                 positive_fraction=0.25,
                 pos_iou_thr=0.5,
                 neg_iou_thr=0.5):
        '''
        Compute regression and classification targets for proposals.
        
        Attributes
        ---
            target_means: [4]. Bounding box refinement mean for RCNN.
            target_stds: [4]. Bounding box refinement standard deviation for RCNN.
            num_rcnn_deltas: int. Maximal number of RoIs per image to feed to bbox heads.

        '''
        self.target_means = target_means
        self.target_stds = target_stds
        self.num_rcnn_deltas = num_rcnn_deltas
        self.positive_fraction = positive_fraction
        self.pos_iou_thr = pos_iou_thr
        self.neg_iou_thr = neg_iou_thr
            
    def build_targets(self, proposals_list, gt_boxes, gt_class_ids, img_metas):
        '''
        Generates detection targets for images. Subsamples proposals and
        generates target class IDs, bounding box deltas for each.
        
        Args
        ---
            proposals_list: list of [num_proposals, (y1, x1, y2, x2)] in normalized coordinates.
            gt_boxes: [batch_size, num_gt_boxes, (y1, x1, y2, x2)] in image coordinates.
            gt_class_ids: [batch_size, num_gt_boxes] Integer class IDs.
            img_metas: [batch_size, 11]
            
        Returns
        ---
            rois_list: list of [num_rois, (y1, x1, y2, x2)] in normalized coordinates
            rcnn_target_matchs_list: list of [num_rois]. Integer class IDs.
            rcnn_target_deltas_list: list of [num_positive_rois, (dy, dx, log(dh), log(dw))].
            
        Note that self.num_rcnn_deltas >= num_rois > num_positive_rois. And different 
           images in one batch may have different num_rois and num_positive_rois.
        '''
        
        pad_shapes = calc_pad_shapes(img_metas) # [[1216, 1216]]
        
        rois_list = []
        rcnn_target_matchs_list = []
        rcnn_target_deltas_list = []
        
        for i in range(img_metas.shape[0]):
            rois, target_matchs, target_deltas = self._build_single_target(
                proposals_list[i], gt_boxes[i], gt_class_ids[i], pad_shapes[i])
            rois_list.append(rois) # [192, 4], including pos/neg anchors
            rcnn_target_matchs_list.append(target_matchs) # positive target label, and padding with zero for neg
            rcnn_target_deltas_list.append(target_deltas) # positive target deltas, and padding with zero for neg
        
        return rois_list, rcnn_target_matchs_list, rcnn_target_deltas_list
    
    def _build_single_target(self, proposals, gt_boxes, gt_class_ids, img_shape):
        '''
        Args
        ---
            proposals: [num_proposals, (y1, x1, y2, x2)] in normalized coordinates.
            gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)]
            gt_class_ids: [num_gt_boxes]
            img_shape: np.ndarray. [2]. (img_height, img_width)
            
        Returns
        ---
            rois: [num_rois, (y1, x1, y2, x2)]
            target_matchs: [num_positive_rois]
            target_deltas: [num_positive_rois, (dy, dx, log(dh), log(dw))]
        '''
        H, W = img_shape # 1216, 1216
        
        
        gt_boxes, non_zeros = trim_zeros(gt_boxes) # [7, 4], remove padded zero boxes
        gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros) # [7]
        # normalize (y1, x1, y2, x2) => 0~1
        gt_boxes = gt_boxes / tf.constant([H, W, H, W], dtype=tf.float32)
        # [2k, 4] with [7, 4] => [2k, 7] overlop scores
        overlaps = geometry.compute_overlaps(proposals, gt_boxes)
        anchor_iou_argmax = tf.argmax(overlaps, axis=1) # [2000]get cloest gt boxed id for each anchor boxes
        roi_iou_max = tf.reduce_max(overlaps, axis=1) # [2000]get clost gt boxes overlop score for each anchor boxes
        # roi_iou_max: [2000],
        positive_roi_bool = (roi_iou_max >= self.pos_iou_thr) #[2000]
        positive_indices = tf.where(positive_roi_bool)[:, 0] #[48, 1] =>[48]
        # get all positive indices, namely get all pos_anchor indices
        negative_indices = tf.where(roi_iou_max < self.neg_iou_thr)[:, 0]
        # get all negative anchor indices
        # Subsample ROIs. Aim for 33% positive
        # Positive ROIs
        positive_count = int(self.num_rcnn_deltas * self.positive_fraction) # 0.25?
        positive_indices = tf.random.shuffle(positive_indices)[:positive_count] # [256*0.25]=64, at most get 64
        positive_count = tf.shape(positive_indices)[0] # 34
        
        # Negative ROIs. Add enough to maintain positive:negative ratio.
        r = 1.0 / self.positive_fraction
        negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count #102
        negative_indices = tf.random.shuffle(negative_indices)[:negative_count] #[102]
        
        # Gather selected ROIs, based on remove redundant pos/neg indices
        positive_rois = tf.gather(proposals, positive_indices) # [34, 4]
        negative_rois = tf.gather(proposals, negative_indices) # [102, 4]
        
        # Assign positive ROIs to GT boxes.
        positive_overlaps = tf.gather(overlaps, positive_indices) # [34, 7]
        roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1) # [34]for each anchor, get its clost gt boxes
        roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) # [34, 4]
        target_matchs = tf.gather(gt_class_ids, roi_gt_box_assignment) # [34]
        # target_matchs, target_deltas all get!!
        # proposal: [34, 4], target: [34, 4]
        target_deltas = transforms.bbox2delta(positive_rois, roi_gt_boxes, self.target_means, self.target_stds)
        # [34, 4] [102, 4]
        rois = tf.concat([positive_rois, negative_rois], axis=0)
        
        N = tf.shape(negative_rois)[0] # 102
        target_matchs = tf.pad(target_matchs, [(0, N)]) # [34] padding after with [N]
        
        target_matchs = tf.stop_gradient(target_matchs) # [34+102]
        target_deltas = tf.stop_gradient(target_deltas) # [34, 4]
        # rois: [34+102, 4]
        return rois, target_matchs, target_deltas

(5) geometry

import tensorflow as tf

def compute_overlaps(boxes1, boxes2):
    '''Computes IoU overlaps between two sets of boxes.
    boxes1, boxes2: [N, (y1, x1, y2, x2)].
    '''
    # 1. Tile boxes2 and repeate boxes1. This allows us to compare
    # every boxes1 against every boxes2 without loops.
    # TF doesn't have an equivalent to np.repeate() so simulate it
    # using tf.tile() and tf.reshape.
    b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1),
                            [1, 1, tf.shape(boxes2)[0]]), [-1, 4])
    b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1])
    # 2. Compute intersections
    b1_y1, b1_x1, b1_y2, b1_x2 = tf.split(b1, 4, axis=1)
    b2_y1, b2_x1, b2_y2, b2_x2 = tf.split(b2, 4, axis=1)
    y1 = tf.maximum(b1_y1, b2_y1)
    x1 = tf.maximum(b1_x1, b2_x1)
    y2 = tf.minimum(b1_y2, b2_y2)
    x2 = tf.minimum(b1_x2, b2_x2)
    intersection = tf.maximum(x2 - x1, 0) * tf.maximum(y2 - y1, 0)
    # 3. Compute unions
    b1_area = (b1_y2 - b1_y1) * (b1_x2 - b1_x1)
    b2_area = (b2_y2 - b2_y1) * (b2_x2 - b2_x1)
    union = b1_area + b2_area - intersection
    # 4. Compute IoU and reshape to [boxes1, boxes2]
    iou = intersection / union
    overlaps = tf.reshape(iou, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]])
    return overlaps

(6) transforms

# transforms
import tensorflow as tf

from detection.utils.misc import *

def bbox2delta(box, gt_box, target_means, target_stds):
    '''Compute refinement needed to transform box to gt_box.
    
    Args
    ---
        box: [..., (y1, x1, y2, x2)]
        gt_box: [..., (y1, x1, y2, x2)]
        target_means: [4]
        target_stds: [4]
    '''
    target_means = tf.constant(
        target_means, dtype=tf.float32)
    target_stds = tf.constant(
        target_stds, dtype=tf.float32)
    
    box = tf.cast(box, tf.float32)
    gt_box = tf.cast(gt_box, tf.float32)

    height = box[..., 2] - box[..., 0]
    width = box[..., 3] - box[..., 1]
    center_y = box[..., 0] + 0.5 * height
    center_x = box[..., 1] + 0.5 * width

    gt_height = gt_box[..., 2] - gt_box[..., 0]
    gt_width = gt_box[..., 3] - gt_box[..., 1]
    gt_center_y = gt_box[..., 0] + 0.5 * gt_height
    gt_center_x = gt_box[..., 1] + 0.5 * gt_width

    dy = (gt_center_y - center_y) / height
    dx = (gt_center_x - center_x) / width
    dh = tf.math.log(gt_height / height)
    dw = tf.math.log(gt_width / width)

    delta = tf.stack([dy, dx, dh, dw], axis=-1)
    delta = (delta - target_means) / target_stds
    
    return delta

def delta2bbox(box, delta, target_means, target_stds):
    '''Compute bounding box based on roi and delta.
    
    Args
    ---
        box: [N, (y1, x1, y2, x2)] box to update
        delta: [N, (dy, dx, log(dh), log(dw))] refinements to apply
        target_means: [4]
        target_stds: [4]
    '''
    target_means = tf.constant(
        target_means, dtype=tf.float32)
    target_stds = tf.constant(
        target_stds, dtype=tf.float32)
    delta = delta * target_stds + target_means    
    # Convert to y, x, h, w
    height = box[:, 2] - box[:, 0]
    width = box[:, 3] - box[:, 1]
    center_y = box[:, 0] + 0.5 * height
    center_x = box[:, 1] + 0.5 * width
    
    # Apply delta
    center_y += delta[:, 0] * height
    center_x += delta[:, 1] * width
    height *= tf.exp(delta[:, 2])
    width *= tf.exp(delta[:, 3])
    
    # Convert back to y1, x1, y2, x2
    y1 = center_y - 0.5 * height
    x1 = center_x - 0.5 * width
    y2 = y1 + height
    x2 = x1 + width
    result = tf.stack([y1, x1, y2, x2], axis=1)
    return result

def bbox_clip(box, window):
    '''
    Args
    ---
        box: [N, (y1, x1, y2, x2)]
        window: [4] in the form y1, x1, y2, x2
    '''
    # Split
    wy1, wx1, wy2, wx2 = tf.split(window, 4)
    y1, x1, y2, x2 = tf.split(box, 4, axis=1)
    # Clip
    y1 = tf.maximum(tf.minimum(y1, wy2), wy1)
    x1 = tf.maximum(tf.minimum(x1, wx2), wx1)
    y2 = tf.maximum(tf.minimum(y2, wy2), wy1)
    x2 = tf.maximum(tf.minimum(x2, wx2), wx1)
    clipped = tf.concat([y1, x1, y2, x2], axis=1)
    clipped.set_shape((clipped.shape[0], 4))
    return clipped

def bbox_flip(bboxes, width):
    '''
    Flip bboxes horizontally.
    
    Args
    ---
        bboxes: [..., 4]
        width: Int or Float
    '''
    y1, x1, y2, x2 = tf.split(bboxes, 4, axis=-1)
    
    new_x1 = width - x2
    new_x2 = width - x1
    
    flipped = tf.concat([y1, new_x1, y2, new_x2], axis=-1)
    
    return flipped



def bbox_mapping(box, img_meta):
    '''
    Args
    ---
        box: [N, 4]
        img_meta: [11]
    '''
    img_meta = parse_image_meta(img_meta)
    scale = img_meta['scale']
    flip = img_meta['flip']
    
    box = box * scale
    if tf.equal(flip, 1):
        box = bbox_flip(box, img_meta['img_shape'][1])
    
    return box

def bbox_mapping_back(box, img_meta):
    '''
    Args
    ---
        box: [N, 4]
        img_meta: [11]
    '''
    img_meta = parse_image_meta(img_meta)
    scale = img_meta['scale']
    flip = img_meta['flip']
    if tf.equal(flip, 1):
        box = bbox_flip(box, img_meta['img_shape'][1])
    box = box / scale
    
    return box

(7) loss

# loss
import tensorflow as tf
from    tensorflow import keras


def smooth_l1_loss(y_true, y_pred):
    '''Implements Smooth-L1 loss.
    
    Args
    ---
        y_true and y_pred are typically: [N, 4], but could be any shape.
    '''
    diff = tf.abs(y_true - y_pred)
    less_than_one = tf.cast(tf.less(diff, 1.0), tf.float32)
    loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5)
    return loss



def rpn_class_loss(target_matchs, rpn_class_logits):
    '''RPN anchor classifier loss.
    
    Args
    ---
        target_matchs: [batch_size, num_anchors]. Anchor match type. 1=positive,
            -1=negative, 0=neutral anchor.
        rpn_class_logits: [batch_size, num_anchors, 2]. RPN classifier logits for FG/BG.
    '''

    # Get anchor classes. Convert the -1/+1 match to 0/1 values.
    anchor_class = tf.cast(tf.equal(target_matchs, 1), tf.int32)
    # Positive and Negative anchors contribute to the loss,
    # but neutral anchors (match value = 0) don't.
    indices = tf.where(tf.not_equal(target_matchs, 0))
    # Pick rows that contribute to the loss and filter out the rest.
    rpn_class_logits = tf.gather_nd(rpn_class_logits, indices)
    anchor_class = tf.gather_nd(anchor_class, indices)
    # Cross entropy loss
    # loss = tf.losses.sparse_softmax_cross_entropy(labels=anchor_class,
    #                                               logits=rpn_class_logits)

    num_classes = rpn_class_logits.shape[-1]
    # print(rpn_class_logits.shape)
    loss = keras.losses.categorical_crossentropy(tf.one_hot(anchor_class, depth=num_classes),
                                                 rpn_class_logits, from_logits=True)

    
    loss = tf.reduce_mean(loss) if tf.size(loss) > 0 else tf.constant(0.0)
    return loss


def rpn_bbox_loss(target_deltas, target_matchs, rpn_deltas):
    '''Return the RPN bounding box loss graph.
    
    Args
    ---
        target_deltas: [batch, num_rpn_deltas, (dy, dx, log(dh), log(dw))].
            Uses 0 padding to fill in unsed bbox deltas.
        target_matchs: [batch, anchors]. Anchor match type. 1=positive,
            -1=negative, 0=neutral anchor.
        rpn_deltas: [batch, anchors, (dy, dx, log(dh), log(dw))]
    '''
    def batch_pack(x, counts, num_rows):
        '''Picks different number of values from each row
        in x depending on the values in counts.
        '''
        outputs = []
        for i in range(num_rows):
            outputs.append(x[i, :counts[i]])
        return tf.concat(outputs, axis=0)
    
    # Positive anchors contribute to the loss, but negative and
    # neutral anchors (match value of 0 or -1) don't.
    indices = tf.where(tf.equal(target_matchs, 1))

    # Pick bbox deltas that contribute to the loss
    rpn_deltas = tf.gather_nd(rpn_deltas, indices)

    # Trim target bounding box deltas to the same length as rpn_deltas.
    batch_counts = tf.reduce_sum(tf.cast(tf.equal(target_matchs, 1), tf.int32), axis=1)
    target_deltas = batch_pack(target_deltas, batch_counts,
                              target_deltas.shape.as_list()[0])

    loss = smooth_l1_loss(target_deltas, rpn_deltas)
    
    loss = tf.reduce_mean(loss) if tf.size(loss) > 0 else tf.constant(0.0)
    
    return loss





def rcnn_class_loss(target_matchs_list, rcnn_class_logits_list):
    '''Loss for the classifier head of Faster RCNN.
    
    Args
    ---
        target_matchs_list: list of [num_rois]. Integer class IDs. Uses zero
            padding to fill in the array.
        rcnn_class_logits_list: list of [num_rois, num_classes]
    '''
    
    class_ids = tf.concat(target_matchs_list, 0)
    class_logits = tf.concat(rcnn_class_logits_list, 0)
    class_ids = tf.cast(class_ids, 'int64')
    
    # loss = tf.losses.sparse_softmax_cross_entropy(labels=class_ids,
    #                                               logits=class_logits)

    num_classes = class_logits.shape[-1]
    # print(class_logits.shape)
    loss = keras.losses.categorical_crossentropy(tf.one_hot(class_ids, depth=num_classes),
                                                 class_logits, from_logits=True)


    loss = tf.reduce_mean(loss) if tf.size(loss) > 0 else tf.constant(0.0)
    return loss


def rcnn_bbox_loss(target_deltas_list, target_matchs_list, rcnn_deltas_list):
    '''Loss for Faster R-CNN bounding box refinement.
    
    Args
    ---
        target_deltas_list: list of [num_positive_rois, (dy, dx, log(dh), log(dw))]
        target_matchs_list: list of [num_rois]. Integer class IDs.
        rcnn_deltas_list: list of [num_rois, num_classes, (dy, dx, log(dh), log(dw))]
    '''
    
    target_deltas = tf.concat(target_deltas_list, 0)
    target_class_ids = tf.concat(target_matchs_list, 0)
    rcnn_deltas = tf.concat(rcnn_deltas_list, 0)

    # Only positive ROIs contribute to the loss. And only
    # the right class_id of each ROI. Get their indicies.
    positive_roi_ix = tf.where(target_class_ids > 0)[:, 0]
    positive_roi_class_ids = tf.cast(
        tf.gather(target_class_ids, positive_roi_ix), tf.int64)
    indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1)
    
    # Gather the deltas (predicted and true) that contribute to loss
    rcnn_deltas = tf.gather_nd(rcnn_deltas, indices)

    # Smooth-L1 Loss
    loss = smooth_l1_loss(target_deltas, rcnn_deltas)
    loss = tf.reduce_mean(loss) if tf.size(loss) > 0 else tf.constant(0.0)

    return loss

Datasets

(8) coco

# coco
import os.path as osp
import cv2
import numpy as np
from pycocotools.coco import COCO

from detection.datasets import transforms, utils


"""
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession


config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
"""

class CocoDataSet(object):
    def __init__(self, dataset_dir, subset,
                 flip_ratio=0,
                 pad_mode='fixed',
                 mean=(0, 0, 0),
                 std=(1, 1, 1),
                 scale=(1024, 800),
                 debug=False):
        '''Load a subset of the COCO dataset.
        
        Attributes
        ---
            dataset_dir: The root directory of the COCO dataset.
            subset: What to load (train, val).
            flip_ratio: Float. The ratio of flipping an image and its bounding boxes.
            pad_mode: Which padded method to use (fixed, non-fixed)
            mean: Tuple. Image mean.
            std: Tuple. Image standard deviation.
            scale: Tuple of two integers.
        '''
        
        if subset not in ['train', 'val']:
            raise AssertionError('subset must be "train" or "val".')

        self.coco = COCO("{}/annotations/instances_{}2017.json".format(dataset_dir, subset))

        # get the mapping from original category ids to labels
        self.cat_ids = self.coco.getCatIds()
        self.cat2label = {
            cat_id: i + 1
            for i, cat_id in enumerate(self.cat_ids)
        }
        
        self.img_ids, self.img_infos = self._filter_imgs()
        
        if debug:
            self.img_ids, self.img_infos = self.img_ids[:50], self.img_infos[:50]
            
        self.image_dir = "{}/images/{}2017".format(dataset_dir, subset)
        
        self.flip_ratio = flip_ratio
        
        if pad_mode in ['fixed', 'non-fixed']:
            self.pad_mode = pad_mode
        elif subset == 'train':
            self.pad_mode = 'fixed'
        else:
            self.pad_mode = 'non-fixed'
        
        self.img_transform = transforms.ImageTransform(scale, mean, std, pad_mode)
        self.bbox_transform = transforms.BboxTransform()
        
        
    def _filter_imgs(self, min_size=32):
        '''Filter images too small or without ground truths.
        
        Args
        ---
            min_size: the minimal size of the image.
        '''
        # Filter images without ground truths.
        all_img_ids = list(set([_['image_id'] for _ in self.coco.anns.values()]))
        # Filter images too small.
        img_ids = []
        img_infos = []
        for i in all_img_ids:
            info = self.coco.loadImgs(i)[0]
            
            ann_ids = self.coco.getAnnIds(imgIds=i)
            ann_info = self.coco.loadAnns(ann_ids)
            ann = self._parse_ann_info(ann_info)
            
            if min(info['width'], info['height']) >= min_size and ann['labels'].shape[0] != 0:
                img_ids.append(i)
                img_infos.append(info)
        return img_ids, img_infos
        
    def _load_ann_info(self, idx):
        img_id = self.img_ids[idx]
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        ann_info = self.coco.loadAnns(ann_ids)
        return ann_info

    def _parse_ann_info(self, ann_info):
        '''Parse bbox annotation.
        
        Args
        ---
            ann_info (list[dict]): Annotation info of an image.
            
        Returns
        ---
            dict: A dict containing the following keys: bboxes, 
                bboxes_ignore, labels.
        '''
        gt_bboxes = []
        gt_labels = []
        gt_bboxes_ignore = []

        for i, ann in enumerate(ann_info):
            if ann.get('ignore', False):
                continue
            x1, y1, w, h = ann['bbox']
            if ann['area'] <= 0 or w < 1 or h < 1:
                continue
            bbox = [y1, x1, y1 + h - 1, x1 + w - 1]
            if ann['iscrowd']:
                gt_bboxes_ignore.append(bbox)
            else:
                gt_bboxes.append(bbox)
                gt_labels.append(self.cat2label[ann['category_id']])

        if gt_bboxes:
            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
            gt_labels = np.array(gt_labels, dtype=np.int64)
        else:
            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
            gt_labels = np.array([], dtype=np.int64)

        if gt_bboxes_ignore:
            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
        else:
            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)

        ann = dict(
            bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)


        return ann
    
    def __len__(self):
        return len(self.img_infos)
    
    def __getitem__(self, idx):
        '''Load the image and its bboxes for the given index.
        
        Args
        ---
            idx: the index of images.
            
        Returns
        ---
            tuple: A tuple containing the following items: image, 
                bboxes, labels.
        '''
        img_info = self.img_infos[idx]
        ann_info = self._load_ann_info(idx)
        
        # load the image.
        img = cv2.imread(osp.join(self.image_dir, img_info['file_name']), cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        ori_shape = img.shape
        
        # Load the annotation.
        ann = self._parse_ann_info(ann_info)
        bboxes = ann['bboxes']
        labels = ann['labels']
        
        flip = True if np.random.rand() < self.flip_ratio else False
        
        # Handle the image
        img, img_shape, scale_factor = self.img_transform(img, flip)

        pad_shape = img.shape
        
        # Handle the annotation.
        bboxes, labels = self.bbox_transform(
            bboxes, labels, img_shape, scale_factor, flip)
        
        # Handle the meta info.
        img_meta_dict = dict({
            'ori_shape': ori_shape,
            'img_shape': img_shape,
            'pad_shape': pad_shape,
            'scale_factor': scale_factor,
            'flip': flip
        })

        img_meta = utils.compose_image_meta(img_meta_dict)
        
        return img, img_meta, bboxes, labels
    
    def get_categories(self):
        '''Get list of category names. 
        
        Returns
        ---
            list: A list of category names.
            
        Note that the first item 'bg' means background.
        '''
        return ['bg'] + [self.coco.loadCats(i)[0]["name"] for i in self.cat2label.keys()]

(9)data_generator

# data_generator
import numpy as np



class DataGenerator:
    """
    读取每一个图片的信息
    """

    def __init__(self, dataset, shuffle=False):
        self.dataset = dataset
        self.shuffle = shuffle
    
    def __call__(self):
        indices = np.arange(len(self.dataset))
        if self.shuffle:
            np.random.shuffle(indices)

        for img_idx in indices:
            img, img_meta, bbox, label = self.dataset[img_idx]
            yield img, img_meta, bbox, label

(10) transforms

# transforms
import numpy as np

from detection.datasets.utils import *


class ImageTransform(object):
    '''Preprocess the image.
    
        1. rescale the image to expected size
        2. normalize the image
        3. flip the image (if needed)
        4. pad the image (if needed)
    '''
    def __init__(self,
                 scale=(800, 1333),
                 mean=(0, 0, 0),
                 std=(1, 1, 1),
                 pad_mode='fixed'):
        self.scale = scale
        self.mean = mean
        self.std = std
        self.pad_mode = pad_mode

        self.impad_size = max(scale) if pad_mode == 'fixed' else 64

    def __call__(self, img, flip=False):
        img, scale_factor = imrescale(img, self.scale)
        img_shape = img.shape
        img = imnormalize(img, self.mean, self.std)
          
        if flip:
            img = img_flip(img)
        if self.pad_mode == 'fixed':
            img = impad_to_square(img, self.impad_size)

        else: # 'non-fixed'
            img = impad_to_multiple(img, self.impad_size)
        
        return img, img_shape, scale_factor


class BboxTransform(object):
    '''Preprocess ground truth bboxes.
    
        1. rescale bboxes according to image size
        2. flip bboxes (if needed)
    '''
    def __init__(self):
        pass
    
    def __call__(self, bboxes, labels, 
                 img_shape, scale_factor, flip=False):
 
        bboxes = bboxes * scale_factor
        if flip:
            bboxes = bbox_flip(bboxes, img_shape)
            
        bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[0])
        bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[1])
            
        return bboxes, labels

(11)datasets.utils

# datasets.utils

import cv2
import numpy as np

###########################################
#
# Utility Functions for 
# Image Preprocessing and Data Augmentation
#
###########################################

def img_flip(img):
    '''Flip the image horizontally
    
    Args
    ---
        img: [height, width, channel]
    
    Returns
    ---
        np.ndarray: the flipped image.
    '''
    return np.fliplr(img)

def bbox_flip(bboxes, img_shape):
    '''Flip bboxes horizontally.
    
    Args
    ---
        bboxes: [..., 4]
        img_shape: Tuple. (height, width)
    
    Returns
    ---
        np.ndarray: the flipped bboxes.
    '''
    w = img_shape[1]
    flipped = bboxes.copy()
    flipped[..., 1] = w - bboxes[..., 3] - 1
    flipped[..., 3] = w - bboxes[..., 1] - 1
    return flipped

def impad_to_square(img, pad_size):
    '''Pad an image to ensure each edge to equal to pad_size.
    
    Args
    ---
        img: [height, width, channels]. Image to be padded
        pad_size: Int.
    
    Returns
    ---
        ndarray: The padded image with shape of 
            [pad_size, pad_size, channels].
    '''
    shape = (pad_size, pad_size, img.shape[-1])
    
    pad = np.zeros(shape, dtype=img.dtype)
    pad[:img.shape[0], :img.shape[1], ...] = img
    return pad

def impad_to_multiple(img, divisor):
    '''Pad an image to ensure each edge to be multiple to some number.
    
    Args
    ---
        img: [height, width, channels]. Image to be padded.
        divisor: Int. Padded image edges will be multiple to divisor.
    
    Returns
    ---
        ndarray: The padded image.
    '''
    pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor
    pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor
    shape = (pad_h, pad_w, img.shape[-1])
    
    pad = np.zeros(shape, dtype=img.dtype)
    pad[:img.shape[0], :img.shape[1], ...] = img
    return pad

def imrescale(img, scale):
    '''Resize image while keeping the aspect ratio.
    
    Args
    ---
        img: [height, width, channels]. The input image.
        scale: Tuple of 2 integers. the image will be rescaled 
            as large as possible within the scale
    
    Returns
    ---
        np.ndarray: the scaled image.
    ''' 
    h, w = img.shape[:2]
    
    max_long_edge = max(scale)
    max_short_edge = min(scale)
    scale_factor = min(max_long_edge / max(h, w),
                       max_short_edge / min(h, w))
    
    new_size = (int(w * float(scale_factor) + 0.5),
                int(h * float(scale_factor) + 0.5))

    rescaled_img = cv2.resize(
        img, new_size, interpolation=cv2.INTER_LINEAR)
    
    return rescaled_img, scale_factor

def imnormalize(img, mean, std):
    '''Normalize the image.
    
    Args
    ---
        img: [height, width, channel]
        mean: Tuple or np.ndarray. [3]
        std: Tuple or np.ndarray. [3]
    
    Returns
    ---
        np.ndarray: the normalized image.
    '''
    img = (img - mean) / std    
    return img.astype(np.float32)

def imdenormalize(norm_img, mean, std):
    '''Denormalize the image.
    
    Args
    ---
        norm_img: [height, width, channel]
        mean: Tuple or np.ndarray. [3]
        std: Tuple or np.ndarray. [3]
    
    Returns
    ---
        np.ndarray: the denormalized image.
    '''
    img = norm_img * std + mean
    return img.astype(np.float32)

#######################################
#
# Utility Functions for Data Formatting
#
#######################################

def get_original_image(img, img_meta, 
                       mean=(0, 0, 0), std=(1, 1, 1)):
    '''Recover the origanal image.
    
    Args
    ---
        img: np.ndarray. [height, width, channel]. 
            The transformed image.
        img_meta: np.ndarray. [11]
        mean: Tuple or np.ndarray. [3]
        std: Tuple or np.ndarray. [3]
    
    Returns
    ---
        np.ndarray: the original image.
    '''
    img_meta_dict = parse_image_meta(img_meta)
    ori_shape = img_meta_dict['ori_shape']
    img_shape = img_meta_dict['img_shape']
    flip = img_meta_dict['flip']
    
    img = img[:img_shape[0], :img_shape[1]]
    if flip:
        img = img_flip(img)
    img = cv2.resize(img, (ori_shape[1], ori_shape[0]), 
                     interpolation=cv2.INTER_LINEAR)
    img = imdenormalize(img, mean, std)
    return img

def compose_image_meta(img_meta_dict):
    '''Takes attributes of an image and puts them in one 1D array.

    Args
    ---
        img_meta_dict: dict

    Returns
    ---
        img_meta: np.ndarray
    '''
    ori_shape = img_meta_dict['ori_shape']
    img_shape = img_meta_dict['img_shape']
    pad_shape = img_meta_dict['pad_shape']
    scale_factor = img_meta_dict['scale_factor']
    flip = 1 if img_meta_dict['flip'] else 0
    img_meta = np.array(
        ori_shape +               # size=3
        img_shape +               # size=3
        pad_shape +               # size=3
        tuple([scale_factor]) +   # size=1
        tuple([flip])             # size=1
    ).astype(np.float32)

    return img_meta

def parse_image_meta(img_meta):
    '''Parses an array that contains image attributes to its components.

    Args
    ---
        meta: [11]

    Returns
    ---
        a dict of the parsed values.
    '''
    ori_shape = img_meta[0:3]
    img_shape = img_meta[3:6]
    pad_shape = img_meta[6:9]
    scale_factor = img_meta[9]
    flip = img_meta[10]
    return {
        'ori_shape': ori_shape.astype(np.int32),
        'img_shape': img_shape.astype(np.int32),
        'pad_shape': pad_shape.astype(np.int32),
        'scale_factor': scale_factor.astype(np.float32),
        'flip': flip.astype(np.bool),
    }

model

(12)resnet

'''ResNet model for Keras.

# Reference:
- [Deep Residual Learning for Image Recognition](
    https://arxiv.org/abs/1512.03385)

'''
# resnet
import  tensorflow as tf
from    tensorflow.keras import layers

class _Bottleneck(tf.keras.Model):

    def __init__(self, filters, block, 
                 downsampling=False, stride=1, **kwargs):
        super(_Bottleneck, self).__init__(**kwargs)

        filters1, filters2, filters3 = filters
        conv_name_base = 'res' + block + '_branch'
        bn_name_base   = 'bn'  + block + '_branch'

        self.downsampling = downsampling
        self.stride = stride
        self.out_channel = filters3
        
        self.conv2a = layers.Conv2D(filters1, (1, 1), strides=(stride, stride),
                                    kernel_initializer='he_normal',
                                    name=conv_name_base + '2a')
        self.bn2a = layers.BatchNormalization(name=bn_name_base + '2a')

        self.conv2b = layers.Conv2D(filters2, (3, 3), padding='same',
                                    kernel_initializer='he_normal',
                                    name=conv_name_base + '2b')
        self.bn2b = layers.BatchNormalization(name=bn_name_base + '2b')

        self.conv2c = layers.Conv2D(filters3, (1, 1),
                                    kernel_initializer='he_normal',
                                    name=conv_name_base + '2c')
        self.bn2c = layers.BatchNormalization(name=bn_name_base + '2c')
         
        if self.downsampling:
            self.conv_shortcut = layers.Conv2D(filters3, (1, 1), strides=(stride, stride),
                                               kernel_initializer='he_normal',
                                               name=conv_name_base + '1')
            self.bn_shortcut = layers.BatchNormalization(name=bn_name_base + '1')     
    
    def call(self, inputs, training=False):
        x = self.conv2a(inputs)
        x = self.bn2a(x, training=training)
        x = tf.nn.relu(x)
        
        x = self.conv2b(x)
        x = self.bn2b(x, training=training)
        x = tf.nn.relu(x)
        
        x = self.conv2c(x)
        x = self.bn2c(x, training=training)
        
        if self.downsampling:
            shortcut = self.conv_shortcut(inputs)
            shortcut = self.bn_shortcut(shortcut, training=training)
        else:
            shortcut = inputs
            
        x += shortcut
        x = tf.nn.relu(x)
        
        return x
    
    def compute_output_shape(self, input_shape):
        shape = tf.TensorShape(input_shape).as_list()

        shape[1] = shape[1] // self.stride
        shape[2] = shape[2] // self.stride
        shape[-1] = self.out_channel
        return tf.TensorShape(shape)        
        

class ResNet(tf.keras.Model):

    def __init__(self, depth, **kwargs):
        super(ResNet, self).__init__(**kwargs)
              
        if depth not in [50, 101]:
            raise AssertionError('depth must be 50 or 101.')
        self.depth = depth
    
        self.padding = layers.ZeroPadding2D((3, 3))
        self.conv1 = layers.Conv2D(64, (7, 7),
                                   strides=(2, 2),
                                   kernel_initializer='he_normal',
                                   name='conv1')
        self.bn_conv1 = layers.BatchNormalization(name='bn_conv1')
        self.max_pool = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')
        
        self.res2a = _Bottleneck([64, 64, 256], block='2a',
                                 downsampling=True, stride=1)
        self.res2b = _Bottleneck([64, 64, 256], block='2b')
        self.res2c = _Bottleneck([64, 64, 256], block='2c')
        
        self.res3a = _Bottleneck([128, 128, 512], block='3a', 
                                 downsampling=True, stride=2)
        self.res3b = _Bottleneck([128, 128, 512], block='3b')
        self.res3c = _Bottleneck([128, 128, 512], block='3c')
        self.res3d = _Bottleneck([128, 128, 512], block='3d')
        
        self.res4a = _Bottleneck([256, 256, 1024], block='4a', 
                                 downsampling=True, stride=2)
        self.res4b = _Bottleneck([256, 256, 1024], block='4b')
        self.res4c = _Bottleneck([256, 256, 1024], block='4c')
        self.res4d = _Bottleneck([256, 256, 1024], block='4d')
        self.res4e = _Bottleneck([256, 256, 1024], block='4e')
        self.res4f = _Bottleneck([256, 256, 1024], block='4f')
        if self.depth == 101:
            self.res4g = _Bottleneck([256, 256, 1024], block='4g')
            self.res4h = _Bottleneck([256, 256, 1024], block='4h')
            self.res4i = _Bottleneck([256, 256, 1024], block='4i')
            self.res4j = _Bottleneck([256, 256, 1024], block='4j')
            self.res4k = _Bottleneck([256, 256, 1024], block='4k')
            self.res4l = _Bottleneck([256, 256, 1024], block='4l')
            self.res4m = _Bottleneck([256, 256, 1024], block='4m')
            self.res4n = _Bottleneck([256, 256, 1024], block='4n')
            self.res4o = _Bottleneck([256, 256, 1024], block='4o')
            self.res4p = _Bottleneck([256, 256, 1024], block='4p')
            self.res4q = _Bottleneck([256, 256, 1024], block='4q')
            self.res4r = _Bottleneck([256, 256, 1024], block='4r')
            self.res4s = _Bottleneck([256, 256, 1024], block='4s')
            self.res4t = _Bottleneck([256, 256, 1024], block='4t')
            self.res4u = _Bottleneck([256, 256, 1024], block='4u')
            self.res4v = _Bottleneck([256, 256, 1024], block='4v')
            self.res4w = _Bottleneck([256, 256, 1024], block='4w') 
        
        self.res5a = _Bottleneck([512, 512, 2048], block='5a', 
                                 downsampling=True, stride=2)
        self.res5b = _Bottleneck([512, 512, 2048], block='5b')
        self.res5c = _Bottleneck([512, 512, 2048], block='5c')
        
        
        self.out_channel = (256, 512, 1024, 2048)
    
    def call(self, inputs, training=True):
        x = self.padding(inputs)
        x = self.conv1(x)
        x = self.bn_conv1(x, training=training)
        x = tf.nn.relu(x)
        x = self.max_pool(x)
        
        x = self.res2a(x, training=training)
        x = self.res2b(x, training=training)
        C2 = x = self.res2c(x, training=training)
        
        x = self.res3a(x, training=training)
        x = self.res3b(x, training=training)
        x = self.res3c(x, training=training)
        C3 = x = self.res3d(x, training=training)
        
        x = self.res4a(x, training=training)
        x = self.res4b(x, training=training)
        x = self.res4c(x, training=training)
        x = self.res4d(x, training=training)
        x = self.res4e(x, training=training)
        x = self.res4f(x, training=training)
        if self.depth == 101:
            x = self.res4g(x, training=training)
            x = self.res4h(x, training=training)
            x = self.res4i(x, training=training)
            x = self.res4j(x, training=training)
            x = self.res4k(x, training=training)
            x = self.res4l(x, training=training)
            x = self.res4m(x, training=training)
            x = self.res4n(x, training=training)
            x = self.res4o(x, training=training)
            x = self.res4p(x, training=training)
            x = self.res4q(x, training=training)
            x = self.res4r(x, training=training)
            x = self.res4s(x, training=training)
            x = self.res4t(x, training=training)
            x = self.res4u(x, training=training)
            x = self.res4v(x, training=training)
            x = self.res4w(x, training=training) 
        C4 = x
        
        x = self.res5a(x, training=training)
        x = self.res5b(x, training=training)
        C5 = x = self.res5c(x, training=training)
        
        return (C2, C3, C4, C5)
    
    def compute_output_shape(self, input_shape):
        shape = tf.TensorShape(input_shape).as_list()
        batch, H, W, C = shape
        
        C2_shape = tf.TensorShape([batch, H //  4, W //  4, self.out_channel[0]])
        C3_shape = tf.TensorShape([batch, H //  8, W //  8, self.out_channel[1]])
        C4_shape = tf.TensorShape([batch, H // 16, W // 16, self.out_channel[2]])
        C5_shape = tf.TensorShape([batch, H // 32, W // 32, self.out_channel[3]])
        
        return (C2_shape, C3_shape, C4_shape, C5_shape)

(13)bbox_head

# bbox_head
import tensorflow as tf
from    tensorflow.keras import layers

from detection.core.bbox import transforms
from detection.core.loss import losses
from detection.utils.misc import *

class BBoxHead(tf.keras.Model):
    def __init__(self, num_classes, 
                 pool_size=(7, 7),
                 target_means=(0., 0., 0., 0.), 
                 target_stds=(0.1, 0.1, 0.2, 0.2),
                 min_confidence=0.7,
                 nms_threshold=0.3,
                 max_instances=100,
                 **kwags):
        super(BBoxHead, self).__init__(**kwags)
        
        self.num_classes = num_classes
        self.pool_size = tuple(pool_size)
        self.target_means = target_means
        self.target_stds = target_stds
        self.min_confidence = min_confidence
        self.nms_threshold = nms_threshold
        self.max_instances = max_instances
        
        self.rcnn_class_loss = losses.rcnn_class_loss
        self.rcnn_bbox_loss = losses.rcnn_bbox_loss
        
        self.rcnn_class_conv1 = layers.Conv2D(1024, self.pool_size, 
                                              padding='valid', name='rcnn_class_conv1')
        
        self.rcnn_class_bn1 = layers.BatchNormalization(name='rcnn_class_bn1')
        
        self.rcnn_class_conv2 = layers.Conv2D(1024, (1, 1), 
                                              name='rcnn_class_conv2')
        
        self.rcnn_class_bn2 = layers.BatchNormalization(name='rcnn_class_bn2')
        
        self.rcnn_class_logits = layers.Dense(num_classes, name='rcnn_class_logits')
        
        self.rcnn_delta_fc = layers.Dense(num_classes * 4, name='rcnn_bbox_fc')
        
    def call(self, inputs, training=True):
        '''
        Args
        ---
            pooled_rois_list: List of [num_rois, pool_size, pool_size, channels]
        
        Returns
        ---
            rcnn_class_logits_list: List of [num_rois, num_classes]
            rcnn_probs_list: List of [num_rois, num_classes]
            rcnn_deltas_list: List of [num_rois, num_classes, (dy, dx, log(dh), log(dw))]
        '''
        pooled_rois_list = inputs
        num_pooled_rois_list = [pooled_rois.shape[0] for pooled_rois in pooled_rois_list]
        pooled_rois = tf.concat(pooled_rois_list, axis=0)
        
        x = self.rcnn_class_conv1(pooled_rois)
        x = self.rcnn_class_bn1(x, training=training)
        x = tf.nn.relu(x)
        
        x = self.rcnn_class_conv2(x)
        x = self.rcnn_class_bn2(x, training=training)
        x = tf.nn.relu(x)
        
        x = tf.squeeze(tf.squeeze(x, 2), 1)
        
        logits = self.rcnn_class_logits(x)
        probs = tf.nn.softmax(logits)
        
        deltas = self.rcnn_delta_fc(x)
        deltas = tf.reshape(deltas, (-1, self.num_classes, 4))
        

        rcnn_class_logits_list = tf.split(logits, num_pooled_rois_list, 0)
        rcnn_probs_list = tf.split(probs, num_pooled_rois_list, 0)
        rcnn_deltas_list = tf.split(deltas, num_pooled_rois_list, 0)

            
        return rcnn_class_logits_list, rcnn_probs_list, rcnn_deltas_list

    def loss(self, 
             rcnn_class_logits_list, rcnn_deltas_list, 
             rcnn_target_matchs_list, rcnn_target_deltas_list):
        """

        :param rcnn_class_logits_list:
        :param rcnn_deltas_list:
        :param rcnn_target_matchs_list:
        :param rcnn_target_deltas_list:
        :return:
        """
        rcnn_class_loss = self.rcnn_class_loss(
            rcnn_target_matchs_list, rcnn_class_logits_list)
        rcnn_bbox_loss = self.rcnn_bbox_loss(
            rcnn_target_deltas_list, rcnn_target_matchs_list, rcnn_deltas_list)
        
        return rcnn_class_loss, rcnn_bbox_loss
        
    def get_bboxes(self, rcnn_probs_list, rcnn_deltas_list, rois_list, img_metas):
        '''
        Args
        ---
            rcnn_probs_list: List of [num_rois, num_classes]
            rcnn_deltas_list: List of [num_rois, num_classes, (dy, dx, log(dh), log(dw))]
            rois_list: List of [num_rois, (y1, x1, y2, x2)]
            img_meta_list: [batch_size, 11]
        
        Returns
        ---
            detections_list: List of [num_detections, (y1, x1, y2, x2, class_id, score)]
                coordinates are in pixel coordinates.
        '''
        
        pad_shapes = calc_pad_shapes(img_metas)
        detections_list = [
            self._get_bboxes_single(
                rcnn_probs_list[i], rcnn_deltas_list[i], rois_list[i], pad_shapes[i])
            for i in range(img_metas.shape[0])
        ]
        return detections_list  
    
    def _get_bboxes_single(self, rcnn_probs, rcnn_deltas, rois, img_shape):
        '''
        Args
        ---
            rcnn_probs: [num_rois, num_classes]
            rcnn_deltas: [num_rois, num_classes, (dy, dx, log(dh), log(dw))]
            rois: [num_rois, (y1, x1, y2, x2)]
            img_shape: np.ndarray. [2]. (img_height, img_width)       
        '''
        H, W = img_shape   
        # Class IDs per ROI
        class_ids = tf.argmax(rcnn_probs, axis=1, output_type=tf.int32)
        
        # Class probability of the top class of each ROI
        indices = tf.stack([tf.range(rcnn_probs.shape[0]), class_ids], axis=1)
        class_scores = tf.gather_nd(rcnn_probs, indices)
        # Class-specific bounding box deltas
        deltas_specific = tf.gather_nd(rcnn_deltas, indices)
        # Apply bounding box deltas
        # Shape: [num_rois, (y1, x1, y2, x2)] in normalized coordinates        
        refined_rois = transforms.delta2bbox(rois, deltas_specific, self.target_means, self.target_stds)
        
        # Clip boxes to image window
        refined_rois *= tf.constant([H, W, H, W], dtype=tf.float32)
        window = tf.constant([0., 0., H * 1., W * 1.], dtype=tf.float32)
        refined_rois = transforms.bbox_clip(refined_rois, window)
        
        
        # Filter out background boxes
        keep = tf.where(class_ids > 0)[:, 0]
        
        # Filter out low confidence boxes
        if self.min_confidence:
            conf_keep = tf.where(class_scores >= self.min_confidence)[:, 0]
            keep = tf.compat.v2.sets.intersection(tf.expand_dims(keep, 0),
                                            tf.expand_dims(conf_keep, 0))
            keep = tf.sparse.to_dense(keep)[0]
            
        # Apply per-class NMS
        # 1. Prepare variables
        pre_nms_class_ids = tf.gather(class_ids, keep)
        pre_nms_scores = tf.gather(class_scores, keep)
        pre_nms_rois = tf.gather(refined_rois,   keep)
        unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]

        def nms_keep_map(class_id):
            '''Apply Non-Maximum Suppression on ROIs of the given class.'''
            # Indices of ROIs of the given class
            ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
            # Apply NMS
            class_keep = tf.image.non_max_suppression(
                    tf.gather(pre_nms_rois, ixs),
                    tf.gather(pre_nms_scores, ixs),
                    max_output_size=self.max_instances,
                    iou_threshold=self.nms_threshold)
            # Map indices
            class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
            return class_keep

        # 2. Map over class IDs
        nms_keep = []
        for i in range(unique_pre_nms_class_ids.shape[0]):
            nms_keep.append(nms_keep_map(unique_pre_nms_class_ids[i]))
        nms_keep = tf.concat(nms_keep, axis=0)
        
        # 3. Compute intersection between keep and nms_keep
        keep = tf.compat.v2.sets.intersection(tf.expand_dims(keep, 0),
                                        tf.expand_dims(nms_keep, 0))
        keep = tf.sparse.to_dense(keep)[0]
        # Keep top detections
        roi_count = self.max_instances
        class_scores_keep = tf.gather(class_scores, keep)
        num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
        top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
        keep = tf.gather(keep, top_ids)  
        
        detections = tf.concat([
            tf.gather(refined_rois, keep),
            tf.cast(tf.gather(class_ids, keep), tf.float32)[..., tf.newaxis],
            tf.gather(class_scores, keep)[..., tf.newaxis]
            ], axis=1)
        
        return detections

(14)faster-rcnn

# faster-rcnn
import tensorflow as tf

from detection.models.backbones import resnet
from detection.models.necks import fpn
from detection.models.rpn_heads import rpn_head
from detection.models.bbox_heads import bbox_head
from detection.models.roi_extractors import roi_align
from detection.models.detectors.test_mixins import RPNTestMixin, BBoxTestMixin

from detection.core.bbox import bbox_target


class FasterRCNN(tf.keras.Model, RPNTestMixin, BBoxTestMixin):

    def __init__(self, num_classes, **kwags):
        super(FasterRCNN, self).__init__(**kwags)
       # 包含多少类
        self.NUM_CLASSES = num_classes
        
        # RPN configuration配置
        # Anchor attributes属性
        self.ANCHOR_SCALES = (32, 64, 128, 256, 512)           ## 尺度
        self.ANCHOR_RATIOS = (0.5, 1, 2)                       ## 比率，这是干什么的？
        self.ANCHOR_FEATURE_STRIDES = (4, 8, 16, 32, 64)       ## 步伐，这个是干什么的？
        
        # Bounding box refinement mean and standard deviation
        # RPN边界框均值和标准差，均值为什么是0呢？
        self.RPN_TARGET_MEANS = (0., 0., 0., 0.)
        self.RPN_TARGET_STDS = (0.1, 0.1, 0.2, 0.2)
        
        # RPN training configuration
        # RPN训练配置
        self.PRN_BATCH_SIZE = 256
        # 前景所占的比例
        self.RPN_POS_FRAC = 0.5
        self.RPN_POS_IOU_THR = 0.7
        self.RPN_NEG_IOU_THR = 0.3

        # ROIs kept configuration
        # 配置生成两千个框
        self.PRN_PROPOSAL_COUNT = 2000
        # 非极大抑制的阈值
        self.PRN_NMS_THRESHOLD = 0.7
        
        # RCNN configuration
        # Bounding box refinement mean and standard deviation
        # RCNN边界框均值和标准差
        self.RCNN_TARGET_MEANS = (0., 0., 0., 0.)
        self.RCNN_TARGET_STDS = (0.1, 0.1, 0.2, 0.2)
        
        # ROI Feat Size
        # 每个候选区域都变成7*7大小
        self.POOL_SIZE = (7, 7)
        
        # RCNN training configuration
        # RCNN训练的配置
        self.RCNN_BATCH_SIZE = 256
        ## 应该是前景所占的比例
        self.RCNN_POS_FRAC = 0.25
        self.RCNN_POS_IOU_THR = 0.5
        self.RCNN_NEG_IOU_THR = 0.5
        
        # Boxes kept configuration
        # 最小置信度为0.7
        self.RCNN_MIN_CONFIDENCE = 0.7
        self.RCNN_NME_THRESHOLD = 0.3
        self.RCNN_MAX_INSTANCES = 100
        
        # Target Generator for the second stage.
        self.bbox_target = bbox_target.ProposalTarget(
            # 目标的均值
            target_means=self.RCNN_TARGET_MEANS,
            # RPN网络的标准差，感觉这里应该是target_stds=self.RCNN_TARGET_STDS,
            target_stds=self.RPN_TARGET_STDS,
            # 批次量
            num_rcnn_deltas=self.RCNN_BATCH_SIZE,
            # 前景的比率
            positive_fraction=self.RCNN_POS_FRAC,
            # 正例的iou阈值
            pos_iou_thr=self.RCNN_POS_IOU_THR,
            # 反例的iou阈值
            neg_iou_thr=self.RCNN_NEG_IOU_THR)
                
        # Modules 一共4部分，主干网络、rpn、rpn分类回归头、roi、分类和回归头
        # 1 主干网络
        self.backbone = resnet.ResNet(
            depth=101, 
            name='res_net')
        # 2 fpn网络
        self.neck = fpn.FPN(
            name='fpn')
        # 3 rpn的分类回归头
        self.rpn_head = rpn_head.RPNHead(
            # anchor的尺度
            anchor_scales=self.ANCHOR_SCALES,
            # anchor的比率，不知道是干什么的？
            anchor_ratios=self.ANCHOR_RATIOS,
            # anchor的步伐，这是干什么的？
            anchor_feature_strides=self.ANCHOR_FEATURE_STRIDES,
            # RPN提出的数量
            proposal_count=self.PRN_PROPOSAL_COUNT,
            # 非极大抑制的阈值
            nms_threshold=self.PRN_NMS_THRESHOLD,
            # 目标的均值，不知道啥意思
            target_means=self.RPN_TARGET_MEANS,
            # 目标的标准差，不知道啥意思
            target_stds=self.RPN_TARGET_STDS,
            # 批次量
            num_rpn_deltas=self.PRN_BATCH_SIZE,
            # 提出得到的前景比例
            positive_fraction=self.RPN_POS_FRAC,
            # 正例的阈值
            pos_iou_thr=self.RPN_POS_IOU_THR,
            # 反例的阈值
            neg_iou_thr=self.RPN_NEG_IOU_THR,
            name='rpn_head')
        # roi层
        self.roi_align = roi_align.PyramidROIAlign(
            # roi的池化大小
            pool_shape=self.POOL_SIZE,
            name='pyramid_roi_align')

        # bbox_head
        self.bbox_head = bbox_head.BBoxHead(
            # 类的数量
            num_classes=self.NUM_CLASSES,
            # 池化层大小
            pool_size=self.POOL_SIZE,
            # RCNN的目标均值
            target_means=self.RCNN_TARGET_MEANS,
            # RCNN的目标标准差
            target_stds=self.RCNN_TARGET_STDS,
            # RCNN的最低置信度
            min_confidence=self.RCNN_MIN_CONFIDENCE,
            # 非极大抑制的阈值
            nms_threshold=self.RCNN_NME_THRESHOLD,
            max_instances=self.RCNN_MAX_INSTANCES,
            name='b_box_head')

    def call(self, inputs, training=True):
        """

        :param inputs: [1, 1216, 1216, 3], [1, 11], [1, 14, 4], [1, 14]
        :param training:
        :return:
        """
        if training: # training
            imgs, img_metas, gt_boxes, gt_class_ids = inputs
        else: # inference
            imgs, img_metas = inputs

        # 每层输出的形状
        # [1, 304, 304, 256] => [1, 152, 152, 512]=>[1,76,76,1024]=>[1,38,38,2048]
        C2, C3, C4, C5 = self.backbone(imgs, 
                                       training=training)
        # 每层输出的形状
        # [1, 304, 304, 256] <= [1, 152, 152, 256]<=[1,76,76,256]<=[1,38,38,256]=>[1,19,19,256]
        P2, P3, P4, P5, P6 = self.neck([C2, C3, C4, C5], 
                                       training=training)
        # 用一个列表存储了各个输出，这里是要用到金字塔，就是用多个层
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        rcnn_feature_maps = [P2, P3, P4, P5]
        # [1, 369303, 2] [1, 369303, 2], [1, 369303, 4], includes all anchors on pyramid level of features

        # 得到
        rpn_class_logits, rpn_probs, rpn_deltas = self.rpn_head(
            rpn_feature_maps, training=training)

        # [369303, 4] => [215169, 4], valid => [6000, 4], performance =>[2000, 4],  NMS
        proposals_list = self.rpn_head.get_proposals(
            rpn_probs, rpn_deltas, img_metas)

        if training: # get target value for these proposal target label and target delta
            rois_list, rcnn_target_matchs_list, rcnn_target_deltas_list = \
                self.bbox_target.build_targets(
                    proposals_list, gt_boxes, gt_class_ids, img_metas)
        else:
            rois_list = proposals_list
        # rois_list only contains coordinates, rcnn_feature_maps save the 5 features data=>[192,7,7,256]
        pooled_regions_list = self.roi_align(#
            (rois_list, rcnn_feature_maps, img_metas), training=training)
        # [192, 81], [192, 81], [192, 81, 4]
        rcnn_class_logits_list, rcnn_probs_list, rcnn_deltas_list = \
            self.bbox_head(pooled_regions_list, training=training)
        if training:
            rpn_class_loss, rpn_bbox_loss = self.rpn_head.loss(
                rpn_class_logits, rpn_deltas, gt_boxes, gt_class_ids, img_metas)
            
            rcnn_class_loss, rcnn_bbox_loss = self.bbox_head.loss(
                rcnn_class_logits_list, rcnn_deltas_list, 
                rcnn_target_matchs_list, rcnn_target_deltas_list)
            
            return [rpn_class_loss, rpn_bbox_loss, 
                    rcnn_class_loss, rcnn_bbox_loss]
        else:
            detections_list = self.bbox_head.get_bboxes(
                rcnn_probs_list, rcnn_deltas_list, rois_list, img_metas)
        
            return detections_list

(15)test_mixins

# test_mixins
import numpy as np
import tensorflow as tf

from detection.core.bbox import transforms
from detection.utils.misc import *

class RPNTestMixin:
    
    def simple_test_rpn(self, img, img_meta):
        '''
        Args
        ---
            imgs: np.ndarray. [height, width, channel]
            img_metas: np.ndarray. [11]
        
        '''
        imgs = tf.Variable(np.expand_dims(img, 0))
        img_metas = tf.Variable(np.expand_dims(img_meta, 0))

        x = self.backbone(imgs, training=False)
        x = self.neck(x, training=False)
        
        rpn_class_logits, rpn_probs, rpn_deltas = self.rpn_head(x, training=False)
        
        proposals_list = self.rpn_head.get_proposals(
            rpn_probs, rpn_deltas, img_metas, with_probs=False)

        return proposals_list[0]
    
class BBoxTestMixin(object):
    
    def _unmold_detections(self, detections_list, img_metas):
        return [
            self._unmold_single_detection(detections_list[i], img_metas[i])
            for i in range(img_metas.shape[0])
        ]

    def _unmold_single_detection(self, detections, img_meta):
        zero_ix = tf.where(tf.not_equal(detections[:, 4], 0))
        detections = tf.gather_nd(detections, zero_ix)

        # Extract boxes, class_ids, scores, and class-specific masks
        boxes = detections[:, :4]
        class_ids = tf.cast(detections[:, 4], tf.int32)
        scores = detections[:, 5]

        boxes = transforms.bbox_mapping_back(boxes, img_meta)

        return {'rois': boxes.numpy(),
                'class_ids': class_ids.numpy(),
                'scores': scores.numpy()}

    def simple_test_bboxes(self, img, img_meta, proposals):
        '''
        Args
        ---
            imgs: np.ndarray. [height, width, channel]
            img_meta: np.ndarray. [11]
        
        '''
        imgs = tf.Variable(np.expand_dims(img, 0))
        img_metas = tf.Variable(np.expand_dims(img_meta, 0))
        rois_list = [tf.Variable(proposals)]
        
        x = self.backbone(imgs, training=False)
        P2, P3, P4, P5, _ = self.neck(x, training=False)
        
        rcnn_feature_maps = [P2, P3, P4, P5]
        
        
        pooled_regions_list = self.roi_align(
            (rois_list, rcnn_feature_maps, img_metas), training=False)

        rcnn_class_logits_list, rcnn_probs_list, rcnn_deltas_list = \
            self.bbox_head(pooled_regions_list, training=False)
        
        detections_list = self.bbox_head.get_bboxes(
            rcnn_probs_list, rcnn_deltas_list, rois_list, img_metas)
        
        return self._unmold_detections(detections_list, img_metas)[0]

(16)fpn

'''
FRN model for Keras.

# Reference:
- [Feature Pyramid Networks for Object Detection](
    https://arxiv.org/abs/1612.03144)

'''
# fpn
import tensorflow as tf
from tensorflow.keras import layers


class FPN(tf.keras.Model):

    def __init__(self, out_channels=256, **kwargs):
        '''
        Feature Pyramid Networks
        
        Attributes
        ---
            out_channels: int. the channels of pyramid feature maps.
        '''
        super(FPN, self).__init__(**kwargs)
        
        self.out_channels = out_channels
        
        self.fpn_c2p2 = layers.Conv2D(out_channels, (1, 1), 
                                      kernel_initializer='he_normal', name='fpn_c2p2')
        self.fpn_c3p3 = layers.Conv2D(out_channels, (1, 1), 
                                      kernel_initializer='he_normal', name='fpn_c3p3')
        self.fpn_c4p4 = layers.Conv2D(out_channels, (1, 1), 
                                      kernel_initializer='he_normal', name='fpn_c4p4')
        self.fpn_c5p5 = layers.Conv2D(out_channels, (1, 1), 
                                      kernel_initializer='he_normal', name='fpn_c5p5')
        
        self.fpn_p3upsampled = layers.UpSampling2D(size=(2, 2), name='fpn_p3upsampled')
        self.fpn_p4upsampled = layers.UpSampling2D(size=(2, 2), name='fpn_p4upsampled')
        self.fpn_p5upsampled = layers.UpSampling2D(size=(2, 2), name='fpn_p5upsampled')
        
        
        self.fpn_p2 = layers.Conv2D(out_channels, (3, 3), padding='SAME', 
                                    kernel_initializer='he_normal', name='fpn_p2')
        self.fpn_p3 = layers.Conv2D(out_channels, (3, 3), padding='SAME', 
                                    kernel_initializer='he_normal', name='fpn_p3')
        self.fpn_p4 = layers.Conv2D(out_channels, (3, 3), padding='SAME', 
                                    kernel_initializer='he_normal', name='fpn_p4')
        self.fpn_p5 = layers.Conv2D(out_channels, (3, 3), padding='SAME', 
                                    kernel_initializer='he_normal', name='fpn_p5')
        
        self.fpn_p6 = layers.MaxPooling2D(pool_size=(1, 1), strides=2, name='fpn_p6')
        
            
    def call(self, inputs, training=True):
        C2, C3, C4, C5 = inputs
        
        P5 = self.fpn_c5p5(C5)
        P4 = self.fpn_c4p4(C4) + self.fpn_p5upsampled(P5)
        P3 = self.fpn_c3p3(C3) + self.fpn_p4upsampled(P4)
        P2 = self.fpn_c2p2(C2) + self.fpn_p3upsampled(P3)
        
        # Attach 3x3 conv to all P layers to get the final feature maps.
        P2 = self.fpn_p2(P2)
        P3 = self.fpn_p3(P3)
        P4 = self.fpn_p4(P4)
        P5 = self.fpn_p5(P5)
        
        # subsampling from P5 with stride of 2.
        P6 = self.fpn_p6(P5)
        
        return [P2, P3, P4, P5, P6]
        
    def compute_output_shape(self, input_shape):
        C2_shape, C3_shape, C4_shape, C5_shape = input_shape
        
        C2_shape, C3_shape, C4_shape, C5_shape = \
            C2_shape.as_list(), C3_shape.as_list(), C4_shape.as_list(), C5_shape.as_list()
        
        C6_shape = [C5_shape[0], (C5_shape[1] + 1) // 2, (C5_shape[2] + 1) // 2, self.out_channels]
        
        C2_shape[-1] = self.out_channels
        C3_shape[-1] = self.out_channels
        C4_shape[-1] = self.out_channels
        C5_shape[-1] = self.out_channels
        
        return [tf.TensorShape(C2_shape),
                tf.TensorShape(C3_shape),
                tf.TensorShape(C4_shape),
                tf.TensorShape(C5_shape),
                tf.TensorShape(C6_shape)]

if __name__ == '__main__':
    
    C2 = tf.random.normal((2, 256, 256,  256))
    C3 = tf.random.normal((2, 128, 128,  512))
    C4 = tf.random.normal((2,  64,  64, 1024))
    C5 = tf.random.normal((2,  32,  32, 2048))
    
    fpn = FPN()
    
    P2, P3, P4, P5, P6 = fpn([C2, C3, C4, C5])
    
    print('P2 shape:', P2.shape.as_list())
    print('P3 shape:', P3.shape.as_list())
    print('P4 shape:', P4.shape.as_list())
    print('P5 shape:', P5.shape.as_list())
    print('P6 shape:', P6.shape.as_list())

(17)roi_align

# roi_align
import tensorflow as tf

from detection.utils.misc import *


class PyramidROIAlign(tf.keras.layers.Layer):

    def __init__(self, pool_shape, **kwargs):
        '''
        Implements ROI Pooling on multiple levels of the feature pyramid.

        Attributes
        ---
            pool_shape: (height, width) of the output pooled regions.
                Example: (7, 7)
        '''
        super(PyramidROIAlign, self).__init__(**kwargs)

        self.pool_shape = tuple(pool_shape)

    def call(self, inputs, training=True):
        '''
        Args
        ---
            rois_list: list of [num_rois, (y1, x1, y2, x2)] in normalized coordinates.
            feature_map_list: List of [batch, height, width, channels].
                feature maps from different levels of the pyramid.
            img_metas: [batch_size, 11]

        Returns
        ---
            pooled_rois_list: list of [num_rois, pooled_height, pooled_width, channels].
                The width and height are those specific in the pool_shape in the layer
                constructor.
        '''
        rois_list, feature_map_list, img_metas = inputs # [2000 ,4], list:[P2, P3, P4, P5]

        pad_shapes = calc_pad_shapes(img_metas)
        
        pad_areas = pad_shapes[:, 0] * pad_shapes[:, 1] # 1216*1216
        
        num_rois_list = [rois.shape.as_list()[0] for rois in rois_list] # data:[2000]
        roi_indices = tf.constant(
            [i for i in range(len(rois_list)) for _ in range(rois_list[i].shape.as_list()[0])],
            dtype=tf.int32
        ) #[0.....], shape:[2000]
        
        areas = tf.constant(#              range(1)                               range(2000)
            [pad_areas[i] for i in range(pad_areas.shape[0]) for _ in range(num_rois_list[i])],
            dtype=tf.float32
        )#[1216*1216, 1216*1216,...], shape:[2000]


        rois = tf.concat(rois_list, axis=0) # [2000, 4]
        
        # Assign each ROI to a level in the pyramid based on the ROI area.
        y1, x1, y2, x2 = tf.split(rois, 4, axis=1) # 4 of [2000, 1]
        h = y2 - y1 # [2000, 1]
        w = x2 - x1 # [2000, 1]
        
        # Equation 1 in the Feature Pyramid Networks paper. Account for
        # the fact that our coordinates are normalized here.
        # e.g. a 224x224 ROI (in pixels) maps to P4

        roi_level = tf.math.log( # [2000]
                    tf.sqrt(tf.squeeze(h * w, 1))
                    / tf.cast((224.0 / tf.sqrt(areas * 1.0)), tf.float32)
                    ) / tf.math.log(2.0)
        roi_level = tf.minimum(5, tf.maximum( # [2000], clamp to [2-5]
            2, 4 + tf.cast(tf.round(roi_level), tf.int32)))
        # roi_level will indicates which level of feature to use

        
        # Loop through levels and apply ROI pooling to each. P2 to P5.
        pooled_rois = []
        roi_to_level = []
        for i, level in enumerate(range(2, 6)): # 2,3,4,5
            ix = tf.where(tf.equal(roi_level, level)) # [1999, 1], means 1999 of 2000 select P2
            level_rois = tf.gather_nd(rois, ix) # boxes to crop, [1999, 4]

            # ROI indices for crop_and_resize.
            level_roi_indices = tf.gather_nd(roi_indices, ix) # [19999], data:[0....0]

            # Keep track of which roi is mapped to which level
            roi_to_level.append(ix)

            # Stop gradient propogation to ROI proposals
            level_rois = tf.stop_gradient(level_rois)
            level_roi_indices = tf.stop_gradient(level_roi_indices)

            # Crop and Resize
            # From Mask R-CNN paper: "We sample four regular locations, so
            # that we can evaluate either max or average pooling. In fact,
            # interpolating only a single value at each bin center (without
            # pooling) is nearly as effective."
            #
            # Here we use the simplified approach of a single value per bin,
            # which is how it's done in tf.crop_and_resize()
            # Result: [batch * num_rois, pool_height, pool_width, channels]
            pooled_rois.append(tf.image.crop_and_resize(
                feature_map_list[i], level_rois, level_roi_indices, self.pool_shape,
                method="bilinear")) # [1, 304, 304, 256], [1999, 4], [1999], [2]=[7,7]=>[1999,7,7,256]
        # [1999, 7, 7, 256], [], [], [1,7,7,256] => [2000, 7, 7, 256]
        # Pack pooled features into one tensor
        pooled_rois = tf.concat(pooled_rois, axis=0)

        # Pack roi_to_level mapping into one array and add another
        # column representing the order of pooled rois
        roi_to_level = tf.concat(roi_to_level, axis=0) # [2000, 1], 1999 of P2, and 1 other P
        roi_range = tf.expand_dims(tf.range(tf.shape(roi_to_level)[0]), 1) # [2000, 1], 0~1999
        roi_to_level = tf.concat([tf.cast(roi_to_level, tf.int32), roi_range],
                                 axis=1) # [2000, 2], (P, range)

        # Rearrange pooled features to match the order of the original rois
        # Sort roi_to_level by batch then roi indextf.Tensor([        0    100001    200002 ... 199801997 199901998  20101999], shape=(2000,), dtype=int32)
        # TF doesn't have a way to sort by two columns, so merge them and sort.
        sorting_tensor = roi_to_level[:, 0] * 100000 + roi_to_level[:, 1]
        ix = tf.nn.top_k(sorting_tensor, k=tf.shape( # k=2000
            roi_to_level)[0]).indices[::-1]# reverse the order
        ix = tf.gather(roi_to_level[:, 1], ix) # [2000]
        pooled_rois = tf.gather(pooled_rois, ix) # [2000, 7, 7, 256]
        # 2000 of [7, 7, 256]
        pooled_rois_list = tf.split(pooled_rois, num_rois_list, axis=0)
        return pooled_rois_list

(18)rpn_head

# rpn_head
import  tensorflow as tf
from    tensorflow.keras import layers

from detection.core.bbox import transforms
from detection.utils.misc import *

from detection.core.anchor import anchor_generator, anchor_target
from detection.core.loss import losses

class RPNHead(tf.keras.Model):

    def __init__(self, 
                 anchor_scales=(32, 64, 128, 256, 512), 
                 anchor_ratios=(0.5, 1, 2), 
                 anchor_feature_strides=(4, 8, 16, 32, 64),
                 proposal_count=2000, 
                 nms_threshold=0.7, 
                 target_means=(0., 0., 0., 0.), 
                 target_stds=(0.1, 0.1, 0.2, 0.2), 
                 num_rpn_deltas=256,
                 positive_fraction=0.5,
                 pos_iou_thr=0.7,
                 neg_iou_thr=0.3,
                 **kwags):
        '''
        Network head of Region Proposal Network.

                                      / - rpn_cls (1x1 conv)
        input - rpn_conv (3x3 conv) -
                                      \ - rpn_reg (1x1 conv)

        Attributes
        ---
            anchor_scales: 1D array of anchor sizes in pixels.
            anchor_ratios: 1D array of anchor ratios of width/height.
            anchor_feature_strides: Stride of the feature map relative 
                to the image in pixels.
            proposal_count: int. RPN proposals kept after non-maximum 
                suppression.
            nms_threshold: float. Non-maximum suppression threshold to 
                filter RPN proposals.
            target_means: [4] Bounding box refinement mean.
            target_stds: [4] Bounding box refinement standard deviation.
            num_rpn_deltas: int.
            positive_fraction: float.
            pos_iou_thr: float.
            neg_iou_thr: float.
        '''
        super(RPNHead, self).__init__(**kwags)
        
        self.proposal_count = proposal_count
        self.nms_threshold = nms_threshold
        self.target_means = target_means
        self.target_stds = target_stds

        self.generator = anchor_generator.AnchorGenerator(
            scales=anchor_scales, 
            ratios=anchor_ratios, 
            feature_strides=anchor_feature_strides)
        
        self.anchor_target = anchor_target.AnchorTarget(
            target_means=target_means, 
            target_stds=target_stds,
            num_rpn_deltas=num_rpn_deltas,
            positive_fraction=positive_fraction,
            pos_iou_thr=pos_iou_thr,
            neg_iou_thr=neg_iou_thr)
        
        self.rpn_class_loss = losses.rpn_class_loss
        self.rpn_bbox_loss = losses.rpn_bbox_loss
        
        
        # Shared convolutional base of the RPN
        self.rpn_conv_shared = layers.Conv2D(512, (3, 3), padding='same',
                                             kernel_initializer='he_normal', 
                                             name='rpn_conv_shared')
        
        self.rpn_class_raw = layers.Conv2D(len(anchor_ratios) * 2, (1, 1),
                                           kernel_initializer='he_normal', 
                                           name='rpn_class_raw')

        self.rpn_delta_pred = layers.Conv2D(len(anchor_ratios) * 4, (1, 1),
                                           kernel_initializer='he_normal', 
                                           name='rpn_bbox_pred')
        
    def call(self, inputs, training=True):
        '''
        Args
        ---
            inputs: [batch_size, feat_map_height, feat_map_width, channels] 
                one level of pyramid feat-maps.
        
        Returns
        ---
            rpn_class_logits: [batch_size, num_anchors, 2]
            rpn_probs: [batch_size, num_anchors, 2]
            rpn_deltas: [batch_size, num_anchors, 4]
        '''
        
        layer_outputs = []
        
        for feat in inputs: # for every anchors feature maps
            """
            (1, 304, 304, 256)
            (1, 152, 152, 256)
            (1, 76, 76, 256)
            (1, 38, 38, 256)
            (1, 19, 19, 256)
            rpn_class_raw: (1, 304, 304, 6)
            rpn_class_logits: (1, 277248, 2)
            rpn_delta_pred: (1, 304, 304, 12)
            rpn_deltas: (1, 277248, 4)
            rpn_class_raw: (1, 152, 152, 6)
            rpn_class_logits: (1, 69312, 2)
            rpn_delta_pred: (1, 152, 152, 12)
            rpn_deltas: (1, 69312, 4)
            rpn_class_raw: (1, 76, 76, 6)
            rpn_class_logits: (1, 17328, 2)
            rpn_delta_pred: (1, 76, 76, 12)
            rpn_deltas: (1, 17328, 4)
            rpn_class_raw: (1, 38, 38, 6)
            rpn_class_logits: (1, 4332, 2)
            rpn_delta_pred: (1, 38, 38, 12)
            rpn_deltas: (1, 4332, 4)
            rpn_class_raw: (1, 19, 19, 6)
            rpn_class_logits: (1, 1083, 2)
            rpn_delta_pred: (1, 19, 19, 12)
            rpn_deltas: (1, 1083, 4)

            """
            # print(feat.shape)
            shared = self.rpn_conv_shared(feat)
            shared = tf.nn.relu(shared)

            x = self.rpn_class_raw(shared)
            # print('rpn_class_raw:', x.shape)
            rpn_class_logits = tf.reshape(x, [tf.shape(x)[0], -1, 2])
            rpn_probs = tf.nn.softmax(rpn_class_logits)
            # print('rpn_class_logits:', rpn_class_logits.shape)

            x = self.rpn_delta_pred(shared)
            # print('rpn_delta_pred:', x.shape)
            rpn_deltas = tf.reshape(x, [tf.shape(x)[0], -1, 4])
            # print('rpn_deltas:', rpn_deltas.shape)
            
            layer_outputs.append([rpn_class_logits, rpn_probs, rpn_deltas])
            # print(rpn_class_logits.shape, rpn_probs.shape, rpn_deltas.shape)
            """
            (1, 277248, 2) (1, 277248, 2) (1, 277248, 4)
            (1, 69312, 2) (1, 69312, 2) (1, 69312, 4)
            (1, 17328, 2) (1, 17328, 2) (1, 17328, 4)
            (1, 4332, 2) (1, 4332, 2) (1, 4332, 4)
            (1, 1083, 2) (1, 1083, 2) (1, 1083, 4)

            """

        outputs = list(zip(*layer_outputs))
        outputs = [tf.concat(list(o), axis=1) for o in outputs]
        rpn_class_logits, rpn_probs, rpn_deltas = outputs
        # (1, 369303, 2) (1, 369303, 2) (1, 369303, 4)
        # print(rpn_class_logits.shape, rpn_probs.shape, rpn_deltas.shape)
        
        return rpn_class_logits, rpn_probs, rpn_deltas

    def loss(self, rpn_class_logits, rpn_deltas, gt_boxes, gt_class_ids, img_metas):
        """

        :param rpn_class_logits: [N, 2]
        :param rpn_deltas: [N, 4]
        :param gt_boxes:  [GT_N]
        :param gt_class_ids:  [GT_N]
        :param img_metas: [11]
        :return:
        """
        # valid_flags indicates anchors located in padded area or not.
        anchors, valid_flags = self.generator.generate_pyramid_anchors(img_metas)

        #
        rpn_target_matchs, rpn_target_deltas = self.anchor_target.build_targets(
            anchors, valid_flags, gt_boxes, gt_class_ids)
        
        rpn_class_loss = self.rpn_class_loss(
            rpn_target_matchs, rpn_class_logits)
        rpn_bbox_loss = self.rpn_bbox_loss(
            rpn_target_deltas, rpn_target_matchs, rpn_deltas)
        
        return rpn_class_loss, rpn_bbox_loss
    
    def get_proposals(self, 
                      rpn_probs, 
                      rpn_deltas, 
                      img_metas, 
                      with_probs=False):
        '''
        Calculate proposals.
        
        Args
        ---
            rpn_probs: [batch_size, num_anchors, (bg prob, fg prob)]
            rpn_deltas: [batch_size, num_anchors, (dy, dx, log(dh), log(dw))]
            img_metas: [batch_size, 11]
            with_probs: bool.
        
        Returns
        ---
            proposals_list: list of [num_proposals, (y1, x1, y2, x2)] in 
                normalized coordinates if with_probs is False. 
                Otherwise, the shape of proposals in proposals_list is 
                [num_proposals, (y1, x1, y2, x2, score)]
        
        Note that num_proposals is no more than proposal_count. And different 
           images in one batch may have different num_proposals.
        '''
        anchors, valid_flags = self.generator.generate_pyramid_anchors(img_metas)
        # [369303, 4], [b, 11]
        # [b, N, (background prob, foreground prob)], get anchor's foreground prob, [1, 369303]
        rpn_probs = rpn_probs[:, :, 1]
        # [[1216, 1216]]
        pad_shapes = calc_pad_shapes(img_metas)
        
        proposals_list = [
            self._get_proposals_single(
                rpn_probs[i], rpn_deltas[i], anchors, valid_flags[i], pad_shapes[i], with_probs)
            for i in range(img_metas.shape[0])
        ]
        
        return proposals_list
    
    def _get_proposals_single(self, 
                              rpn_probs, 
                              rpn_deltas, 
                              anchors, 
                              valid_flags, 
                              img_shape, 
                              with_probs):
        '''
        Calculate proposals.
        
        Args
        ---
            rpn_probs: [num_anchors]
            rpn_deltas: [num_anchors, (dy, dx, log(dh), log(dw))]
            anchors: [num_anchors, (y1, x1, y2, x2)] anchors defined in 
                pixel coordinates.
            valid_flags: [num_anchors]
            img_shape: np.ndarray. [2]. (img_height, img_width)
            with_probs: bool.
        
        Returns
        ---
            proposals: [num_proposals, (y1, x1, y2, x2)] in normalized 
                coordinates.
        '''
        
        H, W = img_shape
        
        # filter invalid anchors, int => bool
        valid_flags = tf.cast(valid_flags, tf.bool)
        # [369303] => [215169], respectively
        rpn_probs = tf.boolean_mask(rpn_probs, valid_flags)
        rpn_deltas = tf.boolean_mask(rpn_deltas, valid_flags)
        anchors = tf.boolean_mask(anchors, valid_flags)

        # Improve performance
        pre_nms_limit = min(6000, anchors.shape[0]) # min(6000, 215169) => 6000
        ix = tf.nn.top_k(rpn_probs, pre_nms_limit, sorted=True).indices
        # [215169] => [6000], respectively
        rpn_probs = tf.gather(rpn_probs, ix)
        rpn_deltas = tf.gather(rpn_deltas, ix)
        anchors = tf.gather(anchors, ix)
        
        # Get refined anchors, => [6000, 4]
        proposals = transforms.delta2bbox(anchors, rpn_deltas, 
                                          self.target_means, self.target_stds)
        # clipping to valid area, [6000, 4]
        window = tf.constant([0., 0., H, W], dtype=tf.float32)
        proposals = transforms.bbox_clip(proposals, window)
        
        # Normalize, (y1, x1, y2, x2)
        proposals = proposals / tf.constant([H, W, H, W], dtype=tf.float32)
        
        # NMS, indices: [2000]
        indices = tf.image.non_max_suppression(
            proposals, rpn_probs, self.proposal_count, self.nms_threshold)
        proposals = tf.gather(proposals, indices) # [2000, 4]
        
        if with_probs:
            proposal_probs = tf.expand_dims(tf.gather(rpn_probs, indices), axis=1)
            proposals = tf.concat([proposals, proposal_probs], axis=1)
   
        return proposals

(19)misc

# misc
import tensorflow as tf

def trim_zeros(boxes, name=None):
    '''
    Often boxes are represented with matrices of shape [N, 4] and
    are padded with zeros. This removes zero boxes.
    
    Args
    ---
        boxes: [N, 4] matrix of boxes.
        non_zeros: [N] a 1D boolean mask identifying the rows to keep
    '''
    non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool)
    boxes = tf.boolean_mask(boxes, non_zeros, name=name)
    return boxes, non_zeros

def parse_image_meta(meta):
    '''
    Parses a tensor that contains image attributes to its components.
    
    Args
    ---
        meta: [..., 11]

    Returns
    ---
        a dict of the parsed tensors.
    '''
    meta = meta.numpy()
    ori_shape = meta[..., 0:3]
    img_shape = meta[..., 3:6]
    pad_shape = meta[..., 6:9]
    scale = meta[..., 9]  
    flip = meta[..., 10]
    return {
        'ori_shape': ori_shape,
        'img_shape': img_shape,
        'pad_shape': pad_shape,
        'scale': scale,
        'flip': flip
    }

def calc_batch_padded_shape(meta):
    '''
    Args
    ---
        meta: [batch_size, 11]
    
    Returns
    ---
        nd.ndarray. Tuple of (height, width)
    '''
    return tf.cast(tf.reduce_max(meta[:, 6:8], axis=0), tf.int32).numpy()

def calc_img_shapes(meta):
    '''
    Args
    ---
        meta: [..., 11]
    
    Returns
    ---
        nd.ndarray. [..., (height, width)]
    '''
    return tf.cast(meta[..., 3:5], tf.int32).numpy()


def calc_pad_shapes(meta):
    '''
    Args
    ---
        meta: [..., 11]
    
    Returns
    ---
        nd.ndarray. [..., (height, width)]
    '''
    return tf.cast(meta[..., 6:8], tf.int32).numpy()

你可能感兴趣的:(#,目标检测)

Python 的 ultralytics 库详解白.夜人工智能
ultralytics是一个专注于计算机视觉任务的Python库，尤其以YOLO（YouOnlyLookOnce）系列模型为核心，提供了简单易用的接口，支持目标检测、实例分割、姿态估计等任务。本文将详细介绍ultralytics库的功能、安装方法、核心模块以及使用示例。1.ultralytics库简介ultralytics库由Ultralytics团队开发，旨在为YOLO系列模型提供高效、灵活且易
【保姆级视频教程（一）】YOLOv12环境配置：从零到一，手把手保姆级教程！| 小白也能轻松玩转目标检测！一只云卷云舒 YOLOv12保姆级通关教程 YOLO YOLOv12 flash attention GPU 计算能力算力
【2025全站首发】YOLOv12环境配置：从零到一，手把手保姆级教程！|小白也能轻松玩转目标检测！文章目录1.FlashAttentionWindows端WHL包下载1.1简介1.2下载链接1.3国内镜像站1.4安装方法2.NVIDIAGPU计算能力概述2.1简介2.2计算能力版本与GPU型号对照表2.2.1CUDA-EnabledDatacenterProducts2.2.2CUDA-Enab
yolov8的第一次实验报告算法宇宙 YOLO 人工智能计算机视觉
1.实验概述实验名称:占道经营目标检测模型实验目标:提高模型的精确率（Precision）和召回率（Recall），使其接近1。实验日期:[2025-01-16]2.数据集数据集名称:[datasets]数据集大小:[2.68Gb]数据集描述:[数据集主要分两个类别：zdjy_ld,zdjy_gd]注释：占道经营流动，占道经营固定3.模型配置3.1基础配置·模型类型:YOLOv8·预训练模型:YO
YOLOv8 的简介及C#中如何简单应用YOLOv8 码上有潜 YOLOv8 YOLO
YOLOv8是YOLO（YouOnlyLookOnce）系列中的最新版本，是一种用于目标检测和图像分割的深度学习模型。YOLO模型以其快速和准确的目标检测性能而著称，广泛应用于实时应用程序中。主要特点高效性：YOLOv8在保持高检测速度的同时，进一步提高了检测精度。端到端训练：可以直接从图像输入端到分类结果输出，简化了训练和部署过程。改进的架构：包括更深的网络结构、更复杂的特征提取方法以及更高效的
Yolov11目标检测(ultralytics) @M_J_Y@ 目标检测 YOLO 目标检测人工智能
Yolov11目标检测（ultralytics）1.克隆仓库2.安装环境依赖3.训练、验证、推理以及onnx模型导出1.克隆仓库从官网下载Yolov11到本地。[email protected]:ultralytics/ultralytics.git2.安装环境依赖pipinstall-e.-ihttps://pypi.mirrors.ustc.edu.cn/simple/3.训练、验证
使用 labelImg 制作YOLO系列目标检测数据集（ 2401_89791028 YOLO 目标检测人工智能
文章转载自K同学，谨防原文失效可参考link1和link2和link3LabelImg介绍LabelImg支持文件夹的导入，在标完一张后，在左侧选择NextImage就可以切换到下一张继续了。输出格式部分，目前LabelImg支持YOLO和PascalVOC2种格式，前者标签文件后缀是.txt件，而后者标签文件后缀是.xml件。标签保存在对应的labels文件夹下，与images中的图片文件名一一
YOLOv8 改进：添加 GAM 注意力机制鱼弦人工智能时代 YOLO
YOLOv8改进：添加GAM注意力机制引言在目标检测领域，YOLO（YouOnlyLookOnce）网络因其速度和准确性被广泛应用。然而，随着场景的复杂化，仅仅依靠卷积特征可能不足以捕捉图像中的重要信息。引入注意力机制，如GAM（GlobalAttentionMechanism），可以有效提高模型对关键区域的关注，从而提升检测性能。技术背景GAM是一种全局注意力机制，通过全局信息聚合和自适应权重分
【北上广深杭大厂AI算法面试题】计算机视觉篇...详解目标检测中的多尺度训练和测试? 努力毕业的小土博^_^ AI算法题库人工智能计算机视觉算法深度学习神经网络目标检测
【北上广深杭大厂AI算法面试题】计算机视觉篇…详解目标检测中的多尺度训练和测试?【北上广深杭大厂AI算法面试题】计算机视觉篇…详解目标检测中的多尺度训练和测试?文章目录【北上广深杭大厂AI算法面试题】计算机视觉篇...详解目标检测中的多尺度训练和测试?前言多尺度训练核心思想：优点与注意点：多尺度测试核心思想：优点与注意点：综合作用参考示例总结欢迎铁子们点赞、关注、收藏！祝大家逢考必过！逢投必中！上
从0到1构建AI深度学习视频分析系统--基于YOLO 目标检测的动作序列检查系统：（2）消息队列与消息中间件 shiter 人工智能系统解决方案与技术架构人工智能深度学习音视频
文章大纲原始视频队列Python内存视频缓存优化方案（4GB以内）一、核心参数设计二、内存管理实现三、性能优化策略四、内存占用验证五、高级优化技巧六、部署建议检测结果队列YOLO检测结果队列技术方案一、技术选型矩阵二、核心实现代码三、性能优化策略四、可视化方案对比五、部署建议逻辑判定队列时间片图论时间序列大模型引入参考文献原始视频队列想要在单机内存中缓存1-5分钟的视频片段，python技术栈的话
YOLOv5+UI界面在车辆检测中的应用与实现深度学习&目标检测实战项目 YOLOv5实战项目 YOLO ui 分类数据挖掘目标跟踪人工智能
1.引言随着智能交通系统（ITS）的快速发展，车辆检测已成为计算机视觉领域的重要研究方向。车辆检测技术广泛应用于交通流量监控、车辆违章抓拍、无人驾驶等场景中。近年来，深度学习技术的突破，特别是卷积神经网络（CNN）的崛起，使得目标检测技术取得了显著进展。其中，YOLO（YouOnlyLookOnce）系列模型以其高效的实时检测能力和出色的性能成为车辆检测领域的首选方法之一。在本文中，我们将基于YO
YOLO优化之扫描融合模块（SimVSS Block）清风AI 人工智能计算机视觉 YOLO 目标检测深度学习目标跟踪
研究背景在自动驾驶技术快速发展的背景下，目标检测作为其核心组成部分面临着严峻挑战。驾驶场景中目标尺度和大小的巨大差异，以及视觉特征不显著且易受噪声干扰的问题，对辅助驾驶系统的安全性构成了潜在威胁。传统的卷积神经网络（CNN）虽然在目标检测领域取得了显著进展，但仍存在局限性，如局部关注性导致难以有效检测不同尺度的目标。为克服这些问题，研究人员开始探索将状态空间模型（SSM）引入目标检测领域，以期提高
深入探究YOLO系列的骨干网路编码实践 YOLO 深度学习计算机视觉
深入探究YOLO系列的骨干网路YOLO系列是目标检测领域中非常知名的算法。其通过将整个图像作为输入，并且直接在图像上通过一个单独的神经网络输出每个检测框的类别预测和边界框信息。为了更好地理解YOLO系列，我们需要先了解它所使用的骨干网路。骨干网络是深度学习模型中的核心部分，负责提取图像的特征。如今常用的骨干网络有VGG、ResNet和MobileNet等。YOLO系列算法采用的是Darknet骨干
《Hello YOLOv8从入门到精通》4，模型架构和骨干网络Backbone调优实践 Jagua YOLO
YOLOv8是由Ultralytics开发的最先进的目标检测模型，其模型架构细节包括骨干网络（Backbone）、颈部网络（Neck）和头部网络（Head）三大部分。一、骨干网络（Backbone）Backbone部分负责特征提取，采用了一系列卷积和反卷积层，同时使用了残差连接和瓶颈结构来减小网络的大小并提高性能。YOLOv8的Backbone参考了CSPDarkNet结构，的增强版本，并结合了其
NPU的应用场景：从云端到边缘绿算技术 NPU架构介绍缓存人工智能科技深度学习
NPU的应用场景非常广泛，主要包括以下几个方面：1.云计算与数据中心AI推理服务：在云端提供高效的AI推理服务，例如图像识别、语音识别。模型训练加速：在大规模训练任务中，NPU可以作为加速单元，提升训练效率。2.边缘计算智能摄像头：在安防监控中，NPU可以实时处理视频流，实现目标检测和跟踪。智能音箱：在语音助手中，NPU可以加速语音识别和自然语言处理任务。3.自动驾驶实时感知：NPU可以加速自动驾
目标检测中衡量模型速度和精度的指标：FPS和mAP asdfg1258963 目标检测_ai 目标检测人工智能
“FPS”和“mAP”分别衡量了模型的速度和精度。FPS（FramesPerSecond）定义：FPS是“每秒传输帧数”的缩写，用于衡量计算机视觉系统（如目标检测、图像识别等）的实时性能。它表示系统每秒钟能够处理的图像或视频帧的数量。重要性：在实时应用中，如自动驾驶、视频监控等，FPS是一个关键指标。高FPS意味着系统能够快速处理输入的图像数据，实现实时响应。计算方式：FPS可以通过以下公式计算：
机器学习(二) 本文(2.5万字) | KNN算法原理及Python复现 | 小酒馆燃着灯机器学习算法 k近邻算法
文章目录一KNN算法原理二KNN三要素三机器学习中标准化四KNN分类预测规则五KNN回归预测规则六KNN算法实现方式七KDTree7.1构造KDtree7.2KDtree查找最近邻八KNN特点九KNN算法实现案例一案例二1.机器学习2.深度学习与目标检测3.YOLOv54.YOLOv5改进5.YOLOv8及其改进6.Python与PyTorch7.工具8.小知识点9.杂记一KNN算法原理K近邻分类
基于分组 NMS 的检测模型后处理改进 Lunar* 目标检测算法与优化目标检测深度学习 python
引言在目标检测任务中，后处理阶段的非极大值抑制（Non-MaximumSuppression,NMS）是至关重要的一环，主要用于去除高度重叠的冗余预测框。然而，在某些场景中，不同类别的目标可能会被网络同时预测为多个相近的类别，例如：交通工具检测场景：同一辆车可能被误检测为“自行车”和“电动车”。动物检测场景：同一只动物可能被误检测为“狼”和“狗”。家电检测场景：同一台设备可能被误检测为“微波炉”和
3.13 YOLO V3 不要不开心了机器学习 pytorch 深度学习
今天的内容为YOLO-V3YOLO系列-YOLO-V3，最大的改进就是网络结构，使其更适合小目标检测。-特征做得更细致，融入多持续特征图信息来预测不同规格物体。-先验框更丰富了，3种scale，每种3个规格，一共9种。-softmax改进，预测多标签任务。-多scale-为了能检测到不同大小的物体，设计了3个scale。-scale变换经典方法-左图：图像金字塔；右图：单一的输入。-scale变换
基于YOLOv5的车牌识别系统：从数据集到UI界面的实现深度学习&目标检测实战项目 YOLOv5实战项目 YOLO ui 分类数据挖掘目标跟踪
1.引言随着智能交通系统的发展，车牌识别技术已成为交通管理、停车场自动化、路面监控等应用中的关键技术之一。车牌识别系统（LicensePlateRecognition,LPR）主要用于识别车辆的车牌号码，并将其转化为可以进一步处理的数据。车牌识别系统通常由图像处理、字符识别、目标检测等多种技术组成。近年来，随着深度学习技术的飞速发展，基于卷积神经网络（CNN）的目标检测算法，如YOLO（YouOn
点云从入门到精通技术详解100篇-基于激光雷达点云的三维目标检测格图素书目标检测人工智能计算机视觉
目录前言图像目标检测算法研究现状点云目标检测算法研究现状基于投影图的方法基于体素的方法基于点云的多模态融合方法2地面点云滤波及神经网络2.1目标检测数据集及采集设备2.1.1KITTI数据集2.1.2车载激光雷达2.2地面点云滤波算法2.2.1RANSAC算法2.2.2CSF算法本文篇幅较长，分为上下两篇，下篇详见基于激光雷达点云的三维目标检测（续）前言近几年来，在计算机视觉领域，利用深度学习卷积
【YOLOv11[基础]】目标检测OD | 导出ONNX模型 | ONN模型推理以及检测结果可视化 | python Jackilina_Stone 【改进】YOLO系列人工智能 python 计算机视觉 YOLO 深度学习目标检测
本文将导出YOLO11.pt模型对应的ONNX模型，并且使用ONNX模型推理以及结果的可视化。话不多说，先看看效果图吧！！！目录一导出ONNX模型二推理及检测结果可视化1代码2效果图
YOLO系列模型从v1到v10的演进剑走偏锋o.O YOLO 目标跟踪人工智能
文章目录引言YOLOv1:开创单阶段目标检测先河发布时间与背景核心创新模型架构训练策略与优化YOLOv2:提升精度与速度的平衡发布时间与背景核心创新模型架构训练策略与优化YOLOv3:多尺度检测与残差连接发布时间与背景核心创新模型架构训练策略与优化YOLOv4:引入注意力机制与优化模块发布时间与背景核心创新模型架构训练策略与优化YOLOv5:工程优化与实际应用的结合发布时间与背景核心创新模型架构训
图像识别技术与应用课后总结（18）一元钱面包人工智能
·YOLO-V3RetinaNet系列，YOLO-V3在不同变体（如YOLOV3-320、YOLOV3-416等）下，在推理时间和精度上有不同的表现，展示了其在速度和准确性上的平衡。YOLO-V3的改进点网络结构：相比之前版本，YOLO-V3的网络结构进行了优化，使其更适合小目标检测。特征处理：对特征的处理更加细致，通过融入多持续特征图信息来预测不同规格的物体。先验框：先验框更加丰富，有3种sca
YOLOv8改进策略【注意力机制篇】| EMA 即插即用模块，提高远距离建模依赖（含C2f二次创新） Limiiiing YOLOv8改进专栏 YOLO 计算机视觉深度学习目标检测
一、本文介绍本文记录的是基于EMA模块的YOLOv8目标检测改进方法研究。EMA认为跨维度交互有助于通道或空间注意力预测，并且解决了现有注意力机制在提取深度视觉表示时可能带来的维度缩减问题。在改进YOLOv8的过程中能够为高级特征图产生更好的像素级注意力，能够建模长程依赖并嵌入精确的位置信息。专栏目录：YOLOv8改进目录一览|涉及卷积层、轻量化、注意力、损失函数、Backbone、SPPF、Ne
YOLOv5改进：在C3块不同位置添加EMA注意力机制，有效提升计算机视觉性能 UksApps YOLO 计算机视觉深度学习
计算机视觉中的目标检测是一个重要的任务，而YOLOv5是目前广泛应用的一种高效目标检测算法。为了进一步提升YOLOv5的性能，我们在C3块的不同位置添加了EMA（ExponentialMovingAverage）注意力机制。EMA注意力机制是一种用于提升模型的感知能力和特征表达能力的技术。在YOLOv5中，我们将EMA注意力机制嵌入到C3块中，以增强这一块的特征表示能力。下面是我们改进的YOLOv
实现一个超轻量级实例分割网络的思路 CV工程师小朱深度学习笔记深度学习应用实例分割 yolact picodet 深度学习
文章目录前言一、基本思路二、picodet三、yolact三、picodet+yolact总结前言在某些工业领域，由于成本问题算力有限，只能实时跑一些超轻量级网络，拿目标检测来说，例如yolo-fast，pp-picodet这些。如果要跑实例分割，目前好像没有什么超轻量级的网络。所以就有想法如何实现一个超轻量级实例分割网络。一、基本思路基于超轻量级目标检测pp-picodet，增加一个掩膜分支。参
如何提升OmniParser V2的小元素识别率——YOLOv8 增加 P2 层的性能变化解析 AI-AIGC-7744423 目标跟踪人工智能计算机视觉
YOLOv8增加P2层通过牺牲部分计算效率换取了小目标检测性能的显著提升，尤其适用于高分辨率、小目标密集的场景。开发者需根据具体任务需求，在精度与速度之间进行合理权衡，并通过模型轻量化技术优化部署效果。更多技术细节可参考微软等机构的开源实现136。YOLOv8增加P2层的性能变化解析一、性能提升方向小目标检测精度显著提高原理：P2层对应更高分辨率的浅层特征图（如1/4下采样），能捕捉更细粒度的纹理
用OpenCV写个视频播放器可还行？（Python版）程序员Linc 计算机视觉 opencv 音视频 python
引言提到OpenCV，大家首先想到的可能是图像处理、目标检测，但你是否想过——用OpenCV实现一个带进度条、倍速播放、暂停功能的视频播放器？本文将通过一个实战项目，带你深入掌握OpenCV的视频处理能力，并解锁以下功能：基础播放/暂停动态倍速调节（0.5x~4x）交互式进度条实时时间戳显示文末提供完整代码，可直接运行！一、环境准备安装OpenCVpipinstallopencv-python#P
AI：230-YOLOv8与RT-DETR的完美结合 | 重塑目标检测技术的前沿【保姆级教程】一键难忘精通AI实战千例专栏合集人工智能 YOLO 目标检测 YOLOv8与RT-DETR
本文收录于专栏：精通AI实战千例专栏合集https://blog.csdn.net/weixin_52908342/category_11863492.html从基础到实践，深入学习。无论你是初学者还是经验丰富的老手，对于本专栏案例和项目实践都有参考学习意义。每一个案例都附带关键代码，详细讲解供大家学习，希望可以帮到大家。正在不断更新中~文章目录YOLOv8与RT-DETR的完美结合|重塑目标检测
51-53 CVPR 2024 | DriveWorld：通过自动驾驶世界模型进行 4D 预训练场景理解（含模型数据流梳理）深圳季连AIgraphX aiXpilot 智驾大模型1 自动驾驶人工智能 AIGC stable diffusion 计算机视觉智慧城市
24年5月，北京大学、国防创新研究院无人系统技术研究中心、中国电信人工智能研究院联合发布了DriveWorld:4DPre-trainedSceneUnderstandingviaWorldModelsforAutonomousDriving。DriveWorld在UniAD的基础上又有所成长，提升了自动驾驶目标检测、目标追踪、3D占用、运动预测及规划的性能，后期扩大数据集和调整骨干网络大小应该会
scala的option和some 矮蛋蛋编程 scala
原文地址： http://blog.sina.com.cn/s/blog_68af3f090100qkt8.html 对于学习 Scala 的 Java™ 开发人员来说，对象是一个比较自然、简单的入口点。在本系列前几期文章中，我介绍了 Scala 中一些面向对象的编程方法，这些方法实际上与 Java 编程的区别不是很大。我还向您展示了 Scala 如何重新应用传统的面向对象概念，找到其缺点
NullPointerException Cb123456 android BaseAdapter
java.lang.NullPointerException: Attempt to invoke virtual method 'int android.view.View.getImportantForAccessibility()' on a null object reference 出现以上异常.然后就在baidu上
PHP使用文件和目录天子之骄 php文件和目录读取和写入 php验证文件 php锁定文件
PHP使用文件和目录 1.使用include()包含文件 (1)：使用include()从一个被包含文档返回一个值 (2)：在控制结构中使用include() include_once()函数需要一个包含文件的路径，此外，第一次调用它的情况和include()一样，如果在脚本执行中再次对同一个文件调用，那么这个文件不会再次包含。在php.ini文件中设置
SQL SELECT DISTINCT 语句何必如此 sql
SELECT DISTINCT 语句用于返回唯一不同的值。 SQL SELECT DISTINCT 语句在表中，一个列可能会包含多个重复值，有时您也许希望仅仅列出不同（distinct）的值。 DISTINCT 关键词用于返回唯一不同的值。 SQL SELECT DISTINCT 语法 SELECT DISTINCT column_name,column_name F
java冒泡排序 3213213333332132 java 冒泡排序
package com.algorithm; /** * @Description 冒泡 * @author FuJianyong * 2015-1-22上午09:58:39 */ public class MaoPao { public static void main(String[] args) { int[] mao = {17,50,26,18,9,10
struts2.18 +json,struts2-json-plugin-2.1.8.1.jar配置及问题！ 7454103 DAO spring Ajax json qq
struts2.18 出来有段时间了！（貌似是稳定版）闲时研究下下！貌似 sruts2 搭配 json 做 ajax 很吃香！实践了下下！不当之处请绕过！呵呵网上一大堆 struts2+json 不过大多的json 插件都是 jsonplugin.34.jar strut
struts2 数据标签说明 darkranger jsp bean struts servlet Scheme
数据标签主要用于提供各种数据访问相关的功能，包括显示一个Action里的属性，以及生成国际化输出等功能数据标签主要包括： action ：该标签用于在JSP页面中直接调用一个Action，通过指定executeResult参数，还可将该Action的处理结果包含到本页面来。 bean ：该标签用于创建一个javabean实例。如果指定了id属性，则可以将创建的javabean实例放入Sta
链表.简单的链表节点构建 aijuans 编程技巧
/*编程环境WIN-TC*/ #include "stdio.h" #include "conio.h" #define NODE(name, key_word, help) \ Node name[1]={{NULL, NULL, NULL, key_word, help}} typedef struct node { &nbs
tomcat下jndi的三种配置方式 avords tomcat
jndi(Java Naming and Directory Interface，Java命名和目录接口)是一组在Java应用中访问命名和目录服务的API。命名服务将名称和对象联系起来，使得我们可以用名称访问对象。目录服务是一种命名服务，在这种服务里，对象不但有名称，还有属性。 tomcat配置
关于敏捷的一些想法 houxinyou 敏捷
从网上看到这样一句话：“敏捷开发的最重要目标就是：满足用户多变的需求，说白了就是最大程度的让客户满意。” 感觉表达的不太清楚。感觉容易被人误解的地方主要在“用户多变的需求”上。第一种多变，实际上就是没有从根本上了解了用户的需求。用户的需求实际是稳定的，只是比较多，也比较混乱，用户一般只能了解自己的那一小部分，所以没有用户能清楚的表达出整体需求。而由于各种条件的，用户表达自己那一部分时也有
富养还是穷养，决定孩子的一生 bijian1013 教育人生
是什么决定孩子未来物质能否丰盛？为什么说寒门很难出贵子，三代才能出贵族？真的是父母必须有钱，才能大概率保证孩子未来富有吗？-----作者：@李雪爱与自由事实并非由物质决定，而是由心灵决定。一朋友富有而且修养气质很好，兄弟姐妹也都如此。她的童年时代，物质上大家都很贫乏，但妈妈总是保持生活中的美感，时不时给孩子们带回一些美好小玩意，从来不对孩子传递生活艰辛、金钱来之不易、要懂得珍惜
oracle 日期时间格式转化征客丶 oracle
oracle 系统时间有 SYSDATE 与 SYSTIMESTAMP； SYSDATE：不支持毫秒，取的是系统时间； SYSTIMESTAMP：支持毫秒，日期，时间是给时区转换的，秒和毫秒是取的系统的。日期转字符窜：一、不取毫秒： TO_CHAR(SYSDATE, 'YYYY-MM-DD HH24:MI:SS') 简要说明， YYYY 年 MM 月
【Scala六】分析Spark源代码总结的Scala语法四 bit1129 scala
1. apply语法 FileShuffleBlockManager中定义的类ShuffleFileGroup，定义： private class ShuffleFileGroup(val shuffleId: Int, val fileId: Int, val files: Array[File]) { ... def apply(bucketId
Erlang中有意思的bug bookjovi erlang
代码中常有一些很搞笑的bug，如下面的一行代码被调用两次（Erlang beam） commit f667e4a47b07b07ed035073b94d699ff5fe0ba9b Author: Jovi Zhang <[email protected]> Date: Fri Dec 2 16:19:22 2011 +0100 erts:
移位打印10进制数转16进制-2008-08-18 ljy325 java 基础
/** * Description 移位打印10进制的16进制形式 * Creation Date 15-08-2008 9:00 * @author 卢俊宇 * @version 1.0 * */ public class PrintHex { // 备选字符 static final char di
读《研磨设计模式》-代码笔记-组合模式 bylijinnan java 设计模式
声明：本文只为方便我个人查阅和理解，详细的分析以及源代码请移步原作者的博客http://chjavach.iteye.com/ import java.util.ArrayList; import java.util.List; abstract class Component { public abstract void printStruct(Str
利用cmd命令将.class文件打包成jar chenyu19891124 cmd jar
cmd命令打jar是如下实现：在运行里输入cmd，利用cmd命令进入到本地的工作盘符。(如我的是D盘下的文件有此路径 D:\workspace\prpall\WEB-INF\classes) 现在是想把D:\workspace\prpall\WEB-INF\classes路径下所有的文件打包成prpall.jar。然后继续如下操作： cd D: 回车 cd workspace/prpal
[原创]JWFD v0.96 工作流系统二次开发包 for Eclipse 简要说明 comsci eclipse 设计模式算法工作 swing
JWFD v0.96 工作流系统二次开发包 for Eclipse 简要说明 &nb
SecureCRT右键粘贴的设置 daizj secureCRT 右键粘贴
一般都习惯鼠标右键自动粘贴的功能，对于SecureCRT6.7.5 ，这个功能也已经是默认配置了。老版本的SecureCRT其实也有这个功能，只是不是默认设置，很多人不知道罢了。菜单： Options->Global Options ...->Terminal 右边有个Mouse的选项块。 Copy on Select Paste on Right/Middle
Linux 软链接和硬链接 dongwei_6688 linux
1.Linux链接概念Linux链接分两种，一种被称为硬链接（Hard Link），另一种被称为符号链接（Symbolic Link）。默认情况下，ln命令产生硬链接。【硬连接】硬连接指通过索引节点来进行连接。在Linux的文件系统中，保存在磁盘分区中的文件不管是什么类型都给它分配一个编号，称为索引节点号(Inode Index)。在Linux中，多个文件名指向同一索引节点是存在的。一般这种连
DIV底部自适应 dcj3sjt126com JavaScript
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml&q
Centos6.5使用yum安装mysql——快速上手必备 dcj3sjt126com mysql
第1步、yum安装mysql [root@stonex ~]# yum -y install mysql-server 安装结果： Installed: mysql-server.x86_64 0:5.1.73-3.el6_5 &nb
如何调试JDK源码 frank1234 jdk
相信各位小伙伴们跟我一样，想通过JDK源码来学习Java，比如collections包，java.util.concurrent包。可惜的是sun提供的jdk并不能查看运行中的局部变量，需要重新编译一下rt.jar。下面是编译jdk的具体步骤： 1.把C:\java\jdk1.6.0_26\sr
Maximal Rectangle hcx2013 max
Given a 2D binary matrix filled with 0's and 1's, find the largest rectangle containing all ones and return its area. public class Solution { public int maximalRectangle(char[][] matrix)
Spring MVC测试框架详解——服务端测试 jinnianshilongnian spring mvc test
随着RESTful Web Service的流行，测试对外的Service是否满足期望也变的必要的。从Spring 3.2开始Spring了Spring Web测试框架，如果版本低于3.2，请使用spring-test-mvc项目（合并到spring3.2中了）。 Spring MVC测试框架提供了对服务器端和客户端（基于RestTemplate的客户端）提供了支持。 &nbs
Linux64位操作系统（CentOS6.6）上如何编译hadoop2.4.0 liyong0802 hadoop
一、准备编译软件 1.在官网下载jdk1.7、maven3.2.1、ant1.9.4，解压设置好环境变量就可以用。环境变量设置如下：（1）执行vim /etc/profile （2）在文件尾部加入: export JAVA_HOME=/home/spark/jdk1.7 export MAVEN_HOME=/ho
StatusBar 字体白色 pangyulei status
[[UIApplication sharedApplication] setStatusBarStyle:UIStatusBarStyleLightContent]; /*you'll also need to set UIViewControllerBasedStatusBarAppearance to NO in the plist file if you use this method
如何分析Java虚拟机死锁 sesame java thread oracle 虚拟机 jdbc
英文资料： Thread Dump and Concurrency Locks Thread dumps are very useful for diagnosing synchronization related problems such as deadlocks on object monitors. Ctrl-\ on Solaris/Linux or Ctrl-B
位运算简介及实用技巧（一）：基础篇 tw_wangzhengquan 位运算
http://www.matrix67.com/blog/archives/263 去年年底写的关于位运算的日志是这个Blog里少数大受欢迎的文章之一，很多人都希望我能不断完善那篇文章。后来我看到了不少其它的资料，学习到了更多关于位运算的知识，有了重新整理位运算技巧的想法。从今天起我就开始写这一系列位运算讲解文章，与其说是原来那篇文章的follow-up，不如说是一个r
jsearch的索引文件结构 yangshangchuan 搜索引擎 jsearch 全文检索信息检索 word分词
jsearch是一个高性能的全文检索工具包，基于倒排索引，基于java8，类似于lucene，但更轻量级。 jsearch的索引文件结构定义如下： 1、一个词的索引由=分割的三部分组成：第一部分是词第二部分是这个词在多少