RCNN系列之-Fast-RCNN keras实现

代码地址

https://github.com/xiaoxu1025/fast-rcnn-keras

对于 RCNN 和 Faster-RCNN的实现地址如下

CSDN链接地址:

https://blog.csdn.net/xiaoxu1025/article/details/104134569  RCNN系列之-RCNN keras实现

https://blog.csdn.net/xiaoxu1025/article/details/104127684  RCNN系列之-Faster-RCNN keras实现

github链接地址:

https://github.com/xiaoxu1025/rcnn-keras

https://github.com/xiaoxu1025/fast-rcnn-keras

 

对代码做几点说明:

1. 数据集采用的是在pascal voc 2. 特征抽取式采用keras自带的vgg16来做特征抽取  3. roi没有参与反向传播

测试我没有实现,不想写了没啥意思。根据论文所说 For each test RoI r, the forward pass outputs a class posterior probability distribution p and a set of predicted bounding-box offsets relative to r (each of the K classes gets its own refined bounding-box prediction) 和RCNN的实现差不多 也就是对每个类进行非极大值抑制。

这仅仅是一个简单实现,和大家交流下而已。有兴趣的朋友可以下载下来看一下,有什么见解可以在下面留言。

我只是跑了一下,然后就暂停了,没有gpu时间太长

 

训练的代码如下:

from voc_annotation import VOCAnnotation
from voc_data import VocData
from models.model import FastRCNN
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers import SGD
import config as cfg

if __name__ == '__main__':
    log_dir = 'logs/000/'

    voc_train_annotation = VOCAnnotation(2007, 'train', '/Users/lx/segment_data', './data/voc_classes.txt')
    voc_train_data = VocData('./data/2007_train.txt', voc_train_annotation)

    voc_val_annotation = VOCAnnotation(2007, 'val', '/Users/lx/segment_data', './data/voc_classes.txt')
    voc_val_data = VocData('./data/2007_val.txt', voc_val_annotation)

    # pascal voc 20个类别
    model = FastRCNN(20)

    logging = TensorBoard(log_dir=log_dir)
    checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
                                 monitor='val_loss', save_weights_only=True, save_best_only=True, period=3)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
    model.compile(optimizer=SGD(lr=1e-3), loss=lambda y_true, y_pred: y_pred)
    model.build(input_shape=[(None, cfg.DEFAUTL_IMAGE_SIZE, cfg.DEFAUTL_IMAGE_SIZE, 3),
                             (None, None, 1), (None, None, 5), (None, None, 80),
                             (None, None, 80), (None, None, 80)])
    batch_size = 1
    model.fit_generator(voc_train_data.data_generator_wrapper(),
                        steps_per_epoch=max(1, voc_train_data.example_nums // batch_size),
                        validation_data=voc_val_data.data_generator_wrapper(),
                        validation_steps=max(1, voc_val_data.example_nums // batch_size),
                        epochs=50,
                        initial_epoch=0,
                        callbacks=[logging, checkpoint])
    model.save_weights(log_dir + 'trained_weights_stage_1.h5')

 

损失函数的实现

import tensorflow as tf
import config as cfg


def fast_loss(args):
    cls_output, labels, bbox_output, bbox_targets, bbox_inside_weights, bbox_outside_weights = args
    labels = tf.cast(labels, dtype=tf.int32)
    # 分类损失
    cls_loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_output, labels=tf.squeeze(labels)))
    diff = tf.multiply(bbox_inside_weights, bbox_output - bbox_targets)
    diff_l1 = smooth_l1(diff, 1.0)
    # 边框回归损失
    roi_bbox_loss = tf.reduce_mean(tf.reduce_sum(tf.multiply(bbox_outside_weights, diff_l1), axis=1))
    roi_bbox_loss = cfg.TRAIN_RPN_BBOX_LAMBDA * roi_bbox_loss
    fast_loss = cls_loss + roi_bbox_loss
    return fast_loss


def smooth_l1(x, sigma):
    '''
                      0.5 * (sigma * x)^2  if |x| < 1/sigma^2
      smoothL1(x) = {
                      |x| - 0.5/sigma^2    otherwise
    '''

    with tf.variable_scope('smooth_l1'):
        conditional = tf.less(tf.abs(x), 1 / sigma ** 2)
        close = 0.5 * (sigma * x) ** 2
        far = tf.abs(x) - 0.5 / sigma ** 2
        return tf.where(conditional, close, far)

模型的实现:

from tensorflow.keras import Model
from models.vgg16_body import get_model_body
from tensorflow.keras.layers import Dense, Flatten, Dropout, Lambda
from roi.roi_proposal import roi_proposal
from fast_loss import fast_loss as loss


class FastRCNN(Model):
    def __init__(self, num_classes, keep_prob=0.5):
        super(FastRCNN, self).__init__()
        self._num_classes = num_classes
        self._vgg16 = get_model_body()
        # roi pooling 不参与反向传播
        self._roi_pooling = Lambda(roi_proposal)
        self._flatten = Flatten()
        self._fc1 = Dense(4096, activation='tanh')
        self._dropout1 = Dropout(keep_prob)
        self._fc2 = Dense(4096, activation='tanh')
        self._dropout2 = Dropout(keep_prob)
        # predict k + 1 categories  k个类别加上背景
        # (None, 128, 21)
        self._fc_cls = Dense(num_classes + 1)
        # predict 4 * k 个值 每个类4个坐标回归值
        # (None, 128, 80)
        self._fc_bbox = Dense(num_classes * 4)
        # 计算损失
        self._loss = Lambda(loss, name='fast_loss')

    def call(self, inputs, mask=None):
        image_data, labels, regions_target, bbox_targets, bbox_inside_weights, bbox_outside_weights = \
            inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5]
        # (None, 36, 36, 512)
        x = self._vgg16(image_data)
        # seletvie_search 貌似有点问题参数可能不对 不能够采样128个满足条件样本
        # (None, 128, 7, 7, 512)
        x = self._roi_pooling([x, regions_target])
        x = self._flatten(x)
        x = self._fc1(x)
        x = self._dropout1(x)
        x = self._fc2(x)
        x = self._dropout2(x)
        # (batch_size, 128, 21)
        cls_output = self._fc_cls(x)
        # (batch_size, 128, 80)
        bbox_output = self._fc_bbox(x)
        loss = self._loss([cls_output, labels, bbox_output, bbox_targets, bbox_inside_weights, bbox_outside_weights])
        return loss

roi pooling的两种实现:

import tensorflow as tf
import numpy as np
import config as cfg


def roi_pool(feature_maps, rois, im_dims=(cfg.DEFAUTL_IMAGE_SIZE, cfg.DEFAUTL_IMAGE_SIZE)):
    # 将tensor数据转成numpy计算
    pooled_features = tf.py_function(_roi_pool_py, [feature_maps, rois, im_dims], [tf.float32])
    pooled_features = tf.convert_to_tensor(pooled_features)
    return pooled_features


def _roi_pool_py(feature_maps, regions, im_dims):
    """
    roi pooling 真正实现  这里是缩小了16倍
    :param feature_maps: (bath_size, 36, 36, 512)
    :param rois:      (batch_id, x1, y1, x2, y2)
    :param im_dims:
    :return:
    """
    batch_size, height, width, channels = feature_maps.shape
    # assert batch_size == 1, 'mini-batch should be 1'
    # 获得
    region_nums = regions.shape[0]
    arg_top = np.zeros(shape=(region_nums, cfg.POOL_HEIGHT, cfg.POOL_WIDTH, channels), dtype=np.float32)
    for idx, region in enumerate(regions):
        # get image size
        img_w, img_h = im_dims[0], im_dims[1]
        spatial_scale_w = width // img_w
        spatial_scale_h = height // img_h
        roi_batch_ind = region[0]
        # 得到region在特征图上的坐标
        roi_start_w = int(round(region[1] * spatial_scale_w))
        roi_start_h = int(round(region[2] * spatial_scale_h))
        roi_end_w = int(round(region[3] * spatial_scale_w))
        roi_end_h = int(round(region[4] * spatial_scale_h))
        # # roi_batch_ind should be zero
        # if roi_batch_ind < 0 or roi_batch_ind >= batch_size:
        #     continue
        # 得到region在特征图上宽高
        roi_height = max(roi_end_h - roi_start_h + 1, 1)
        roi_width = max(roi_end_w - roi_start_w + 1, 1)
        # 将region在特征图上的宽高进行划分
        sub_roi_width = roi_width / cfg.POOL_WIDTH
        sub_roi_height = roi_height / cfg.POOL_HEIGHT

        batch_data = feature_maps[roi_batch_ind, ...]
        # 遍历batch_data数据进行 roi_pooling
        for c in range(channels):
            for ph in range(cfg.POOL_HEIGHT):
                for pw in range(cfg.POOL_WIDTH):
                    hstart = int(ph * sub_roi_height)
                    wstart = int(pw * sub_roi_width)
                    hend = int((ph + 1) * sub_roi_height)
                    wend = int((pw + 1) * sub_roi_width)
                    # 计算相对于特征图的坐标
                    hstart = min(max(roi_start_h + hstart, 0), height)
                    wstart = min(max(roi_start_w + wstart, 0), width)
                    hend = min(max(roi_start_h + hend, 0), height)
                    wend = min(max(roi_start_w + wend, 0), width)

                    for h in range(hstart, hend):
                        for w in range(wstart, wend):
                            if batch_data[h, w, c] > arg_top[idx, ph, pw, c]:
                                arg_top[idx, ph, pw, c] = batch_data[h, w, c]
    return arg_top
import tensorflow as tf
import config as cfg


def roi_pool_tf(feature_maps, rois, im_dims=(cfg.DEFAUTL_IMAGE_SIZE, cfg.DEFAUTL_IMAGE_SIZE)):
    """

    :param feature_maps: (batch_size, 36, 36, 512)
    :param rois: shape (batch_size, 128, 5) -> n * (batch_id, x1, y1, x2, y2)
    :param im_dims:
    :return:
    """
    # Image that the ROI is taken from (minibatch of 1 means these will all be 0)
    box_ind = tf.cast(rois[..., 0], dtype=tf.int32)

    # ROI box coordinates. Must be normalized and ordered to [y1, x1, y2, x2]
    # box must be normalized
    boxes = rois[..., 1:]
    normalization = tf.cast(tf.stack([im_dims[1], im_dims[0], im_dims[1], im_dims[0]], axis=0),
                            dtype=tf.float32)
    boxes = tf.div(boxes, normalization)
    boxes = tf.stack([boxes[..., 1], boxes[..., 0], boxes[..., 3], boxes[..., 2]], axis=-1)  # y1, x1, y2, x2

    # ROI pool output size
    crop_size = tf.constant([14, 14])

    # ROI pool
    pooled_features = tf.image.crop_and_resize(image=feature_maps, boxes=boxes[0, ...], box_ind=box_ind[0, ...], crop_size=crop_size)
    # Max pool to (7x7)
    pooled_features = tf.nn.max_pool(pooled_features, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    return pooled_features

还有些代码就不贴了,可以自行下载。 

你可能感兴趣的:(目标检测,人工智能,计算机神经网络)