代码地址
https://github.com/xiaoxu1025/fast-rcnn-keras
对于 RCNN 和 Faster-RCNN的实现地址如下
CSDN链接地址:
https://blog.csdn.net/xiaoxu1025/article/details/104134569 RCNN系列之-RCNN keras实现
https://blog.csdn.net/xiaoxu1025/article/details/104127684 RCNN系列之-Faster-RCNN keras实现
github链接地址:
https://github.com/xiaoxu1025/rcnn-keras
https://github.com/xiaoxu1025/fast-rcnn-keras
对代码做几点说明:
1. 数据集采用的是在pascal voc 2. 特征抽取式采用keras自带的vgg16来做特征抽取 3. roi没有参与反向传播
测试我没有实现,不想写了没啥意思。根据论文所说 For each test RoI r, the forward pass outputs a class posterior probability distribution p and a set of predicted bounding-box offsets relative to r (each of the K classes gets its own refined bounding-box prediction) 和RCNN的实现差不多 也就是对每个类进行非极大值抑制。
这仅仅是一个简单实现,和大家交流下而已。有兴趣的朋友可以下载下来看一下,有什么见解可以在下面留言。
我只是跑了一下,然后就暂停了,没有gpu时间太长
训练的代码如下:
from voc_annotation import VOCAnnotation
from voc_data import VocData
from models.model import FastRCNN
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers import SGD
import config as cfg
if __name__ == '__main__':
log_dir = 'logs/000/'
voc_train_annotation = VOCAnnotation(2007, 'train', '/Users/lx/segment_data', './data/voc_classes.txt')
voc_train_data = VocData('./data/2007_train.txt', voc_train_annotation)
voc_val_annotation = VOCAnnotation(2007, 'val', '/Users/lx/segment_data', './data/voc_classes.txt')
voc_val_data = VocData('./data/2007_val.txt', voc_val_annotation)
# pascal voc 20个类别
model = FastRCNN(20)
logging = TensorBoard(log_dir=log_dir)
checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
monitor='val_loss', save_weights_only=True, save_best_only=True, period=3)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
model.compile(optimizer=SGD(lr=1e-3), loss=lambda y_true, y_pred: y_pred)
model.build(input_shape=[(None, cfg.DEFAUTL_IMAGE_SIZE, cfg.DEFAUTL_IMAGE_SIZE, 3),
(None, None, 1), (None, None, 5), (None, None, 80),
(None, None, 80), (None, None, 80)])
batch_size = 1
model.fit_generator(voc_train_data.data_generator_wrapper(),
steps_per_epoch=max(1, voc_train_data.example_nums // batch_size),
validation_data=voc_val_data.data_generator_wrapper(),
validation_steps=max(1, voc_val_data.example_nums // batch_size),
epochs=50,
initial_epoch=0,
callbacks=[logging, checkpoint])
model.save_weights(log_dir + 'trained_weights_stage_1.h5')
损失函数的实现
import tensorflow as tf
import config as cfg
def fast_loss(args):
cls_output, labels, bbox_output, bbox_targets, bbox_inside_weights, bbox_outside_weights = args
labels = tf.cast(labels, dtype=tf.int32)
# 分类损失
cls_loss = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_output, labels=tf.squeeze(labels)))
diff = tf.multiply(bbox_inside_weights, bbox_output - bbox_targets)
diff_l1 = smooth_l1(diff, 1.0)
# 边框回归损失
roi_bbox_loss = tf.reduce_mean(tf.reduce_sum(tf.multiply(bbox_outside_weights, diff_l1), axis=1))
roi_bbox_loss = cfg.TRAIN_RPN_BBOX_LAMBDA * roi_bbox_loss
fast_loss = cls_loss + roi_bbox_loss
return fast_loss
def smooth_l1(x, sigma):
'''
0.5 * (sigma * x)^2 if |x| < 1/sigma^2
smoothL1(x) = {
|x| - 0.5/sigma^2 otherwise
'''
with tf.variable_scope('smooth_l1'):
conditional = tf.less(tf.abs(x), 1 / sigma ** 2)
close = 0.5 * (sigma * x) ** 2
far = tf.abs(x) - 0.5 / sigma ** 2
return tf.where(conditional, close, far)
模型的实现:
from tensorflow.keras import Model
from models.vgg16_body import get_model_body
from tensorflow.keras.layers import Dense, Flatten, Dropout, Lambda
from roi.roi_proposal import roi_proposal
from fast_loss import fast_loss as loss
class FastRCNN(Model):
def __init__(self, num_classes, keep_prob=0.5):
super(FastRCNN, self).__init__()
self._num_classes = num_classes
self._vgg16 = get_model_body()
# roi pooling 不参与反向传播
self._roi_pooling = Lambda(roi_proposal)
self._flatten = Flatten()
self._fc1 = Dense(4096, activation='tanh')
self._dropout1 = Dropout(keep_prob)
self._fc2 = Dense(4096, activation='tanh')
self._dropout2 = Dropout(keep_prob)
# predict k + 1 categories k个类别加上背景
# (None, 128, 21)
self._fc_cls = Dense(num_classes + 1)
# predict 4 * k 个值 每个类4个坐标回归值
# (None, 128, 80)
self._fc_bbox = Dense(num_classes * 4)
# 计算损失
self._loss = Lambda(loss, name='fast_loss')
def call(self, inputs, mask=None):
image_data, labels, regions_target, bbox_targets, bbox_inside_weights, bbox_outside_weights = \
inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5]
# (None, 36, 36, 512)
x = self._vgg16(image_data)
# seletvie_search 貌似有点问题参数可能不对 不能够采样128个满足条件样本
# (None, 128, 7, 7, 512)
x = self._roi_pooling([x, regions_target])
x = self._flatten(x)
x = self._fc1(x)
x = self._dropout1(x)
x = self._fc2(x)
x = self._dropout2(x)
# (batch_size, 128, 21)
cls_output = self._fc_cls(x)
# (batch_size, 128, 80)
bbox_output = self._fc_bbox(x)
loss = self._loss([cls_output, labels, bbox_output, bbox_targets, bbox_inside_weights, bbox_outside_weights])
return loss
roi pooling的两种实现:
import tensorflow as tf
import numpy as np
import config as cfg
def roi_pool(feature_maps, rois, im_dims=(cfg.DEFAUTL_IMAGE_SIZE, cfg.DEFAUTL_IMAGE_SIZE)):
# 将tensor数据转成numpy计算
pooled_features = tf.py_function(_roi_pool_py, [feature_maps, rois, im_dims], [tf.float32])
pooled_features = tf.convert_to_tensor(pooled_features)
return pooled_features
def _roi_pool_py(feature_maps, regions, im_dims):
"""
roi pooling 真正实现 这里是缩小了16倍
:param feature_maps: (bath_size, 36, 36, 512)
:param rois: (batch_id, x1, y1, x2, y2)
:param im_dims:
:return:
"""
batch_size, height, width, channels = feature_maps.shape
# assert batch_size == 1, 'mini-batch should be 1'
# 获得
region_nums = regions.shape[0]
arg_top = np.zeros(shape=(region_nums, cfg.POOL_HEIGHT, cfg.POOL_WIDTH, channels), dtype=np.float32)
for idx, region in enumerate(regions):
# get image size
img_w, img_h = im_dims[0], im_dims[1]
spatial_scale_w = width // img_w
spatial_scale_h = height // img_h
roi_batch_ind = region[0]
# 得到region在特征图上的坐标
roi_start_w = int(round(region[1] * spatial_scale_w))
roi_start_h = int(round(region[2] * spatial_scale_h))
roi_end_w = int(round(region[3] * spatial_scale_w))
roi_end_h = int(round(region[4] * spatial_scale_h))
# # roi_batch_ind should be zero
# if roi_batch_ind < 0 or roi_batch_ind >= batch_size:
# continue
# 得到region在特征图上宽高
roi_height = max(roi_end_h - roi_start_h + 1, 1)
roi_width = max(roi_end_w - roi_start_w + 1, 1)
# 将region在特征图上的宽高进行划分
sub_roi_width = roi_width / cfg.POOL_WIDTH
sub_roi_height = roi_height / cfg.POOL_HEIGHT
batch_data = feature_maps[roi_batch_ind, ...]
# 遍历batch_data数据进行 roi_pooling
for c in range(channels):
for ph in range(cfg.POOL_HEIGHT):
for pw in range(cfg.POOL_WIDTH):
hstart = int(ph * sub_roi_height)
wstart = int(pw * sub_roi_width)
hend = int((ph + 1) * sub_roi_height)
wend = int((pw + 1) * sub_roi_width)
# 计算相对于特征图的坐标
hstart = min(max(roi_start_h + hstart, 0), height)
wstart = min(max(roi_start_w + wstart, 0), width)
hend = min(max(roi_start_h + hend, 0), height)
wend = min(max(roi_start_w + wend, 0), width)
for h in range(hstart, hend):
for w in range(wstart, wend):
if batch_data[h, w, c] > arg_top[idx, ph, pw, c]:
arg_top[idx, ph, pw, c] = batch_data[h, w, c]
return arg_top
import tensorflow as tf
import config as cfg
def roi_pool_tf(feature_maps, rois, im_dims=(cfg.DEFAUTL_IMAGE_SIZE, cfg.DEFAUTL_IMAGE_SIZE)):
"""
:param feature_maps: (batch_size, 36, 36, 512)
:param rois: shape (batch_size, 128, 5) -> n * (batch_id, x1, y1, x2, y2)
:param im_dims:
:return:
"""
# Image that the ROI is taken from (minibatch of 1 means these will all be 0)
box_ind = tf.cast(rois[..., 0], dtype=tf.int32)
# ROI box coordinates. Must be normalized and ordered to [y1, x1, y2, x2]
# box must be normalized
boxes = rois[..., 1:]
normalization = tf.cast(tf.stack([im_dims[1], im_dims[0], im_dims[1], im_dims[0]], axis=0),
dtype=tf.float32)
boxes = tf.div(boxes, normalization)
boxes = tf.stack([boxes[..., 1], boxes[..., 0], boxes[..., 3], boxes[..., 2]], axis=-1) # y1, x1, y2, x2
# ROI pool output size
crop_size = tf.constant([14, 14])
# ROI pool
pooled_features = tf.image.crop_and_resize(image=feature_maps, boxes=boxes[0, ...], box_ind=box_ind[0, ...], crop_size=crop_size)
# Max pool to (7x7)
pooled_features = tf.nn.max_pool(pooled_features, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return pooled_features
还有些代码就不贴了,可以自行下载。