【目标检测算法实现系列】Keras实现Faster R-CNN算法(一)
【目标检测算法实现系列】Keras实现Faster R-CNN算法(二)
【目标检测算法实现系列】Keras实现Faster R-CNN算法(三)
讲过上面几篇文章,实现了Fater RCNN中的所有模块,这次来具体看下训练和测试过程
from keras_faster_rcnn import config, data_generators, data_augment, losses
from keras_faster_rcnn import net_model, roi_helper, RoiPoolingConv, voc_data_parser
from keras.optimizers import Adam, SGD, RMSprop
from keras.utils import generic_utils
from keras.layers import Input
from keras.models import Model
from keras import backend as K
import numpy as np
import time
import pprint
import pickle
#获取原始数据集
all_imgs, classes_count, class_mapping = voc_data_parser.get_data("data")
if 'bg' not in classes_count:
classes_count['bg'] = 0
class_mapping['bg'] = len(class_mapping)
pprint.pprint(classes_count)
print('类别数 (包含背景) = {}'.format(len(classes_count)))
num_imgs = len(all_imgs)
train_imgs = [s for s in all_imgs if s['imageset'] == 'train'] #训练集
val_imgs = [s for s in all_imgs if s['imageset'] == 'val'] #验证集
test_imgs = [s for s in all_imgs if s['imageset'] == 'test'] #测试集
print('训练样本个数 {}'.format(len(train_imgs)))
print('验证样本个数 {}'.format(len(val_imgs)))
print('测试样本个数 {}'.format(len(test_imgs)))
C = config.Config() #相关配置信息
C.class_mapping = class_mapping
config_output_filename = "config/config.pickle"
with open(config_output_filename, "wb") as config_f:
pickle.dump(C, config_f)
print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(
config_output_filename))
#生成用于RPN网络训练数据集的迭代器
data_gen_train = data_generators.get_anchor_data_gt(train_imgs, classes_count, C, mode='train')
data_gen_val = data_generators.get_anchor_data_gt(val_imgs, classes_count, C, mode='val')
data_gen_test = data_generators.get_anchor_data_gt(test_imgs, classes_count, C, mode='val')
img_input = Input(shape=(None, None, 3)) #网络模型最开始的输入
roi_input = Input(shape=(None, 4)) #roi模块的输入
'''
model_rpn : 输入:图片数据; 输出:对应RPN网络中分类层和回归层的两个输出
model_classifier: 输入: 图片数据和选取出来的ROI数据; 输出: 最终分类层输出和回归层输出
'''
# 用来进行特征提取的基础网络 VGG16
shared_layers = net_model.base_net_vgg(img_input)
# RPN网络
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = net_model.rpn_net(shared_layers, num_anchors)
# 最后的检测网络(包含ROI池化层 和 全连接层)
classifier = net_model.roi_classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count))
model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)
#这是一个同时包含RPN和分类器的模型,用于为模型加载/保存权重
model_all = Model([img_input, roi_input], rpn[:2] + classifier)
try:
print('loading weights from {}'.format(C.model_path))
model_rpn.load_weights(C.model_path, by_name=True)
model_classifier.load_weights(C.model_path, by_name=True)
except:
print('没有找到上一次的训练模型')
try:
print('loading weights from {}'.format(C.base_net_weights))
model_rpn.load_weights(C.base_net_weights, by_name=True)
model_classifier.load_weights(C.base_net_weights, by_name=True)
except:
print('没有找到预训练的模型参数')
optimizer = Adam(lr=1e-5)
optimizer_classifier = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_cls_loss(num_anchors), losses.rpn_regr_loss(num_anchors)])
model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.final_cls_loss, losses.final_regr_loss(len(classes_count)-1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
model_all.compile(optimizer='sgd', loss='mae')
epoch_length = 1000 #每1000轮训练,记录一次平均loss
num_epochs = 2000
iter_num = 0
train_step = 0 #记录训练次数
losses = np.zeros((epoch_length, 5)) #用来存储1000轮训练中,没一轮的损失
# rpn_accuracy_rpn_monitor = []
# rpn_accuracy_for_epoch = []
start_time = time.time()
best_loss = np.Inf
print('Starting training')
for epoch_num in range(num_epochs):
progbar = generic_utils.Progbar(epoch_length)
print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))
while True:
# if len(rpn_accuracy_rpn_monitor) == epoch_length:
# mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor)
# rpn_accuracy_rpn_monitor = []
# print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(
# mean_overlapping_bboxes, epoch_length))
# if mean_overlapping_bboxes == 0:
# print(
# 'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.')
X, Y, img_data = next(data_gen_train) #通过构造的迭代器,获得一条数据
# print(X.shape)
# print(Y[0].shape, Y[1].shape)
loss_rpn = model_rpn.train_on_batch(X, Y) #训练basenet 与 RPN网络
P_rpn = model_rpn.predict_on_batch(X) #获得RPN网络的输出
#通过rpn网络的输出,找出对应的roi
R = roi_helper.rpn_to_roi(P_rpn[0], P_rpn[1], C, use_regr=True, overlap_thresh=0.7,
max_boxes=300)
#生成roipooing层的输入数据以及最终分类层的训练数据Y值以及最终回归层的训练数据Y值
X2, Y1, Y2, IouS = roi_helper.calc_roi(R, img_data, C, class_mapping)
if X2 is None:
continue
# print("model_classifier.train_on_batch--X.shape={},X2.shape={}".format(X.shape, X2.shape))
loss_class = model_classifier.train_on_batch([X, X2], [Y1, Y2])
train_step += 1
losses[iter_num, 0] = loss_rpn[1] #rpn_cls_loss
losses[iter_num, 1] = loss_rpn[2] #rpn_regr_loss
losses[iter_num, 2] = loss_class[1] #final_cls_loss
losses[iter_num, 3] = loss_class[2] #final_regr_loss
losses[iter_num, 4] = loss_class[3] #final_acc
iter_num += 1
progbar.update(iter_num,
[('rpn_cls', np.mean(losses[:iter_num, 0])),
('rpn_regr', np.mean(losses[:iter_num, 1])),
('detector_cls', np.mean(losses[:iter_num, 2])),
('detector_regr', np.mean(losses[:iter_num, 3]))])
if iter_num == epoch_length: #每1000轮训练,统计一次
loss_rpn_cls = np.mean(losses[:, 0])
loss_rpn_regr = np.mean(losses[:, 1])
loss_class_cls = np.mean(losses[:, 2])
loss_class_regr = np.mean(losses[:, 3])
class_acc = np.mean(losses[:, 4])
# mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
# rpn_accuracy_for_epoch = []
if C.verbose:
# print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes))
print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
print('Loss RPN classifier: {}'.format(loss_rpn_cls))
print('Loss RPN regression: {}'.format(loss_rpn_regr))
print('Loss Detector classifier: {}'.format(loss_class_cls))
print('Loss Detector regression: {}'.format(loss_class_regr))
print('Elapsed time: {}'.format(time.time() - start_time))
curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
iter_num = 0
start_time = time.time()
if curr_loss < best_loss:
if C.verbose:
print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
best_loss = curr_loss
model_all.save_weights(C.model_path)
break
贴出来一部分训练过程如下:
import os
import cv2
import numpy as np
import sys
import pickle
import time
from keras_faster_rcnn import config, roi_helper, net_model
from keras import backend as K
from keras.layers import Input
from keras.models import Model
config_output_filename = "config/config.pickle"
with open(config_output_filename, "rb") as config_f:
C = pickle.load(config_f)
C.use_horizontal_flips = False
C.use_vertical_flips = False
C.rot_90 = False
test_img_path = "test"
class_mapping = C.class_mapping
if "bg" not in class_mapping:
class_mapping["bg"] = len(class_mapping)
class_mapping = {v:k for k,v in class_mapping.items()} #key与value调换位置
#class_to_color 定义对应类别多对应的颜色
class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
#定义相关输入Input
img_input = Input(shape=(None, None, 3))
roi_input = Input(shape=(C.num_rois, 4))
feature_map_input = Input(shape=(None, None, 512))
#基础网络(VGG)进行特征提取
shared_layers = net_model.base_net_vgg(img_input)
#RPN网络
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn_layer_out = net_model.rpn_net(shared_layers, num_anchors)
#roi pooling层以及最后网络的输出
final_classifer_reg = net_model.roi_classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping))
model_rpn = Model(img_input, rpn_layer_out)
model_final_classifer_reg_only = Model([feature_map_input, roi_input], final_classifer_reg)
model_final_classifer_reg = Model([feature_map_input, roi_input], final_classifer_reg)
#加载训练好的模型对应的参数
print("Loading weights from {}".format(C.model_path))
model_rpn.load_weights(C.model_path, by_name=True)
model_final_classifer_reg.load_weights(C.model_path, by_name=True)
model_rpn.compile(optimizer="sgd", loss="mse")
model_final_classifer_reg.compile(optimizer="sgd", loss="mse")
all_imgs = []
classes = {}
bbox_threshold = 0.8
visualise = True
def image_Preprocessing(img, C):
'''
图片预处理
:param img:
:param C:
:return:
'''
height, width, _ = img.shape
if width < height:
ratio = float(C.im_size) / width
new_width = C.im_size
new_height = int(height * ratio)
else:
ratio = float(C.im_size) / height
new_height = C.im_size
new_width = int(width * ratio)
img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
x_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
x_img = x_img.astype(np.float32)
x_img[:, :, 0] -= C.img_channel_mean[0]
x_img[:, :, 1] -= C.img_channel_mean[1]
x_img[:, :, 2] -= C.img_channel_mean[2]
x_img /= C.img_scaling_factor
x_img = np.expand_dims(x_img, axis=0)
return x_img, ratio
for idx, img_name in enumerate(sorted(os.listdir(test_img_path))): #遍历所有测试文件
if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
continue
print("test image name:{}".format(img_name))
st = time.time()
filepath = os.path.join(test_img_path, img_name)
img = cv2.imread(filepath) #读取对应图片
#对测试图片先进行和训练图片一样的预处理
X, ratio = image_Preprocessing(img, C)
#经过基础卷积模块和RPN网络后的结果
[Y1, Y2, feature_map] = model_rpn.predict(X)
#根据RPN网络结果,获得对应所需要的ROI
Rois = roi_helper.rpn_to_roi(Y1, Y2, C, overlap_thresh=0.7)
#(x1,y1,x2,y2) to (x,y,w,h)
Rois[:, 2] -= Rois[:, 0]
Rois[:, 3] -= Rois[:, 1]
bboxes = {}
probs = {}
for jk in range(Rois.shape[0] // C.num_rois +1): #一次处理32个roi
print("jk==",jk, "Rois.shape[0] // C.num_rois=",Rois.shape[0] // C.num_rois)
if jk == Rois.shape[0] // C.num_rois:
rois = np.expand_dims(Rois[C.num_rois * jk:, :], axis=0)
if rois.shape[1] == 0:
break
rois_zero = np.zeros((rois.shape[0], C.num_rois, rois.shape[2]))
print(rois_zero.shape)
print(rois.shape)
print(rois_zero[:, rois.shape[1]:, :].shape)
rois_zero[:, :rois.shape[1], :] = rois
rois_zero[:, rois.shape[1]:, :] = rois[0, 0, :]
rois = rois_zero
else:
rois = np.expand_dims(Rois[C.num_rois * jk: C.num_rois * (jk + 1), :], axis=0)
if rois.shape[1] == 0:
break
#获得预测结果
[P_cls, P_regr] = model_final_classifer_reg_only.predict([feature_map, rois])
print("P_cls=",P_cls.shape)
for ii in range(P_cls.shape[1]): #遍历每一个roi对应的预测类别
#过滤调那些分类概率值不高 以及 负样本
if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2]-1):
continue
#获得当前roi预测出的类别
cls_num =np.argmax(P_cls[0,ii, :])
cls_name = class_mapping[cls_num]
if cls_name not in bboxes:
bboxes[cls_name] = []
probs[cls_name] = []
(x, y, w, h) = rois[0, ii, :]
tx, ty, tw, th = P_regr[0, ii, 4*cls_num: 4*(cls_num+1)]
tx /= C.classifier_regr_std[0]
ty /= C.classifier_regr_std[1]
tw /= C.classifier_regr_std[2]
th /= C.classifier_regr_std[3]
x, y, w, h = roi_helper.apply_regr(x, y, w, h, tx, ty, tw, th)
#获得预测出来的对应在原始图片上的anchor
bbox_for_img = [C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)]
bboxes[cls_name].append(bbox_for_img)
probs[cls_name].append(cls_num)
all_dets = []
print("bboxes=",bboxes.keys())
for key in bboxes:
bbox = np.array(bboxes[key])
#非极大值抑制
new_boxes, new_probs = roi_helper.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
print("new_boxes.shape", new_boxes.shape)
for jk in range(new_boxes.shape[0]):
(x1, y1, x2, y2) = new_boxes[jk, :]
real_x1 = int(round(x1 // ratio))
real_y1 = int(round(y1 // ratio))
real_x2 = int(round(x2 // ratio))
real_y2 = int(round(y2 // ratio))
cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2),
(int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])), 2)
textLabel = "{}:{}".format(key, int(100 * new_probs[jk]))
all_dets.append((key, 100 * new_probs[jk]))
retval, baseLine = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1)
textOrg = (real_x1, real_y1-0)
cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
(textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 2)
cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
(textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1)
cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
print('Elapsed time = {}'.format(time.time() - st))
print(all_dets)
cv2.imshow('img', img)
cv2.waitKey(0)
print("imwrite--img")
cv2.imwrite('./results_imgs/{}.png'.format(idx), img)
检测结果如下:
到此,整个Keras实现FatserRcnn模型系列到此结束
相关本章完整代码以及VOC2102数据集百度网盘下载,请关注我自己的公众号 AI计算机视觉工坊,回复【代码】和【数据集】获取。本公众号不定期推送机器学习,深度学习,计算机视觉等相关文章,欢迎大家和我一起学习,交流。