搞了三个星期,花了将近50G手机流量,终于把这个弄出来了。用faster实时检测行人,17年初的时候,当时自己正在学习DL,发现有人就做出了这个,可以实时监测到人和车辆等各种物体,一直很好奇是如何做到的,前期已经完成了基于faster的相关图片检测任务,最近又要要求搞实时的视频检测,正好可以继续。
网络:faster rcnn
模型:ZF
摄像头:笔记本自带
显卡:gtx860
很是开心,O(∩_∩)O~~,终于自己也搞出了这个
不过,目前有个问题,fps有点低,大概只有8帧左右的样子,现在我终于理解了原作者说的帧速率大概只有15帧左右的样子,这句话。接下来让我想想如何才能提高速度?
-------------------------------------------- 2019-5-13添加 -------------------------------------------
有很多朋友问我是怎么做的,现在贴上我修改的代码。不对之处,还请多指点。
网络的具体改动见代码:
#!/usr/bin/env python
#-*- coding: UTF-8 -*-
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# 实时对相机拍摄的图片进行检测
# --------------------------------------------------------
"""
Demo script showing detections in sample images.
See README.md for installation instructions before running.
"""
import _init_paths
from fast_rcnn.config import cfg
from fast_rcnn.test import im_detect
from fast_rcnn.nms_wrapper import nms
from utils.timer import Timer
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import caffe, os, sys, cv2
import argparse
import math
import matplotlib
import time
#import py_camer
reload(sys)
sys.setdefaultencoding('utf-8')
CLASSES = ('__background__',
'person')
NETS = {'vgg16': ('VGG16',
'VGG16_faster_rcnn_final.caffemodel'),
'zf': ('ZF',
'ZF_faster_rcnn_iter_2000.caffemodel')}
#****************** 把检测结果标注在视频上 ************************
def gain_box_score(im, class_name, dets, start_time,time_takes,thresh=0.5):
"""Draw detected bounding boxes."""
inds = np.where(dets[:, -1] >= thresh)[0] #返回置信度大于阈值的窗口下标
if len(inds) == 0:
cv2.imshow("视频检测",im)
else:
im = im[:, :, (2, 1, 0)]
zhfont = matplotlib.font_manager.FontProperties(fname="/usr/share/fonts/truetype/arphic/ukai.ttc") #字体
fig, ax = plt.subplots(figsize=(12, 12))
ax.imshow(im, aspect='equal')
for i in inds:
bbox = dets[i, :4] #坐标位置(Xmin,Ymin,Xmax,Ymax)
score = dets[i, -1] #置信度得分
#- ------------------- 开始画框 -----------------------
x1 = bbox[0]
y1 = bbox[1]
x2 = bbox[2]
y2 = bbox[3]
im = im.astype('uint8')
im = im[:,:,[2,1,0]]
im = im.copy()
end_time = time.time()
current_time = time.ctime() #获得当前系统时间
fps = round(1/(end_time - start_time),2)
cv2.rectangle(im, (x1, y1), (x2, y2), (0, 255, 0), 5) # 用矩形圈出人
cv2.putText(im, str(round(score,2)), (int(x1+x2)/2,int(y2-y2/20)),cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2) #显示物体的得分
cv2.putText(im, str(class_name), (x1,int(y2-y2/20)),cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2) #显示物体的类别
cv2.putText(im, str(current_time), (30,30),cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2) #显示时间
cv2.putText(im, "fps:"+str(fps), (30,60),cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2) #显示帧率
cv2.putText(im, "takes time :"+str(round(time_takes*1000,1))+"ms", (30,90),cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2) #显示检测时间
cv2.imshow("视频检测",im)
#==================================== 结束 ==============================
def demo(net, image_name, start_time):
"""Detect object classes in an image using pre-computed object proposals."""
im = image_name
# Detect all object classes and regress object bounds
timer = Timer()
timer.tic()
scores, boxes = im_detect(net, im) #检测,返回得分和目标区域所在位置
timer.toc()
time_takes = timer.total_time
#print ('Detection took {:.3f}s for '
# '{:d} object proposals').format(timer.total_time, boxes.shape[0])
# Visualize detections for each class
CONF_THRESH = 0.75
NMS_THRESH = 0.1
for cls_ind, cls in enumerate(CLASSES[1:]):
cls_ind += 1 # because we skipped background
cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
cls_scores = scores[:, cls_ind]
dets = np.hstack((cls_boxes,
cls_scores[:, np.newaxis])).astype(np.float32)
keep = nms(dets, NMS_THRESH)
dets = dets[keep, :]
# ---------------------- 在视频上进行标注 -----------------------
gain_box_score(im, cls, dets, start_time, time_takes, thresh=CONF_THRESH)
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='Faster R-CNN demo')
parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
default=0, type=int)
parser.add_argument('--cpu', dest='cpu_mode',
help='Use CPU mode (overrides --gpu)',
action='store_true')
parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
choices=NETS.keys(), default='zf')
args = parser.parse_args()
return args
if __name__ == '__main__':
cfg.TEST.HAS_RPN = True # Use RPN for proposals
args = parse_args()
prototxt = '/home/hito/py-faster-rcnn/models/pascal_voc/ZF/faster_rcnn_end2end/test.prototxt'
caffemodel = '/home/hito/py-faster-rcnn/data/faster_rcnn_models/ZF_faster_rcnn_iter_2000.caffemodel'
if not os.path.isfile(caffemodel):
raise IOError(('{:s} not found.\nDid you run ./data/script/'
'fetch_faster_rcnn_models.sh?').format(caffemodel))
if args.cpu_mode:
caffe.set_mode_cpu()
else:
caffe.set_mode_gpu()
caffe.set_device(args.gpu_id)
cfg.GPU_ID = args.gpu_id
net = caffe.Net(prototxt, caffemodel, caffe.TEST)
print '\n\nLoaded network {:s}'.format(caffemodel)
# Warmup on a dummy image
im = 128 * np.ones((300, 500, 3), dtype=np.uint8)
for i in xrange(2):
_, _= im_detect(net, im)
# ------------------- 摄像头模块 ----------------
cap=cv2.VideoCapture(0) #打开0号摄像头
while(1):
ret, frame = cap.read() # show a frame
start_time = time.time()
demo(net, frame, start_time)
key = cv2.waitKey(1)
if key == ord('q') or key == ord('Q') or key == 27 : # ESC:27 key: quit program
break
其实主要是做了demo的改动,因为摄像头的检测也是把视频流抽成一张一张的图片,然后才进行处理的。代码的改动对照着原来自带的demo是可以看出来的。祝好运!