许多文章都是基于R-cnn系列做的Person search,同时R-cnn系列的算法也都是非常经典的。所以今天准备详细的学习一下Fast R-cnn,。代码网址: https://github.com/rbgirshick/fast-rcnn
cd /home/mmt/Desktop/fast-rcnn/caffe-fast-rcnn
cp Makefile.config.example Makefile.config
然后修改里面的内容
WITH_PYTHON_LAYER := 1
USE_CUDNN := 1
将这两句前面的 # 号去掉
然后
make -j8
将Makefile.config中的
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
修改为:
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnumake
将 Makefile中下一行修改如下:
LIBRARIES += glog gflags protobuf leveldb snappy \
lmdb boost_system hdf5_serial_hl hdf5_serial m \
opencv_core opencv_highgui opencv_imgproc
重新编译 make -j8
将BVLC(https://github.com/BVLC/caffe)中的下列文件copy 到相应的文件夹,即将这2个文件夹里的cudnn开头的文件全部换成新的,例如:
./include/util/cudnn.hpp
./src/caffe/layers/cudnn_conv_layer.cpp
./src/caffe/layers/cudnn_conv_layer.cu
./src/caffe/layers/cudnn_relu_layer.cpp
./src/caffe/layers/cudnn_relu_layer.cu
./src/caffe/layers/cudnn_sigmoid_layer.cpp
./src/caffe/layers/cudnn_sigmoid_layer.cu
./src/caffe/layers/cudnn_tanh_layer.cpp
./src/caffe/layers/cudnn_tanh_layer.cu
make clean
make -j8
解决方法:
在Makefile中下一行的末尾添加 opencv_imgcodecs
修改如下:
LIBRARIES += glog gflags protobuf leveldb snappy \
lmdb boost_system hdf5_serial_hl hdf5_serial m \
opencv_core opencv_highgui opencv_imgproc opencv_imgcodecs
make clean
make -j8
编译成功。
然后
make pycaffe
出现错误
python/caffe/_caffe.cpp:10:31: fatal error: numpy/arrayobject.h: No such file or directory
解决方法:
将Makefile.config中的下行加一个local
PYTHON_INCLUDE := /usr/include/python2.7 \
/usr/local/lib/python2.7/dist-packages/numpy/core/include
make pycaffe,编译成功。
接下来,运行Fast-rcnn中的demo,看看是否能够运行成功。
cd /home/mmt/Desktop/fast-rcnn
下载models
./data/scripts/fetch_fast_rcnn_models.sh
./tools/demo.py
出现错误
ImportError: No module named cython_bbox
回到lib文件夹里没找到。重新进入到lib里,然后,make 一下。可能是之前make clean 把编译的都删掉了。
./tools/demo.py
出现错误
Floating point exception
网上说是由于使用高版本的gcc glibc 编译后在低版本的上运行导致.但是不想更改这个版本,怕损坏之前其他程序配好的环境。(以后有时间可以试一下)
但是找不到其他的解决办法。
因为之前编译过faster-rcnn,它里面是基于fast-rcnn的Caffe做的,因此将其caffe拷贝过来,放到fast-rcnn里,重新编译。(有些人可能会说,如果这样做不就是成为了fast-rcnn了吗?并不是的,faster与fast的代码的区别在于lib文件中,还有net模型上,caffe并没有区别。)
运行 ./tools/demo.py
- 二、代码
静下心来,安静的看代码(带上耳机,听自己喜欢的音乐,很容易静下心来!!)
首先,看一下demo的代码:
#!/usr/bin/env python
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
"""
Demo script showing detections in sample images.
See README.md for installation instructions before running.
"""
import _init_paths
from fast_rcnn.config import cfg
from fast_rcnn.test import im_detect
from utils.cython_nms import nms
from utils.timer import Timer
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import caffe, os, sys, cv2
import argparse
#数据集中 class 的种类
CLASSES = ('__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
#可供选择的集中 net 类型
NETS = {'vgg16': ('VGG16',
'vgg16_fast_rcnn_iter_40000.caffemodel'),
'vgg_cnn_m_1024': ('VGG_CNN_M_1024',
'vgg_cnn_m_1024_fast_rcnn_iter_40000.caffemodel'),
'caffenet': ('CaffeNet',
'caffenet_fast_rcnn_iter_40000.caffemodel')}
def vis_detections(im, class_name, dets, thresh=0.5):
"""Draw detected bounding boxes."""
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
return
im = im[:, :, (2, 1, 0)]
fig, ax = plt.subplots(figsize=(12, 12))
ax.imshow(im, aspect='equal')
for i in inds:
bbox = dets[i, :4]
score = dets[i, -1]
ax.add_patch(
plt.Rectangle((bbox[0], bbox[1]),
bbox[2] - bbox[0],
bbox[3] - bbox[1], fill=False,
edgecolor='red', linewidth=3.5)
)
ax.text(bbox[0], bbox[1] - 2,
'{:s} {:.3f}'.format(class_name, score),
bbox=dict(facecolor='blue', alpha=0.5),
fontsize=14, color='white')
ax.set_title(('{} detections with '
'p({} | box) >= {:.1f}').format(class_name, class_name,
thresh),
fontsize=14)
plt.axis('off')
plt.tight_layout()
plt.draw()
def demo(net, image_name, classes):
"""Detect object classes in an image using pre-computed object proposals."""
# Load pre-computed Selected Search object proposals
#首先下载已经计算好的box,box有很多,obj_proposals是一个R*4的矩阵,R表示检测到的box的数量,4分别表示box的中心点与宽和高。
box_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo',
image_name + '_boxes.mat')
obj_proposals = sio.loadmat(box_file)['boxes']
# Load the demo image
im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '.jpg')
im = cv2.imread(im_file)
# Detect all object classes and regress object bounds
timer = Timer()
timer.tic()
#scores的维数为2888*21,21为类的数,boxes为2888*84
scores, boxes = im_detect(net, im, obj_proposals)
timer.toc()
print ('Detection took {:.3f}s for '
'{:d} object proposals').format(timer.total_time, boxes.shape[0])
# Visualize detections for each class
CONF_THRESH = 0.8
NMS_THRESH = 0.3
#classes为要检测的类别
for cls in classes:
cls_ind = CLASSES.index(cls) #返回类别在CASESS中的索引位置
cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]#返回该类别的框
cls_scores = scores[:, cls_ind] #返回该类别框的参数
keep = np.where(cls_scores >= CONF_THRESH)[0]#返回得分值大于0.8的框的索引
cls_boxes = cls_boxes[keep, :] #返回相应的框的参数
cls_scores = cls_scores[keep] #返回得分值
dets = np.hstack((cls_boxes,
cls_scores[:, np.newaxis])).astype(np.float32)#dets是一个46*5的矩阵,即阈值之后保留下来46个框,前四列对应着框的位置,第5列对应着得分值
keep = nms(dets, NMS_THRESH) #经过非最大值抑制后得到的框的位置
dets = dets[keep, :]
print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls,
CONF_THRESH)
vis_detections(im, cls, dets, thresh=CONF_THRESH) #显示框的函数
#如果不设置默认参数,demo运行的一些参数就要通过终端输入,https://www.cnblogs.com/zknublx/p/6106343.html
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
default=0, type=int)
parser.add_argument('--cpu', dest='cpu_mode',
help='Use CPU mode (overrides --gpu)',
action='store_true')
parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
choices=NETS.keys(), default='vgg16')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
# os.path.join将多个路径组合后返回
#cfg.ROOT_DIR,fast-rcnn中编写的自动获取当前路径的程序,
prototxt = os.path.join(cfg.ROOT_DIR, 'models', NETS[args.demo_net][0],
'test.prototxt')
print(cfg.ROOT_DIR )
print(prototxt)
print(args.demo_net)
/home/mmt/eclipse-workspace/day9/src/fast-rcnn
/home/mmt/eclipse-workspace/day9/src/fast-rcnn/models/VGG16/test.prototxt
vgg16
caffemodel = os.path.join(cfg.ROOT_DIR, 'data', 'fast_rcnn_models',
NETS[args.demo_net][1])
#如果没有该路径的文件,会提示你下载该文件
if not os.path.isfile(caffemodel):
raise IOError(('{:s} not found.\nDid you run ./data/scripts/'
'fetch_fast_rcnn_models.sh?').format(caffemodel))
# caffe中设置GPU的模块
if args.cpu_mode:
caffe.set_mode_cpu()
else:
caffe.set_mode_gpu()
caffe.set_device(args.gpu_id)
net = caffe.Net(prototxt, caffemodel, caffe.TEST)
print '\n\nLoaded network {:s}'.format(caffemodel)
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
print 'Demo for data/demo/000004.jpg'
demo(net, '000004', ('car',))
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
print 'Demo for data/demo/001551.jpg'
demo(net, '001551', ('sofa', 'tvmonitor'))
plt.show()