function rcnn_demo(use_gpu)
% rcnn_demo(use_gpu)
% Run the R-CNN demo on a test image. Set use_gpu = false to run
% in CPU mode. (GPU mode is the default.)
% ---------------------------------------------------------
% Copyright (c) 2014, Ross Girshick
% This file is part of the R-CNN code and is available
% under the terms of the Simplified BSD License provided in
% LICENSE. Please retain this notice and LICENSE if you use
% this file (or any portion of it) in your project.
% ---------------------------------------------------------
rcnn_model_file = './data/rcnn_models/voc_2012/rcnn_model_finetuned.mat';
if ~exist(rcnn_model_file, 'file')
error('You need to download the R-CNN precomputed models. See for details.');
if ~exist('use_gpu', 'var') || isempty(use_gpu)
use_gpu = true;
modes = {'CPU', 'GPU'};
fprintf('Running in %s mode\n', modes{use_gpu+1});
fprintf('(To run in %s mode, call rcnn_demo(%d) instead)\n', ...
modes{~use_gpu+1}, ~use_gpu);
fprintf('Press any key to continue\n');
fprintf('Initializing R-CNN model (this might take a little while)\n');
rcnn_model = rcnn_load_model(rcnn_model_file, use_gpu);
im = imread('./000084.jpg');
dets = rcnn_detect(im, rcnn_model);
% show top scoring bicycle detection
showboxes(im, dets{2}(1,:));
title(sprintf('bicycle conf = %.3f', dets{2}(1,end)));
fprintf('Press any key to see the top scoring person detection\n');
% show top scoring person detection
showboxes(im, dets{15}(1,:));
title(sprintf('person conf = %.3f', dets{15}(1,end)));
struct rcnn_model变量结构
cnn 1*1struct
cache_name ‘v1_finetune_voc_2012_train_iter_70k’
function dets = rcnn_detect(im, rcnn_model)
% compute selective search candidates
fprintf('Computing candidate regions...');
th = tic();
fast_mode = true;
boxes = selective_search_boxes(im, fast_mode);
% compat: change coordinate order from [y1 x1 y2 x2] to [x1 y1 x2 y2]
%selective_search输出的坐标是[y1 x1 y2 x2],将坐标转换为[x1 y1 x2 y2]
boxes = boxes(:, [2 1 4 3]);%之前都没用这种转换,好蠢
fprintf('found %d candidates (in %.3fs).\n', size(boxes,1), toc(th));
% extract features from candidates (one row per candidate box)
fprintf('Extracting CNN features from regions...');
th = tic();
feat = rcnn_features(im, boxes, rcnn_model);
feat = rcnn_scale_features(feat, rcnn_model.training_opts.feat_norm_mean);
fprintf('done (in %.3fs).\n', toc(th));
fprintf('Scoring regions with detectors and applying NMS...');
% compute scores for each candidate [num_boxes x num_classes]
th = tic();
%scores = feat*rcnn_model.detectors.W + rcnn_model.detectors.B
scores = bsxfun(@plus, feat*rcnn_model.detectors.W, rcnn_model.detectors.B);
% apply NMS to each class and return final scored detections
num_classes = length(rcnn_model.classes);
dets = cell(num_classes, 1);
for i = 1:num_classes
scored_boxes = cat(2, boxes, scores(:,i));
keep = nms(scored_boxes, 0.3);
dets{i} = scored_boxes(keep, :);
fprintf('done (in %.3fs)\n', toc(th));
function feat = rcnn_features(im, boxes, rcnn_model)
% feat = rcnn_features(im, boxes, rcnn_model)
% Compute CNN features on a set of boxes.
% im is an image in RGB order as returned by imread
% boxes are in [x1 y1 x2 y2] format with one box per row
% rcnn_model specifies the CNN Caffe net file to use.
% make sure that caffe has been initialized for this model
if rcnn_model.cnn.init_key ~= caffe('get_init_key')
error('You probably need to call rcnn_load_model');
% Each batch contains 256 (default) image regions.
% Processing more than this many at once takes too much memory
% for a typical high-end GPU.
[batches, batch_padding] = rcnn_extract_regions(im, boxes, rcnn_model);
batch_size = rcnn_model.cnn.batch_size;
% compute features for each batch of region images
feat_dim = -1;
feat = [];
curr = 1;
for j = 1:length(batches)
% forward propagate batch of region images
f = caffe('forward', batches(j));
f = f{1};
f = f(:);
% first batch, init feat_dim and feat
if j == 1
feat_dim = length(f)/batch_size;
feat = zeros(size(boxes, 1), feat_dim, 'single');
f = reshape(f, [feat_dim batch_size]);
% last batch, trim f to size
if j == length(batches)
if batch_padding > 0
f = f(:, 1:end-batch_padding);
feat(curr:curr+size(f,2)-1,:) = f';
curr = curr + batch_size;
function [batches, batch_padding] = rcnn_extract_regions(im, boxes, rcnn_model)
% [batches, batch_padding] = rcnn_extract_regions(im, boxes, rcnn_model)
% Extract image regions and preprocess them for use in Caffe.
% 提取图像区域并且对它们进行预处理,以便在Caffe中使用。
% Output is a cell array of batches.
% 输出是batches的单元阵列
% Each batch is a 4-D tensor formatted for input into Caffe:
% 每个batch是一个4维的张量,被格式化的作为Caffe的输入。
% - BGR channel order RGB通道顺序
% - single precision 单精度
% - mean subtracted 减去平均
% - dimensions from fastest to slowest: width, height, channel, batch_index
% 尺度从最快到最慢:宽度,
% im is an image in RGB order as returned by imread
% boxes are in [x1 y1 x2 y2] format with one box per row
% convert image to BGR and single
im = single(im(:,:,[3 2 1]));
num_boxes = size(boxes, 1);
batch_size = rcnn_model.cnn.batch_size;%batch_size = 256
num_batches = ceil(num_boxes / batch_size);
batch_padding = batch_size - mod(num_boxes, batch_size);
if batch_padding == batch_size
batch_padding = 0;
crop_mode = rcnn_model.detectors.crop_mode;
image_mean = rcnn_model.cnn.image_mean;
crop_size = size(image_mean,1);
crop_padding = rcnn_model.detectors.crop_padding;
batches = cell(num_batches, 1);
%for batch = 1:num_batches
parfor batch = 1:num_batches
batch_start = (batch-1)*batch_size+1;
batch_end = min(num_boxes, batch_start+batch_size-1);
ims = zeros(crop_size, crop_size, 3, batch_size, 'single');
for j = batch_start:batch_end
bbox = boxes(j,:);
crop = rcnn_im_crop(im, bbox, crop_mode, crop_size, ...
crop_padding, image_mean);
% swap dims 1 and 2 to make width the fastest dimension (for caffe)
ims(:,:,:,j-batch_start+1) = permute(crop, [2 1 3]);
batches{batch} = ims;