R-CNN笔记1:rcnn_demo代码注释

  • rcnn_demo.m
  • rcnn_detect.m
  • rcnn_features.m
  • rcnn_extract_regions.m

rcnn_demo.m

    function rcnn_demo(use_gpu)
    % rcnn_demo(use_gpu)
    %   Run the R-CNN demo on a test image. Set use_gpu = false to run
    %   in CPU mode. (GPU mode is the default.)

    % AUTORIGHTS
    % ---------------------------------------------------------
    % Copyright (c) 2014, Ross Girshick
    % 
    % This file is part of the R-CNN code and is available 
    % under the terms of the Simplified BSD License provided in 
    % LICENSE. Please retain this notice and LICENSE if you use 
    % this file (or any portion of it) in your project.
    % ---------------------------------------------------------

    %rcnn模型存储路径
    rcnn_model_file = './data/rcnn_models/voc_2012/rcnn_model_finetuned.mat';
    if ~exist(rcnn_model_file, 'file')
      error('You need to download the R-CNN precomputed models. See README.md for details.');
    end

    if ~exist('use_gpu', 'var') || isempty(use_gpu)
      use_gpu = true;
    end

    modes = {'CPU', 'GPU'};
    fprintf('~~~~~~~~~~~~~~~~~~~\n');
    fprintf('Running in %s mode\n', modes{use_gpu+1});
    fprintf('(To run in %s mode, call rcnn_demo(%d) instead)\n',  ...
        modes{~use_gpu+1}, ~use_gpu);
    fprintf('Press any key to continue\n');
    pause;

    fprintf('Initializing R-CNN model (this might take a little while)\n');
    %加载rcnn模型
    rcnn_model = rcnn_load_model(rcnn_model_file, use_gpu);
    fprintf('done\n');

    im = imread('./000084.jpg');
    %开始检测
    dets = rcnn_detect(im, rcnn_model);

    % show top scoring bicycle detection
    showboxes(im, dets{2}(1,:));
    title(sprintf('bicycle conf = %.3f', dets{2}(1,end)));

    fprintf('Press any key to see the top scoring person detection\n');
    pause;

    % show top scoring person detection
    showboxes(im, dets{15}(1,:));
    title(sprintf('person conf = %.3f', dets{15}(1,end)));

struct rcnn_model变量结构
cnn 1*1struct
cache_name ‘v1_finetune_voc_2012_train_iter_70k’
detectors
classes
training_opts
SVs


rcnn_detect.m

    function dets = rcnn_detect(im, rcnn_model)
    % AUTORIGHTS
    % ---------------------------------------------------------
    % Copyright (c) 2014, Ross Girshick
    % 
    % This file is part of the R-CNN code and is available 
    % under the terms of the Simplified BSD License provided in 
    % LICENSE. Please retain this notice and LICENSE if you use 
    % this file (or any portion of it) in your project.
    % ---------------------------------------------------------

    % compute selective search candidates
    fprintf('Computing candidate regions...');
    th = tic();
    fast_mode = true;
    %selective_search提取窗口
    boxes = selective_search_boxes(im, fast_mode);
    % compat: change coordinate order from [y1 x1 y2 x2] to [x1 y1 x2 y2]
    %selective_search输出的坐标是[y1 x1 y2 x2],将坐标转换为[x1 y1 x2 y2]
    boxes = boxes(:, [2 1 4 3]);%之前都没用这种转换,好蠢
    %输出窗口大小和运行时间
    fprintf('found %d candidates (in %.3fs).\n', size(boxes,1), toc(th));

    % extract features from candidates (one row per candidate box)
    fprintf('Extracting CNN features from regions...');
    th = tic();
    %提取rcnn特征,每个特征是一个4096维的向量
    feat = rcnn_features(im, boxes, rcnn_model);
    feat = rcnn_scale_features(feat, rcnn_model.training_opts.feat_norm_mean);
    fprintf('done (in %.3fs).\n', toc(th));

    fprintf('Scoring regions with detectors and applying NMS...');
    % compute scores for each candidate [num_boxes x num_classes]
    th = tic();
    %scores = feat*rcnn_model.detectors.W + rcnn_model.detectors.B
    scores = bsxfun(@plus, feat*rcnn_model.detectors.W, rcnn_model.detectors.B);

    % apply NMS to each class and return final scored detections
    num_classes = length(rcnn_model.classes);
    dets = cell(num_classes, 1);
    for i = 1:num_classes
      scored_boxes = cat(2, boxes, scores(:,i));
      keep = nms(scored_boxes, 0.3); 
      dets{i} = scored_boxes(keep, :);
    end
    fprintf('done (in %.3fs)\n', toc(th));

rcnn_features.m


function feat = rcnn_features(im, boxes, rcnn_model)
% feat = rcnn_features(im, boxes, rcnn_model)
%   Compute CNN features on a set of boxes.
%
%   im is an image in RGB order as returned by imread
%   boxes are in [x1 y1 x2 y2] format with one box per row
%   rcnn_model specifies the CNN Caffe net file to use.

% AUTORIGHTS
% ---------------------------------------------------------
% Copyright (c) 2014, Ross Girshick
% 
% This file is part of the R-CNN code and is available 
% under the terms of the Simplified BSD License provided in 
% LICENSE. Please retain this notice and LICENSE if you use 
% this file (or any portion of it) in your project.
% ---------------------------------------------------------

% make sure that caffe has been initialized for this model
if rcnn_model.cnn.init_key ~= caffe('get_init_key')
  error('You probably need to call rcnn_load_model');
end

% Each batch contains 256 (default) image regions.
% Processing more than this many at once takes too much memory
% for a typical high-end GPU.
[batches, batch_padding] = rcnn_extract_regions(im, boxes, rcnn_model);
batch_size = rcnn_model.cnn.batch_size;

% compute features for each batch of region images
feat_dim = -1;
feat = [];
curr = 1;
for j = 1:length(batches)
  % forward propagate batch of region images 
  f = caffe('forward', batches(j));
  f = f{1};
  f = f(:);

  % first batch, init feat_dim and feat
  if j == 1
    feat_dim = length(f)/batch_size;
    feat = zeros(size(boxes, 1), feat_dim, 'single');
  end

  f = reshape(f, [feat_dim batch_size]);

  % last batch, trim f to size
  if j == length(batches)
    if batch_padding > 0
      f = f(:, 1:end-batch_padding);
    end
  end

  feat(curr:curr+size(f,2)-1,:) = f';
  curr = curr + batch_size;
end

rcnn_extract_regions.m

function [batches, batch_padding] = rcnn_extract_regions(im, boxes, rcnn_model)
% [batches, batch_padding] = rcnn_extract_regions(im, boxes, rcnn_model)
%   Extract image regions and preprocess them for use in Caffe.
%   提取图像区域并且对它们进行预处理,以便在Caffe中使用。
%   Output is a cell array of batches.
%   输出是batches的单元阵列
%   Each batch is a 4-D tensor formatted for input into Caffe:
%   每个batch是一个4维的张量,被格式化的作为Caffe的输入。
%     - BGR channel order RGB通道顺序
%     - single precision 单精度
%     - mean subtracted 减去平均
%     - dimensions from fastest to slowest: width, height, channel, batch_index
%       尺度从最快到最慢:宽度,
%   im is an image in RGB order as returned by imread
%   boxes are in [x1 y1 x2 y2] format with one box per row

% AUTORIGHTS
% ---------------------------------------------------------
% Copyright (c) 2014, Ross Girshick
% 
% This file is part of the R-CNN code and is available 
% under the terms of the Simplified BSD License provided in 
% LICENSE. Please retain this notice and LICENSE if you use 
% this file (or any portion of it) in your project.
% ---------------------------------------------------------

% convert image to BGR and single
im = single(im(:,:,[3 2 1]));
num_boxes = size(boxes, 1);
batch_size = rcnn_model.cnn.batch_size;%batch_size = 256
num_batches = ceil(num_boxes / batch_size);
batch_padding = batch_size - mod(num_boxes, batch_size);
if batch_padding == batch_size
  batch_padding = 0;
end

crop_mode = rcnn_model.detectors.crop_mode;
image_mean = rcnn_model.cnn.image_mean;
crop_size = size(image_mean,1);
crop_padding = rcnn_model.detectors.crop_padding;

batches = cell(num_batches, 1);
%for batch = 1:num_batches
parfor batch = 1:num_batches
  batch_start = (batch-1)*batch_size+1;
  batch_end = min(num_boxes, batch_start+batch_size-1);

  ims = zeros(crop_size, crop_size, 3, batch_size, 'single');
  for j = batch_start:batch_end
    bbox = boxes(j,:);
    crop = rcnn_im_crop(im, bbox, crop_mode, crop_size, ...
        crop_padding, image_mean);
    % swap dims 1 and 2 to make width the fastest dimension (for caffe)
    ims(:,:,:,j-batch_start+1) = permute(crop, [2 1 3]);
  end

  batches{batch} = ims;
end

你可能感兴趣的:(R,CNN笔记)