DPM(Defomable Parts Model) 源码分析-检测(二)

DPM(Defomable Parts Model)原理

首先声明此版本为V3.1。因为和论文最相符。V4增加了模型数由2个增加为6个,V5提取了语义特征。源码太长纯代码应该在2K+,只选取了核心部分代码

demo.m

function demo()

test('000034.jpg', 'car');
test('000061.jpg', 'person');
test('000084.jpg', 'bicycle');

function test(name, cls)
% load and display image
im=imread(name);
clf;
image(im);
axis equal; 
axis on;
disp('input image');
disp('press any key to continue'); pause;

% load and display model
load(['VOC2007/' cls '_final']); %加载模型
visualizemodel(model);
disp([cls ' model']);
disp('press any key to continue'); pause;

% detect objects
boxes = detect(im, model, 0); %model为mat中的结构体
top = nms(boxes, 0.5);  %Non-maximum suppression.
showboxes(im, top);
%print(gcf, '-djpeg90', '-r0', [cls '.jpg']);
disp('detections');
disp('press any key to continue'); pause;

% get bounding boxes
bbox = getboxes(model, boxes);	%根据检测到的root,parts,预测bounding
top = nms(bbox, 0.5);
bbox = clipboxes(im, top); %预测出来的bounding,可能会超过图像原始尺寸,所以要减掉
showboxes(im, bbox);
disp('bounding boxes');
disp('press any key to continue'); pause;


detect.m

function [boxes] = detect(input, model, thresh, bbox, ...
                          overlap, label, fid, id, maxsize)
% 论文 fig.4 						 

% boxes = detect(input, model, thresh, bbox, overlap, label, fid, id, maxsize)
% Detect objects in input using a model and a score threshold.
% Higher threshold leads to fewer detections.
% boxes = [rx1 ry1 rx2 ry2 | px1 py1 px2 py2 ...| componetindex | score ]
% The function returns a matrix with one row per detected object.  The
% last column of each row gives the score of the detection.  The
% column before last specifies the component used for the detection.
% The first 4 columns specify the bounding box for the root filter and
% subsequent columns specify the bounding boxes of each part.
%
% If bbox is not empty, we pick best detection with significant overlap. 
% If label and fid are included, we write feature vectors to a data file.

%phase 2: im, model, 0, bbox, overlap, 1, fid, 2*i-1
% trian boxex : detect(im, model, 0, bbox, overlap)
if nargin > 3 && ~isempty(bbox)
  latent = true;
else
  latent = false;
end

if nargin > 6 && fid ~= 0
  write = true;
else
  write = false;
end

if nargin < 9
  maxsize = inf;
end

% we assume color images
input = color(input);	%如果是灰度图,扩充为三通道 R=G=B=Gray

% prepare model for convolutions
rootfilters = [];
for i = 1:length(model.rootfilters) % 
  rootfilters{i} = model.rootfilters{i}.w;% r*w*31维向量,9(方向范围 0~180) +18(方向范围 0-360)+4(cell熵和)
end
partfilters = [];
for i = 1:length(model.partfilters)
  partfilters{i} = model.partfilters{i}.w;
end

% cache some data 获取所有 root,part的所有信息
for c = 1:model.numcomponents	% releas3.1 一种对象,只有2个模型,releas5 有3*2个模型
  ridx{c} = model.components{c}.rootindex; % m1=1,m2=2
  oidx{c} = model.components{c}.offsetindex; %o1=1,o2=2
  root{c} = model.rootfilters{ridx{c}}.w;
  rsize{c} = [size(root{c},1) size(root{c},2)]; %root size,单位为 sbin*sbin的block块,相当于原始HOG中的一个cell
  numparts{c} = length(model.components{c}.parts); %目前为固定值6个,但是有些part是 fake
  for j = 1:numparts{c}
    pidx{c,j} = model.components{c}.parts{j}.partindex; %part是在该对象的所有component的part下连续编号
    didx{c,j} = model.components{c}.parts{j}.defindex;  % 在 rootfiter中的 anchor location
    part{c,j} = model.partfilters{pidx{c,j}}.w; % 6*6*31
    psize{c,j} = [size(part{c,j},1) size(part{c,j},2)];	% 
    % reverse map from partfilter index to (component, part#)
    rpidx{pidx{c,j}} = [c j];
  end
end

% we pad the feature maps to detect partially visible objects
padx = ceil(model.maxsize(2)/2+1); % 7/2+1 = 5
pady = ceil(model.maxsize(1)/2+1); % 11/2+1 = 7

% the feature pyramid
interval = model.interval;	%10
%--------------------------------特征金字塔---------------------------------------------------------
% feat的尺寸为 img.rows/sbin,img.cols/sbin
% scales:缩放了多少
[feat, scales] = featpyramid(input, model.sbin, interval); % 8,10

% detect at each scale
best = -inf;
ex = [];
boxes = [];
%---------------------逐层检测目标-----------------------------------------------------------%
for level = interval+1:length(feat) %注意是从第二层开始
  scale = model.sbin/scales(level);  % 1/缩小了多少  
  if size(feat{level}, 1)+2*pady < model.maxsize(1) || ... %扩展后还是未能达到 能同时计算两个component的得分
     size(feat{level}, 2)+2*padx < model.maxsize(2) || ...
     (write && ftell(fid) >= maxsize) %已经没有空间保存样本了
    continue;
  end
  
  if latent	%训练时使用,检测时跳过
    skip = true;
    for c = 1:model.numcomponents
      root_area = (rsize{c}(1)*scale) * (rsize{c}(2)*scale);% rootfilter
      box_area = (bbox(3)-bbox(1)+1) * (bbox(4)-bbox(2)+1); % bbox该class 所有 rootfilter 的交集即minsize
      if (root_area/box_area) >= overlap && (box_area/root_area) >= overlap %这句话真纠结,a>=0.7b,b>=0.7a -> a>=0.7b>=0.49a
        skip = false;
      end
    end
    if skip
      continue;
    end
  end
    
  % -----------convolve feature maps with filters -----------
  %rootmatch,partmatch ,得分图root的尺度总是part的一半,
  %rootmatch尺寸是partmatch的一半
  featr = padarray(feat{level}, [pady padx 0], 0);	% 上下各补充 pady 行0,左右各补充padx行 0
  %C = fconv(A, cell of B, start, end);
  rootmatch = fconv(featr, rootfilters, 1, length(rootfilters));
  if length(partfilters) > 0
    featp = padarray(feat{level-interval}, [2*pady 2*padx 0], 0);
    partmatch = fconv(featp, partfilters, 1, length(partfilters));
  end
  %-------------------逐component检测-----------------------------------
  % 参见论文 Fig 4
  % 最终得到  综合得分图   score
  for c = 1:model.numcomponents
    % root score + offset
    score = rootmatch{ridx{c}} + model.offsets{oidx{c}}.w;  
    % add in parts
    for j = 1:numparts{c}
      def = model.defs{didx{c,j}}.w;
      anchor = model.defs{didx{c,j}}.anchor;
      % the anchor position is shifted to account for misalignment
      % between features at different resolutions
      ax{c,j} = anchor(1) + 1; %
      ay{c,j} = anchor(2) + 1;
      match = partmatch{pidx{c,j}};
      [M, Ix{c,j}, Iy{c,j}] = dt(-match, def(1), def(2), def(3), def(4)); % dx,dy,dx^2,dy^2的偏移惩罚系数
	  % M part的综合匹配得分图,与part尺寸一致。Ix{c,j}, Iy{c,j} 即part实际的最佳位置(相对于root)
	  % 参见论文公式 9
      score = score - M(ay{c,j}:2:ay{c,j}+2*(size(score,1)-1), ...
                        ax{c,j}:2:ax{c,j}+2*(size(score,2)-1));
    end
	
	%-------阈值淘汰------------------------
    if ~latent
      % get all good matches
	  % ---thresh  在 分类时为0,在 找 hard exmaple 时是 -1.05--
      I = find(score > thresh);	%返回的是从上到下从左到右的索引
      [Y, X] = ind2sub(size(score), I);  %还原为 行,列坐标      
      tmp = zeros(length(I), 4*(1+numparts{c})+2);	%一个目标的root,part,score信息,见程序开头说明
      for i = 1:length(I)
        x = X(i);
        y = Y(i);
        [x1, y1, x2, y2] = rootbox(x, y, scale, padx, pady, rsize{c});
        b = [x1 y1 x2 y2];
        if write
          rblocklabel = model.rootfilters{ridx{c}}.blocklabel;
          oblocklabel = model.offsets{oidx{c}}.blocklabel;      
          f = featr(y:y+rsize{c}(1)-1, x:x+rsize{c}(2)-1, :);
          xc = round(x + rsize{c}(2)/2 - padx); % 
          yc = round(y + rsize{c}(1)/2 - pady);
          ex = [];
          ex.header = [label; id; level; xc; yc; ...
                       model.components{c}.numblocks; ...
                       model.components{c}.dim];
          ex.offset.bl = oblocklabel;
          ex.offset.w = 1;
          ex.root.bl = rblocklabel;
          width1 = ceil(rsize{c}(2)/2);
          width2 = floor(rsize{c}(2)/2);
          f(:,1:width2,:) = f(:,1:width2,:) + flipfeat(f(:,width1+1:end,:));
          ex.root.w = f(:,1:width1,:);
          ex.part = [];
        end
        for j = 1:numparts{c}
          [probex, probey, px, py, px1, py1, px2, py2] = ...
              partbox(x, y, ax{c,j}, ay{c,j}, scale, padx, pady, ...
                      psize{c,j}, Ix{c,j}, Iy{c,j});
          b = [b px1 py1 px2 py2];
          if write
            if model.partfilters{pidx{c,j}}.fake
              continue;
            end
            pblocklabel = model.partfilters{pidx{c,j}}.blocklabel;
            dblocklabel = model.defs{didx{c,j}}.blocklabel;
            f = featp(py:py+psize{c,j}(1)-1,px:px+psize{c,j}(2)-1,:);
            def = -[(probex-px)^2; probex-px; (probey-py)^2; probey-py];
            partner = model.partfilters{pidx{c,j}}.partner;
            if partner > 0
              k = rpidx{partner}(2);
              [kprobex, kprobey, kpx, kpy, kpx1, kpy1, kpx2, kpy2] = ...
                  partbox(x, y, ax{c,k}, ay{c,k}, scale, padx, pady, ...
                          psize{c,k}, Ix{c,k}, Iy{c,k});
              kf = featp(kpy:kpy+psize{c,k}(1)-1,kpx:kpx+psize{c,k}(2)-1,:);
              % flip linear term in horizontal deformation model
              kdef = -[(kprobex-kpx)^2; kpx-kprobex; ...
                       (kprobey-kpy)^2; kprobey-kpy];
              f = f + flipfeat(kf);
              def = def + kdef;
            else
              width1 = ceil(psize{c,j}(2)/2);
              width2 = floor(psize{c,j}(2)/2);
              f(:,1:width2,:) = f(:,1:width2,:) + flipfeat(f(:,width1+1:end,:));
              f = f(:,1:width1,:);
            end
            ex.part(j).bl = pblocklabel;
            ex.part(j).w = f;
            ex.def(j).bl = dblocklabel;
            ex.def(j).w = def;
          end
        end
        if write
          exwrite(fid, ex); % 写入负样本
        end
        tmp(i,:) = [b c score(I(i))];
      end
      boxes = [boxes; tmp];
    end

    if latent
      % get best match
      for x = 1:size(score,2)
        for y = 1:size(score,1)
          if score(y, x) > best  
			% 以该(y,x)为left-top点的rootfilter的范围在原图像中的位置
            [x1, y1, x2, y2] = rootbox(x, y, scale, padx, pady, rsize{c});
            % intesection with bbox
            xx1 = max(x1, bbox(1));
            yy1 = max(y1, bbox(2));
            xx2 = min(x2, bbox(3));
            yy2 = min(y2, bbox(4));
            w = (xx2-xx1+1);
            h = (yy2-yy1+1);
            if w > 0 && h > 0
              % check overlap with bbox
              inter = w*h;
              a = (x2-x1+1) * (y2-y1+1); % rootfilter 的面积
              b = (bbox(3)-bbox(1)+1) * (bbox(4)-bbox(2)+1); % bbox的面积
			  % 计算很很独特,如果只是 inter / b 那么 如果a很大,只是一部分与 bounding box重合,那就不可靠了,人再怎么标注错误,也不会这么大
			  % 所以,a越大,要求的重合率越高才好,所以分母+a,是个不错的选择,但是这样减小的太多了,所以减去 inter
              o = inter / (a+b-inter);
              if (o >= overlap)
			    %
                best = score(y, x);
                boxes = [x1 y1 x2 y2];
				% 这一部分一直被覆盖,最后保留的是 best样本
                if write				  
                  f = featr(y:y+rsize{c}(1)-1, x:x+rsize{c}(2)-1, :);
                  rblocklabel = model.rootfilters{ridx{c}}.blocklabel;
                  oblocklabel = model.offsets{oidx{c}}.blocklabel;      
                  xc = round(x + rsize{c}(2)/2 - padx);
                  yc = round(y + rsize{c}(1)/2 - pady);          
                  ex = [];
				  % label; id; level; xc; yc,正样本的重要信息!
				  % xc,yc,居然是相对于剪切后的图片
                  ex.header = [label; id; level; xc; yc; ...
                               model.components{c}.numblocks; ...
                               model.components{c}.dim];
                  ex.offset.bl = oblocklabel;
                  ex.offset.w = 1;
                  ex.root.bl = rblocklabel;
                  width1 = ceil(rsize{c}(2)/2);
                  width2 = floor(rsize{c}(2)/2);
                  f(:,1:width2,:) = f(:,1:width2,:) + flipfeat(f(:,width1+1:end,:));
                  ex.root.w = f(:,1:width1,:); %样本特征
                  ex.part = [];
                end
                for j = 1:numparts{c}
				  %probex,probey综合得分最高的位置,相对于featp
				  %px1,py1,px2,py2 转化成相对于featr
                  [probex, probey, px, py, px1, py1, px2, py2] = ...
                      partbox(x, y, ax{c,j}, ay{c,j}, scale, ...
                              padx, pady, psize{c,j}, Ix{c,j}, Iy{c,j});
                  boxes = [boxes px1 py1 px2 py2];
                  if write
                    if model.partfilters{pidx{c,j}}.fake
                      continue;
                    end
                    p = featp(py:py+psize{c,j}(1)-1, ...
                              px:px+psize{c,j}(2)-1, :);
                    def = -[(probex-px)^2; probex-px; (probey-py)^2; probey-py];
                    pblocklabel = model.partfilters{pidx{c,j}}.blocklabel;
                    dblocklabel = model.defs{didx{c,j}}.blocklabel;
                    partner = model.partfilters{pidx{c,j}}.partner;
                    if partner > 0
                      k = rpidx{partner}(2);
                      [kprobex, kprobey, kpx, kpy, kpx1, kpy1, kpx2, kpy2] = ...
                          partbox(x, y, ax{c,k}, ay{c,k}, scale, padx, pady, ...
                                  psize{c,k}, Ix{c,k}, Iy{c,k});
                      kp = featp(kpy:kpy+psize{c,k}(1)-1, ...
                                 kpx:kpx+psize{c,k}(2)-1, :);
                      % flip linear term in horizontal deformation model
                      kdef = -[(kprobex-kpx)^2; kpx-kprobex; ...
                               (kprobey-kpy)^2; kprobey-kpy];
                      p = p + flipfeat(kp);
                      def = def + kdef;
                    else
                      width1 = ceil(psize{c,j}(2)/2);
                      width2 = floor(psize{c,j}(2)/2);
                      p(:,1:width2,:) = p(:,1:width2,:) + ...
                          flipfeat(p(:,width1+1:end,:));
                      p = p(:,1:width1,:);
                    end
                    ex.part(j).bl = pblocklabel;
                    ex.part(j).w = p;
                    ex.def(j).bl = dblocklabel;
                    ex.def(j).w = def;
                  end
                end
                boxes = [boxes c best];
              end
            end
          end
        end
      end
    end
  end
end

if latent && write && ~isempty(ex)
  exwrite(fid, ex); %datfile
end

% The functions below compute a bounding box for a root or part 
% template placed in the feature hierarchy.
%
% coordinates need to be transformed to take into account:
% 1. padding from convolution
% 2. scaling due to sbin & image subsampling
% 3. offset from feature computation    
%

function [x1, y1, x2, y2] = rootbox(x, y, scale, padx, pady, rsize)
x1 = (x-padx)*scale+1;	%图像是先缩放(构造金字塔时)再打补丁
y1 = (y-pady)*scale+1;
x2 = x1 + rsize(2)*scale - 1; % 宽度也要缩放
y2 = y1 + rsize(1)*scale - 1;

function [probex, probey, px, py, px1, py1, px2, py2] = ...
    partbox(x, y, ax, ay, scale, padx, pady, psize, Ix, Iy)
probex = (x-1)*2+ax; %最优位置
probey = (y-1)*2+ay;
px = double(Ix(probey, probex)); %综合得分最高的位置
py = double(Iy(probey, probex));
px1 = ((px-2)/2+1-padx)*scale+1; % pading是root的两倍
py1 = ((py-2)/2+1-pady)*scale+1;
px2 = px1 + psize(2)*scale/2 - 1;
py2 = py1 + psize(1)*scale/2 - 1;

% write an example to the data file
function exwrite(fid, ex)
fwrite(fid, ex.header, 'int32');
buf = [ex.offset.bl; ex.offset.w(:); ...
       ex.root.bl; ex.root.w(:)];
fwrite(fid, buf, 'single');
for j = 1:length(ex.part)
  if ~isempty(ex.part(j).w)
    buf = [ex.part(j).bl; ex.part(j).w(:); ...
           ex.def(j).bl; ex.def(j).w(:)];
    fwrite(fid, buf, 'single');
  end
end


features.cc

#include <math.h>
#include "mex.h"

// small value, used to avoid division by zero
#define eps 0.0001

#define bzero(a, b) memset(a, 0, b) 
int round(float a) { float tmp = a - (int)a; if( tmp >= 0.5 ) return (int)a + 1; else return (int)a; }
// unit vectors used to compute gradient orientation
// cos(20*i)
double uu[9] = {1.0000, 
		0.9397, 
		0.7660, 
		0.500, 
		0.1736, 
		-0.1736, 
		-0.5000, 
		-0.7660, 
		-0.9397};
//sin(20*i)
double vv[9] = {0.0000, 
		0.3420, 
		0.6428, 
		0.8660, 
		0.9848, 
		0.9848, 
		0.8660, 
		0.6428, 
		0.3420};

static inline double min(double x, double y) { return (x <= y ? x : y); }
static inline double max(double x, double y) { return (x <= y ? y : x); }

static inline int min(int x, int y) { return (x <= y ? x : y); }
static inline int max(int x, int y) { return (x <= y ? y : x); }

// main function:
// takes a double color image and a bin size 
// returns HOG features
mxArray *process(const mxArray *mximage, const mxArray *mxsbin) {
  double *im = (double *)mxGetPr(mximage);
  const int *dims = mxGetDimensions(mximage);
  if (mxGetNumberOfDimensions(mximage) != 3 ||
      dims[2] != 3 ||
      mxGetClassID(mximage) != mxDOUBLE_CLASS)
    mexErrMsgTxt("Invalid input");

  int sbin = (int)mxGetScalar(mxsbin);

  // memory for caching orientation histograms & their norms
  int blocks[2];
  blocks[0] = (int)round((double)dims[0]/(double)sbin);//行
  blocks[1] = (int)round((double)dims[1]/(double)sbin);//列
  double *hist = (double *)mxCalloc(blocks[0]*blocks[1]*18, sizeof(double));//只需要计算18bin,9bin的推
  double *norm = (double *)mxCalloc(blocks[0]*blocks[1], sizeof(double));

  // memory for HOG features
  int out[3];//size
  out[0] = max(blocks[0]-2, 0);//减去2干嘛??
  out[1] = max(blocks[1]-2, 0);
  out[2] = 27+4;
  mxArray *mxfeat = mxCreateNumericArray(3, out, mxDOUBLE_CLASS, mxREAL);//特征,size=out 
  double *feat = (double *)mxGetPr(mxfeat);
  
  int visible[2];
  visible[0] = blocks[0]*sbin;
  visible[1] = blocks[1]*sbin;
  //先列再行
  for (int x = 1; x < visible[1]-1; x++) {
    for (int y = 1; y < visible[0]-1; y++) {
      // first color channel
      double *s = im + min(x, dims[1]-2)*dims[0] + min(y, dims[0]-2);//在im中的位置
      double dy = *(s+1) - *(s-1);
      double dx = *(s+dims[0]) - *(s-dims[0]); //坐标系是一样的,c和matlab的存储顺序不一样
      double v = dx*dx + dy*dy;

      // second color channel
      s += dims[0]*dims[1];
      double dy2 = *(s+1) - *(s-1);
      double dx2 = *(s+dims[0]) - *(s-dims[0]);
      double v2 = dx2*dx2 + dy2*dy2;

      // third color channel
      s += dims[0]*dims[1];
      double dy3 = *(s+1) - *(s-1);
      double dx3 = *(s+dims[0]) - *(s-dims[0]);
      double v3 = dx3*dx3 + dy3*dy3;

      // pick channel with strongest gradient,计算v
      if (v2 > v) {
		v = v2;
		dx = dx2;
		dy = dy2;
		  } 
		  if (v3 > v) {
		v = v3;
		dx = dx3;
		dy = dy3;
      }

      // snap to one of 18 orientations,就算角度best_o
      double best_dot = 0;
      int best_o = 0;
      for (int o = 0; o < 9; o++) {
	    // (sinθ)^2+(cosθ)^2 =1
		// max cosθ*dx+ sinθ*dy 对其求导,可得极大值 θ = arctan dy/dx
		double dot = uu[o]*dx + vv[o]*dy;
		if (dot > best_dot) {
		  best_dot = dot;
		  best_o = o;
		} else if (-dot > best_dot) {
		  best_dot = -dot;
		  best_o = o+9;
		}
      }
      
      // add to 4 histograms around pixel using linear interpolation
      double xp = ((double)x+0.5)/(double)sbin - 0.5;
      double yp = ((double)y+0.5)/(double)sbin - 0.5;
      int ixp = (int)floor(xp);
      int iyp = (int)floor(yp);
      double vx0 = xp-ixp;
      double vy0 = yp-iyp;
      double vx1 = 1.0-vx0;
      double vy1 = 1.0-vy0;
      v = sqrt(v);
	//左上角	
      if (ixp >= 0 && iyp >= 0) {
		*(hist + ixp*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) += 
		  vx1*vy1*v;
      }
	  //右上角      
      if (ixp+1 < blocks[1] && iyp >= 0) {
		*(hist + (ixp+1)*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) += 
		  vx0*vy1*v;
      }
	  //左下角
      if (ixp >= 0 && iyp+1 < blocks[0]) {
		*(hist + ixp*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) += 
		  vx1*vy0*v;
      }
      //右下角
      if (ixp+1 < blocks[1] && iyp+1 < blocks[0]) {
		*(hist + (ixp+1)*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) += 
		  vx0*vy0*v;
      }
    }
  }

  // compute energy in each block by summing over orientations
  //计算每一个cell的 sum( ( v(oi)+v(oi+9) )^2 ),oi=0..8
  for (int o = 0; o < 9; o++) {
    double *src1 = hist + o*blocks[0]*blocks[1];
    double *src2 = hist + (o+9)*blocks[0]*blocks[1];
    double *dst = norm;
    double *end = norm + blocks[1]*blocks[0];
    while (dst < end) {
      *(dst++) += (*src1 + *src2) * (*src1 + *src2);
      src1++;
      src2++;
    }
  }

  // compute features
  for (int x = 0; x < out[1]; x++) {
    for (int y = 0; y < out[0]; y++) {
      double *dst = feat + x*out[0] + y;      
      double *src, *p, n1, n2, n3, n4;

      p = norm + (x+1)*blocks[0] + y+1;//右下角的constrain insensitive sum
      n1 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
      p = norm + (x+1)*blocks[0] + y;//右边
      n2 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
      p = norm + x*blocks[0] + y+1;//下边
      n3 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
      p = norm + x*blocks[0] + y;//自己      
      n4 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);

      double t1 = 0;
      double t2 = 0;
      double t3 = 0;
      double t4 = 0;

      // contrast-sensitive features
      src = hist + (x+1)*blocks[0] + (y+1);
      for (int o = 0; o < 18; o++) {
		double h1 = min(*src * n1, 0.2);//截短
		double h2 = min(*src * n2, 0.2);
		double h3 = min(*src * n3, 0.2);
		double h4 = min(*src * n4, 0.2);
		*dst = 0.5 * (h1 + h2 + h3 + h4);//求和
		t1 += h1;
		t2 += h2;
		t3 += h3;
		t4 += h4;
		dst += out[0]*out[1];//下一个bin
		src += blocks[0]*blocks[1];
      }

      // contrast-insensitive features
      src = hist + (x+1)*blocks[0] + (y+1);
      for (int o = 0; o < 9; o++) {
        double sum = *src + *(src + 9*blocks[0]*blocks[1]);
        double h1 = min(sum * n1, 0.2);
        double h2 = min(sum * n2, 0.2);
        double h3 = min(sum * n3, 0.2);
        double h4 = min(sum * n4, 0.2);
        *dst = 0.5 * (h1 + h2 + h3 + h4);
        dst += out[0]*out[1];
        src += blocks[0]*blocks[1];
      }

      // texture features
      *dst = 0.2357 * t1;
      dst += out[0]*out[1];
      *dst = 0.2357 * t2;
      dst += out[0]*out[1];
      *dst = 0.2357 * t3;
      dst += out[0]*out[1];
      *dst = 0.2357 * t4;
    }
  }

  mxFree(hist);
  mxFree(norm);
  return mxfeat;
}

// matlab entry point
// F = features(image, bin)
// image should be color with double values
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { 
  if (nrhs != 2)
    mexErrMsgTxt("Wrong number of inputs"); 
  if (nlhs != 1)
    mexErrMsgTxt("Wrong number of outputs");
  plhs[0] = process(prhs[0], prhs[1]);
}



 

dt.cc

#include <math.h>
#include <sys/types.h>
#include "mex.h"

#define int32_t int
/*
 * Generalized distance transforms.
 * We use a simple nlog(n) divide and conquer algorithm instead of the
 * theoretically faster linear method, for no particular reason except
 * that this is a bit simpler and I wanted to test it out.
 *
 * The code is a bit convoluted because dt1d can operate either along
 * a row or column of an array.  
 */

static inline int square(int x) { return x*x; }

// dt helper function
void dt_helper(double *src, double *dst, int *ptr, int step, 
	       int s1, int s2, int d1, int d2, double a, double b) {
 if (d2 >= d1) {
   int d = (d1+d2) >> 1;
   int s = s1;
   for (int p = s1+1; p <= s2; p++)
     if (src[s*step] + a*square(d-s) + b*(d-s) > 
	 src[p*step] + a*square(d-p) + b*(d-p))
	s = p;
   dst[d*step] = src[s*step] + a*square(d-s) + b*(d-s);
   ptr[d*step] = s;
   dt_helper(src, dst, ptr, step, s1, s, d1, d-1, a, b);
   dt_helper(src, dst, ptr, step, s, s2, d+1, d2, a, b);
 }
}

// dt of 1d array
void dt1d(double *src, double *dst, int *ptr, int step, int n, 
	  double a, double b) {
  dt_helper(src, dst, ptr, step, 0, n-1, 0, n-1, a, b);
}

// matlab entry point
// [M, Ix, Iy] = dt(vals, ax, bx, ay, by)
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { 
  if (nrhs != 5)
    mexErrMsgTxt("Wrong number of inputs"); 
  if (nlhs != 3)
    mexErrMsgTxt("Wrong number of outputs");
  if (mxGetClassID(prhs[0]) != mxDOUBLE_CLASS)
    mexErrMsgTxt("Invalid input");

  const int *dims = mxGetDimensions(prhs[0]);
  double *vals = (double *)mxGetPr(prhs[0]);
  double ax = mxGetScalar(prhs[1]);
  double bx = mxGetScalar(prhs[2]);
  double ay = mxGetScalar(prhs[3]);
  double by = mxGetScalar(prhs[4]);
  
  mxArray *mxM = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL);
  mxArray *mxIx = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL);
  mxArray *mxIy = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL);
  double *M = (double *)mxGetPr(mxM);
  int32_t *Ix = (int32_t *)mxGetPr(mxIx);
  int32_t *Iy = (int32_t *)mxGetPr(mxIy);

  double *tmpM = (double *)mxCalloc(dims[0]*dims[1], sizeof(double)); // part map
  int32_t *tmpIx = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t));
  int32_t *tmpIy = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t));

  for (int x = 0; x < dims[1]; x++)
    dt1d(vals+x*dims[0], tmpM+x*dims[0], tmpIy+x*dims[0], 1, dims[0], ay, by);

  for (int y = 0; y < dims[0]; y++)
    dt1d(tmpM+y, M+y, tmpIx+y, dims[0], dims[1], ax, bx);

  // get argmins and adjust for matlab indexing from 1
  for (int x = 0; x < dims[1]; x++) {
    for (int y = 0; y < dims[0]; y++) {
      int p = x*dims[0]+y;
      Ix[p] = tmpIx[p]+1;
      Iy[p] = tmpIy[tmpIx[p]*dims[0]+y]+1;
    }
  }

  mxFree(tmpM);
  mxFree(tmpIx);
  mxFree(tmpIy);
  plhs[0] = mxM;
  plhs[1] = mxIx;
  plhs[2] = mxIy;
}



 

 

你可能感兴趣的:(源码分析,mod,Parts,Deformable)