DPM(Defomable Parts Model)原理
首先声明此版本为V3.1。因为和论文最相符。V4增加了模型数由2个增加为6个,V5提取了语义特征。源码太长纯代码应该在2K+,只选取了核心部分代码
demo.m
function demo() test('000034.jpg', 'car'); test('000061.jpg', 'person'); test('000084.jpg', 'bicycle'); function test(name, cls) % load and display image im=imread(name); clf; image(im); axis equal; axis on; disp('input image'); disp('press any key to continue'); pause; % load and display model load(['VOC2007/' cls '_final']); %加载模型 visualizemodel(model); disp([cls ' model']); disp('press any key to continue'); pause; % detect objects boxes = detect(im, model, 0); %model为mat中的结构体 top = nms(boxes, 0.5); %Non-maximum suppression. showboxes(im, top); %print(gcf, '-djpeg90', '-r0', [cls '.jpg']); disp('detections'); disp('press any key to continue'); pause; % get bounding boxes bbox = getboxes(model, boxes); %根据检测到的root,parts,预测bounding top = nms(bbox, 0.5); bbox = clipboxes(im, top); %预测出来的bounding,可能会超过图像原始尺寸,所以要减掉 showboxes(im, bbox); disp('bounding boxes'); disp('press any key to continue'); pause;
detect.m
function [boxes] = detect(input, model, thresh, bbox, ... overlap, label, fid, id, maxsize) % 论文 fig.4 % boxes = detect(input, model, thresh, bbox, overlap, label, fid, id, maxsize) % Detect objects in input using a model and a score threshold. % Higher threshold leads to fewer detections. % boxes = [rx1 ry1 rx2 ry2 | px1 py1 px2 py2 ...| componetindex | score ] % The function returns a matrix with one row per detected object. The % last column of each row gives the score of the detection. The % column before last specifies the component used for the detection. % The first 4 columns specify the bounding box for the root filter and % subsequent columns specify the bounding boxes of each part. % % If bbox is not empty, we pick best detection with significant overlap. % If label and fid are included, we write feature vectors to a data file. %phase 2: im, model, 0, bbox, overlap, 1, fid, 2*i-1 % trian boxex : detect(im, model, 0, bbox, overlap) if nargin > 3 && ~isempty(bbox) latent = true; else latent = false; end if nargin > 6 && fid ~= 0 write = true; else write = false; end if nargin < 9 maxsize = inf; end % we assume color images input = color(input); %如果是灰度图,扩充为三通道 R=G=B=Gray % prepare model for convolutions rootfilters = []; for i = 1:length(model.rootfilters) % rootfilters{i} = model.rootfilters{i}.w;% r*w*31维向量,9(方向范围 0~180) +18(方向范围 0-360)+4(cell熵和) end partfilters = []; for i = 1:length(model.partfilters) partfilters{i} = model.partfilters{i}.w; end % cache some data 获取所有 root,part的所有信息 for c = 1:model.numcomponents % releas3.1 一种对象,只有2个模型,releas5 有3*2个模型 ridx{c} = model.components{c}.rootindex; % m1=1,m2=2 oidx{c} = model.components{c}.offsetindex; %o1=1,o2=2 root{c} = model.rootfilters{ridx{c}}.w; rsize{c} = [size(root{c},1) size(root{c},2)]; %root size,单位为 sbin*sbin的block块,相当于原始HOG中的一个cell numparts{c} = length(model.components{c}.parts); %目前为固定值6个,但是有些part是 fake for j = 1:numparts{c} pidx{c,j} = model.components{c}.parts{j}.partindex; %part是在该对象的所有component的part下连续编号 didx{c,j} = model.components{c}.parts{j}.defindex; % 在 rootfiter中的 anchor location part{c,j} = model.partfilters{pidx{c,j}}.w; % 6*6*31 psize{c,j} = [size(part{c,j},1) size(part{c,j},2)]; % % reverse map from partfilter index to (component, part#) rpidx{pidx{c,j}} = [c j]; end end % we pad the feature maps to detect partially visible objects padx = ceil(model.maxsize(2)/2+1); % 7/2+1 = 5 pady = ceil(model.maxsize(1)/2+1); % 11/2+1 = 7 % the feature pyramid interval = model.interval; %10 %--------------------------------特征金字塔--------------------------------------------------------- % feat的尺寸为 img.rows/sbin,img.cols/sbin % scales:缩放了多少 [feat, scales] = featpyramid(input, model.sbin, interval); % 8,10 % detect at each scale best = -inf; ex = []; boxes = []; %---------------------逐层检测目标-----------------------------------------------------------% for level = interval+1:length(feat) %注意是从第二层开始 scale = model.sbin/scales(level); % 1/缩小了多少 if size(feat{level}, 1)+2*pady < model.maxsize(1) || ... %扩展后还是未能达到 能同时计算两个component的得分 size(feat{level}, 2)+2*padx < model.maxsize(2) || ... (write && ftell(fid) >= maxsize) %已经没有空间保存样本了 continue; end if latent %训练时使用,检测时跳过 skip = true; for c = 1:model.numcomponents root_area = (rsize{c}(1)*scale) * (rsize{c}(2)*scale);% rootfilter box_area = (bbox(3)-bbox(1)+1) * (bbox(4)-bbox(2)+1); % bbox该class 所有 rootfilter 的交集即minsize if (root_area/box_area) >= overlap && (box_area/root_area) >= overlap %这句话真纠结,a>=0.7b,b>=0.7a -> a>=0.7b>=0.49a skip = false; end end if skip continue; end end % -----------convolve feature maps with filters ----------- %rootmatch,partmatch ,得分图root的尺度总是part的一半, %rootmatch尺寸是partmatch的一半 featr = padarray(feat{level}, [pady padx 0], 0); % 上下各补充 pady 行0,左右各补充padx行 0 %C = fconv(A, cell of B, start, end); rootmatch = fconv(featr, rootfilters, 1, length(rootfilters)); if length(partfilters) > 0 featp = padarray(feat{level-interval}, [2*pady 2*padx 0], 0); partmatch = fconv(featp, partfilters, 1, length(partfilters)); end %-------------------逐component检测----------------------------------- % 参见论文 Fig 4 % 最终得到 综合得分图 score for c = 1:model.numcomponents % root score + offset score = rootmatch{ridx{c}} + model.offsets{oidx{c}}.w; % add in parts for j = 1:numparts{c} def = model.defs{didx{c,j}}.w; anchor = model.defs{didx{c,j}}.anchor; % the anchor position is shifted to account for misalignment % between features at different resolutions ax{c,j} = anchor(1) + 1; % ay{c,j} = anchor(2) + 1; match = partmatch{pidx{c,j}}; [M, Ix{c,j}, Iy{c,j}] = dt(-match, def(1), def(2), def(3), def(4)); % dx,dy,dx^2,dy^2的偏移惩罚系数 % M part的综合匹配得分图,与part尺寸一致。Ix{c,j}, Iy{c,j} 即part实际的最佳位置(相对于root) % 参见论文公式 9 score = score - M(ay{c,j}:2:ay{c,j}+2*(size(score,1)-1), ... ax{c,j}:2:ax{c,j}+2*(size(score,2)-1)); end %-------阈值淘汰------------------------ if ~latent % get all good matches % ---thresh 在 分类时为0,在 找 hard exmaple 时是 -1.05-- I = find(score > thresh); %返回的是从上到下从左到右的索引 [Y, X] = ind2sub(size(score), I); %还原为 行,列坐标 tmp = zeros(length(I), 4*(1+numparts{c})+2); %一个目标的root,part,score信息,见程序开头说明 for i = 1:length(I) x = X(i); y = Y(i); [x1, y1, x2, y2] = rootbox(x, y, scale, padx, pady, rsize{c}); b = [x1 y1 x2 y2]; if write rblocklabel = model.rootfilters{ridx{c}}.blocklabel; oblocklabel = model.offsets{oidx{c}}.blocklabel; f = featr(y:y+rsize{c}(1)-1, x:x+rsize{c}(2)-1, :); xc = round(x + rsize{c}(2)/2 - padx); % yc = round(y + rsize{c}(1)/2 - pady); ex = []; ex.header = [label; id; level; xc; yc; ... model.components{c}.numblocks; ... model.components{c}.dim]; ex.offset.bl = oblocklabel; ex.offset.w = 1; ex.root.bl = rblocklabel; width1 = ceil(rsize{c}(2)/2); width2 = floor(rsize{c}(2)/2); f(:,1:width2,:) = f(:,1:width2,:) + flipfeat(f(:,width1+1:end,:)); ex.root.w = f(:,1:width1,:); ex.part = []; end for j = 1:numparts{c} [probex, probey, px, py, px1, py1, px2, py2] = ... partbox(x, y, ax{c,j}, ay{c,j}, scale, padx, pady, ... psize{c,j}, Ix{c,j}, Iy{c,j}); b = [b px1 py1 px2 py2]; if write if model.partfilters{pidx{c,j}}.fake continue; end pblocklabel = model.partfilters{pidx{c,j}}.blocklabel; dblocklabel = model.defs{didx{c,j}}.blocklabel; f = featp(py:py+psize{c,j}(1)-1,px:px+psize{c,j}(2)-1,:); def = -[(probex-px)^2; probex-px; (probey-py)^2; probey-py]; partner = model.partfilters{pidx{c,j}}.partner; if partner > 0 k = rpidx{partner}(2); [kprobex, kprobey, kpx, kpy, kpx1, kpy1, kpx2, kpy2] = ... partbox(x, y, ax{c,k}, ay{c,k}, scale, padx, pady, ... psize{c,k}, Ix{c,k}, Iy{c,k}); kf = featp(kpy:kpy+psize{c,k}(1)-1,kpx:kpx+psize{c,k}(2)-1,:); % flip linear term in horizontal deformation model kdef = -[(kprobex-kpx)^2; kpx-kprobex; ... (kprobey-kpy)^2; kprobey-kpy]; f = f + flipfeat(kf); def = def + kdef; else width1 = ceil(psize{c,j}(2)/2); width2 = floor(psize{c,j}(2)/2); f(:,1:width2,:) = f(:,1:width2,:) + flipfeat(f(:,width1+1:end,:)); f = f(:,1:width1,:); end ex.part(j).bl = pblocklabel; ex.part(j).w = f; ex.def(j).bl = dblocklabel; ex.def(j).w = def; end end if write exwrite(fid, ex); % 写入负样本 end tmp(i,:) = [b c score(I(i))]; end boxes = [boxes; tmp]; end if latent % get best match for x = 1:size(score,2) for y = 1:size(score,1) if score(y, x) > best % 以该(y,x)为left-top点的rootfilter的范围在原图像中的位置 [x1, y1, x2, y2] = rootbox(x, y, scale, padx, pady, rsize{c}); % intesection with bbox xx1 = max(x1, bbox(1)); yy1 = max(y1, bbox(2)); xx2 = min(x2, bbox(3)); yy2 = min(y2, bbox(4)); w = (xx2-xx1+1); h = (yy2-yy1+1); if w > 0 && h > 0 % check overlap with bbox inter = w*h; a = (x2-x1+1) * (y2-y1+1); % rootfilter 的面积 b = (bbox(3)-bbox(1)+1) * (bbox(4)-bbox(2)+1); % bbox的面积 % 计算很很独特,如果只是 inter / b 那么 如果a很大,只是一部分与 bounding box重合,那就不可靠了,人再怎么标注错误,也不会这么大 % 所以,a越大,要求的重合率越高才好,所以分母+a,是个不错的选择,但是这样减小的太多了,所以减去 inter o = inter / (a+b-inter); if (o >= overlap) % best = score(y, x); boxes = [x1 y1 x2 y2]; % 这一部分一直被覆盖,最后保留的是 best样本 if write f = featr(y:y+rsize{c}(1)-1, x:x+rsize{c}(2)-1, :); rblocklabel = model.rootfilters{ridx{c}}.blocklabel; oblocklabel = model.offsets{oidx{c}}.blocklabel; xc = round(x + rsize{c}(2)/2 - padx); yc = round(y + rsize{c}(1)/2 - pady); ex = []; % label; id; level; xc; yc,正样本的重要信息! % xc,yc,居然是相对于剪切后的图片 ex.header = [label; id; level; xc; yc; ... model.components{c}.numblocks; ... model.components{c}.dim]; ex.offset.bl = oblocklabel; ex.offset.w = 1; ex.root.bl = rblocklabel; width1 = ceil(rsize{c}(2)/2); width2 = floor(rsize{c}(2)/2); f(:,1:width2,:) = f(:,1:width2,:) + flipfeat(f(:,width1+1:end,:)); ex.root.w = f(:,1:width1,:); %样本特征 ex.part = []; end for j = 1:numparts{c} %probex,probey综合得分最高的位置,相对于featp %px1,py1,px2,py2 转化成相对于featr [probex, probey, px, py, px1, py1, px2, py2] = ... partbox(x, y, ax{c,j}, ay{c,j}, scale, ... padx, pady, psize{c,j}, Ix{c,j}, Iy{c,j}); boxes = [boxes px1 py1 px2 py2]; if write if model.partfilters{pidx{c,j}}.fake continue; end p = featp(py:py+psize{c,j}(1)-1, ... px:px+psize{c,j}(2)-1, :); def = -[(probex-px)^2; probex-px; (probey-py)^2; probey-py]; pblocklabel = model.partfilters{pidx{c,j}}.blocklabel; dblocklabel = model.defs{didx{c,j}}.blocklabel; partner = model.partfilters{pidx{c,j}}.partner; if partner > 0 k = rpidx{partner}(2); [kprobex, kprobey, kpx, kpy, kpx1, kpy1, kpx2, kpy2] = ... partbox(x, y, ax{c,k}, ay{c,k}, scale, padx, pady, ... psize{c,k}, Ix{c,k}, Iy{c,k}); kp = featp(kpy:kpy+psize{c,k}(1)-1, ... kpx:kpx+psize{c,k}(2)-1, :); % flip linear term in horizontal deformation model kdef = -[(kprobex-kpx)^2; kpx-kprobex; ... (kprobey-kpy)^2; kprobey-kpy]; p = p + flipfeat(kp); def = def + kdef; else width1 = ceil(psize{c,j}(2)/2); width2 = floor(psize{c,j}(2)/2); p(:,1:width2,:) = p(:,1:width2,:) + ... flipfeat(p(:,width1+1:end,:)); p = p(:,1:width1,:); end ex.part(j).bl = pblocklabel; ex.part(j).w = p; ex.def(j).bl = dblocklabel; ex.def(j).w = def; end end boxes = [boxes c best]; end end end end end end end end if latent && write && ~isempty(ex) exwrite(fid, ex); %datfile end % The functions below compute a bounding box for a root or part % template placed in the feature hierarchy. % % coordinates need to be transformed to take into account: % 1. padding from convolution % 2. scaling due to sbin & image subsampling % 3. offset from feature computation % function [x1, y1, x2, y2] = rootbox(x, y, scale, padx, pady, rsize) x1 = (x-padx)*scale+1; %图像是先缩放(构造金字塔时)再打补丁 y1 = (y-pady)*scale+1; x2 = x1 + rsize(2)*scale - 1; % 宽度也要缩放 y2 = y1 + rsize(1)*scale - 1; function [probex, probey, px, py, px1, py1, px2, py2] = ... partbox(x, y, ax, ay, scale, padx, pady, psize, Ix, Iy) probex = (x-1)*2+ax; %最优位置 probey = (y-1)*2+ay; px = double(Ix(probey, probex)); %综合得分最高的位置 py = double(Iy(probey, probex)); px1 = ((px-2)/2+1-padx)*scale+1; % pading是root的两倍 py1 = ((py-2)/2+1-pady)*scale+1; px2 = px1 + psize(2)*scale/2 - 1; py2 = py1 + psize(1)*scale/2 - 1; % write an example to the data file function exwrite(fid, ex) fwrite(fid, ex.header, 'int32'); buf = [ex.offset.bl; ex.offset.w(:); ... ex.root.bl; ex.root.w(:)]; fwrite(fid, buf, 'single'); for j = 1:length(ex.part) if ~isempty(ex.part(j).w) buf = [ex.part(j).bl; ex.part(j).w(:); ... ex.def(j).bl; ex.def(j).w(:)]; fwrite(fid, buf, 'single'); end end
features.cc
#include <math.h> #include "mex.h" // small value, used to avoid division by zero #define eps 0.0001 #define bzero(a, b) memset(a, 0, b) int round(float a) { float tmp = a - (int)a; if( tmp >= 0.5 ) return (int)a + 1; else return (int)a; } // unit vectors used to compute gradient orientation // cos(20*i) double uu[9] = {1.0000, 0.9397, 0.7660, 0.500, 0.1736, -0.1736, -0.5000, -0.7660, -0.9397}; //sin(20*i) double vv[9] = {0.0000, 0.3420, 0.6428, 0.8660, 0.9848, 0.9848, 0.8660, 0.6428, 0.3420}; static inline double min(double x, double y) { return (x <= y ? x : y); } static inline double max(double x, double y) { return (x <= y ? y : x); } static inline int min(int x, int y) { return (x <= y ? x : y); } static inline int max(int x, int y) { return (x <= y ? y : x); } // main function: // takes a double color image and a bin size // returns HOG features mxArray *process(const mxArray *mximage, const mxArray *mxsbin) { double *im = (double *)mxGetPr(mximage); const int *dims = mxGetDimensions(mximage); if (mxGetNumberOfDimensions(mximage) != 3 || dims[2] != 3 || mxGetClassID(mximage) != mxDOUBLE_CLASS) mexErrMsgTxt("Invalid input"); int sbin = (int)mxGetScalar(mxsbin); // memory for caching orientation histograms & their norms int blocks[2]; blocks[0] = (int)round((double)dims[0]/(double)sbin);//行 blocks[1] = (int)round((double)dims[1]/(double)sbin);//列 double *hist = (double *)mxCalloc(blocks[0]*blocks[1]*18, sizeof(double));//只需要计算18bin,9bin的推 double *norm = (double *)mxCalloc(blocks[0]*blocks[1], sizeof(double)); // memory for HOG features int out[3];//size out[0] = max(blocks[0]-2, 0);//减去2干嘛?? out[1] = max(blocks[1]-2, 0); out[2] = 27+4; mxArray *mxfeat = mxCreateNumericArray(3, out, mxDOUBLE_CLASS, mxREAL);//特征,size=out double *feat = (double *)mxGetPr(mxfeat); int visible[2]; visible[0] = blocks[0]*sbin; visible[1] = blocks[1]*sbin; //先列再行 for (int x = 1; x < visible[1]-1; x++) { for (int y = 1; y < visible[0]-1; y++) { // first color channel double *s = im + min(x, dims[1]-2)*dims[0] + min(y, dims[0]-2);//在im中的位置 double dy = *(s+1) - *(s-1); double dx = *(s+dims[0]) - *(s-dims[0]); //坐标系是一样的,c和matlab的存储顺序不一样 double v = dx*dx + dy*dy; // second color channel s += dims[0]*dims[1]; double dy2 = *(s+1) - *(s-1); double dx2 = *(s+dims[0]) - *(s-dims[0]); double v2 = dx2*dx2 + dy2*dy2; // third color channel s += dims[0]*dims[1]; double dy3 = *(s+1) - *(s-1); double dx3 = *(s+dims[0]) - *(s-dims[0]); double v3 = dx3*dx3 + dy3*dy3; // pick channel with strongest gradient,计算v if (v2 > v) { v = v2; dx = dx2; dy = dy2; } if (v3 > v) { v = v3; dx = dx3; dy = dy3; } // snap to one of 18 orientations,就算角度best_o double best_dot = 0; int best_o = 0; for (int o = 0; o < 9; o++) { // (sinθ)^2+(cosθ)^2 =1 // max cosθ*dx+ sinθ*dy 对其求导,可得极大值 θ = arctan dy/dx double dot = uu[o]*dx + vv[o]*dy; if (dot > best_dot) { best_dot = dot; best_o = o; } else if (-dot > best_dot) { best_dot = -dot; best_o = o+9; } } // add to 4 histograms around pixel using linear interpolation double xp = ((double)x+0.5)/(double)sbin - 0.5; double yp = ((double)y+0.5)/(double)sbin - 0.5; int ixp = (int)floor(xp); int iyp = (int)floor(yp); double vx0 = xp-ixp; double vy0 = yp-iyp; double vx1 = 1.0-vx0; double vy1 = 1.0-vy0; v = sqrt(v); //左上角 if (ixp >= 0 && iyp >= 0) { *(hist + ixp*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) += vx1*vy1*v; } //右上角 if (ixp+1 < blocks[1] && iyp >= 0) { *(hist + (ixp+1)*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) += vx0*vy1*v; } //左下角 if (ixp >= 0 && iyp+1 < blocks[0]) { *(hist + ixp*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) += vx1*vy0*v; } //右下角 if (ixp+1 < blocks[1] && iyp+1 < blocks[0]) { *(hist + (ixp+1)*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) += vx0*vy0*v; } } } // compute energy in each block by summing over orientations //计算每一个cell的 sum( ( v(oi)+v(oi+9) )^2 ),oi=0..8 for (int o = 0; o < 9; o++) { double *src1 = hist + o*blocks[0]*blocks[1]; double *src2 = hist + (o+9)*blocks[0]*blocks[1]; double *dst = norm; double *end = norm + blocks[1]*blocks[0]; while (dst < end) { *(dst++) += (*src1 + *src2) * (*src1 + *src2); src1++; src2++; } } // compute features for (int x = 0; x < out[1]; x++) { for (int y = 0; y < out[0]; y++) { double *dst = feat + x*out[0] + y; double *src, *p, n1, n2, n3, n4; p = norm + (x+1)*blocks[0] + y+1;//右下角的constrain insensitive sum n1 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps); p = norm + (x+1)*blocks[0] + y;//右边 n2 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps); p = norm + x*blocks[0] + y+1;//下边 n3 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps); p = norm + x*blocks[0] + y;//自己 n4 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps); double t1 = 0; double t2 = 0; double t3 = 0; double t4 = 0; // contrast-sensitive features src = hist + (x+1)*blocks[0] + (y+1); for (int o = 0; o < 18; o++) { double h1 = min(*src * n1, 0.2);//截短 double h2 = min(*src * n2, 0.2); double h3 = min(*src * n3, 0.2); double h4 = min(*src * n4, 0.2); *dst = 0.5 * (h1 + h2 + h3 + h4);//求和 t1 += h1; t2 += h2; t3 += h3; t4 += h4; dst += out[0]*out[1];//下一个bin src += blocks[0]*blocks[1]; } // contrast-insensitive features src = hist + (x+1)*blocks[0] + (y+1); for (int o = 0; o < 9; o++) { double sum = *src + *(src + 9*blocks[0]*blocks[1]); double h1 = min(sum * n1, 0.2); double h2 = min(sum * n2, 0.2); double h3 = min(sum * n3, 0.2); double h4 = min(sum * n4, 0.2); *dst = 0.5 * (h1 + h2 + h3 + h4); dst += out[0]*out[1]; src += blocks[0]*blocks[1]; } // texture features *dst = 0.2357 * t1; dst += out[0]*out[1]; *dst = 0.2357 * t2; dst += out[0]*out[1]; *dst = 0.2357 * t3; dst += out[0]*out[1]; *dst = 0.2357 * t4; } } mxFree(hist); mxFree(norm); return mxfeat; } // matlab entry point // F = features(image, bin) // image should be color with double values void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { if (nrhs != 2) mexErrMsgTxt("Wrong number of inputs"); if (nlhs != 1) mexErrMsgTxt("Wrong number of outputs"); plhs[0] = process(prhs[0], prhs[1]); }
dt.cc
#include <math.h> #include <sys/types.h> #include "mex.h" #define int32_t int /* * Generalized distance transforms. * We use a simple nlog(n) divide and conquer algorithm instead of the * theoretically faster linear method, for no particular reason except * that this is a bit simpler and I wanted to test it out. * * The code is a bit convoluted because dt1d can operate either along * a row or column of an array. */ static inline int square(int x) { return x*x; } // dt helper function void dt_helper(double *src, double *dst, int *ptr, int step, int s1, int s2, int d1, int d2, double a, double b) { if (d2 >= d1) { int d = (d1+d2) >> 1; int s = s1; for (int p = s1+1; p <= s2; p++) if (src[s*step] + a*square(d-s) + b*(d-s) > src[p*step] + a*square(d-p) + b*(d-p)) s = p; dst[d*step] = src[s*step] + a*square(d-s) + b*(d-s); ptr[d*step] = s; dt_helper(src, dst, ptr, step, s1, s, d1, d-1, a, b); dt_helper(src, dst, ptr, step, s, s2, d+1, d2, a, b); } } // dt of 1d array void dt1d(double *src, double *dst, int *ptr, int step, int n, double a, double b) { dt_helper(src, dst, ptr, step, 0, n-1, 0, n-1, a, b); } // matlab entry point // [M, Ix, Iy] = dt(vals, ax, bx, ay, by) void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { if (nrhs != 5) mexErrMsgTxt("Wrong number of inputs"); if (nlhs != 3) mexErrMsgTxt("Wrong number of outputs"); if (mxGetClassID(prhs[0]) != mxDOUBLE_CLASS) mexErrMsgTxt("Invalid input"); const int *dims = mxGetDimensions(prhs[0]); double *vals = (double *)mxGetPr(prhs[0]); double ax = mxGetScalar(prhs[1]); double bx = mxGetScalar(prhs[2]); double ay = mxGetScalar(prhs[3]); double by = mxGetScalar(prhs[4]); mxArray *mxM = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL); mxArray *mxIx = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); mxArray *mxIy = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL); double *M = (double *)mxGetPr(mxM); int32_t *Ix = (int32_t *)mxGetPr(mxIx); int32_t *Iy = (int32_t *)mxGetPr(mxIy); double *tmpM = (double *)mxCalloc(dims[0]*dims[1], sizeof(double)); // part map int32_t *tmpIx = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t)); int32_t *tmpIy = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t)); for (int x = 0; x < dims[1]; x++) dt1d(vals+x*dims[0], tmpM+x*dims[0], tmpIy+x*dims[0], 1, dims[0], ay, by); for (int y = 0; y < dims[0]; y++) dt1d(tmpM+y, M+y, tmpIx+y, dims[0], dims[1], ax, bx); // get argmins and adjust for matlab indexing from 1 for (int x = 0; x < dims[1]; x++) { for (int y = 0; y < dims[0]; y++) { int p = x*dims[0]+y; Ix[p] = tmpIx[p]+1; Iy[p] = tmpIy[tmpIx[p]*dims[0]+y]+1; } } mxFree(tmpM); mxFree(tmpIx); mxFree(tmpIy); plhs[0] = mxM; plhs[1] = mxIx; plhs[2] = mxIy; }