DPM(Defomable Parts Model)原理
首先调用格式:
example:
pascal('person', 2); % train and evaluate a 2 component person model
pascal_train.m
function model = pascal_train(cls, n) % n=2 % model = pascal_train(cls) % Train a model using the PASCAL dataset. globals; %----------读取正负样本----------------------- % pos.im,neg.im存储了图像路径,pos.x1..pos.y2为box,负样本无box [pos, neg] = pascal_data(cls); % 按照长宽比,分成等量的两部分? 即将 component label 固定,phase2时,该值为latent variable。 spos为索引 spos = split(pos, n); % -----------phase 1 : train root filters using warped positives & random negatives----------- try load([cachedir cls '_random']); catch % -----------------------------phas 1-------------------------------- % 初始化 rootfilters for i=1:n models{i} = initmodel(spos{i}); %---------train------------- % model.rootfilters{i}.w % model.offsets{i}.w models{i} = train(cls, models{i}, spos{i}, neg, 1, 1, 1, 1, 2^28); end save([cachedir cls '_random'], 'models'); end % -----------------phase2------------------------------------------- % :merge models and train using latent detections & hard negatives try load([cachedir cls '_hard']); catch model = mergemodels(models); model = train(cls, model, pos, neg(1:200), 0, 0, 2, 2, 2^28, true, 0.7); save([cachedir cls '_hard'], 'model'); end %----------------phase 3---------------------------------------------- % add parts and update models using latent detections & hard negatives. try load([cachedir cls '_parts']); catch for i=1:n model = addparts(model, i, 6); end % use more data mining iterations in the beginning model = train(cls, model, pos, neg(1:200), 0, 0, 1, 4, 2^30, true, 0.7); model = train(cls, model, pos, neg(1:200), 0, 0, 6, 2, 2^30, true, 0.7, true); save([cachedir cls '_parts'], 'model'); end % update models using full set of negatives. try load([cachedir cls '_mine']); catch model = train(cls, model, pos, neg, 0, 0, 1, 3, 2^30, true, 0.7, true, ... 0.003*model.numcomponents, 2); save([cachedir cls '_mine'], 'model'); end % train bounding box prediction try load([cachedir cls '_final']); catch % 论文中说用最小二乘,怎么直接相除了,都不考虑矩阵的奇异性 model = trainbox(cls, model, pos, 0.7); save([cachedir cls '_final'], 'model'); end
initmodel.m
function model = initmodel(pos, sbin, size) % model = initmodel(pos, sbin, size) % Initialize model structure. % % If not supplied the dimensions of the model template are computed % from statistics in the postive examples. % % This should be documented! :-) % model.sbin 8 % model.interval 10 % model.numblocks phase 1 :单独训练rootfilter时为2,offset,rootfilter;phase 2,为 4 % model.numcomponents 1 % model.blocksizes (1)=1,(2)= root.h*root.w/2*31 % model.regmult 0,1 % model.learnmult 20,1 % model.maxsize root 的size % model.minsize % model.rootfilters{i} % .size 以sbin为单位,尺寸为综合各样本的h/w,area计算出来的 % .w % .blocklabel blocklabel是为编号,offset(2),rootfilter(2),partfilter(12 or less),def (12 same as part)虽然意义不同但是放在一起统一编号 % model.partfilters{i} % .w % .blocklabel % model.defs{i} % .anchor % .w % .blocklabel % model.offsets{i} % .w 0 % .blocklabel 1 % model.components{i} % .rootindex 1 % .parts{j} % .partindex % .defindex % .offsetindex 1 % .dim 2 + model.blocksizes(1) + model.blocksizes(2) % .numblocks 2 % pick mode of aspect ratios h = [pos(:).y2]' - [pos(:).y1]' + 1; w = [pos(:).x2]' - [pos(:).x1]' + 1; xx = -2:.02:2; filter = exp(-[-100:100].^2/400); % e^-25,e^25 aspects = hist(log(h./w), xx); % aspects = convn(aspects, filter, 'same'); [peak, I] = max(aspects); aspect = exp(xx(I)); %滤波后最大的h/w,作为最典型的h/w % pick 20 percentile area areas = sort(h.*w); area = areas(floor(length(areas) * 0.2)); % 比它大的,可以缩放,比该尺寸小的呢? area = max(min(area, 5000), 3000); %限制在 3000-5000 % pick dimensions w = sqrt(area/aspect); h = w*aspect; % size of HOG features if nargin < 4 model.sbin = 8; else model.sbin = sbin; end % size of root filter if nargin < 5 model.rootfilters{1}.size = [round(h/model.sbin) round(w/model.sbin)]; else model.rootfilters{1}.size = size; end % set up offset model.offsets{1}.w = 0; model.offsets{1}.blocklabel = 1; model.blocksizes(1) = 1; model.regmult(1) = 0; model.learnmult(1) = 20; model.lowerbounds{1} = -100; % set up root filter model.rootfilters{1}.w = zeros([model.rootfilters{1}.size 31]); height = model.rootfilters{1}.size(1); % root filter is symmetricf width = ceil(model.rootfilters{1}.size(2)/2); % ??? /2 model.rootfilters{1}.blocklabel = 2; model.blocksizes(2) = width * height * 31; model.regmult(2) = 1; model.learnmult(2) = 1; model.lowerbounds{2} = -100*ones(model.blocksizes(2),1); % set up one component model model.components{1}.rootindex = 1; model.components{1}.offsetindex = 1; model.components{1}.parts = {}; model.components{1}.dim = 2 + model.blocksizes(1) + model.blocksizes(2); model.components{1}.numblocks = 2; % initialize the rest of the model structure model.interval = 10; model.numcomponents = 1; model.numblocks = 2; model.partfilters = {}; model.defs = {}; model.maxsize = model.rootfilters{1}.size; model.minsize = model.rootfilters{1}.size;
learn.cc
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include <sys/time.h> #include <errno.h> /* * Optimize LSVM objective function via gradient descent. * * We use an adaptive cache mechanism. After a negative example * scores beyond the margin multiple times it is removed from the * training set for a fixed number of iterations. */ // Data File Format // EXAMPLE* // // EXAMPLE: // long label ints // blocks int // dim int // DATA{blocks} // // DATA: // block label float // block data floats // // Internal Binary Format // len int (byte length of EXAMPLE) // EXAMPLE <see above> // unique flag byte // number of iterations #define ITER 5000000 // small cache parameters #define INCACHE 3 #define WAIT 10 // error checking #define check(e) \ (e ? (void)0 : (printf("%s:%u error: %s\n%s\n", __FILE__, __LINE__, #e, strerror(errno)), exit(1))) // number of non-zero blocks in example ex #define NUM_NONZERO(ex) (((int *)ex)[labelsize+1]) // float pointer to data segment of example ex #define EX_DATA(ex) ((float *)(ex + sizeof(int)*(labelsize+3))) // class label (+1 or -1) for the example #define LABEL(ex) (((int *)ex)[1]) // block label (converted to 0-based index) #define BLOCK_IDX(data) (((int)data[0])-1) int labelsize; int dim; // comparison function for sorting examples // 参见 http://blog.sina.com.cn/s/blog_5155e8d401009145.html int comp(const void *a, const void *b) { // sort by extended label first, and whole example second... //逐字节比较的,当buf1<buf2时,返回值<0,当buf1=buf2时,返回值=0,当buf1>buf2时,返回值>0 // 先比较这五个量 [label id level x y],也就是说按照 样本类别->id->level->x->y排序样本 int c = memcmp(*((char **)a) + sizeof(int), *((char **)b) + sizeof(int), labelsize*sizeof(int));// 5 if (c) //label 不相等 return c; // labels are the same ,怎么可能会一样呢 id在正负样本集内从1开始是递增的啊 phase 2 阶段同一张图片产生的样本,id都是一样的 int alen = **((int **)a); int blen = **((int **)b); if (alen == blen) //长度一样 return memcmp(*((char **)a) + sizeof(int), *((char **)b) + sizeof(int), alen); //真霸气,所有字节都比较…… return ((alen < blen) ? -1 : 1);//按长度排序 } // a collapsed example is a sequence of examples struct collapsed { char **seq; int num; }; // set of collapsed examples struct data { collapsed *x; int num; int numblocks; int *blocksizes; float *regmult; float *learnmult; }; // seed the random number generator with the current time void seed_time() { struct timeval tp; check(gettimeofday(&tp, NULL) == 0); srand48((long)tp.tv_usec); } static inline double min(double x, double y) { return (x <= y ? x : y); } static inline double max(double x, double y) { return (x <= y ? y : x); } // gradient descent //---------------参照论文公式17 后的步骤--------------------------------------- void gd(double C, double J, data X, double **w, double **lb) { // C=0.0002, J=1, X, w==0, lb==-100); // int num = X.num; //组数 // state for random permutations int *perm = (int *)malloc(sizeof(int)*X.num); check(perm != NULL); // state for small cache int *W = (int *)malloc(sizeof(int)*num); check(W != NULL); for (int j = 0; j < num; j++) W[j] = 0; int t = 0; while (t < ITER) { // 5000000 ,霸气…… // pick random permutation for (int i = 0; i < num; i++) //组数 perm[i] = i; //-------打乱顺序----- // 论文中是随机选择一个样本,这里是随机排好序,再顺序取。 // 类似于随机取,但是这里能保证取到全部样本,避免单个样本重复被抽到,重复作用 for (int swapi = 0; swapi < num; swapi++) { int swapj = (int)(drand48()*(num-swapi)) + swapi; //drand48 产生 0-1之间的均匀分布 int tmp = perm[swapi]; perm[swapi] = perm[swapj]; perm[swapj] = tmp; } // count number of examples in the small cache int cnum = 0; //下面的循环部分的实际循环次数 for (int i = 0; i < num; i++) { if (W[i] <= INCACHE) // 3 cnum++; } //------------------------------------------------------- for (int swapi = 0; swapi < num; swapi++) { // select example int i = perm[swapi]; collapsed x = X.x[i]; // skip if example is not in small cache //负样本分对一次+1,分错一次清为0 //连续三次都分对了,那么这个样本很有可能是 easy 样本 //直接让他罚停四次迭代 if (W[i] > INCACHE) { //3 W[i]--; continue; } // learning rate double T = t + 1000.0; //学习率,直接1/t太大了 double rateX = cnum * C / T; double rateR = 1.0 / T; if (t % 10000 == 0) { printf("."); fflush(stdout); //清除文件缓冲区,文件以写方式打开时将缓冲区内容写入文件 } t++; // compute max over latent placements // -----step 3---- int M = -1; double V = 0; // 组内循环,选择 Zi=argmax β*f 即文中的第3部 // 训练rootfiter时,x.num=1,因为随机产生的负样本其id不同 for (int m = 0; m < x.num; m++) { double val = 0; char *ptr = x.seq[m]; float *data = EX_DATA(ptr); //特征数据的地址 第9个数据开始, //后面跟着是 block1 label | block2 data|block2 lable | block2 data // 1 | 1 | 2 | h*w/2*31个float int blocks = NUM_NONZERO(ptr); // phase 1,phase 2 : 2 个,offset,rootfilter for (int j = 0; j < blocks; j++) { int b = BLOCK_IDX(data); // data++; for (int k = 0; k < X.blocksizes[b]; k++)//(1)=1,(2)= root.h*root.w/2*31 val += w[b][k] * data[k]; //第一次循环是0 data += X.blocksizes[b]; } if (M < 0 || val > V) { M = m; V = val; } } // update model //-----step.4 也算了step.5 的一半 --------------- // 梯度下降,减小 w for (int j = 0; j < X.numblocks; j++) {// 2 double mult = rateR * X.regmult[j] * X.learnmult[j]; // 0,1 20,1,1/T,对于block2,学习率at就是 1/t,block 1 为0 for (int k = 0; k < X.blocksizes[j]; k++) { w[j][k] -= mult * w[j][k]; //不管是分对了,还是分错了,都要减掉 at*β,见公式17下的4,5 } } char *ptr = x.seq[M]; int label = LABEL(ptr); //----step.5----------分错了,往梯度的负方向移动 if (label * V < 1.0) { W[i] = 0; float *data = EX_DATA(ptr); int blocks = NUM_NONZERO(ptr); for (int j = 0; j < blocks; j++) { int b = BLOCK_IDX(data); // yi*cnum * C / T*1,见论文中 公式16,17 double mult = (label > 0 ? J : -1) * rateX * X.learnmult[b]; data++; for (int k = 0; k < X.blocksizes[b]; k++) w[b][k] += mult * data[k]; data += X.blocksizes[b]; } } else if (label == -1) { if (W[i] == INCACHE) //3 W[i] = WAIT; //10 else W[i]++; } } // apply lowerbounds for (int j = 0; j < X.numblocks; j++) { for (int k = 0; k < X.blocksizes[j]; k++) { w[j][k] = max(w[j][k], lb[j][k]); } } } free(perm); free(W); } // score examples double *score(data X, char **examples, int num, double **w) { double *s = (double *)malloc(sizeof(double)*num); check(s != NULL); for (int i = 0; i < num; i++) { s[i] = 0.0; float *data = EX_DATA(examples[i]); int blocks = NUM_NONZERO(examples[i]); for (int j = 0; j < blocks; j++) { int b = BLOCK_IDX(data); data++; for (int k = 0; k < X.blocksizes[b]; k++) s[i] += w[b][k] * data[k]; data += X.blocksizes[b]; } } return s; } // merge examples with identical labels void collapse(data *X, char **examples, int num) { //&X, sorted, num_unique collapsed *x = (collapsed *)malloc(sizeof(collapsed)*num); check(x != NULL); int i = 0; x[0].seq = examples; x[0].num = 1; for (int j = 1; j < num; j++) { if (!memcmp(x[i].seq[0]+sizeof(int), examples[j]+sizeof(int), labelsize*sizeof(int))) { x[i].num++; //如果label 五个量相同 } else { i++; x[i].seq = &(examples[j]); x[i].num = 1; } } X->x = x; X->num = i+1; } //调用参数 C=0.0002, J=1, hdrfile, datfile, modfile, inffile, lobfile int main(int argc, char **argv) { seed_time(); int count; data X; // command line arguments check(argc == 8); double C = atof(argv[1]); double J = atof(argv[2]); char *hdrfile = argv[3]; char *datfile = argv[4]; char *modfile = argv[5]; char *inffile = argv[6]; char *lobfile = argv[7]; // read header file FILE *f = fopen(hdrfile, "rb"); check(f != NULL); int header[3]; count = fread(header, sizeof(int), 3, f); check(count == 3); int num = header[0]; //正负样本总数 labelsize = header[1]; // labelsize = 5; [label id level x y] X.numblocks = header[2]; // 2 X.blocksizes = (int *)malloc(X.numblocks*sizeof(int)); //(1)=1,(2)= root.h*root.w/2*31 count = fread(X.blocksizes, sizeof(int), X.numblocks, f); check(count == X.numblocks); X.regmult = (float *)malloc(sizeof(float)*X.numblocks); //0 ,1 check(X.regmult != NULL); count = fread(X.regmult, sizeof(float), X.numblocks, f); check(count == X.numblocks); X.learnmult = (float *)malloc(sizeof(float)*X.numblocks);//20, 1 check(X.learnmult != NULL); count = fread(X.learnmult, sizeof(float), X.numblocks, f); check(count == X.numblocks); check(num != 0); fclose(f); printf("%d examples with label size %d and %d blocks\n", num, labelsize, X.numblocks); printf("block size, regularization multiplier, learning rate multiplier\n"); dim = 0; for (int i = 0; i < X.numblocks; i++) { dim += X.blocksizes[i]; printf("%d, %.2f, %.2f\n", X.blocksizes[i], X.regmult[i], X.learnmult[i]); } // ---------------从 datfile 读取 正负 examples---------------- // examples [i] 存储了第i个样本的信息 长度为 1 int + 7 int +dim 个float + 1 byte // 1 int legth 样本包括信息头在内的总字节长度 // 7 int [1/-1 id 0 0 0 2 dim] ,id为样本编号,[label id level centry_x centry_y],2是block个数 // dim float feature,dim=2+1+root.h*root.w/2*31,意义如下 // block1 label | block2 data|block2 lable | block2 data // 1 | 1 | 2 | h*w/2*31个float // 1 byte unique=0 f = fopen(datfile, "rb"); check(f != NULL); printf("Reading examples\n"); //+,-example数据 char **examples = (char **)malloc(num*sizeof(char *)); check(examples != NULL); for (int i = 0; i < num; i++) { // we use an extra byte in the end of each example to mark unique // we use an extra int at the start of each example to store the // example's byte length (excluding unique flag and this int) //[legth label id level x y unique] unique=0 int buf[labelsize+2]; //写入时的值为[1/-1 i 0 0 0 2 dim] count = fread(buf, sizeof(int), labelsize+2, f); check(count == labelsize+2); // byte length of an example's data segment //---前面七个是头,后面dim个float是样本特征数据,dim=2+1+root.h*root.w/2*31 int len = sizeof(int)*(labelsize+2) + sizeof(float)*buf[labelsize+1]; // memory for data, an initial integer, and a final byte examples[i] = (char *)malloc(sizeof(int)+len+1); check(examples[i] != NULL); // set data segment's byte length ((int *)examples[i])[0] = len; // set the unique flag to zero examples[i][sizeof(int)+len] = 0; // copy label data into example for (int j = 0; j < labelsize+2; j++) ((int *)examples[i])[j+1] = buf[j]; // read the rest of the data segment into the example count = fread(examples[i]+sizeof(int)*(labelsize+3), 1, len-sizeof(int)*(labelsize+2), f); check(count == len-sizeof(int)*(labelsize+2)); } fclose(f); printf("done\n"); // sort printf("Sorting examples\n"); char **sorted = (char **)malloc(num*sizeof(char *)); check(sorted != NULL); memcpy(sorted, examples, num*sizeof(char *)); //qsort 库函数,真正的比较函数为 comp //从小到大,快速排序 //依次按照 样本类别->id->level->cx->cy 排序样本 //如果前面五个量都一样…… //1.等长度,比较所有字节; //2.谁长谁小,长度不同是因为不同的component的 尺寸不一致 qsort(sorted, num, sizeof(char *), comp); printf("done\n"); // find unique examples // 唯一的样本,unique flag=1, // 相同的样本第一个样本的unique flag为1,其余为0 ,有的样本的位置被,unique替代了,但是并没有完全删除掉 int i = 0; int len = *((int *)sorted[0]); //负样本的第一个 sorted[0][sizeof(int)+len] = 1; // unique flag 置 1 for (int j = 1; j < num; j++) { int alen = *((int *)sorted[i]); int blen = *((int *)sorted[j]); if (alen != blen || memcmp(sorted[i] + sizeof(int), sorted[j] + sizeof(int), alen)) //component不同 || 不同样本 { i++; sorted[i] = sorted[j]; sorted[i][sizeof(int)+blen] = 1; //标记为 unique } } int num_unique = i+1; printf("%d unique examples\n", num_unique); // -------------------collapse examples---------------- // 前面是找完全不一样的样本,这里是分组 // label 的五个量 [label id level centry_x centry_y] 相同的分为一组,在detect时,写入了datfile // 负样本的 cx,cy都是相对于整张图片的,正样本是相对于剪切后的图像 // 前面五个全相同, // 对于phase1 不可能,因为正负样本的id都不相同 // 对于phase2 正样本只保留了最有可能是正样本的样本,只有一种情况, // rootfilter1,rootfilter2在同一张图片(id相同),检测出来的 Hard负样本 的cx,cy相同,因此一组最多应该只能出现2个 (待验证) // 原因是此时的latent variable 为(cx,cy,component),上述情况相下,我们只能保留component1或者component2 // 后续训练时,这两个量是连续使用的,为什么呢?? // collapse.seq(char **) 记录了每一组的第一个样本 // collapse.num 每组的个数 // X.num 组数 // X.x=&collapse[0],也就是第一个 collapse的地址 collapse(&X, sorted, num_unique); printf("%d collapsed examples\n", X.num); // initial model // 读modfile文件,得到w的初始值。phase 1 初始化为全 0,phase 2 为上一次训练的结果…… double **w = (double **)malloc(sizeof(double *)*X.numblocks);//2 check(w != NULL); f = fopen(modfile, "rb"); for (int i = 0; i < X.numblocks; i++) { w[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]); //(1)=1,(2)= root.h*root.w/2*31 check(w[i] != NULL); count = fread(w[i], sizeof(double), X.blocksizes[i], f); check(count == X.blocksizes[i]); } fclose(f); // lower bounds // 读lobfile文件,初始化为全 滤波器参数下线-100 …… double **lb = (double **)malloc(sizeof(double *)*X.numblocks); check(lb != NULL); f = fopen(lobfile, "rb"); for (int i = 0; i < X.numblocks; i++) { lb[i] = (double *)malloc(sizeof(double)*X.blocksizes[i]); check(lb[i] != NULL); count = fread(lb[i], sizeof(double), X.blocksizes[i], f); check(count == X.blocksizes[i]); } fclose(f); printf("Training"); //-------------------------------- train ------------------------------- //-----梯度下降发训练参数 w,参见论文 公式17 后面的步骤 gd(C, J, X, w, lb); printf("done\n"); // save model printf("Saving model\n"); f = fopen(modfile, "wb"); check(f != NULL); // 存储 block1,block2的训练结果,w for (int i = 0; i < X.numblocks; i++) { count = fwrite(w[i], sizeof(double), X.blocksizes[i], f); check(count == X.blocksizes[i]); } fclose(f); // score examples // ---所有的样本都的得分,没有乘以 label y printf("Scoring\n"); double *s = score(X, examples, num, w); // ---------Write info file------------- printf("Writing info file\n"); f = fopen(inffile, "w"); check(f != NULL); for (int i = 0; i < num; i++) { int len = ((int *)examples[i])[0]; // label, score, unique flag count = fprintf(f, "%d\t%f\t%d\n", ((int *)examples[i])[1], s[i], (int)examples[i][sizeof(int)+len]); check(count > 0); } fclose(f); printf("Freeing memory\n"); for (int i = 0; i < X.numblocks; i++) { free(w[i]); free(lb[i]); } free(w); free(lb); free(s); for (int i = 0; i < num; i++) free(examples[i]); free(examples); free(sorted); free(X.x); free(X.blocksizes); free(X.regmult); free(X.learnmult); return 0; }