我使用的是opencv2.4.9,cvCreateTreeCascadeClassifier的源码在......opencv\sources\apps\haartraining\cvhaartraining.cpp之中,这中间用到很多结构体,指针函数,宏等各方面的内容,请参考我博客中的文章opencv源码详细解读目录。如需转载请注明本博网址http://blog.csdn.net/ding977921830?viewmode=contents。具体内容如下:
/* *创建级联分类器 * dirname -将创建目录分类器的目录名 * 该目录名必须存在,其包含的子目录为 0, 1, 2, ... (nstages-1). * vecfilename - .vec文件,它包括包含样本图像的文件名 * bgfilename - 背景描述文件名 * bg_vecfile - 如果bgfilename 代表一个包含离散负样本文件,那么为true * npos - 在每一个强分类器训练所需要的正样本数 * nneg - 在每一个强分类器训练所需要的负样本数 * nstages - 强分类器的层数 * numprecalculated - 预计算的特征数量,每一个预计算的特征需要的内存大小为: (number_of_samples*(sizeof( float ) + sizeof( short ))) bytes * numsplits -每个弱分类器的分支数 * 1 - stumps, 2 and more - trees. * minhitrate - 每一级的最小击中率 * maxfalsealarm - 每一级的最大虚警率 * weightfraction - 剔除小样本的权重 * mode - 0 - BASIC = Viola * 1 - CORE = All upright * 2 - ALL = All features * symmetric - 非0表示垂直对称 * equalweights - 非0表示所有样本的初始化权重相等 * winwidth - 样本的宽 * winheight - 样本的高 * boosttype - 应用的提升算法类型 * 0 - Discrete AdaBoost * 1 - Real AdaBoost * 2 - LogitBoost * 3 - Gentle AdaBoost * stumperror - 如果是 Discrete AdaBoost算法时使用的树桩错误率 * 0 - 错分不纯度 * 1 - 基尼不纯度 * 2 - 熵不纯度 */ void cvCreateTreeCascadeClassifier( const char* dirname, const char* vecfilename, const char* bgfilename, int npos, int nneg, int nstages, int numprecalculated, int numsplits, float minhitrate, float maxfalsealarm, float weightfraction, int mode, int symmetric, int equalweights, int winwidth, int winheight, int boosttype, int stumperror, int maxtreesplits, int minpos, bool bg_vecfile ) { CvTreeCascadeClassifier* tcc = NULL; CvIntHaarFeatures* haar_features = NULL; CvHaarTrainingData* training_data = NULL; CvMat* vals = NULL; CvMat* cluster_idx = NULL; CvMat* idx = NULL; CvMat* features_idx = NULL; CV_FUNCNAME( "cvCreateTreeCascadeClassifier" ); __BEGIN__; int i, k; CvTreeCascadeNode* leaves; int best_num, cur_num; CvSize winsize; char stage_name[PATH_MAX]; char buf[PATH_MAX]; char* suffix; int total_splits; int poscount; //每训练一个分类器实际取出的正样本数 int negcount; //每训练一个分类器实际取出的负样本数 int consumed; //每训练一个分类器查询过的正样本数量 double false_alarm; double proctime; int nleaves; double required_leaf_fa_rate; //叶子的虚警率 float neg_ratio; //负样本与正样本之比 int max_clusters; ///* max number of clusters */ //#define CV_MAX_CLUSTERS 3 max_clusters = CV_MAX_CLUSTERS; neg_ratio = (float) nneg / npos; nleaves = 1 + MAX( 0, maxtreesplits ); //在c++中pow(x,y)是求x的y次方的意思。在此的含义是最大虚警率的nstages次方,再除以叶子总数即为叶子虚警率 required_leaf_fa_rate = pow( (double) maxfalsealarm, (double) nstages ) / nleaves; printf( "Required leaf false alarm rate: %g\n", required_leaf_fa_rate ); total_splits = 0; winsize = cvSize( winwidth, winheight ); CV_CALL( cluster_idx = cvCreateMat( 1, npos + nneg, CV_32SC1 ) ); CV_CALL( idx = cvCreateMat( 1, npos + nneg, CV_32SC1 ) ); CV_CALL( tcc = (CvTreeCascadeClassifier*) icvLoadTreeCascadeClassifier( dirname, winwidth + 1, &total_splits ) ); CV_CALL( leaves = icvFindDeepestLeaves( tcc ) ); CV_CALL( icvPrintTreeCascade( tcc->root ) ); haar_features = icvCreateIntHaarFeatures( winsize, mode, symmetric ); printf( "Number of features used : %d\n", haar_features->count ); training_data = icvCreateHaarTrainingData( winsize, npos + nneg ); sprintf( stage_name, "%s/", dirname ); suffix = stage_name + strlen( stage_name ); if (! bg_vecfile) if( !icvInitBackgroundReaders( bgfilename, winsize ) && nstages > 0 ) CV_ERROR( CV_StsError, "Unable to read negative images" ); if( nstages > 0 ) { /* width-first search in the tree */ do { CvSplit* first_split; CvSplit* last_split; CvSplit* cur_split; CvTreeCascadeNode* parent; CvTreeCascadeNode* cur_node; CvTreeCascadeNode* last_node; first_split = last_split = cur_split = NULL; parent = leaves; leaves = NULL; do { int best_clusters; /* best selected number of clusters */ float posweight, negweight; double leaf_fa_rate; if( parent ) sprintf( buf, "%d", parent->idx ); else sprintf( buf, "NULL" ); printf( "\nParent node: %s\n\n", buf ); printf( "*** 1 cluster ***\n" ); tcc->eval = icvEvalTreeCascadeClassifierFilter; /* find path from the root to the node <parent> */ icvSetLeafNode( tcc, parent ); /* load samples */ consumed = 0; poscount = icvGetHaarTrainingDataFromVec( training_data, 0, npos, (CvIntHaarClassifier*) tcc, vecfilename, &consumed ); printf( "POS: %d %d %f\n", poscount, consumed, ((double) poscount)/consumed ); if( poscount <= 0 ) CV_ERROR( CV_StsError, "Unable to obtain positive samples" ); fflush( stdout ); proctime = -TIME( 0 ); nneg = (int) (neg_ratio * poscount); negcount = icvGetHaarTrainingDataFromBG( training_data, poscount, nneg, (CvIntHaarClassifier*) tcc, &false_alarm, bg_vecfile ? bgfilename : NULL ); printf( "NEG: %d %g\n", negcount, false_alarm ); printf( "BACKGROUND PROCESSING TIME: %.2f\n", (proctime + TIME( 0 )) ); if( negcount <= 0 ) CV_ERROR( CV_StsError, "Unable to obtain negative samples" ); leaf_fa_rate = false_alarm; if( leaf_fa_rate <= required_leaf_fa_rate ) { printf( "Required leaf false alarm rate achieved. " "Branch training terminated.\n" ); } else if( nleaves == 1 && tcc->next_idx == nstages ) { printf( "Required number of stages achieved. " "Branch training terminated.\n" ); } else { CvTreeCascadeNode* single_cluster; CvTreeCascadeNode* multiple_clusters; int single_num; icvSetNumSamples( training_data, poscount + negcount ); posweight = (equalweights) ? 1.0F / (poscount + negcount) : (0.5F/poscount); negweight = (equalweights) ? 1.0F / (poscount + negcount) : (0.5F/negcount); icvSetWeightsAndClasses( training_data, poscount, posweight, 1.0F, negcount, negweight, 0.0F ); fflush( stdout ); /* precalculate feature values */ proctime = -TIME( 0 ); icvPrecalculate( training_data, haar_features, numprecalculated );//计算haar_features中的前numprecalculated个特征 printf( "Precalculation time: %.2f\n", (proctime + TIME( 0 )) ); /* train stage classifier using all positive samples */ CV_CALL( single_cluster = icvCreateTreeCascadeNode() ); fflush( stdout ); proctime = -TIME( 0 ); single_cluster->stage = (CvStageHaarClassifier*) icvCreateCARTStageClassifier( //创建一个级分类器 training_data, NULL, haar_features, minhitrate, maxfalsealarm, symmetric, weightfraction, numsplits, (CvBoostType) boosttype, (CvStumpError) stumperror, 0 ); printf( "Stage training time: %.2f\n", (proctime + TIME( 0 )) ); single_num = icvNumSplits( single_cluster->stage );//得到级分类器中分类回归树分类器的数量 best_num = single_num; best_clusters = 1; multiple_clusters = NULL; printf( "Number of used features: %d\n", single_num ); if( maxtreesplits >= 0 ) { max_clusters = MIN( max_clusters, maxtreesplits - total_splits + 1 ); } /* try clustering */ vals = NULL; for( k = 2; k <= max_clusters; k++ ) { int cluster; int stop_clustering; printf( "*** %d clusters ***\n", k ); /* check whether clusters are big enough */ stop_clustering = ( k * minpos > poscount ); if( !stop_clustering ) { int num[CV_MAX_CLUSTERS]; if( k == 2 ) { proctime = -TIME( 0 ); CV_CALL( vals = icvGetUsedValues( training_data, 0, poscount, haar_features, single_cluster->stage ) ); //根据级分类器来计算训练样本的特征值 printf( "Getting values for clustering time: %.2f\n", (proctime + TIME(0)) ); printf( "Value matirx size: %d x %d\n", vals->rows, vals->cols ); fflush( stdout ); cluster_idx->cols = vals->rows; for( i = 0; i < negcount; i++ ) idx->data.i[i] = poscount + i; } proctime = -TIME( 0 ); /* * 按照给定的类别数目对样本集合进行聚类 *cvKMeans2( vals, k, cluster_idx, CV_TERM_CRITERIA() ) *vals:输入样本的浮点矩阵,每个样本一行。 *k:所给定的聚类数目 *cluster_idx 输出整数向量:每个样本对应的类别标识,在 cluster_count 内。 *CV_TERM_CRITERIA()迭代终止准则,指定聚类的最大迭代次数和/或精度(两次迭代引起的聚类中心的移动距离) */ CV_CALL( cvKMeans2( vals, k, cluster_idx, CV_TERM_CRITERIA() ) ); printf( "Clustering time: %.2f\n", (proctime + TIME( 0 )) ); for( cluster = 0; cluster < k; cluster++ ) num[cluster] = 0; for( i = 0; i < cluster_idx->cols; i++ ) num[cluster_idx->data.i[i]]++; for( cluster = 0; cluster < k; cluster++ ) { if( num[cluster] < minpos ) { stop_clustering = 1; break; } } } if( stop_clustering ) { printf( "Clusters are too small. Clustering aborted.\n" ); break; } cur_num = 0; cur_node = last_node = NULL; for( cluster = 0; (cluster < k) && (cur_num < best_num); cluster++ ) { CvTreeCascadeNode* new_node; int num_splits; int last_pos; int total_pos; printf( "Cluster: %d\n", cluster ); last_pos = negcount; for( i = 0; i < cluster_idx->cols; i++ ) { if( cluster_idx->data.i[i] == cluster ) { idx->data.i[last_pos++] = i; } } idx->cols = last_pos; total_pos = idx->cols - negcount; printf( "# pos: %d of %d. (%d%%)\n", total_pos, poscount, 100 * total_pos / poscount ); CV_CALL( new_node = icvCreateTreeCascadeNode() ); if( last_node ) last_node->next = new_node; else cur_node = new_node; last_node = new_node; posweight = (equalweights) ? 1.0F / (total_pos + negcount) : (0.5F / total_pos); negweight = (equalweights) ? 1.0F / (total_pos + negcount) : (0.5F / negcount); icvSetWeightsAndClasses( training_data, poscount, posweight, 1.0F, negcount, negweight, 0.0F ); /* CV_DEBUG_SAVE( idx ); */ fflush( stdout ); proctime = -TIME( 0 ); new_node->stage = (CvStageHaarClassifier*) icvCreateCARTStageClassifier( training_data, idx, haar_features, minhitrate, maxfalsealarm, symmetric, weightfraction, numsplits, (CvBoostType) boosttype, (CvStumpError) stumperror, best_num - cur_num ); printf( "Stage training time: %.2f\n", (proctime + TIME( 0 )) ); if( !(new_node->stage) ) { printf( "Stage training aborted.\n" ); cur_num = best_num + 1; } else { num_splits = icvNumSplits( new_node->stage ); cur_num += num_splits; printf( "Number of used features: %d\n", num_splits ); } } /* for each cluster */ if( cur_num < best_num ) { icvReleaseTreeCascadeNodes( &multiple_clusters ); best_num = cur_num; best_clusters = k; multiple_clusters = cur_node; } else { icvReleaseTreeCascadeNodes( &cur_node ); } } /* try different number of clusters */ cvReleaseMat( &vals ); CvSplit* curSplit; CV_CALL( curSplit = (CvSplit*) cvAlloc( sizeof( *curSplit ) ) ); CV_ZERO_OBJ( curSplit ); if( last_split ) last_split->next = curSplit; else first_split = curSplit; last_split = curSplit; curSplit->single_cluster = single_cluster; curSplit->multiple_clusters = multiple_clusters; curSplit->num_clusters = best_clusters; curSplit->parent = parent; curSplit->single_multiple_ratio = (float) single_num / best_num; } if( parent ) parent = parent->next_same_level; } while( parent ); /* choose which nodes should be splitted */ do { float max_single_multiple_ratio; cur_split = NULL; max_single_multiple_ratio = 0.0F; last_split = first_split; while( last_split ) { if( last_split->single_cluster && last_split->multiple_clusters && last_split->single_multiple_ratio > max_single_multiple_ratio ) { max_single_multiple_ratio = last_split->single_multiple_ratio; cur_split = last_split; } last_split = last_split->next; } if( cur_split ) { if( maxtreesplits < 0 || cur_split->num_clusters <= maxtreesplits - total_splits + 1 ) { cur_split->single_cluster = NULL; total_splits += cur_split->num_clusters - 1; } else { icvReleaseTreeCascadeNodes( &(cur_split->multiple_clusters) ); cur_split->multiple_clusters = NULL; } } } while( cur_split ); /* attach new nodes to the tree */ leaves = last_node = NULL; last_split = first_split; while( last_split ) { cur_node = (last_split->multiple_clusters) ? last_split->multiple_clusters : last_split->single_cluster; parent = last_split->parent; if( parent ) parent->child = cur_node; /* connect leaves via next_same_level and save them */ for( ; cur_node; cur_node = cur_node->next ) { FILE* file; if( last_node ) last_node->next_same_level = cur_node; else leaves = cur_node; last_node = cur_node; cur_node->parent = parent; cur_node->idx = tcc->next_idx; tcc->next_idx++; sprintf( suffix, "%d/%s", cur_node->idx, CV_STAGE_CART_FILE_NAME ); file = NULL; if( icvMkDir( stage_name ) && (file = fopen( stage_name, "w" )) != 0 ) { cur_node->stage->save( (CvIntHaarClassifier*) cur_node->stage, file ); fprintf( file, "\n%d\n%d\n", ((parent) ? parent->idx : -1), ((cur_node->next) ? tcc->next_idx : -1) ); } else { printf( "Failed to save classifier into %s\n", stage_name ); } if( file ) fclose( file ); } if( parent ) sprintf( buf, "%d", parent->idx ); else sprintf( buf, "NULL" ); printf( "\nParent node: %s\n", buf ); printf( "Chosen number of splits: %d\n\n", (last_split->multiple_clusters) ? (last_split->num_clusters - 1) : 0 ); cur_split = last_split; last_split = last_split->next; cvFree( &cur_split ); } /* for each split point */ printf( "Total number of splits: %d\n", total_splits ); if( !(tcc->root) ) tcc->root = leaves; CV_CALL( icvPrintTreeCascade( tcc->root ) ); } while( leaves ); /* save the cascade to xml file */ { char xml_path[1024]; int len = (int)strlen(dirname); CvHaarClassifierCascade* cascade = 0; strcpy( xml_path, dirname ); if( xml_path[len-1] == '\\' || xml_path[len-1] == '/' ) len--; strcpy( xml_path + len, ".xml" ); cascade = cvLoadHaarClassifierCascade( dirname, cvSize(winwidth,winheight) ); if( cascade ) cvSave( xml_path, cascade ); cvReleaseHaarClassifierCascade( &cascade ); } } /* if( nstages > 0 ) */ /* check cascade performance */ printf( "\nCascade performance\n" ); tcc->eval = icvEvalTreeCascadeClassifier; /* load samples */ consumed = 0; poscount = icvGetHaarTrainingDataFromVec( training_data, 0, npos, (CvIntHaarClassifier*) tcc, vecfilename, &consumed ); printf( "POS: %d %d %f\n", poscount, consumed, (consumed > 0) ? (((float) poscount)/consumed) : 0 ); if( poscount <= 0 ) fprintf( stderr, "Warning: unable to obtain positive samples\n" ); proctime = -TIME( 0 ); negcount = icvGetHaarTrainingDataFromBG( training_data, poscount, nneg, (CvIntHaarClassifier*) tcc, &false_alarm, bg_vecfile ? bgfilename : NULL ); printf( "NEG: %d %g\n", negcount, false_alarm ); printf( "BACKGROUND PROCESSING TIME: %.2f\n", (proctime + TIME( 0 )) ); if( negcount <= 0 ) fprintf( stderr, "Warning: unable to obtain negative samples\n" ); __END__; if (! bg_vecfile) icvDestroyBackgroundReaders(); if( tcc ) tcc->release( (CvIntHaarClassifier**) &tcc ); icvReleaseIntHaarFeatures( &haar_features ); icvReleaseHaarTrainingData( &training_data ); cvReleaseMat( &cluster_idx ); cvReleaseMat( &idx ); cvReleaseMat( &vals ); cvReleaseMat( &features_idx ); }