DenseCRF源码解读

simple_dense_inference.py

int main( int argc, char* argv[]){
  if (argc<4){
    printf("Usage: %s image annotations output\n", argv[0] );
    return 1;
  }
  // Number of labels
  const int M = 21;
  // Load the color image and some crude(粗糙的) annotations (which are used in a simple classifier)
  int W, H, GW, GH;
  // W和H是图片的宽和高
  // GW, GH是标注的宽和高
  // read the origion image im
  unsigned char * im = readPPM( argv[1], W, H );
  if (!im){
    printf("Failed to load image!\n");
    return 1;
  }
  // read the annotated image anno
  unsigned char * anno = readPPM( argv[2], GW, GH );
  if (!anno){
    printf("Failed to load annotations!\n");
    return 1;
  }
  if (W!=GW || H!=GH){
    printf("Annotation size doesn't match image!\n");
    return 1;
  }
	
  // get the unary
  /////////// Put your own unary classifier here! ///////////
  float * unary = classify( anno, W, H, M );
  ///////////////////////////////////////////////////////////
	
  // Setup the CRF model
  // W,H,W是图像的宽,高和类别数
  DenseCRF2D crf(W, H, M);
  // Specify the unary potential as an array of size W*H*(#classes)
  // packing order: x0y0l0 x0y0l1 x0y0l2 .. x1y0l0 x1y0l1 ... (row-order)
  crf.setUnaryEnergy( unary );
  // add a color independent term (feature = pixel location 0..W-1, 0..H-1)
  // x_stddev = 3
  // y_stddev = 3
  // weight = 3
  // stddev是标准偏差
  crf.addPairwiseGaussian( 3, 3, 3 );
  // add a color dependent term (feature = xyrgb)
  // x_stddev = 60
  // y_stddev = 60
  // r_stddev = g_stddev = b_stddev = 20
  // weight = 10
  // 计算$/sum k_{smoothness kernel}(fi; fj)*Q_i$然后pairwise_.push_back( potential );
  crf.addPairwiseBilateral( 60, 60, 20, 20, 20, im, 10 );
  // 计算$/sum k_{appearance kernel}(fi; fj)*Q_i$然后pairwise_.push_back( potential );
	
  // Do map inference
  short * map = new short[W*H];
  // short是整型,是result
  crf.map(10, map);
	
  // Store the result
  unsigned char *res = colorize( map, W, H );
  writePPM( argv[3], W, H, res );
	
  delete[] im;
  delete[] anno;
  delete[] res;
  delete[] map;
  delete[] unary;
}

其中有几个关键的函数,分别是
DenseCRF2D crf(W, H, M);
crf.setUnaryEnergy( unary )
crf.addPairwiseGaussian( 3, 3, 3 )
crf.addPairwiseBilateral( 60, 60, 20, 20, 20, im, 10 );
crf.map(10, map)

DenseCRF::DenseCRF(int N, int M)

DenseCRF::DenseCRF(int N, int M) : N_(N), M_(M) {
  unary_ = allocate( N_*M_ );
// allocate貌似是内存分配用的,一共有M_个类别
  additional_unary_ = allocate( N_*M_ );
  current_ = allocate( N_*M_ );// stepInference时当前的Q(标签值)
  next_ = allocate( N_*M_ ); // stepInference时下次迭代的Q(标签值)
  tmp_ = allocate( 2*N_*M_ );// tmp是compatiblilty function given by Potts model
  // Set the additional_unary_ to zero
  memset( additional_unary_, 0, sizeof(float)*N_*M_ );
  // memset将某一块内存中的内容全部设置为指定的值
}

void DenseCRF::setUnaryEnergy

void DenseCRF::setUnaryEnergy(const float* unary) {
  memcpy( unary_, unary, N_*M_*sizeof(float) );
  // memcpy内存拷贝函数,把unary拷贝到unary_中
}

void DenseCRF2D::addPairwiseGaussian

void DenseCRF2D::addPairwiseGaussian ( float sx, float sy, float w, const SemiMetricFunction * function ) {
	// add a color independent term (feature = pixel location 0..W-1, 0..H-1)
	// x_stddev = 3
	// y_stddev = 3
	// w = 3
	// function = none
	// stddev是标准偏差
float * feature = new float [N_*2];
  // N_ = W_ * H_,feature是像素的位置,里对每个位置(i,j)存储一对值:i / sx,j / sy
  for( int j=0; j<H_; j++ )
    for( int i=0; i<W_; i++ ){
      feature[(j*W_+i)*2+0] = i / sx;
      feature[(j*W_+i)*2+1] = j / sy;
    }
  addPairwiseEnergy( feature, 2, w, function );
  // 将feature添加进PairwiseEnergy,
  // 具体是addPairwiseEnergy->addPairwiseEnergy( new PottsPotential( features, D, N_, w ) )
  delete [] feature;
}

这个函数在处理smoothness kernel: w ( 2 ) exp ⁡ ( − ∣ p i − p j ∣ 2 2 θ γ 2 ) w^{(2)} \exp \left(-\frac{\left|p_{i}-p_{j}\right|^{2}}{2 \theta_{\gamma}^{2}}\right) w(2)exp(2θγ2pipj2),The smoothness kernel removes small isolated regions.
式中 p i p_i pi p i p_i pi是定义的仅与位置有关的特征,定义为位置坐标x,y分别除以对应的缩放因子sx,sy

for( int j=0; j<H_; j++ )
    for( int i=0; i<W_; i++ ){
      feature[(j*W_+i)*2+0] = i / sx;
      feature[(j*W_+i)*2+1] = j / sy;
    }

接下来,addPairwiseEnergy( feature, 2, w, function )

void DenseCRF::addPairwiseEnergy (const float* features, int D, float w, const SemiMetricFunction * function) {
  if (function)
    addPairwiseEnergy( new SemiMetricPotential( features, D, N_, w, function ) );
  else
    addPairwiseEnergy( new PottsPotential( features, D, N_, w ) );
  // addPairwiseEnergy( feature, 2, w, function );
  // feature是图像每个像素i,j除以标准偏差
  // D貌似是通道数,只有xy,D=2
  // N_是图像像素总数
  // w是权重
}
void DenseCRF::addPairwiseEnergy ( PairwisePotential* potential ){
  pairwise_.push_back( potential );
  // 把potential插入到pairwise_中
}

addPairwiseEnergy中的参数function指的就是文章中的label compatibility function,在官方给出的例子中,function=none
所以,调用了addPairwiseEnergy( new PottsPotential( features, D, N_, w ) );其中的参数features是仅与位置有关的特征 p p pD是特征的维度=2,x和y;N_是像素数;w在本例中=3,是smoothness kernel的权重

  PottsPotential(const float* features, int D, int N, float w, bool per_pixel_normalization=true) :N_(N), w_(w) {
	  // 传进来的参数中w=3,这里调用这个函数需要计算$/sum w*k_{smoothness kernel}(fi; fj)*Q_i$
	  //
    lattice_.init( features, D, N );
	// D是channel,N是像素数
    norm_ = allocate( N );
    for ( int i=0; i<N; i++ )
      norm_[i] = 1;
    // Compute the normalization factor
	// 计算归一化因子
    lattice_.compute( norm_, norm_, 1 );
    if ( per_pixel_normalization ) {
      // use a per pixel normalization
      for ( int i=0; i<N; i++ )
	norm_[i] = 1.f / (norm_[i]+1e-20f);
	  // 每个位置i取倒数
    }
    else {
	  // !per_pixel_normalization
      float mean_norm = 0;
      for ( int i=0; i<N; i++ )
	    mean_norm += norm_[i];
      mean_norm = N / mean_norm;
      // use a per pixel normalization
      for ( int i=0; i<N; i++ )
	norm_[i] = mean_norm;
	  // 每个位置取平均值的倒数
    }
  }

权重参数w在这里暂时没有用,permutohedral lattice近似计算那里我没有看。。猜测这里计算得到的norm_ k ( s m o o t h n e s s    k e r n e l ) ( f i , f j ) k^{(smoothness \; kernel)}\left(\mathbf{f}_{i}, \mathbf{f}_{j}\right) k(smoothnesskernel)(fi,fj)

void DenseCRF2D::addPairwiseBilateral

void DenseCRF2D::addPairwiseBilateral ( float sx, float sy, float sr, float sg, float sb, const unsigned char* im, float w, const SemiMetricFunction * function ) {
  // add a color dependent term (feature = xyrgb)
  // x_stddev = 60
  // y_stddev = 60
  // r_stddev = g_stddev = b_stddev = 20
  // weight = 10
  // 
	float * feature = new float [N_*5];
  for( int j=0; j<H_; j++ )
    for( int i=0; i<W_; i++ ){
      feature[(j*W_+i)*5+0] = i / sx;
      feature[(j*W_+i)*5+1] = j / sy;
      feature[(j*W_+i)*5+2] = im[(i+j*W_)*3+0] / sr;
      feature[(j*W_+i)*5+3] = im[(i+j*W_)*3+1] / sg;
      feature[(j*W_+i)*5+4] = im[(i+j*W_)*3+2] / sb;
    }
  addPairwiseEnergy( feature, 5, w, function );
  // 计算$/sum w*k_{appearance kernel}(fi; fj)*Q_i$然后pairwise_.push_back( potential );
  delete [] feature;
}

例子中调用的语句是crf.addPairwiseBilateral( 60, 60, 20, 20, 20, im, 10 );
调用的方式和addPairwiseGaussian差不多。

void DenseCRF::map ( int n_iterations, short* result, float relax )

void DenseCRF::map ( int n_iterations, short* result, float relax ) {
  // Run inference
  // crf.map(10, map);
  float * prob = runInference( n_iterations, relax );
	
  // Find the map
  for( int i=0; i<N_; i++ ){
    const float * p = prob + i*M_;
    // Find the max and subtract it so that the exp doesn't explode
    float mx = p[0];
    int imx = 0;
    for( int j=1; j<M_; j++ )
      if( mx < p[j] ){
	mx = p[j];
	imx = j;
      }
    result[i] = imx;
  }
}

主要的计算函数是runInference,后面那些在进行argmax操作,得到最终的结果

float* DenseCRF::runInference( int n_iterations, float relax ) {
  startInference();
  // expAndNormalize,对二元势进行初始化,赋值给current_
  for( int it=0; it<n_iterations; it++ )
    stepInference(relax);
  return current_;
}

Q i ( x i ) ← 1 Z i exp ⁡ { − ϕ u ( x i ) } Q_{i}\left(x_{i}\right) \leftarrow \frac{1}{Z_{i}} \exp \left\{-\phi_{u}\left(x_{i}\right)\right\} Qi(xi)Zi1exp{ϕu(xi)}, ϕ u ( x i ) \phi_{u}\left(x_{i}\right) ϕu(xi)是一元势,Q的维度应该是N*M

void DenseCRF::startInference(){
  // Initialize using the unary energies
  expAndNormalize( current_, unary_, -1 );
}

void DenseCRF::expAndNormalize ( float* out, const float* in, float scale, float relax ) {
  //float *V = new float[ N_+10 ];
  // expAndNormalize( current_, unary_, -1 );
  // M_是类别数
  float *V = new float[M_];

  for( int i=0; i<N_; i++ ){
	  // 对每个像素
    const float * b = in + i*M_;
	// Find the max and subtract it so that the exp doesn't explode
	// b.shape=N_
    float mx = scale*b[0];
    for( int j=1; j<M_; j++ )
      if( mx < scale*b[j] )
	mx = scale*b[j];
	  // 找到最大的b[j]给mx
    float tt = 0;
    for( int j=0; j<M_; j++ ){
      V[j] = fast_exp( scale*b[j]-mx );
      tt += V[j];
	  // 求归一化系数tt=Z_i
    }
    // Make it a probability
    for( int j=0; j<M_; j++ )
      V[j] /= tt;
		
    float * a = out + i*M_;
    for( int j=0; j<M_; j++ )
      if (relax == 1)
	a[j] = V[j];
      else
	a[j] = (1-relax)*a[j] + relax*V[j];
  }
  delete[] V;
}
void DenseCRF::stepInference( float relax ){
#ifdef SSE_DENSE_CRF
  __m128 * sse_next_ = (__m128*)next_;
  __m128 * sse_unary_ = (__m128*)unary_;
  __m128 * sse_additional_unary_ = (__m128*)additional_unary_;
#endif
  // Set the unary potential
  // 设置一元势
#ifdef SSE_DENSE_CRF
  for( int i=0; i<(N_*M_-1)/4+1; i++ )
    sse_next_[i] = - sse_unary_[i] - sse_additional_unary_[i];
#else
  for( int i=0; i<N_*M_; i++ )
    next_[i] = -unary_[i] - additional_unary_[i];
  // next_是一元势,additional_unary_是不是指的可以自己定义的部分
#endif
	
  // Add up all pairwise potentials
  for( unsigned int i=0; i<pairwise_.size(); i++ )
    pairwise_[i]->apply( next_, current_, tmp_, M_ );
  // pairwise_之前貌似push_back了两次,所以分别是smoothness kernel和appearance kernel计算的结果
  
    // 计算message passing,tmp_是
	
  // Exponentiate and normalize
  expAndNormalize( current_, next_, 1.0, relax );
  // 将next_进行处理后赋值给current_
}
  void apply(float* out_values, const float* in_values, float* tmp, int value_size) const {
	  // pairwise_[i]->apply( next_, current_, tmp_, M_ );
	  // norm_.shape=N
	  // out_values.shape=value_size*N

    lattice_.compute( tmp, in_values, value_size );
	// tmp是输出,in_values是输入,value_size是channel数,一共有M_个类别
    for ( int i=0,k=0; i<N_; i++ )
      for ( int j=0; j<value_size; j++, k++ )
	    out_values[k] += w_*norm_[i]*tmp[k];
	    // w_是权重
  }

你可能感兴趣的:(图像分割)