基于图的图像分割方法(Graph-Based Image Segmentation)源码阅读笔记

这个方法被应用于深度学习目标检测的经典之作selective search方法中(Selective Search for Object Recognition),用于初始化分割区域。。论文题目:《Efficient Graph-Based Image Segmentation》

查阅了许多博客,后来感觉,对于这个方法整体还是一知半解,于是花了一个下午阅读了源码,做一个笔记,如有错误,希望大家指正

源代码输入5个参数,示意如下:

 sigma: Used to smooth the input image before segmenting it.
  k: Value for the threshold function.
  min: Minimum component size enforced by post-processing.
  input: Input image.
  output: Output image.

大意为,sigma:使用的平滑参数,k:给出的初始化阈值,min:最小分割块大小(用于合并小块),输入图片(input)和输入图片(output)(都要为pnm格式)

参数传入segment.cpp中,如下

image *seg = segment_image(input, sigma, k, min_size, &num_ccs); 

调用segment-image.h中的 image *segment_image,函数大概注释如下:

/*
Copyright (C) 2006 Pedro Felzenszwalb

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
*/

#ifndef SEGMENT_IMAGE
#define SEGMENT_IMAGE

#include 
#include 
#include 
#include 
#include 
#include "segment-graph.h"

// random color
rgb random_rgb(){ 
  rgb c;
  double r;

  c.r = (uchar)random();
  c.g = (uchar)random();
  c.b = (uchar)random();

  return c;
}

// dissimilarity measure between pixels
// 衡量不相似度,用rgb距离
static inline float diff(image<float> *r, image<float> *g, image<float> *b,
             int x1, int y1, int x2, int y2) {
  return sqrt(square(imRef(r, x1, y1)-imRef(r, x2, y2)) +
          square(imRef(g, x1, y1)-imRef(g, x2, y2)) +
          square(imRef(b, x1, y1)-imRef(b, x2, y2)));
}

/*
 * Segment an image
 *
 * Returns a color image representing the segmentation.
 *
 * im: image to segment.
 * sigma: to smooth the image.
 * c: constant for treshold function.
 * min_size: minimum component size (enforced by post-processing stage).
 * num_ccs: number of connected components in the segmentation.
 */
image *segment_image(image *im, float sigma, float c, int min_size,
              int *num_ccs) {
    //图像的宽和高
  int width = im->width();
  int height = im->height();
    // 初始化了三个通道的片
  image<float> *r = new image<float>(width, height);
  image<float> *g = new image<float>(width, height);
  image<float> *b = new image<float>(width, height);

  // smooth each color channel
    //平滑色彩通道
  for (int y = 0; y < height; y++) {
    for (int x = 0; x < width; x++) {
      imRef(r, x, y) = imRef(im, x, y).r;
      imRef(g, x, y) = imRef(im, x, y).g;
      imRef(b, x, y) = imRef(im, x, y).b;
    }
  }
  image<float> *smooth_r = smooth(r, sigma);
  image<float> *smooth_g = smooth(g, sigma);
  image<float> *smooth_b = smooth(b, sigma);
  delete r;
  delete g;
  delete b;

  // build graph
    //构建图,边的数组是原来的4倍,就是四个方向,计算出了全部的点和点之间的相似性
    //这里是初始化了width*height*4大小的数组,其实用不了这么多
  edge *edges = new edge[width*height*4];
  int num = 0;
  for (int y = 0; y < height; y++) {
    for (int x = 0; x < width; x++) {
        //对每个边的左右点计算权重
      if (x < width-1) {
    edges[num].a = y * width + x;
    edges[num].b = y * width + (x+1);
    edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y);
    num++;
      }

      if (y < height-1) {
    edges[num].a = y * width + x;
    edges[num].b = (y+1) * width + x;
    edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x, y+1);
    num++;
      }

      if ((x < width-1) && (y < height-1)) {
    edges[num].a = y * width + x;
    edges[num].b = (y+1) * width + (x+1);
    edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y+1);
    num++;
      }

      if ((x < width-1) && (y > 0)) {
    edges[num].a = y * width + x;
    edges[num].b = (y-1) * width + (x+1);
    edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y-1);
    num++;
      }
    }
  }
//   printf("%d %d", width*height*4, num);
//    printf("width is:%d height is:%d", width, height);
  delete smooth_r;
  delete smooth_g;
  delete smooth_b;

  // segment
    //分割步骤,。。。。跳出
  universe *u = segment_graph(width*height, num, edges, c);

  // post process small components
    //合并最小的块,如果边两边的块小于min_size就合并
  for (int i = 0; i < num; i++) {
    int a = u->find(edges[i].a);
    int b = u->find(edges[i].b);
    if ((a != b) && ((u->size(a) < min_size) || (u->size(b) < min_size)))
      u->join(a, b);
  }
  delete [] edges;
  *num_ccs = u->num_sets();
    //写入新的pnm图片
  image *output = new image(width, height);

  // pick random colors for each component
  rgb *colors = new rgb[width*height];
  for (int i = 0; i < width*height; i++)
    colors[i] = random_rgb();

  for (int y = 0; y < height; y++) {
    for (int x = 0; x < width; x++) {
      int comp = u->find(y * width + x);
      imRef(output, x, y) = colors[comp];
    }
  }  

  delete [] colors;  
  delete u;

  return output;
}

#endif

其中 segment_graph(width*height, num, edges, c); 这一步跳到 segment-graph.h中

/*
Copyright (C) 2006 Pedro Felzenszwalb

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
*/

#ifndef SEGMENT_GRAPH
#define SEGMENT_GRAPH

#include 
#include 
#include "disjoint-set.h"

// threshold function
#define THRESHOLD(size, c) (c/size)

typedef struct {
  float w;
  int a, b;
} edge;

bool operator<(const edge &a, const edge &b) {
    //按照不相似程度从小到大排序
  return a.w < b.w;
}

/*
 * Segment a graph
 *
 * Returns a disjoint-set forest representing the segmentation.
 *
 * num_vertices: number of vertices in graph.
 * num_edges: number of edges in graph
 * edges: array of edges.
 * c: constant for treshold function.
 */
universe *segment_graph(int num_vertices, int num_edges, edge *edges, 
            float c) { 
  // sort edges by weight
    // 将edge按照权重进行排序,权重越大越不相似,这里权重是不相似程度
  std::sort(edges, edges + num_edges);

  // make a disjoint-set forest
    // num_vertices 是顶点的数量
  universe *u = new universe(num_vertices);

  // init thresholds
    //这里阈值是为了防止刚开始的时候每个点之间全部都分开了
  float *threshold = new float[num_vertices];
  for (int i = 0; i < num_vertices; i++)
      //c/1
    threshold[i] = THRESHOLD(1,c);

  // for each edge, in non-decreasing weight order...
  for (int i = 0; i < num_edges; i++) {
      //遍历所有的边
    edge *pedge = &edges[i];

    // components conected by this edge
      // 找出这个边的左右顶点,这个顶点会随着边的合并而变动,为了保证阈值threshold[a]是最大的,就是这个域内的不相似程度最大
    int a = u->find(pedge->a);
    int b = u->find(pedge->b);
      // 如果 a==b 则说明在同个域里面
    if (a != b) {
        //权值小于阈值,这里a和b代表这个边链接的两个域的分别的最大不相似程度
      if ((pedge->w <= threshold[a]) &&
      (pedge->w <= threshold[b])) {
          //如果这个边的长度(就是这个边链接的两个点的不相似程度)比两边的最大的不相似度都小,则将这两个域链接起来
    u->join(a, b);
//          printf("size a is:%d ", u->size(a));
    a = u->find(a);
//          printf(" size  a is:%d \n", u->size(a));
          //这里只更新了a的部分,这里个人理解,不太确定,应该是因为在disjoint-set.h中代码find函数中总是找到下标最靠前的那个点,然后返回
    threshold[a] = pedge->w + THRESHOLD(u->size(a), c);
      }
    }
  }

  // free up
  delete threshold;
  return u;
}

#endif

下面是 universe *u = new universe(num_vertices); 的定义。。。在disjoint-set.h中

/*
Copyright (C) 2006 Pedro Felzenszwalb

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
*/

#ifndef DISJOINT_SET
#define DISJOINT_SET

// disjoint-set forests using union-by-rank and path compression (sort of).

typedef struct {
    /*
     rank:等级,用来判定将要合并的块合在左边还是右边
     p:用来找到这个块最先的那个节点
     size:用来记录现在这个块有大
     */
  int rank;
  int p;
  int size;
} uni_elt;

class universe {
public:
  universe(int elements);
  ~universe();
  int find(int x);  
  void join(int x, int y);
  int size(int x) const { return elts[x].size; }
  int num_sets() const { return num; }

private:
  uni_elt *elts;
  int num;
};

universe::universe(int elements) {
  //初始化三个参数
  elts = new uni_elt[elements];
  num = elements;
  for (int i = 0; i < elements; i++) {
    elts[i].rank = 0;
    elts[i].size = 1;
    elts[i].p = i;
  }
}

universe::~universe() {
  delete [] elts;
}

int universe::find(int x) {
  int y = x;
  while (y != elts[y].p){
    // 个人理解,这里不确定。这里通过不断的链接,查找到最初的那个点,那个点作为这个域的标识
    y = elts[y].p;
  }
  elts[x].p = y;
  return y;
}

void universe::join(int x, int y) {
  //链接两个块,如果x的rank大于y就合并在x里面,
  //反之则加在y里面
  if (elts[x].rank > elts[y].rank) {
    elts[y].p = x;
    elts[x].size += elts[y].size;
  } else {
    elts[x].p = y;
    elts[y].size += elts[x].size;
    if (elts[x].rank == elts[y].rank)
      elts[y].rank++;
  }
  num--;
}

#endif

关于如何平滑(smooth)参数,在filter.h里,就不贴了(里面涉及一些数学公式,不看代码会看的很累)

个人觉得,大概思路是,先是每个点作为一个块,对边的权重进行排序,遍历所有的边,不断结合块,把边遍历过之后,也就合并所有的块了,在把太小的块融合起来。

最后,贴上一段原始论文中的描述,供大家参考

Algorithm 1 Segmentation algorithm.
The input is a graph G = (V, E), with n vertices and m edges. The output is a
segmentation of V into components S = (C 1 , . . . , C r ).
0. Sort E into π = (o 1 , . . . , o m ), by non-decreasing edge weight.
1. Start with a segmentation S 0 , where each vertex v i is in its own component.
2. Repeat step 3 for q = 1, . . . , m.
3. Construct S q given S q−1 as follows. Let v i and v j denote the vertices connected
by the q-th edge in the ordering, i.e., o q = (v i , v j ). If v i and v j are in disjoint
components of S q−1 and w(o q ) is small compared to the internal difference of
both those components, then merge the two components otherwise do nothing.
More formally, let C i q−1 be the component of S q−1 containing v i and C j q−1 the
component containing v j . If C i q−1 6 = C j q−1 and w(o q ) ≤ M Int(C i q−1 , C j q−1 ) then
S q is obtained from S q−1 by merging C i q−1 and C j q−1 . Otherwise S q = S q−1 .
4. Return S = S m .

最后的最后,代码和论文如下:这里

你可能感兴趣的:(c++算法入门题目)