这个方法被应用于深度学习目标检测的经典之作selective search方法中(Selective Search for Object Recognition),用于初始化分割区域。。论文题目:《Efficient Graph-Based Image Segmentation》
查阅了许多博客,后来感觉,对于这个方法整体还是一知半解,于是花了一个下午阅读了源码,做一个笔记,如有错误,希望大家指正
源代码输入5个参数,示意如下:
sigma: Used to smooth the input image before segmenting it.
k: Value for the threshold function.
min: Minimum component size enforced by post-processing.
input: Input image.
output: Output image.
参数传入segment.cpp中,如下
image *seg = segment_image(input, sigma, k, min_size, &num_ccs);
调用segment-image.h中的 image *segment_image,函数大概注释如下:
#ifndef SEGMENT_IMAGE
#define SEGMENT_IMAGE
#include
#include
#include
#include
#include
#include "segment-graph.h"
rgb random_rgb(){
rgb c;
double r;
c.r = (uchar)random();
c.g = (uchar)random();
c.b = (uchar)random();
return c;
}
static inline float diff(image<float> *r, image<float> *g, image<float> *b,
int x1, int y1, int x2, int y2) {
return sqrt(square(imRef(r, x1, y1)-imRef(r, x2, y2)) +
square(imRef(g, x1, y1)-imRef(g, x2, y2)) +
square(imRef(b, x1, y1)-imRef(b, x2, y2)));
}
image *segment_image(image *im, float sigma, float c, int min_size,
int *num_ccs) {
int width = im->width();
int height = im->height();
image<float> *r = new image<float>(width, height);
image<float> *g = new image<float>(width, height);
image<float> *b = new image<float>(width, height);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
imRef(r, x, y) = imRef(im, x, y).r;
imRef(g, x, y) = imRef(im, x, y).g;
imRef(b, x, y) = imRef(im, x, y).b;
}
}
image<float> *smooth_r = smooth(r, sigma);
image<float> *smooth_g = smooth(g, sigma);
image<float> *smooth_b = smooth(b, sigma);
delete r;
delete g;
delete b;
edge *edges = new edge[width*height*4];
int num = 0;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
if (x < width-1) {
edges[num].a = y * width + x;
edges[num].b = y * width + (x+1);
edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y);
num++;
}
if (y < height-1) {
edges[num].a = y * width + x;
edges[num].b = (y+1) * width + x;
edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x, y+1);
num++;
}
if ((x < width-1) && (y < height-1)) {
edges[num].a = y * width + x;
edges[num].b = (y+1) * width + (x+1);
edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y+1);
num++;
}
if ((x < width-1) && (y > 0)) {
edges[num].a = y * width + x;
edges[num].b = (y-1) * width + (x+1);
edges[num].w = diff(smooth_r, smooth_g, smooth_b, x, y, x+1, y-1);
num++;
}
}
}
delete smooth_r;
delete smooth_g;
delete smooth_b;
universe *u = segment_graph(width*height, num, edges, c);
for (int i = 0; i < num; i++) {
int a = u->find(edges[i].a);
int b = u->find(edges[i].b);
if ((a != b) && ((u->size(a) < min_size) || (u->size(b) < min_size)))
u->join(a, b);
}
delete [] edges;
*num_ccs = u->num_sets();
image *output = new image(width, height);
rgb *colors = new rgb[width*height];
for (int i = 0; i < width*height; i++)
colors[i] = random_rgb();
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int comp = u->find(y * width + x);
imRef(output, x, y) = colors[comp];
}
}
delete [] colors;
delete u;
return output;
}
#endif
其中 segment_graph(width*height, num, edges, c); 这一步跳到 segment-graph.h中
#ifndef SEGMENT_GRAPH
#define SEGMENT_GRAPH
#include
#include
#include "disjoint-set.h"
#define THRESHOLD(size, c) (c/size)
typedef struct {
float w;
int a, b;
} edge;
bool operator<(const edge &a, const edge &b) {
return a.w < b.w;
}
universe *segment_graph(int num_vertices, int num_edges, edge *edges,
float c) {
std::sort(edges, edges + num_edges);
universe *u = new universe(num_vertices);
float *threshold = new float[num_vertices];
for (int i = 0; i < num_vertices; i++)
threshold[i] = THRESHOLD(1,c);
for (int i = 0; i < num_edges; i++) {
edge *pedge = &edges[i];
int a = u->find(pedge->a);
int b = u->find(pedge->b);
if (a != b) {
if ((pedge->w <= threshold[a]) &&
(pedge->w <= threshold[b])) {
u->join(a, b);
a = u->find(a);
threshold[a] = pedge->w + THRESHOLD(u->size(a), c);
}
}
}
delete threshold;
return u;
}
#endif
下面是 universe *u = new universe(num_vertices); 的定义。。。在disjoint-set.h中
#ifndef DISJOINT_SET
#define DISJOINT_SET
typedef struct {
int rank;
int p;
int size;
} uni_elt;
class universe {
public:
universe(int elements);
~universe();
int find(int x);
void join(int x, int y);
int size(int x) const { return elts[x].size; }
int num_sets() const { return num; }
private:
uni_elt *elts;
int num;
};
universe::universe(int elements) {
elts = new uni_elt[elements];
num = elements;
for (int i = 0; i < elements; i++) {
elts[i].rank = 0;
elts[i].size = 1;
elts[i].p = i;
}
}
universe::~universe() {
delete [] elts;
}
int universe::find(int x) {
int y = x;
while (y != elts[y].p){
y = elts[y].p;
}
elts[x].p = y;
return y;
}
void universe::join(int x, int y) {
if (elts[x].rank > elts[y].rank) {
elts[y].p = x;
elts[x].size += elts[y].size;
} else {
elts[x].p = y;
elts[y].size += elts[x].size;
if (elts[x].rank == elts[y].rank)
elts[y].rank++;
}
num--;
}
#endif
关于如何平滑(smooth)参数,在filter.h里,就不贴了(里面涉及一些数学公式,不看代码会看的很累)
个人觉得,大概思路是,先是每个点作为一个块,对边的权重进行排序,遍历所有的边,不断结合块,把边遍历过之后,也就合并所有的块了,在把太小的块融合起来。
最后,贴上一段原始论文中的描述,供大家参考
Algorithm 1 Segmentation algorithm.
The input is a graph G = (V, E), with n vertices and m edges. The output is a
segmentation of V into components S = (C 1 , . . . , C r ).
0. Sort E into π = (o 1 , . . . , o m ), by non-decreasing edge weight.
1. Start with a segmentation S 0 , where each vertex v i is in its own component.
2. Repeat step 3 for q = 1, . . . , m.
3. Construct S q given S q−1 as follows. Let v i and v j denote the vertices connected
by the q-th edge in the ordering, i.e., o q = (v i , v j ). If v i and v j are in disjoint
components of S q−1 and w(o q ) is small compared to the internal difference of
both those components, then merge the two components otherwise do nothing.
More formally, let C i q−1 be the component of S q−1 containing v i and C j q−1 the
component containing v j . If C i q−1 6 = C j q−1 and w(o q ) ≤ M Int(C i q−1 , C j q−1 ) then
S q is obtained from S q−1 by merging C i q−1 and C j q−1 . Otherwise S q = S q−1 .
4. Return S = S m .
最后的最后,代码和论文如下:这里