关于meanshitf的介绍:
mean shift 图像分割 (一): 1 总体思想,2 算法步骤
mean shift 图像分割 (二): 3 算法原理,4 延伸
mean shift 图像分割 (三): 5 非参数密度估计
不得不说,这个OpenCV实现实在不咋地,
这次我改风格了,中英文杂合注释
main.cpp
#include "opencv2/opencv.hpp"
#include
using namespace cv;
using namespace std;
static void help(char** argv)
{
cout << "\nDemonstrate mean-shift based color segmentation in spatial pyramid.\n"
<< "Call:\n " << argv[0] << " image\n"
<< "This program allows you to set the spatial and color radius\n"
<< "of the mean shift window as well as the number of pyramid reduction levels explored\n"
<< endl;
}
//This colors the segmentations
static void floodFillPostprocess(Mat& img, const Scalar& colorDiff = Scalar::all(1))
{
CV_Assert(!img.empty());
RNG rng = theRNG();
Mat mask(img.rows + 2, img.cols + 2, CV_8UC1, Scalar::all(0));
for (int y = 0; y < img.rows; y++)
{
for (int x = 0; x < img.cols; x++)
{
if (mask.at(y + 1, x + 1) == 0)
{
Scalar newVal(rng(256), rng(256), rng(256));
floodFill(img, mask, Point(x, y), newVal, 0, colorDiff, colorDiff);
}
}
}
}
string winName = "meanshift";
int spatialRad, colorRad, maxPyrLevel;
Mat img, res;
static void meanShiftSegmentation(int, void*)
{
cout << "spatialRad=" << spatialRad << "; "
<< "colorRad=" << colorRad << "; "
<< "maxPyrLevel=" << maxPyrLevel << endl;
pyrMeanShiftFiltering(img, res, spatialRad, colorRad, maxPyrLevel);
floodFillPostprocess(res, Scalar::all(2));
imshow(winName, res);
}
int main(int argc, char** argv)
{
//if (argc != 2)
//{
// help(argv);
// return -1;
//}
string fimg = "G:/Pic/fruits.jpg";//"G:/Pic/2012060619243397.png";
img = imread(fimg);
if (img.empty())
return -1;
//640-by-480 it works well to set spatialRadiusequal = 2 and colorRadiusequal = 40
// max_level, which describes how many levels of scale pyramid you want
//used for segmentation.A max_levelof 2 or 3 works well for a 640 - by - 480 color image
spatialRad = 40;
colorRad = 22;
maxPyrLevel = 2;
namedWindow(winName, WINDOW_AUTOSIZE);
createTrackbar("spatialRad", winName, &spatialRad, 80, meanShiftSegmentation);
createTrackbar("colorRad", winName, &colorRad, 60, meanShiftSegmentation);
createTrackbar("maxPyrLevel", winName, &maxPyrLevel, 5, meanShiftSegmentation);
meanShiftSegmentation(0, 0);
waitKey();
return 0;
}
/*M///
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
/****************************************************************************************\
* Meanshift *
\****************************************************************************************/
CV_IMPL void
cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr,
double sp0, double sr, int max_level,
CvTermCriteria termcrit )
{
const int cn = 3;
const int MAX_LEVELS = 8;
if( (unsigned)max_level > (unsigned)MAX_LEVELS )
CV_Error( CV_StsOutOfRange, "The number of pyramid levels is too large or negative" );
std::vector src_pyramid(max_level+1);
std::vector dst_pyramid(max_level+1);
cv::Mat mask0;
int i, j, level;
//uchar* submask = 0;
#define cdiff(ofs0) (tab[c0-dptr[ofs0]+255] + \
tab[c1-dptr[(ofs0)+1]+255] + tab[c2-dptr[(ofs0)+2]+255] >= isr22) // color diffference Note: it‘s >= not <
double sr2 = sr * sr;// sr: ||x|| color window radius
int isr2 = cvRound(sr2), isr22 = MAX(isr2,16);
int tab[768]; // 256*,lookup table for fast square distance computation
cv::Mat src0 = cv::cvarrToMat(srcarr);
cv::Mat dst0 = cv::cvarrToMat(dstarr);
if( src0.type() != CV_8UC3 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 3-channel images are supported" );
if( src0.type() != dst0.type() )
CV_Error( CV_StsUnmatchedFormats, "The input and output images must have the same type" );
if( src0.size() != dst0.size() )
CV_Error( CV_StsUnmatchedSizes, "The input and output images must have the same size" );
if( !(termcrit.type & CV_TERMCRIT_ITER) )
termcrit.max_iter = 5; // if we don't have set the number of iterations ,it will iterate 5 times at most .
termcrit.max_iter = MAX(termcrit.max_iter,1);
termcrit.max_iter = MIN(termcrit.max_iter,100); // max iteration is 100
if( !(termcrit.type & CV_TERMCRIT_EPS) )
termcrit.epsilon = 1.f; // the default epsilon
termcrit.epsilon = MAX(termcrit.epsilon, 0.f);
for( i = 0; i < 768; i++ )
tab[i] = (i - 255)*(i - 255); //tab[0]=255^2,tab[255]=0,tab[512]=255^2
// 1. construct pyramid
src_pyramid[0] = src0;
dst_pyramid[0] = dst0;
for( level = 1; level <= max_level; level++ )
{
src_pyramid[level].create( (src_pyramid[level-1].rows+1)/2,
(src_pyramid[level-1].cols+1)/2, src_pyramid[level-1].type() );// plus 1 to ensure both the clos and row are even
dst_pyramid[level].create( src_pyramid[level].rows,
src_pyramid[level].cols, src_pyramid[level].type() ); //
cv::pyrDown( src_pyramid[level-1], src_pyramid[level], src_pyramid[level].size() );// first using Gaussian blure then removing every even-numbered row and column
//CV_CALL( cvResize( src_pyramid[level-1], src_pyramid[level], CV_INTER_AREA ));
}
mask0.create(src0.rows, src0.cols, CV_8UC1); // memory buf for mask of every scale
//CV_CALL( submask = (uchar*)cvAlloc( (sp+2)*(sp+2) ));
// 2. apply meanshift, starting from the pyramid top (i.e. the smallest layer)
for( level = max_level; level >= 0; level-- )
{
cv::Mat src = src_pyramid[level]; // the current processing layer
cv::Size size = src.size();
uchar* sptr = src.data; //
int sstep = (int)src.step;// all bytee in a row(including the padded pixels )
uchar* mask = 0;
int mstep = 0;
uchar* dptr;
int dstep;
float sp = (float)(sp0 / (1 << level)); // spatial window radius,keep the contents which the kernel can cover are identical
sp = MAX( sp, 1 );
if( level < max_level ) //except for the top level,先跳过,其实也可以忽略
{
cv::Size size1 = dst_pyramid[level+1].size(); // notice that layer level+1 has been processed
cv::Mat m( size.height, size.width, CV_8UC1, mask0.data ); // Note that the memory to which .data point don't have the same size as the m. Howerver,the former will alway large or equal to the later. We just use the mask0 as an big enough container that only allocate one time.
dstep = (int)dst_pyramid[level+1].step;//
dptr = dst_pyramid[level+1].data + dstep + cn; //jump the first row and first cloumn(including 3 channels)
mstep = (int)m.step;
mask = m.data + mstep;//jump the first row
//cvResize( dst_pyramid[level+1], dst_pyramid[level], CV_INTER_CUBIC );
cv::pyrUp( dst_pyramid[level+1], dst_pyramid[level], dst_pyramid[level].size() ); // 这一行有意义吗?完全可以去掉啊?????
// Note:the image is first upsized with new even rows and cols filled with 0s ,thereafter the missing values is approximated with the Gaussian convolution.
m.setTo(cv::Scalar::all(0));
for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3, mask += mstep*2 )
// dptr + dstep + width*3*2 : jump to the second point of the next row
// mstep*2 : 2 row in mask is correspondence to 1 rows in dst_pyramid[level+1];
for( j = 1; j < size1.width-1; j++, dptr += cn )//jump the first and the last column,Notice that before jump to the next row,the dptr have pointed to the last column
{
int c0 = dptr[0], c1 = dptr[1], c2 = dptr[2];
mask[j*2 - 1] = cdiff(-3) || cdiff(3) || cdiff(-dstep-3) || cdiff(-dstep) ||
cdiff(-dstep+3) || cdiff(dstep-3) || cdiff(dstep) || cdiff(dstep+3);//if any of it's 8 neigbours doesn't similar with it in color sapce,it should be proceed which labeled with 1
}
}
cv::dilate( m, m, cv::Mat() );
mask = m.data;
}
dptr = dst_pyramid[level].data;
dstep = (int)dst_pyramid[level].step;
for( i = 0; i < size.height; i++, sptr += sstep - size.width*3,// jumpping the padding bytes in the ends of each row of src
dptr += dstep - size.width*3,// jumpping the padding bytes in the ends of each row of src
mask += mstep ) // the offset of mask
{
for( j = 0; j < size.width; j++, sptr += 3, dptr += 3 )
{
int x0 = j, y0 = i, x1, y1, iter;// x1,y1: the position of mode
int c0, c1, c2;
if( mask && !mask[j] ) // 可以忽略,mask !=0:except for the top level, mask[j]==0: similar to all of it's 8 neighbors
continue;
c0 = sptr[0], c1 = sptr[1], c2 = sptr[2]; // b,g,r or L,u,v of the central position
// iterate meanshift procedure,核心部分
for( iter = 0; iter < termcrit.max_iter; iter++ )
{
uchar* ptr;
int x, y, count = 0; // count : count the number of pixels whithin the color support in a square window
int minx, miny, maxx, maxy;
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;//
double icount;
int stop_flag;
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
//a square window of (2*sp+1)*(2*sp+1)
minx = cvRound(x0 - sp); minx = MAX(minx, 0);//ensure minx doesn't less than the first column
miny = cvRound(y0 - sp); miny = MAX(miny, 0);//ensure minx doesn't less than the first row
maxx = cvRound(x0 + sp); maxx = MIN(maxx, size.width-1);
maxy = cvRound(y0 + sp); maxy = MIN(maxy, size.height-1);
ptr = sptr + (miny - i)*sstep + (minx - j)*3;// move to (minx,miny)
for( y = miny; y <= maxy; y++, ptr += sstep - (maxx-minx+1)*3 )
{
int row_count = 0; // count the number of pixels whthin the color support in a row
x = minx;
#if CV_ENABLE_UNROLLED //展开,先跳过
for( ; x + 3 <= maxx; x += 4, ptr += 12 )//process 4 colums every cycle to reduce the cycle times. it will be faster than loop every column
{
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
{
s0 += t0; s1 += t1; s2 += t2;
sx += x; row_count++;
}
t0 = ptr[3], t1 = ptr[4], t2 = ptr[5];
if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
{
s0 += t0; s1 += t1; s2 += t2;
sx += x+1; row_count++;
}
t0 = ptr[6], t1 = ptr[7], t2 = ptr[8];
if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
{
s0 += t0; s1 += t1; s2 += t2;
sx += x+2; row_count++;
}
t0 = ptr[9], t1 = ptr[10], t2 = ptr[11];
if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
{
s0 += t0; s1 += t1; s2 += t2;
sx += x+3; row_count++;
}
}
#endif
for( ; x <= maxx; x++, ptr += 3 ) // if we have defined CV_ENABLE_UNROLLED then processing the remain (maxx+1)%4 cloumns otherwise processing all of columns
{
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; //b,g,r
if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )//truncate with isr2 to have finite support(color similarity )
{ // the value [-255,255] first map to [0 510],then map to the squre distance to central position (i,j)
s0 += t0; s1 += t1; s2 += t2;
sx += x;
row_count++;
}
}
count += row_count;
sy += y*row_count;
}
if( count == 0 )
break;
icount = 1./count;
x1 = cvRound(sx*icount); // x mean
y1 = cvRound(sy*icount); // Y mean
s0 = cvRound(s0*icount); // b mean
s1 = cvRound(s1*icount); // g mean
s2 = cvRound(s2*icount); // r mean
stop_flag = (x0 == x1 && y0 == y1) || abs(x1-x0) + abs(y1-y0) + // converge to (i,j)
tab[s0 - c0 + 255] + tab[s1 - c1 + 255] +
tab[s2 - c2 + 255] <= termcrit.epsilon; //movement can be ignored
x0 = x1; y0 = y1;
c0 = s0; c1 = s1; c2 = s2;// Notice;the center color was replaced by the filtered value
if( stop_flag )
break;
}
dptr[0] = (uchar)c0; //assign the filtered value of converging point to the starting point
dptr[1] = (uchar)c1;
dptr[2] = (uchar)c2;
}
}
}
}
void cv::pyrMeanShiftFiltering( InputArray _src, OutputArray _dst,
double sp, double sr, int maxLevel,
TermCriteria termcrit )
{
Mat src = _src.getMat();
if( src.empty() )
return;
_dst.create( src.size(), src.type() );
CvMat c_src = src, c_dst = _dst.getMat();
cvPyrMeanShiftFiltering( &c_src, &c_dst, sp, sr, maxLevel, termcrit );
}