Java中调用pyrMeanShiftFiltering函数,对meanshift函数解释

首先看看源函数:

public static void pyrMeanShiftFiltering(Mat src, Mat dst, double sp, double sr)
    {
        
        pyrMeanShiftFiltering_1(src.nativeObj, dst.nativeObj, sp, sr);
        
        return;
    }

参数解释:

src:输入的8-比特,3-信道图象

dst和源图象相同大小,相同格式的输出图象.
sp空间窗的半径The spatial window radius.
sr色彩窗的半径The color window radius.



opencv
中的源码

进入函数pyrMeanShiftFiltering中看到源码是c++写的:

// C++:  void pyrMeanShiftFiltering(Mat src, Mat& dst, double sp, double sr, int maxLevel = 1, TermCriteria termcrit = TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS,5,1))
    private static native void pyrMeanShiftFiltering_0(long src_nativeObj, long dst_nativeObj, double sp, double sr, int maxLevel, int termcrit_type, int termcrit_maxCount, double termcrit_epsilon);
    private static native void pyrMeanShiftFiltering_1(long src_nativeObj, long dst_nativeObj, double sp, double sr);


于是到opencv源码包中找到meanshift的源码imageSegmentation.cpp如下:

/**
 * @function Watershed_and_Distance_Transform.cpp
 * @brief Sample code showing how to segment overlapping objects using Laplacian filtering, in addition to Watershed and Distance Transformation
 * @author OpenCV Team
 */

#include 
#include 

using namespace std;
using namespace cv;

int main(int, char** argv)
{
//! [load_image]
    // Load the image
    Mat src = imread(argv[1]);

    // Check if everything was fine
    if (!src.data)
        return -1;

    // Show source image
    imshow("Source Image", src);
//! [load_image]

//! [black_bg]
    // Change the background from white to black, since that will help later to extract
    // better results during the use of Distance Transform
    for( int x = 0; x < src.rows; x++ ) {
      for( int y = 0; y < src.cols; y++ ) {
          if ( src.at(x, y) == Vec3b(255,255,255) ) {
            src.at(x, y)[0] = 0;
            src.at(x, y)[1] = 0;
            src.at(x, y)[2] = 0;
          }
        }
    }

    // Show output image
    imshow("Black Background Image", src);
//! [black_bg]

//! [sharp]
    // Create a kernel that we will use for accuting/sharpening our image
    Mat kernel = (Mat_(3,3) <<
            1,  1, 1,
            1, -8, 1,
            1,  1, 1); // an approximation of second derivative, a quite strong kernel

    // do the laplacian filtering as it is
    // well, we need to convert everything in something more deeper then CV_8U
    // because the kernel has some negative values,
    // and we can expect in general to have a Laplacian image with negative values
    // BUT a 8bits unsigned int (the one we are working with) can contain values from 0 to 255
    // so the possible negative number will be truncated
    Mat imgLaplacian;
    Mat sharp = src; // copy source image to another temporary one
    filter2D(sharp, imgLaplacian, CV_32F, kernel);
    src.convertTo(sharp, CV_32F);
    Mat imgResult = sharp - imgLaplacian;

    // convert back to 8bits gray scale
    imgResult.convertTo(imgResult, CV_8UC3);
    imgLaplacian.convertTo(imgLaplacian, CV_8UC3);

    // imshow( "Laplace Filtered Image", imgLaplacian );
    imshow( "New Sharped Image", imgResult );
//! [sharp]

    src = imgResult; // copy back

//! [bin]
    // Create binary image from source image
    Mat bw;
    cvtColor(src, bw, CV_BGR2GRAY);
    threshold(bw, bw, 40, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
    imshow("Binary Image", bw);
//! [bin]

//! [dist]
    // Perform the distance transform algorithm
    Mat dist;
    distanceTransform(bw, dist, CV_DIST_L2, 3);

    // Normalize the distance image for range = {0.0, 1.0}
    // so we can visualize and threshold it
    normalize(dist, dist, 0, 1., NORM_MINMAX);
    imshow("Distance Transform Image", dist);
//! [dist]

//! [peaks]
    // Threshold to obtain the peaks
    // This will be the markers for the foreground objects
    threshold(dist, dist, .4, 1., CV_THRESH_BINARY);

    // Dilate a bit the dist image
    Mat kernel1 = Mat::ones(3, 3, CV_8UC1);
    dilate(dist, dist, kernel1);
    imshow("Peaks", dist);
//! [peaks]

//! [seeds]
    // Create the CV_8U version of the distance image
    // It is needed for findContours()
    Mat dist_8u;
    dist.convertTo(dist_8u, CV_8U);

    // Find total markers
    vector > contours;
    findContours(dist_8u, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);

    // Create the marker image for the watershed algorithm
    Mat markers = Mat::zeros(dist.size(), CV_32SC1);

    // Draw the foreground markers
    for (size_t i = 0; i < contours.size(); i++)
        drawContours(markers, contours, static_cast(i), Scalar::all(static_cast(i)+1), -1);

    // Draw the background marker
    circle(markers, Point(5,5), 3, CV_RGB(255,255,255), -1);
    imshow("Markers", markers*10000);
//! [seeds]

//! [watershed]
    // Perform the watershed algorithm
    watershed(src, markers);

    Mat mark = Mat::zeros(markers.size(), CV_8UC1);
    markers.convertTo(mark, CV_8UC1);
    bitwise_not(mark, mark);
//    imshow("Markers_v2", mark); // uncomment this if you want to see how the mark
                                  // image looks like at that point

    // Generate random colors
    vector colors;
    for (size_t i = 0; i < contours.size(); i++)
    {
        int b = theRNG().uniform(0, 255);
        int g = theRNG().uniform(0, 255);
        int r = theRNG().uniform(0, 255);

        colors.push_back(Vec3b((uchar)b, (uchar)g, (uchar)r));
    }

    // Create the result image
    Mat dst = Mat::zeros(markers.size(), CV_8UC3);

    // Fill labeled objects with random colors
    for (int i = 0; i < markers.rows; i++)
    {
        for (int j = 0; j < markers.cols; j++)
        {
            int index = markers.at(i,j);
            if (index > 0 && index <= static_cast(contours.size()))
                dst.at(i,j) = colors[index-1];
            else
                dst.at(i,j) = Vec3b(0,0,0);
        }
    }

    // Visualize the final image
    imshow("Final Result", dst);
//! [watershed]

    waitKey(0);
    return 0;
}


样例:

cvPyrMeanShiftFiltering(srcImage,dstImage,3,7,0,cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,5,1));
*/
CV_IMPL void
cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr, 
                         double sp0, double sr, int max_level,
                         CvTermCriteria termcrit )
{
    const int cn = 3;
    const int MAX_LEVELS = 8;
    CvMat* src_pyramid[MAX_LEVELS+1];
    CvMat* dst_pyramid[MAX_LEVELS+1];
    CvMat* mask0 = 0;
    int i, j, level;
    //uchar* submask = 0;

    #define cdiff(ofs0) (tab[c0-dptr[ofs0]+255] + \
        tab[c1-dptr[(ofs0)+1]+255] + tab[c2-dptr[(ofs0)+2]+255] >= isr22)

    memset( src_pyramid, 0, sizeof(src_pyramid) );
    memset( dst_pyramid, 0, sizeof(dst_pyramid) );
    
    CV_FUNCNAME( "cvPyrMeanShiftFiltering" );

    __BEGIN__;

    double sr2 = sr * sr;
    int isr2 = cvRound(sr2), isr22 = MAX(isr2,16);
    int tab[768];
    CvMat sstub0, *src0;
    CvMat dstub0, *dst0;

    CV_CALL( src0 = cvGetMat( srcarr, &sstub0 ));
    CV_CALL( dst0 = cvGetMat( dstarr, &dstub0 ));

    if( CV_MAT_TYPE(src0->type) != CV_8UC3 )
        CV_ERROR( CV_StsUnsupportedFormat, "Only 8-bit, 3-channel images are supported" );
    
    if( !CV_ARE_TYPES_EQ( src0, dst0 ))
        CV_ERROR( CV_StsUnmatchedFormats, "The input and output images must have the same type" );

    if( !CV_ARE_SIZES_EQ( src0, dst0 ))
        CV_ERROR( CV_StsUnmatchedSizes, "The input and output images must have the same size" );

    if( (unsigned)max_level > (unsigned)MAX_LEVELS )
        CV_ERROR( CV_StsOutOfRange, "The number of pyramid levels is too large or negative" );

    if( !(termcrit.type & CV_TERMCRIT_ITER) )
        termcrit.max_iter = 5;
    termcrit.max_iter = MAX(termcrit.max_iter,1);
    termcrit.max_iter = MIN(termcrit.max_iter,100);
    if( !(termcrit.type & CV_TERMCRIT_EPS) )
        termcrit.epsilon = 1.f;
    termcrit.epsilon = MAX(termcrit.epsilon, 0.f);

    for( i = 0; i < 768; i++ )
        tab[i] = (i - 255)*(i - 255);

    // 1. construct pyramid
    src_pyramid[0] = src0;
    dst_pyramid[0] = dst0;
 /*
    for( level = 1; level <= max_level; level++ )
    {
        CV_CALL( src_pyramid[level] = cvCreateMat( (src_pyramid[level-1]->rows+1)/2,
                        (src_pyramid[level-1]->cols+1)/2, src_pyramid[level-1]->type ));
        CV_CALL( dst_pyramid[level] = cvCreateMat( src_pyramid[level]->rows,
                        src_pyramid[level]->cols, src_pyramid[level]->type ));
        CV_CALL( cvPyrDown( src_pyramid[level-1], src_pyramid[level] ));
        //CV_CALL( cvResize( src_pyramid[level-1], src_pyramid[level], CV_INTER_AREA ));
    }
 */

    CV_CALL( mask0 = cvCreateMat( src0->rows, src0->cols, CV_8UC1 ));
    //CV_CALL( submask = (uchar*)cvAlloc( (sp+2)*(sp+2) ));

    // 2. apply meanshift, starting from the pyramid top (i.e. the smallest layer)
    for( level = max_level; level >= 0; level-- )
    {
        CvMat* src = src_pyramid[level];
        CvSize size = cvGetMatSize(src);
        uchar* sptr = src->data.ptr;
        int sstep = src->step; /* 以字节为单位的行数据长度 */

        uchar* mask = 0;
        int mstep = 0;

        uchar* dptr;
        int dstep;
        float sp = (float)(sp0 / (1 << level));
        sp = MAX( sp, 1 );

//这里保证了sp最小也为1,也就保证了下面进行窗口计算时,窗口大小最小也为(1*2+1)*(1*2+1)

  //由于max_level的值为0,所以下面的代码不执行

        //if( level < max_level )
        //{
        //    CvSize size1 = cvGetMatSize(dst_pyramid[level+1]);
        //    CvMat m = cvMat( size.height, size.width, CV_8UC1, mask0->data.ptr );
        //    dstep = dst_pyramid[level+1]->step;
        //    dptr = dst_pyramid[level+1]->data.ptr + dstep + cn;
        //    mstep = m.step;
        //    mask = m.data.ptr + mstep;
        //    //cvResize( dst_pyramid[level+1], dst_pyramid[level], CV_INTER_CUBIC );
        //    cvPyrUp( dst_pyramid[level+1], dst_pyramid[level] );
        //    cvZero( &m );

        //    for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3, mask += mstep*2 )
        //    {
        //        for( j = 1; j < size1.width-1; j++, dptr += cn )
        //        {
        //            int c0 = dptr[0], c1 = dptr[1], c2 = dptr[2];
        //            mask[j*2 - 1] = cdiff(-3) || cdiff(3) || cdiff(-dstep-3) || cdiff(-dstep) ||
        //                cdiff(-dstep+3) || cdiff(dstep-3) || cdiff(dstep) || cdiff(dstep+3);
        //        }
        //    }

        //    cvDilate( &m, &m, 0, 1 );
        //    mask = m.data.ptr;
        //}

        dptr = dst_pyramid[level]->data.ptr;
        dstep = dst_pyramid[level]->step;

  //一个像素占三个字节,下面的循环对每一个象元进行处理,由于是对三个波段的数据进行
  //处理,所以每个象元占三个字节,保存三个8位的象元值
        for( i = 0; i < size.height; i++, sptr += sstep - size.width*3,
                                        dptr += dstep - size.width*3, //防止数据的遗漏矩阵,每处理一行数据对齐一次 如果width是4的倍数,
                                                                        //则不会出现这种情况,分析代码时可假设 
                                          mask += mstep )               //是这样的,方便分析 注意for循环的执行顺序 
        {   
            for( j = 0; j < size.width; j++, sptr += 3, dptr += 3 )
            {    
    //x0记录当前处理象元的列号,y0记录当前处理象元的行号
                int x0 = j, y0 = i, x1, y1, iter;
                int c0, c1, c2;

                //if( mask && !mask[j] )
                //    continue;

                c0 = sptr[0], c1 = sptr[1], c2 = sptr[2];   //记录第一个处理象元的三个分量(RGB)

                // iterate meanshift procedure
                for( iter = 0; iter < termcrit.max_iter; iter++ ) //所谓的MeanShift算法迭代核
                {
                    uchar* ptr;
                    int x, y, count = 0;
                    int minx, miny, maxx, maxy;
                    int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
                    double icount;
                    int stop_flag;

                    //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
                    minx = cvRound(x0 - sp); minx = MAX(minx, 0);
                    miny = cvRound(y0 - sp); miny = MAX(miny, 0);
                    maxx = cvRound(x0 + sp); maxx = MIN(maxx, size.width-1);
                    maxy = cvRound(y0 + sp); maxy = MIN(maxy, size.height-1);

     //根据上面的代码可以得到窗口大小为:(2*sp+1)*(2*sp+1),当统计到达影像边界时,
     //重新考虑,这就是上面代码MAX和MIN两个函数所控制的内容

 

                    //sptr记录了当前处理象元的指针
     //ptr记录了以当前处理象元为中心的处理窗口中的第一个象元指针     
                    ptr = sptr + (miny - i)*sstep + (minx - j)*3;


     /*double sr2 = sr * sr;
     int isr2 = cvRound(sr2), isr22 = MAX(isr2,16);

     for( i = 0; i < 768; i++ )
     tab[i] = (i - 255)*(i - 255);
     */

     /*ptr += sstep - (maxx-minx+1)*3该语句的含义是处理完
     指定窗口中一行数据时,使窗口的指针指向窗口中的下一行的
     第一个数据
     */
                    for( y = miny; y <= maxy; y++, ptr += sstep - (maxx-minx+1)*3 )
                    {
                        int row_count = 0;
                        x = minx;
                      /*  for( ; x + 3 <= maxx; x += 4, ptr += 12 ) //窗口中的每一行分两部分循环
                        {      
                            int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
                            {                        
                                s0 += t0; s1 += t1; s2 += t2;
                                sx += x; row_count++;
                            }
                            t0 = ptr[3], t1 = ptr[4], t2 = ptr[5];
                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
                            {                        
                                s0 += t0; s1 += t1; s2 += t2;
                                sx += x+1; row_count++;
                            }
                            t0 = ptr[6], t1 = ptr[7], t2 = ptr[8];
                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
                            {                        
                                s0 += t0; s1 += t1; s2 += t2;
                                sx += x+2; row_count++;
                            }
                            t0 = ptr[9], t1 = ptr[10], t2 = ptr[11];
                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
                            {                        
                                s0 += t0; s1 += t1; s2 += t2;
                                sx += x+3; row_count++;
                            }
                        } //窗口一行第一部分计算结束*/                        
                        for( ; x <= maxx; x++, ptr += 3 )
                        {      
                            int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
                            {                        
                                s0 += t0; s1 += t1; s2 += t2;
                                sx += x; row_count++;
                            }
                        } //窗口一行第二部分计算结束
                        count += row_count; //count记录了窗口中所有符合条件的样本点数目
                        sy += y*row_count;
                    } //窗口循环结束
            
                    if( count == 0 )
                        break;

                    icount = 1./count; //1.表示把1转换为浮点类型
                    x1 = cvRound(sx*icount);
                    y1 = cvRound(sy*icount);
                    s0 = cvRound(s0*icount);
                    s1 = cvRound(s1*icount);
                    s2 = cvRound(s2*icount);
        
                    stop_flag = x0 == x1 && y0 == y1 || abs(x1-x0) + abs(y1-y0) +
                        tab[s0 - c0 + 255] + tab[s1 - c1 + 255] +
                        tab[s2 - c2 + 255] <= termcrit.epsilon;
                
                    x0 = x1; y0 = y1;
                    c0 = s0; c1 = s1; c2 = s2;

                    if( stop_flag )
                        break;
                } //针对这个窗口的迭代结束

                dptr[0] = (uchar)c0;
                dptr[1] = (uchar)c1;
                dptr[2] = (uchar)c2;
            }
        }
    }  

    __END__;

    for( i = 1; i <= MAX_LEVELS; i++ )
    {
        cvReleaseMat( &src_pyramid[i] );
        cvReleaseMat( &dst_pyramid[i] );
    }
    cvReleaseMat( &mask0 );
}




你可能感兴趣的:(图像处理)