Opencv中meanShiftSegmentation的实现

Opencv中meanShiftSegmentation的实现

1.样例在opencv-2.4.6.1\samples\cpp的meanShift_Segmentation.cpp中

static void meanShiftSegmentation( int, void* ){

    cout << "spatialRad=" << spatialRad << "; "

         << "colorRad=" << colorRad << "; "

         << "maxPyrLevel=" << maxPyrLevel << endl;

    pyrMeanShiftFiltering( img, res, spatialRad, colorRad, maxPyrLevel );

    floodFillPostprocess( res, Scalar::all(2) );

    imshow( winName, res );

}

 

2. 下面着重介绍一下pyrMeanShiftFiltering.核心函数式cvPyrMeanShiftFiltering

该函数使用金字塔的方法加速,每一级金字塔中实际依然是使用meanshift。核心思想如下:

(1)   从金子塔塔顶的图像开始处理:(塔顶的图像最小,速度较快)。图像的每个点进行以下meanshift迭代运算:

a)  从图像左上角第一个像素开始,以该点为中心,生成指定大小的窗口。计算窗口中满足距离条件的所有点的平均位置以及R,G,B的平均值。

(距离条件:(R1-R0)*(R1-R0)+(G1-G0)*(G1-G0)+(B1-B0)*(B1-B0)<Thresh

其中R0,G0,B0为窗口中心像素的RGB值;R1,G1,B1为窗口中每个像素相应的RGB值)

b)  将窗口中心平移到a)获取的平均位置,计算新窗口中满足距离条件的所有点的新平均位置以及新R,G,B的平均值。(R0,G0,B0为a)中获取的新R,G,B的平均值,即距离比较的参考点为a)中获取的新R,G,B的平均值。)

c)  不断重复执行b),直到满足迭代次数或者新平均位置和新R,G,B的平均值和上一次的值差距满足迭代精度。另外在重复执行b)时,每一次都将窗口中心不断平移到b)获取的新平均位置,而且距离条件中比较的参考点为b)获取的新R,G,B的平均值。

d)  将迭代结束获得的新R,G,B的平均值存储到窗口中心位置对应的像素中。

 

不断移动窗口中心,将图像的每个点都做为窗口中心进行遍历,执行上面的a,b,c,d。

 

(2) 将(1)的结果(新的金字塔图像)生成下一层金字塔计算的初值和mask。利用mask实现加速,只有mask为1的位置才进行本轮的迭代。

a)  将(1)的结果升2采样,即图像长宽都扩大2倍,整个图像扩大4倍。将其作为改层金字塔图像计算的初值。

b)  判断a)结果中的所有像素点是否满足差异条件,如果满足则mask为1,否则mask为0。

(判断条件:(R1-R0)*(R1-R0)+(G1-G0)*(G1-G0)+(B1-B0)*(B1-B0)<Thresh

其中R0,G0,B0为窗口中心像素的RGB值;R1,G1,B1为窗口中每个像素相应的RGB值。。。。。。。。)

 待续。。。。。

 

(3) 根据(2)中的初值和mask,将mask为1的所有像素进行meanshift迭代,迭代过程和步骤(1)一样,最后把迭代结束获得的新R,G,B的平均值存储到mask为1的位置的对应像素中。

(4) 重复步骤(2)和步骤(3),直至金字塔底。最终输出和原图大小一样的结果图像。

 

 

 

 

CV_IMPL void

cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr,

                         double sp0, double sr, int max_level,

                         CvTermCriteria termcrit )

{

    const int cn = 3;

    const int MAX_LEVELS = 8;

 

    if( (unsigned)max_level > (unsigned)MAX_LEVELS )

        CV_Error( CV_StsOutOfRange, "The number of pyramid levels is too large or negative" );

 

    std::vector<cv::Mat> src_pyramid(max_level+1);

    std::vector<cv::Mat> dst_pyramid(max_level+1);

    cv::Mat mask0;

    int i, j, level;

    //uchar* submask = 0;

 

    #define cdiff(ofs0) (tab[c0-dptr[ofs0]+255] + \

        tab[c1-dptr[(ofs0)+1]+255] + tab[c2-dptr[(ofs0)+2]+255] >= isr22)

 

    double sr2 = sr * sr;

    int isr2 = cvRound(sr2), isr22 = MAX(isr2,16);

    int tab[768];

    cv::Mat src0 = cv::cvarrToMat(srcarr);

    cv::Mat dst0 = cv::cvarrToMat(dstarr);

 

    if( src0.type() != CV_8UC3 )

        CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 3-channel images are supported" );

 

    if( src0.type() != dst0.type() )

        CV_Error( CV_StsUnmatchedFormats, "The input and output images must have the same type" );

 

    if( src0.size() != dst0.size() )

        CV_Error( CV_StsUnmatchedSizes, "The input and output images must have the same size" );

 

    if( !(termcrit.type & CV_TERMCRIT_ITER) )

        termcrit.max_iter = 5;

    termcrit.max_iter = MAX(termcrit.max_iter,1);

    termcrit.max_iter = MIN(termcrit.max_iter,100);

    if( !(termcrit.type & CV_TERMCRIT_EPS) )

        termcrit.epsilon = 1.f;

    termcrit.epsilon = MAX(termcrit.epsilon, 0.f);

 

    for( i = 0; i < 768; i++ )

        tab[i] = (i - 255)*(i - 255);

 

    // 1. construct pyramid

    src_pyramid[0] = src0;

    dst_pyramid[0] = dst0;

    for( level = 1; level <= max_level; level++ )

    {

        src_pyramid[level].create( (src_pyramid[level-1].rows+1)/2,

                        (src_pyramid[level-1].cols+1)/2, src_pyramid[level-1].type() );

        dst_pyramid[level].create( src_pyramid[level].rows,

                        src_pyramid[level].cols, src_pyramid[level].type() );

        cv::pyrDown( src_pyramid[level-1], src_pyramid[level], src_pyramid[level].size() );

        //CV_CALL( cvResize( src_pyramid[level-1], src_pyramid[level], CV_INTER_AREA ));

    }

 

    mask0.create(src0.rows, src0.cols, CV_8UC1);

    //CV_CALL( submask = (uchar*)cvAlloc( (sp+2)*(sp+2) ));

 

    // 2. apply meanshift, starting from the pyramid top (i.e. the smallest layer)

    for( level = max_level; level >= 0; level-- )

    {

        cv::Mat src = src_pyramid[level];

        cv::Size size = src.size();

        uchar* sptr = src.data;

        int sstep = (int)src.step;

        uchar* mask = 0;

        int mstep = 0;

        uchar* dptr;

        int dstep;

        float sp = (float)(sp0 / (1 << level));

        sp = MAX( sp, 1 );

 

        if( level < max_level )

        {

            cv::Size size1 = dst_pyramid[level+1].size();

            cv::Mat m( size.height, size.width, CV_8UC1, mask0.data );

            dstep = (int)dst_pyramid[level+1].step;

            dptr = dst_pyramid[level+1].data + dstep + cn;

            mstep = (int)m.step;

            mask = m.data + mstep;

            //cvResize( dst_pyramid[level+1], dst_pyramid[level], CV_INTER_CUBIC );

            cv::pyrUp( dst_pyramid[level+1], dst_pyramid[level], dst_pyramid[level].size() );

            m.setTo(cv::Scalar::all(0));

 

            for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3, mask += mstep*2 )

            {

                for( j = 1; j < size1.width-1; j++, dptr += cn )

                {

                    int c0 = dptr[0], c1 = dptr[1], c2 = dptr[2];

                    mask[j*2 - 1] = cdiff(-3) || cdiff(3) || cdiff(-dstep-3) || cdiff(-dstep) ||

                        cdiff(-dstep+3) || cdiff(dstep-3) || cdiff(dstep) || cdiff(dstep+3);

                }

            }

 

            cv::dilate( m, m, cv::Mat() );

            mask = m.data;

        }

 

        dptr = dst_pyramid[level].data;

        dstep = (int)dst_pyramid[level].step;

 

        for( i = 0; i < size.height; i++, sptr += sstep - size.width*3,

                                          dptr += dstep - size.width*3,

                                          mask += mstep )

        {

            for( j = 0; j < size.width; j++, sptr += 3, dptr += 3 )

            {

                int x0 = j, y0 = i, x1, y1, iter;

                int c0, c1, c2;

 

                if( mask && !mask[j] )

                    continue;

 

                c0 = sptr[0], c1 = sptr[1], c2 = sptr[2];

 

                // iterate meanshift procedure,图像的每点都使用meanshift,找到其收敛的RGB

                for( iter = 0; iter < termcrit.max_iter; iter++ )

                {

                    uchar* ptr;

                    int x, y, count = 0;

                    int minx, miny, maxx, maxy;

                    int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;

                    double icount;

                    int stop_flag;

 

                    //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)

                    minx = cvRound(x0 - sp); minx = MAX(minx, 0);

                    miny = cvRound(y0 - sp); miny = MAX(miny, 0);

                    maxx = cvRound(x0 + sp); maxx = MIN(maxx, size.width-1);

                    maxy = cvRound(y0 + sp); maxy = MIN(maxy, size.height-1);

                    ptr = sptr + (miny - i)*sstep + (minx - j)*3;

 

                    for( y = miny; y <= maxy; y++, ptr += sstep - (maxx-minx+1)*3 )

                    {

                        int row_count = 0;

                        x = minx;

                        #if CV_ENABLE_UNROLLED

                        for( ; x + 3 <= maxx; x += 4, ptr += 12 )

                        {

                            int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];

                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )

                            {

                                s0 += t0; s1 += t1; s2 += t2;

                                sx += x; row_count++;

                            }

                            t0 = ptr[3], t1 = ptr[4], t2 = ptr[5];

                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )

                            {

                                s0 += t0; s1 += t1; s2 += t2;

                                sx += x+1; row_count++;

                            }

                            t0 = ptr[6], t1 = ptr[7], t2 = ptr[8];

                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )

                            {

                                s0 += t0; s1 += t1; s2 += t2;

                                sx += x+2; row_count++;

                            }

                            t0 = ptr[9], t1 = ptr[10], t2 = ptr[11];

                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )

                            {

                                s0 += t0; s1 += t1; s2 += t2;

                                sx += x+3; row_count++;

                            }

                        }

                        #endif

                        for( ; x <= maxx; x++, ptr += 3 )

                        {

                            int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];

                            if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )

                            {

                                s0 += t0; s1 += t1; s2 += t2;

                                sx += x; row_count++;

                            }

                        }

                        count += row_count;

                        sy += y*row_count;

                    }

 

                    if( count == 0 )

                        break;

 

                    icount = 1./count;

                    x1 = cvRound(sx*icount);

                    y1 = cvRound(sy*icount);

                    s0 = cvRound(s0*icount);

                    s1 = cvRound(s1*icount);

                    s2 = cvRound(s2*icount);

 

                    stop_flag = (x0 == x1 && y0 == y1) || abs(x1-x0) + abs(y1-y0) +

                        tab[s0 - c0 + 255] + tab[s1 - c1 + 255] +

                        tab[s2 - c2 + 255] <= termcrit.epsilon;

 

                    x0 = x1; y0 = y1;

                    c0 = s0; c1 = s1; c2 = s2;

 

                    if( stop_flag )

                        break;

                }

 

                dptr[0] = (uchar)c0;

                dptr[1] = (uchar)c1;

                dptr[2] = (uchar)c2;

            }

        }// 一层

    }

}

 

你可能感兴趣的:(Opencv中meanShiftSegmentation的实现)