显著性检测——ITTI模型

1. 参考文献

// Itti L, Koch C, Niebur E. A Model of Saliency-Based Visual Attention for Rapid Scene Analysis[J]. IEEE Transactions on Pattern Analysis & Machine Intelligence, 1998, 20(11):1254-1259.

// Itti L, Koch C. Computational modelling of visual attention.[J]. Nature Reviews Neuroscience, 2001, 2(3):194.

2. 模型实现

2.1 显著性检测公共头文件

#ifndef SALIENTCOMMON_H
#define SALIENTCOMMON_H
// std lib
#include 
#include 
#include 
#include 

// opencv lib
#include 
#include 
#include 
#include 
#include 

using namespace std;
using namespace cv;

#endif // SALIENTCOMMON_H

2.2 显著性检测头文件

#ifndef SALIENTITTI_H
#define SALIENTITTI_H
#include "salientcommon.h"
// Itti L, Koch C, Niebur E. A Model of Saliency-Based Visual Attention for Rapid Scene Analysis[J]. IEEE Transactions on Pattern Analysis & Machine Intelligence, 1998, 20(11):1254-1259.
// Itti L, Koch C. Computational modelling of visual attention.[J]. Nature Reviews Neuroscience, 2001, 2(3):194.

//高斯金字塔结构体
typedef struct GaussPyr
{
    IplImage *lev[9];
}GaussPyr;

class salientITTI
{
public:
    salientITTI(string imageName);
    void m_getSalientITTI(Mat& result);
    ~salientITTI();
private:
    void initPyr(GaussPyr *p);
    int downsample(int x, int level);
    void Gscale(GaussPyr *p, IplImage *data, int level, double sigma);
    void overScaleSub(IplImage *s1, IplImage *s2, IplImage *dst);
    void getLocalMaxima(IplImage *scr, double thresh, double *lm_sum, int *lm_num, double*lm_avg);
    void N_operation(IplImage *scr, IplImage *dst);
    void CS_operation(GaussPyr *p1, GaussPyr *p2, IplImage *dst);
    void PyrSub(GaussPyr *s1, GaussPyr *s2, GaussPyr *dst);
    void cvGabor(IplImage *scr, IplImage *dst, int width, double lamda, double theta);
    void runItti(const char* filename);
private:
    Mat m_orgImg;
    Mat m_resultImg;
    string m_imageName;
};

#endif // SALIENTITTI_H

2.3 显著性检测实现

#include "salientitti.h"
#define PI 3.1415926
//将金字塔尺寸定义为全局变量
CvSize PyrSize[9] = {NULL};

salientITTI::salientITTI(string imageName)
{
    m_imageName = imageName;
}

salientITTI::~salientITTI()
{

}

void salientITTI::m_getSalientITTI(Mat &result)
{
    runItti(m_imageName.c_str());
    m_resultImg.copyTo(result);
}

//初始化金字塔结构体
void salientITTI::initPyr(GaussPyr *p)
{
    for(int i = 0; i < 9; i++)
        p->lev[i] = cvCreateImage(PyrSize[i], IPL_DEPTH_64F, 1);
}


//根据层数,求第i层的尺寸
int salientITTI::downsample(int x, int level)
{
    if(level-- > 0)
    {
        if(x%2 == 0)
            x = x/2;
        else
            x = (x+1)/2;

        downsample(x,level);
    }
    if(level == -1)
        return x;
}


//计算并产生一幅图的高斯金字塔  每层的图像
void salientITTI::Gscale(GaussPyr *p, IplImage *data, int level, double sigma)
{
    for(int i = 0; ilev[0], CV_GAUSSIAN, 5, 5, sigma, 0);
        else
        {
            IplImage *tem = cvCreateImage(PyrSize[i-1], IPL_DEPTH_64F, 1);
            cvSmooth(p->lev[i-1], tem, CV_GAUSSIAN, 5, 5, sigma, 0);
            for(int a = 0; a < PyrSize[i].height; a++)
                for(int b = 0; b < PyrSize[i].width; b++)
                    ((double *)(p->lev[i]->imageData + a*p->lev[i]->widthStep))[b] = ((double *)(tem->imageData + 2*a*tem->widthStep))[2*b];

        }
    }
}


//c-s过程中用到的跨尺度相减
void salientITTI::overScaleSub(IplImage *s1, IplImage *s2, IplImage *dst)
{
    cvResize(s2, dst, CV_INTER_LINEAR);
    cvAbsDiff(s1, dst, dst);
}


//求图像的局部最大值
void salientITTI::getLocalMaxima(IplImage *scr, double thresh, double *lm_sum, int *lm_num, double*lm_avg)
{
    *lm_sum = 0.0;
    *lm_num = 0;
    *lm_avg = 0.0;
    int count = 0;
    //查找局部最大值
    for(int a = 1; a < ((scr->height) - 1); a++)
        for(int b = 1; b< ((scr->width) - 1); b++)
        {
            double val = ((double *)(scr->imageData + a*scr->widthStep))[b];
            if((val >= thresh) &&
                    (val >= ((double *)(scr->imageData + (a-1)*scr->widthStep))[b]) &&
                    (val >= ((double *)(scr->imageData + (a+1)*scr->widthStep))[b]) &&
                    (val >= ((double *)(scr->imageData + a*scr->widthStep))[b-1]) &&
                    (val >= ((double *)(scr->imageData + a*scr->widthStep))[b+1]))
            {
                if (val == 10) count++;//因为设定图像的最大值是10
                *lm_sum += val;//局部最大值加上全局最大值
                (*lm_num) ++;//总的个数 包含局部和全局
            }
        }

    if(*lm_num > count)
    {
        *lm_sum = *lm_sum - 10*count;//局部最大值的总和
        *lm_num = *lm_num - count;//局部最大值的个数
        if(*lm_num > 0)
            *lm_avg = *lm_sum / *lm_num;//局部最大值的平均值
        else
            *lm_avg = 0;
    }
    else
        *lm_avg = 0;
}


//N操作,包括下采样到第5层的尺寸
void salientITTI::N_operation(IplImage *scr, IplImage *dst)
{
    cvNormalize(scr, scr, 1.0, 0.0, CV_MINMAX, NULL);
    cvConvertScale(scr, scr, 10, 0);
    int lm_num;
    double lm_sum;
    double lm_avg;
    getLocalMaxima(scr ,1, &lm_sum, &lm_num, &lm_avg);
    if(lm_num > 0)
        cvConvertScale(scr, scr, (10-lm_avg)*(10-lm_avg), 0);


    //下采样,先判断目前的层数
    if( scr->height == PyrSize[2].height )//scr在第三层
    {
        for(int a = 0; a < PyrSize[4].height; a++)
            for(int b = 0; b < PyrSize[4].width; b++)
                ((double *)(dst->imageData + a*dst->widthStep))[b] = ((double *)(scr->imageData + 4*a*scr->widthStep))[4*b];
    }

    else if( scr->height == PyrSize[3].height )//scr在第四层
    {
        for(int a = 0; a < PyrSize[4].height; a++)
            for(int b = 0; b < PyrSize[4].width; b++)
                ((double *)(dst->imageData + a*dst->widthStep))[b] = ((double *)(scr->imageData + 2*a*scr->widthStep))[2*b];
    }
    else //scr在第五层
    {
        for(int a = 0; a < PyrSize[4].height; a++)
            for(int b = 0; b < PyrSize[4].width; b++)
                ((double *)(dst->imageData + a*dst->widthStep))[b] = ((double *)(scr->imageData + a*scr->widthStep))[b];
    }
}


//C_S过程,金字塔最底层为第0层(实际上(c,s)=(3,6),(3,7),(4,7),(4,8),(5,8),(5,9),)
void salientITTI::CS_operation(GaussPyr *p1, GaussPyr *p2, IplImage *dst)
{
    for(int c = 2; c < 5; c++)
        for(int delta = 3, s = c + delta; delta < 5; delta++, s = c + delta)
        {
            IplImage *tem_c = cvCreateImage(PyrSize[c], IPL_DEPTH_64F, 1);
            overScaleSub(p1->lev[c], p2->lev[s], tem_c);
            IplImage *tem_5lev = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
            N_operation(tem_c, tem_5lev);
            cvAdd(tem_5lev, dst, dst, NULL);

        }
}


//金字塔相减,在求颜色特征时使用
void salientITTI::PyrSub(GaussPyr *s1, GaussPyr *s2, GaussPyr *dst)
{
    for(int i = 0; i<9; i++)
        cvSub(s1->lev[i], s2->lev[i], dst->lev[i], NULL);
}


//计算Gabor滤波
void salientITTI::cvGabor(IplImage *scr, IplImage *dst, int width, double lamda, double theta)
{

    CvMat *gabor_kernel = cvCreateMat(width, width, CV_32FC1);
    double tmp1,tmp2,xtmp,ytmp,re;
    int i,j,x,y;
    for(i = 0; i < width; i++)
        for(j = 0; j < width; j++)
        {
            x = (i*16/(width - 1)) - 8;
            y = (j*16/(width - 1)) - 8;
            xtmp = (float)x*cos(theta) + (float)y*sin(theta);
            ytmp = (float)(-x)*sin(theta) + (float)y*cos(theta);

            tmp1 = (1/(PI*pow(lamda,2)))*exp(-((pow(xtmp,2) + pow(ytmp,2))/pow(lamda,2)));
            tmp2 = cos(2*PI*xtmp/lamda);
            re   = tmp1*tmp2;
            cvSetReal2D((CvMat*)gabor_kernel,i,j,re);

        }

    cvFilter2D(scr, dst, gabor_kernel, cvPoint(-1, -1));
    cvAbs(dst, dst);
    double max = 0;
    for(int i = 0; iheight; i++)
        for(int j = 0; jwidth; j++)
            if( ((double *)(dst->imageData + i*dst->widthStep))[j] >= max )
                max = ((double *)(dst->imageData + i*dst->widthStep))[j];

    cvConvertScale(dst, dst, 1/max, 0);
}


void salientITTI::runItti(const char* filename)
{

    //读入原图
    IplImage *origin = cvLoadImage(filename, CV_LOAD_IMAGE_UNCHANGED);

    Mat tmpOrgImg;
    tmpOrgImg = cvarrToMat(origin);
    tmpOrgImg.copyTo(m_orgImg);

    //将原图插值为两倍
    CvSize newsize;
    newsize.height = 2*origin->height;
    newsize.width =  2*origin->width;
    IplImage *originx2 = cvCreateImage(newsize, origin->depth, origin->nChannels);
    cvResize(origin, originx2 , CV_INTER_LINEAR);

    //分离bgr分量
    IplImage *rgb[4] = {NULL};
    for(int i=0; i < 3; i++)
        rgb[i] = cvCreateImage(newsize, IPL_DEPTH_8U, 1);
    cvSplit(originx2, rgb[2], rgb[1], rgb[0], rgb[3]);



    //计算亮度I和R、G、B、Y分量
    IplImage *Intensity = cvCreateImage(newsize, IPL_DEPTH_64F, 1);
    IplImage *R = cvCreateImage(newsize, IPL_DEPTH_64F, 1);
    IplImage *G = cvCreateImage(newsize, IPL_DEPTH_64F, 1);
    IplImage *B = cvCreateImage(newsize, IPL_DEPTH_64F, 1);
    IplImage *Y = cvCreateImage(newsize, IPL_DEPTH_64F, 1);

    uchar *data_r = (uchar *)rgb[0]->imageData;
    uchar *data_g = (uchar *)rgb[1]->imageData;
    uchar *data_b = (uchar *)rgb[2]->imageData;


    for(int i=0; i < newsize.height; i++)
        for(int j=0; j < newsize.width; j++)
        {

            int index_8 = i*(rgb[0]->widthStep) + j;
            ((double *)(Intensity->imageData + i*Intensity->widthStep))[j] = (double(data_r[index_8]) + double(data_g[index_8]) + double(data_b[index_8]))/3;
            ((double *)(R->imageData + i*Intensity->widthStep))[j] = double(data_r[index_8]) - (double(data_g[index_8]) + double(data_b[index_8]))/2;
            ((double *)(G->imageData + i*Intensity->widthStep))[j] = double(data_g[index_8]) - (double(data_r[index_8]) + double(data_b[index_8]))/2;
            ((double *)(B->imageData + i*Intensity->widthStep))[j] = double(data_b[index_8]) - (double(data_r[index_8]) + double(data_g[index_8]))/2;
            ((double *)(Y->imageData + i*Intensity->widthStep))[j] = (double(data_r[index_8]) + double(data_g[index_8]))/2 - (abs(double(data_r[index_8]) - double(data_g[index_8])))/2 - double(data_b[index_8]);

        }
    //release
    cvReleaseImage(&originx2); cvReleaseImage(&rgb[0]);	cvReleaseImage(&rgb[1]);cvReleaseImage(&rgb[2]);cvReleaseImage(&rgb[3]);



    //初始化高斯金字塔尺寸
    for(int i = 0; i < 9; i++)
    {
        PyrSize[i].height = downsample(newsize.height, i);
        PyrSize[i].width = downsample(newsize.width, i);
    }

    ////////////////////亮度特征提取
    //计算I金字塔
    GaussPyr Pyr_I;
    initPyr(&Pyr_I);
    Gscale(&Pyr_I, Intensity, 9, 1.6);

    //I的CS过程
    IplImage *I_mean = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
    cvSetZero(I_mean);
    CS_operation(&Pyr_I, &Pyr_I, I_mean);
    N_operation(I_mean, I_mean);
    //release
    for(int i = 0; i < 9; i++)
    {
        cvReleaseImage(&(Pyr_I.lev[i]));
    }



    ////////////////////方向特征提取
    //计算O金字塔
    IplImage *orient[4];
    for(int i = 0; i < 4; i++)
        orient[i] = cvCreateImage(cvGetSize(Intensity), IPL_DEPTH_64F,1);

    cvGabor(Intensity, orient[0], 11, 5, 0);
    cvGabor(Intensity, orient[1], 11, 5, PI/4);
    cvGabor(Intensity, orient[2], 11, 5, PI/2);
    cvGabor(Intensity, orient[3], 11, 5, 3*PI/4);
    //release
    cvReleaseImage(&Intensity);

    GaussPyr Pyr_0, Pyr_45, Pyr_90, Pyr_135;

    initPyr(&Pyr_0);
    initPyr(&Pyr_45);
    initPyr(&Pyr_90);
    initPyr(&Pyr_135);

    Gscale(&Pyr_0, orient[0], 9, 0.5);
    Gscale(&Pyr_45, orient[1], 9, 0.5);
    Gscale(&Pyr_90, orient[2], 9, 0.5);
    Gscale(&Pyr_135, orient[3], 9, 0.5);
    //release
    cvReleaseImage(&orient[0]); cvReleaseImage(&orient[1]); cvReleaseImage(&orient[2]); cvReleaseImage(&orient[3]);

    //O的CS过程
    IplImage *O_mean = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
    IplImage *O_1 = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
    IplImage *O_2 = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
    IplImage *O_3 = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
    cvSetZero(O_mean);
    cvSetZero(O_1);
    cvSetZero(O_2);
    cvSetZero(O_3);

    CS_operation(&Pyr_0, &Pyr_0, O_mean);
    CS_operation(&Pyr_45, &Pyr_45, O_1);
    CS_operation(&Pyr_90, &Pyr_90, O_2);
    CS_operation(&Pyr_135, &Pyr_135, O_3);
    //release
    for(int i = 0; i < 9; i++)
    {
        cvReleaseImage(&(Pyr_0.lev[i]));
        cvReleaseImage(&(Pyr_45.lev[i]));
        cvReleaseImage(&(Pyr_90.lev[i]));
        cvReleaseImage(&(Pyr_135.lev[i]));
    }


    N_operation(O_mean, O_mean);
    N_operation(O_1, O_1);
    N_operation(O_2, O_2);
    N_operation(O_3, O_3);

    cvAdd(O_mean, O_1, O_mean, NULL);
    cvAdd(O_mean, O_2, O_mean, NULL);
    cvAdd(O_mean, O_3, O_mean, NULL);

    N_operation(O_mean, O_mean);
    //release
    cvReleaseImage(&O_1); cvReleaseImage(&O_2); cvReleaseImage(&O_3);


    ////////////////////颜色特征提取
    //计算RGBY金字塔
    GaussPyr Pyr_R, Pyr_G, Pyr_B, Pyr_Y;

    initPyr(&Pyr_R);
    initPyr(&Pyr_G);
    initPyr(&Pyr_B);
    initPyr(&Pyr_Y);

    Gscale(&Pyr_R, R, 9, 0.5);
    Gscale(&Pyr_G, G, 9, 0.5);
    Gscale(&Pyr_B, B, 9, 0.5);
    Gscale(&Pyr_Y, Y, 9, 0.5);
    //release
    cvReleaseImage(&R); cvReleaseImage(&G); cvReleaseImage(&B); cvReleaseImage(&Y);

    //计算RG—BY金字塔
    GaussPyr Pyr_RG, Pyr_GR, Pyr_BY, Pyr_YB;

    initPyr(&Pyr_RG);
    initPyr(&Pyr_GR);
    initPyr(&Pyr_BY);
    initPyr(&Pyr_YB);

    PyrSub(&Pyr_R, &Pyr_G, &Pyr_RG);
    PyrSub(&Pyr_G, &Pyr_R, &Pyr_GR);
    PyrSub(&Pyr_B, &Pyr_Y, &Pyr_BY);
    PyrSub(&Pyr_Y, &Pyr_B, &Pyr_YB);
    //release
    for(int i = 0; i < 9; i++)
    {
        cvReleaseImage(&(Pyr_R.lev[i]));
        cvReleaseImage(&(Pyr_G.lev[i]));
        cvReleaseImage(&(Pyr_B.lev[i]));
        cvReleaseImage(&(Pyr_Y.lev[i]));
    }

    //C的CS过程
    IplImage *C_mean = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
    IplImage *C_1 = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
    cvSetZero(C_mean);
    cvSetZero(C_1);

    CS_operation(&Pyr_RG, &Pyr_GR, C_mean);
    CS_operation(&Pyr_BY, &Pyr_YB, C_1);
    cvAdd(C_mean, C_1, C_mean, NULL);
    //release
    for(int i = 0; i < 9; i++)
    {
        cvReleaseImage(&(Pyr_RG.lev[i]));
        cvReleaseImage(&(Pyr_GR.lev[i]));
        cvReleaseImage(&(Pyr_BY.lev[i]));
        cvReleaseImage(&(Pyr_YB.lev[i]));
    }
    cvReleaseImage(&C_1);

    N_operation(C_mean, C_mean);


    //整合所有特征,生成显著性图
    IplImage *all = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);//在第四层生成的显著图  融合后的显著图
    cvSetZero(all);
    cvAdd(I_mean, C_mean, all, NULL);
    cvAdd(all, O_mean, all, NULL);
    //release
    cvReleaseImage(&I_mean); cvReleaseImage(&C_mean); cvReleaseImage(&O_mean);

    cvConvertScale(all, all, 0.33333, 0);
    cvNormalize(all, all, 1.0, 0.0, CV_MINMAX, NULL);


    IplImage *all_8U = cvCreateImage(cvGetSize(all), IPL_DEPTH_8U, 1);
    cvConvertScale(all, all_8U, 255, 0);
    //release
    cvReleaseImage(&all);

    IplImage *saliency = cvCreateImage(cvSize(origin->width, origin->height), IPL_DEPTH_8U, 1);//最后调整到原图像大小
    cvResize(all_8U, saliency, CV_INTER_LINEAR);
    //release
    cvReleaseImage(&all_8U);

    Mat tmpResultImg;
    tmpResultImg = cvarrToMat(saliency);
    tmpResultImg.copyTo(m_resultImg);

#if 0
    cvNamedWindow("原图", CV_WINDOW_AUTOSIZE);
    cvNamedWindow("显著性图", CV_WINDOW_AUTOSIZE);
    cvShowImage("原图", origin);
    cvShowImage("显著性图", saliency);



    cvWaitKey(0);
    cvReleaseImage(&origin);
    cvReleaseImage(&saliency);
    cvDestroyWindow("原图");
    cvDestroyWindow("显著性图");
#endif
}

注意:2.3部分参考了别人的代码,但是忘记出处了!


3. 模型效果

显著性检测——ITTI模型_第1张图片


你可能感兴趣的:(opencv函数学习,图像处理,算法实现)