1. 参考文献
// Itti L, Koch C, Niebur E. A Model of Saliency-Based Visual Attention for Rapid Scene Analysis[J]. IEEE Transactions on Pattern Analysis & Machine Intelligence, 1998, 20(11):1254-1259.
// Itti L, Koch C. Computational modelling of visual attention.[J]. Nature Reviews Neuroscience, 2001, 2(3):194.
2. 模型实现
2.1 显著性检测公共头文件
#ifndef SALIENTCOMMON_H
#define SALIENTCOMMON_H
// std lib
#include
#include
#include
#include
// opencv lib
#include
#include
#include
#include
#include
using namespace std;
using namespace cv;
#endif // SALIENTCOMMON_H
2.2 显著性检测头文件
#ifndef SALIENTITTI_H
#define SALIENTITTI_H
#include "salientcommon.h"
// Itti L, Koch C, Niebur E. A Model of Saliency-Based Visual Attention for Rapid Scene Analysis[J]. IEEE Transactions on Pattern Analysis & Machine Intelligence, 1998, 20(11):1254-1259.
// Itti L, Koch C. Computational modelling of visual attention.[J]. Nature Reviews Neuroscience, 2001, 2(3):194.
//高斯金字塔结构体
typedef struct GaussPyr
{
IplImage *lev[9];
}GaussPyr;
class salientITTI
{
public:
salientITTI(string imageName);
void m_getSalientITTI(Mat& result);
~salientITTI();
private:
void initPyr(GaussPyr *p);
int downsample(int x, int level);
void Gscale(GaussPyr *p, IplImage *data, int level, double sigma);
void overScaleSub(IplImage *s1, IplImage *s2, IplImage *dst);
void getLocalMaxima(IplImage *scr, double thresh, double *lm_sum, int *lm_num, double*lm_avg);
void N_operation(IplImage *scr, IplImage *dst);
void CS_operation(GaussPyr *p1, GaussPyr *p2, IplImage *dst);
void PyrSub(GaussPyr *s1, GaussPyr *s2, GaussPyr *dst);
void cvGabor(IplImage *scr, IplImage *dst, int width, double lamda, double theta);
void runItti(const char* filename);
private:
Mat m_orgImg;
Mat m_resultImg;
string m_imageName;
};
#endif // SALIENTITTI_H
2.3 显著性检测实现
#include "salientitti.h"
#define PI 3.1415926
//将金字塔尺寸定义为全局变量
CvSize PyrSize[9] = {NULL};
salientITTI::salientITTI(string imageName)
{
m_imageName = imageName;
}
salientITTI::~salientITTI()
{
}
void salientITTI::m_getSalientITTI(Mat &result)
{
runItti(m_imageName.c_str());
m_resultImg.copyTo(result);
}
//初始化金字塔结构体
void salientITTI::initPyr(GaussPyr *p)
{
for(int i = 0; i < 9; i++)
p->lev[i] = cvCreateImage(PyrSize[i], IPL_DEPTH_64F, 1);
}
//根据层数,求第i层的尺寸
int salientITTI::downsample(int x, int level)
{
if(level-- > 0)
{
if(x%2 == 0)
x = x/2;
else
x = (x+1)/2;
downsample(x,level);
}
if(level == -1)
return x;
}
//计算并产生一幅图的高斯金字塔 每层的图像
void salientITTI::Gscale(GaussPyr *p, IplImage *data, int level, double sigma)
{
for(int i = 0; ilev[0], CV_GAUSSIAN, 5, 5, sigma, 0);
else
{
IplImage *tem = cvCreateImage(PyrSize[i-1], IPL_DEPTH_64F, 1);
cvSmooth(p->lev[i-1], tem, CV_GAUSSIAN, 5, 5, sigma, 0);
for(int a = 0; a < PyrSize[i].height; a++)
for(int b = 0; b < PyrSize[i].width; b++)
((double *)(p->lev[i]->imageData + a*p->lev[i]->widthStep))[b] = ((double *)(tem->imageData + 2*a*tem->widthStep))[2*b];
}
}
}
//c-s过程中用到的跨尺度相减
void salientITTI::overScaleSub(IplImage *s1, IplImage *s2, IplImage *dst)
{
cvResize(s2, dst, CV_INTER_LINEAR);
cvAbsDiff(s1, dst, dst);
}
//求图像的局部最大值
void salientITTI::getLocalMaxima(IplImage *scr, double thresh, double *lm_sum, int *lm_num, double*lm_avg)
{
*lm_sum = 0.0;
*lm_num = 0;
*lm_avg = 0.0;
int count = 0;
//查找局部最大值
for(int a = 1; a < ((scr->height) - 1); a++)
for(int b = 1; b< ((scr->width) - 1); b++)
{
double val = ((double *)(scr->imageData + a*scr->widthStep))[b];
if((val >= thresh) &&
(val >= ((double *)(scr->imageData + (a-1)*scr->widthStep))[b]) &&
(val >= ((double *)(scr->imageData + (a+1)*scr->widthStep))[b]) &&
(val >= ((double *)(scr->imageData + a*scr->widthStep))[b-1]) &&
(val >= ((double *)(scr->imageData + a*scr->widthStep))[b+1]))
{
if (val == 10) count++;//因为设定图像的最大值是10
*lm_sum += val;//局部最大值加上全局最大值
(*lm_num) ++;//总的个数 包含局部和全局
}
}
if(*lm_num > count)
{
*lm_sum = *lm_sum - 10*count;//局部最大值的总和
*lm_num = *lm_num - count;//局部最大值的个数
if(*lm_num > 0)
*lm_avg = *lm_sum / *lm_num;//局部最大值的平均值
else
*lm_avg = 0;
}
else
*lm_avg = 0;
}
//N操作,包括下采样到第5层的尺寸
void salientITTI::N_operation(IplImage *scr, IplImage *dst)
{
cvNormalize(scr, scr, 1.0, 0.0, CV_MINMAX, NULL);
cvConvertScale(scr, scr, 10, 0);
int lm_num;
double lm_sum;
double lm_avg;
getLocalMaxima(scr ,1, &lm_sum, &lm_num, &lm_avg);
if(lm_num > 0)
cvConvertScale(scr, scr, (10-lm_avg)*(10-lm_avg), 0);
//下采样,先判断目前的层数
if( scr->height == PyrSize[2].height )//scr在第三层
{
for(int a = 0; a < PyrSize[4].height; a++)
for(int b = 0; b < PyrSize[4].width; b++)
((double *)(dst->imageData + a*dst->widthStep))[b] = ((double *)(scr->imageData + 4*a*scr->widthStep))[4*b];
}
else if( scr->height == PyrSize[3].height )//scr在第四层
{
for(int a = 0; a < PyrSize[4].height; a++)
for(int b = 0; b < PyrSize[4].width; b++)
((double *)(dst->imageData + a*dst->widthStep))[b] = ((double *)(scr->imageData + 2*a*scr->widthStep))[2*b];
}
else //scr在第五层
{
for(int a = 0; a < PyrSize[4].height; a++)
for(int b = 0; b < PyrSize[4].width; b++)
((double *)(dst->imageData + a*dst->widthStep))[b] = ((double *)(scr->imageData + a*scr->widthStep))[b];
}
}
//C_S过程,金字塔最底层为第0层(实际上(c,s)=(3,6),(3,7),(4,7),(4,8),(5,8),(5,9),)
void salientITTI::CS_operation(GaussPyr *p1, GaussPyr *p2, IplImage *dst)
{
for(int c = 2; c < 5; c++)
for(int delta = 3, s = c + delta; delta < 5; delta++, s = c + delta)
{
IplImage *tem_c = cvCreateImage(PyrSize[c], IPL_DEPTH_64F, 1);
overScaleSub(p1->lev[c], p2->lev[s], tem_c);
IplImage *tem_5lev = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
N_operation(tem_c, tem_5lev);
cvAdd(tem_5lev, dst, dst, NULL);
}
}
//金字塔相减,在求颜色特征时使用
void salientITTI::PyrSub(GaussPyr *s1, GaussPyr *s2, GaussPyr *dst)
{
for(int i = 0; i<9; i++)
cvSub(s1->lev[i], s2->lev[i], dst->lev[i], NULL);
}
//计算Gabor滤波
void salientITTI::cvGabor(IplImage *scr, IplImage *dst, int width, double lamda, double theta)
{
CvMat *gabor_kernel = cvCreateMat(width, width, CV_32FC1);
double tmp1,tmp2,xtmp,ytmp,re;
int i,j,x,y;
for(i = 0; i < width; i++)
for(j = 0; j < width; j++)
{
x = (i*16/(width - 1)) - 8;
y = (j*16/(width - 1)) - 8;
xtmp = (float)x*cos(theta) + (float)y*sin(theta);
ytmp = (float)(-x)*sin(theta) + (float)y*cos(theta);
tmp1 = (1/(PI*pow(lamda,2)))*exp(-((pow(xtmp,2) + pow(ytmp,2))/pow(lamda,2)));
tmp2 = cos(2*PI*xtmp/lamda);
re = tmp1*tmp2;
cvSetReal2D((CvMat*)gabor_kernel,i,j,re);
}
cvFilter2D(scr, dst, gabor_kernel, cvPoint(-1, -1));
cvAbs(dst, dst);
double max = 0;
for(int i = 0; iheight; i++)
for(int j = 0; jwidth; j++)
if( ((double *)(dst->imageData + i*dst->widthStep))[j] >= max )
max = ((double *)(dst->imageData + i*dst->widthStep))[j];
cvConvertScale(dst, dst, 1/max, 0);
}
void salientITTI::runItti(const char* filename)
{
//读入原图
IplImage *origin = cvLoadImage(filename, CV_LOAD_IMAGE_UNCHANGED);
Mat tmpOrgImg;
tmpOrgImg = cvarrToMat(origin);
tmpOrgImg.copyTo(m_orgImg);
//将原图插值为两倍
CvSize newsize;
newsize.height = 2*origin->height;
newsize.width = 2*origin->width;
IplImage *originx2 = cvCreateImage(newsize, origin->depth, origin->nChannels);
cvResize(origin, originx2 , CV_INTER_LINEAR);
//分离bgr分量
IplImage *rgb[4] = {NULL};
for(int i=0; i < 3; i++)
rgb[i] = cvCreateImage(newsize, IPL_DEPTH_8U, 1);
cvSplit(originx2, rgb[2], rgb[1], rgb[0], rgb[3]);
//计算亮度I和R、G、B、Y分量
IplImage *Intensity = cvCreateImage(newsize, IPL_DEPTH_64F, 1);
IplImage *R = cvCreateImage(newsize, IPL_DEPTH_64F, 1);
IplImage *G = cvCreateImage(newsize, IPL_DEPTH_64F, 1);
IplImage *B = cvCreateImage(newsize, IPL_DEPTH_64F, 1);
IplImage *Y = cvCreateImage(newsize, IPL_DEPTH_64F, 1);
uchar *data_r = (uchar *)rgb[0]->imageData;
uchar *data_g = (uchar *)rgb[1]->imageData;
uchar *data_b = (uchar *)rgb[2]->imageData;
for(int i=0; i < newsize.height; i++)
for(int j=0; j < newsize.width; j++)
{
int index_8 = i*(rgb[0]->widthStep) + j;
((double *)(Intensity->imageData + i*Intensity->widthStep))[j] = (double(data_r[index_8]) + double(data_g[index_8]) + double(data_b[index_8]))/3;
((double *)(R->imageData + i*Intensity->widthStep))[j] = double(data_r[index_8]) - (double(data_g[index_8]) + double(data_b[index_8]))/2;
((double *)(G->imageData + i*Intensity->widthStep))[j] = double(data_g[index_8]) - (double(data_r[index_8]) + double(data_b[index_8]))/2;
((double *)(B->imageData + i*Intensity->widthStep))[j] = double(data_b[index_8]) - (double(data_r[index_8]) + double(data_g[index_8]))/2;
((double *)(Y->imageData + i*Intensity->widthStep))[j] = (double(data_r[index_8]) + double(data_g[index_8]))/2 - (abs(double(data_r[index_8]) - double(data_g[index_8])))/2 - double(data_b[index_8]);
}
//release
cvReleaseImage(&originx2); cvReleaseImage(&rgb[0]); cvReleaseImage(&rgb[1]);cvReleaseImage(&rgb[2]);cvReleaseImage(&rgb[3]);
//初始化高斯金字塔尺寸
for(int i = 0; i < 9; i++)
{
PyrSize[i].height = downsample(newsize.height, i);
PyrSize[i].width = downsample(newsize.width, i);
}
////////////////////亮度特征提取
//计算I金字塔
GaussPyr Pyr_I;
initPyr(&Pyr_I);
Gscale(&Pyr_I, Intensity, 9, 1.6);
//I的CS过程
IplImage *I_mean = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
cvSetZero(I_mean);
CS_operation(&Pyr_I, &Pyr_I, I_mean);
N_operation(I_mean, I_mean);
//release
for(int i = 0; i < 9; i++)
{
cvReleaseImage(&(Pyr_I.lev[i]));
}
////////////////////方向特征提取
//计算O金字塔
IplImage *orient[4];
for(int i = 0; i < 4; i++)
orient[i] = cvCreateImage(cvGetSize(Intensity), IPL_DEPTH_64F,1);
cvGabor(Intensity, orient[0], 11, 5, 0);
cvGabor(Intensity, orient[1], 11, 5, PI/4);
cvGabor(Intensity, orient[2], 11, 5, PI/2);
cvGabor(Intensity, orient[3], 11, 5, 3*PI/4);
//release
cvReleaseImage(&Intensity);
GaussPyr Pyr_0, Pyr_45, Pyr_90, Pyr_135;
initPyr(&Pyr_0);
initPyr(&Pyr_45);
initPyr(&Pyr_90);
initPyr(&Pyr_135);
Gscale(&Pyr_0, orient[0], 9, 0.5);
Gscale(&Pyr_45, orient[1], 9, 0.5);
Gscale(&Pyr_90, orient[2], 9, 0.5);
Gscale(&Pyr_135, orient[3], 9, 0.5);
//release
cvReleaseImage(&orient[0]); cvReleaseImage(&orient[1]); cvReleaseImage(&orient[2]); cvReleaseImage(&orient[3]);
//O的CS过程
IplImage *O_mean = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
IplImage *O_1 = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
IplImage *O_2 = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
IplImage *O_3 = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
cvSetZero(O_mean);
cvSetZero(O_1);
cvSetZero(O_2);
cvSetZero(O_3);
CS_operation(&Pyr_0, &Pyr_0, O_mean);
CS_operation(&Pyr_45, &Pyr_45, O_1);
CS_operation(&Pyr_90, &Pyr_90, O_2);
CS_operation(&Pyr_135, &Pyr_135, O_3);
//release
for(int i = 0; i < 9; i++)
{
cvReleaseImage(&(Pyr_0.lev[i]));
cvReleaseImage(&(Pyr_45.lev[i]));
cvReleaseImage(&(Pyr_90.lev[i]));
cvReleaseImage(&(Pyr_135.lev[i]));
}
N_operation(O_mean, O_mean);
N_operation(O_1, O_1);
N_operation(O_2, O_2);
N_operation(O_3, O_3);
cvAdd(O_mean, O_1, O_mean, NULL);
cvAdd(O_mean, O_2, O_mean, NULL);
cvAdd(O_mean, O_3, O_mean, NULL);
N_operation(O_mean, O_mean);
//release
cvReleaseImage(&O_1); cvReleaseImage(&O_2); cvReleaseImage(&O_3);
////////////////////颜色特征提取
//计算RGBY金字塔
GaussPyr Pyr_R, Pyr_G, Pyr_B, Pyr_Y;
initPyr(&Pyr_R);
initPyr(&Pyr_G);
initPyr(&Pyr_B);
initPyr(&Pyr_Y);
Gscale(&Pyr_R, R, 9, 0.5);
Gscale(&Pyr_G, G, 9, 0.5);
Gscale(&Pyr_B, B, 9, 0.5);
Gscale(&Pyr_Y, Y, 9, 0.5);
//release
cvReleaseImage(&R); cvReleaseImage(&G); cvReleaseImage(&B); cvReleaseImage(&Y);
//计算RG—BY金字塔
GaussPyr Pyr_RG, Pyr_GR, Pyr_BY, Pyr_YB;
initPyr(&Pyr_RG);
initPyr(&Pyr_GR);
initPyr(&Pyr_BY);
initPyr(&Pyr_YB);
PyrSub(&Pyr_R, &Pyr_G, &Pyr_RG);
PyrSub(&Pyr_G, &Pyr_R, &Pyr_GR);
PyrSub(&Pyr_B, &Pyr_Y, &Pyr_BY);
PyrSub(&Pyr_Y, &Pyr_B, &Pyr_YB);
//release
for(int i = 0; i < 9; i++)
{
cvReleaseImage(&(Pyr_R.lev[i]));
cvReleaseImage(&(Pyr_G.lev[i]));
cvReleaseImage(&(Pyr_B.lev[i]));
cvReleaseImage(&(Pyr_Y.lev[i]));
}
//C的CS过程
IplImage *C_mean = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
IplImage *C_1 = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);
cvSetZero(C_mean);
cvSetZero(C_1);
CS_operation(&Pyr_RG, &Pyr_GR, C_mean);
CS_operation(&Pyr_BY, &Pyr_YB, C_1);
cvAdd(C_mean, C_1, C_mean, NULL);
//release
for(int i = 0; i < 9; i++)
{
cvReleaseImage(&(Pyr_RG.lev[i]));
cvReleaseImage(&(Pyr_GR.lev[i]));
cvReleaseImage(&(Pyr_BY.lev[i]));
cvReleaseImage(&(Pyr_YB.lev[i]));
}
cvReleaseImage(&C_1);
N_operation(C_mean, C_mean);
//整合所有特征,生成显著性图
IplImage *all = cvCreateImage(PyrSize[4], IPL_DEPTH_64F, 1);//在第四层生成的显著图 融合后的显著图
cvSetZero(all);
cvAdd(I_mean, C_mean, all, NULL);
cvAdd(all, O_mean, all, NULL);
//release
cvReleaseImage(&I_mean); cvReleaseImage(&C_mean); cvReleaseImage(&O_mean);
cvConvertScale(all, all, 0.33333, 0);
cvNormalize(all, all, 1.0, 0.0, CV_MINMAX, NULL);
IplImage *all_8U = cvCreateImage(cvGetSize(all), IPL_DEPTH_8U, 1);
cvConvertScale(all, all_8U, 255, 0);
//release
cvReleaseImage(&all);
IplImage *saliency = cvCreateImage(cvSize(origin->width, origin->height), IPL_DEPTH_8U, 1);//最后调整到原图像大小
cvResize(all_8U, saliency, CV_INTER_LINEAR);
//release
cvReleaseImage(&all_8U);
Mat tmpResultImg;
tmpResultImg = cvarrToMat(saliency);
tmpResultImg.copyTo(m_resultImg);
#if 0
cvNamedWindow("原图", CV_WINDOW_AUTOSIZE);
cvNamedWindow("显著性图", CV_WINDOW_AUTOSIZE);
cvShowImage("原图", origin);
cvShowImage("显著性图", saliency);
cvWaitKey(0);
cvReleaseImage(&origin);
cvReleaseImage(&saliency);
cvDestroyWindow("原图");
cvDestroyWindow("显著性图");
#endif
}
注意:2.3部分参考了别人的代码,但是忘记出处了!
3. 模型效果