前段时间介绍过相似图片搜索原理一(ahash) http://blog.csdn.net/lu597203933/article/details/45101859,它是基于内容检索最简单的一种;这里介绍它的增强版本感知哈希算法(perceptual hash, phash)。它主要也是用缩略图搜原图并能达到较好点的效果.
理论部分:
理论部分主要包括以下几个步骤:
<1> 图像缩放—将图像缩放到32*32大小
<2>灰度化—对32*32大小的图像进行灰度化
<3>离散余弦变换(DCT)—对32*32大小图像进行DCT
<4>计算均值—用32*32大小图片前面8*8大小图片处理并计算这64个像素的均值
<4>得到8*8图像的phash—8*8的像素值中大于均值的则用1表示,小于的用0表示,这样就得到一个64位二进制码作为该图像的phash值。
<5>计算两幅图像ahash值的汉明距离,距离越小,表明两幅图像越相似;距离越大,表明两幅图像距离越大。
这样做能够避免伽马校正或者颜色直方图调整带来的影响。
更详细的理论可以参看:
1:http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
2:http://blog.csdn.net/luoweifu/article/details/8220992包括java代码实现
下面我给出自己的c++代码实现:
<1>图像灰度化与缩放
Mat img = imread("E:\\algorithmZack\\ImageSearch\\image\\person.jpg", 1); if(!img.data){ cout << "the image is not exist" << endl; return 0; } int size = 32; // 图片缩放后大小 resize(img, img, Size(size,size)); // 缩放到32*32 cvtColor(img, img, COLOR_BGR2GRAY); // 灰度化
<2>DCT变换
/* 功能:获取DCT系数 n:矩阵大小 quotient: 系数 quotientT: 系数转置 */ void coefficient(const int &n, double **quotient, double **quotientT){ double sqr = 1.0/sqrt(n+0.0); for(int i = 0; i < n; i++){ quotient[0][i] = sqr; quotientT[i][0] = sqr; } for(int i = 1; i < n; i++){ for(int j = 0; j < n; j++){ quotient[i][j] = sqrt(2.0/n)*cos(i*(j+0.5)*PI/n); // 由公式得到 quotientT[j][i] = quotient[i][j]; } } } /* 功能:两矩阵相乘 A和B:源输入矩阵 result:输出矩阵 */ void matrixMultiply(double **A, double **B, int n, double **result){ double t = 0; for(int i = 0; i < n; i++){ for(int j = 0; j < n; j++){ t = 0; for(int k = 0; k < n; k++) t += A[i][k]*B[k][j]; result[i][j] = t; } } } void DCT(Mat_<uchar> image, const int &n, double **iMatrix){ for(int i = 0; i < n; i++){ for(int j = 0; j < n; j++){ iMatrix[i][j] = (double)image(i,j); } } // 为系数分配空间 double **quotient = new double*[n]; double **quotientT = new double*[n]; double **tmp = new double*[n]; for(int i = 0; i < n; i++){ quotient[i] = new double[n]; quotientT[i] = new double[n]; tmp[i] = new double[n]; } // 计算系数矩阵 coefficient(n, quotient, quotientT); matrixMultiply(quotient, iMatrix, n, tmp); // 由公式成绩结果 matrixMultiply(tmp, quotientT, n, iMatrix); for(int i = 0; i < n; i++){ delete []tmp[i]; delete []quotient[i]; delete []quotientT[i]; } delete []tmp; delete []quotient; delete []quotientT; }
<3>计算均值
// 计算8*8图像的平均灰度 float calcAverage(double **iMatrix, const int &size){ float sum = 0; for(int i = 0 ; i < size; i++){ for(int j = 0; j < size; j++){ sum += iMatrix[i][j]; } } return sum/(size*size); }
<4>计算汉明距离
/* 计算hash值 image:8*8的灰度图像 size: 图像大小 8*8 phash:存放64位hash值 averagePix: 灰度值的平均值 */ void fingerPrint(double **iMatrix, const int &size, bitset<hashLength> &phash, const float &averagePix){ for(int i = 0; i < size; i++){ int pos = i * size; for(int j = 0; j < size; j++){ phash[pos+j] = iMatrix[i][j] >= averagePix ? 1:0; } } }
完整源代码:
#include <iostream> #include <bitset> #include <string> #include <iomanip> #include <cmath> #include <opencv2\highgui\highgui.hpp> #include <opencv2\imgproc\imgproc.hpp> #include <opencv2\core\core.hpp> using namespace std; using namespace cv; #define PI 3.1415926 #define hashLength 64 /* 功能:获取DCT系数 n:矩阵大小 quotient: 系数 quotientT: 系数转置 */ void coefficient(const int &n, double **quotient, double **quotientT){ double sqr = 1.0/sqrt(n+0.0); for(int i = 0; i < n; i++){ quotient[0][i] = sqr; quotientT[i][0] = sqr; } for(int i = 1; i < n; i++){ for(int j = 0; j < n; j++){ quotient[i][j] = sqrt(2.0/n)*cos(i*(j+0.5)*PI/n); // 由公式得到 quotientT[j][i] = quotient[i][j]; } } } /* 功能:两矩阵相乘 A和B:源输入矩阵 result:输出矩阵 */ void matrixMultiply(double **A, double **B, int n, double **result){ double t = 0; for(int i = 0; i < n; i++){ for(int j = 0; j < n; j++){ t = 0; for(int k = 0; k < n; k++) t += A[i][k]*B[k][j]; result[i][j] = t; } } } void DCT(Mat_<uchar> image, const int &n, double **iMatrix){ for(int i = 0; i < n; i++){ for(int j = 0; j < n; j++){ iMatrix[i][j] = (double)image(i,j); } } // 为系数分配空间 double **quotient = new double*[n]; double **quotientT = new double*[n]; double **tmp = new double*[n]; for(int i = 0; i < n; i++){ quotient[i] = new double[n]; quotientT[i] = new double[n]; tmp[i] = new double[n]; } // 计算系数矩阵 coefficient(n, quotient, quotientT); matrixMultiply(quotient, iMatrix, n, tmp); // 由公式成绩结果 matrixMultiply(tmp, quotientT, n, iMatrix); for(int i = 0; i < n; i++){ delete []tmp[i]; delete []quotient[i]; delete []quotientT[i]; } delete []tmp; delete []quotient; delete []quotientT; } // 计算8*8图像的平均灰度 float calcAverage(double **iMatrix, const int &size){ float sum = 0; for(int i = 0 ; i < size; i++){ for(int j = 0; j < size; j++){ sum += iMatrix[i][j]; } } return sum/(size*size); } /* 计算hash值 image:8*8的灰度图像 size: 图像大小 8*8 phash:存放64位hash值 averagePix: 灰度值的平均值 */ void fingerPrint(double **iMatrix, const int &size, bitset<hashLength> &phash, const float &averagePix){ for(int i = 0; i < size; i++){ int pos = i * size; for(int j = 0; j < size; j++){ phash[pos+j] = iMatrix[i][j] >= averagePix ? 1:0; } } } /*计算汉明距离*/ int hammingDistance(const bitset<hashLength> &query, const bitset<hashLength> &target){ int distance = 0; for(int i = 0; i < hashLength; i++){ distance += (query[i] == target[i] ? 0 : 1); } return distance; } string bitTohex(const bitset<hashLength> &target){ string str; for(int i = 0; i < hashLength; i=i+4){ int sum = 0; string s; sum += target[i] + (target[i+1]<<1) + (target[i+2]<<2) + (target[i+3]<<3); stringstream ss; ss << hex <<sum; // 以十六进制保存 ss >> s; str += s; } return str; } int main(){ Mat img = imread("E:\\algorithmZack\\ImageSearch\\image\\person.jpg", 1); if(!img.data){ cout << "the image is not exist" << endl; return 0; } int size = 32; // 图片缩放后大小 resize(img, img, Size(size,size)); // 缩放到32*32 cvtColor(img, img, COLOR_BGR2GRAY); // 灰度化 double **iMatrix = new double*[size]; for(int i = 0; i < size; i++) iMatrix[i] = new double[size]; DCT(img, size, iMatrix); // 离散余弦变换 float averagePix = calcAverage(iMatrix, 8); cout << averagePix << endl; bitset<hashLength> phash; fingerPrint(iMatrix, 8, phash, averagePix); //cout << phash << endl; string str = bitTohex(phash); cout << str << endl; /*namedWindow("img"); imshow("img", img); waitKey(0);*/ string img_dir = "E:\\algorithmZack\\ImageSearch\\image\\"; for(int i = 1; i <= 11; i++){ string pos; stringstream ss; ss << i; ss >> pos; string img_name = img_dir + "person" + pos +".jpg"; Mat target = imread(img_name, 1); if(!target.data){ cout << "the target image" << img_name << " is not exist" << endl; continue; } resize(target, target, Size(size,size)); cvtColor(target, target, COLOR_BGR2GRAY); DCT(target, size, iMatrix); float averagePix2 = calcAverage(iMatrix, 8); bitset<hashLength> phash2; fingerPrint(iMatrix, 8, phash2, averagePix2); //cout << averagePix2 << endl; int distance = hammingDistance(phash, phash2); // 计算汉明距离 cout <<"【" << i <<"-" << distance << "】 "; } cout << endl; for(int i = 0; i < size; i++) delete []iMatrix[i]; delete []iMatrix; return 0; }
测试图片为:
结果为:
其中【i-j】, i代表personi, j代表personi与person的汉明距离。并由结果可见phash对于图片的旋转肯定是无能为力的。
说明:完整的工程文件等着几篇常规图像检索方法写完后再上传,请关注!
参考文献:
1:http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html英文原始资料
2:http://blog.csdn.net/luoweifu/article/details/8220992包括java代码实现