OpenCV下PCA降维

这两天看了下PCA降维,用OpenCV测试了下。主要是参考[1]和[2]。根据我的理解,代码记录下。


#include <opencv/cv.h>
#include <opencv/highgui.h>
#include <stdio.h>
#include <stdlib.h>

using namespace cv;
using namespace std;

#define DIMENTIONS	7
#define SAMPLE_NUM	31

float Coordinates[DIMENTIONS*SAMPLE_NUM]={
		 101.5,100.4,97.0,98.7,100.8,114.2,104.2
		,100.8,93.5,95.9,100.7,106.7,104.3,106.4
		,100.8,97.4,98.2,98.2,99.5,103.6,102.4
		,99.4,96.0,98.2,97.8,99.1,98.3,104.3
		,101.8,97.7,99.0,98.1,98.4,102.0,103.7
		,101.8,96.8,96.4,92.7,99.6,101.3,103.4
		,101.3,98.2,99.4,103.7,98.7,101.4,105.3
		,101.9,100.0,98.4,96.9,102.7,100.3,102.3
		,100.3,98.9,97.2,97.4,98.1,102.1,102.3
		,99.3,97.7,97.6,101.1,96.8,110.1,100.4
		,98.7,98.4,97.0,99.6,95.6,107.2,99.8
		,99.7,97.7,98.0,99.3,97.3,104.1,102.7
		,97.6,96.5,97.6,102.5,97.2,100.6,99.9
		,98.0,98.4,97.1,100.5,101.4,103.0,99.9
		,101.1,98.6,98.7,102.4,96.9,108.2,101.7
		,100.4,98.6,98.0,100.7,99.4,102.4,103.3
		,99.3,96.9,94.0,98.1,99.7,109.7,99.2
		,98.6,97.4,96.4,99.8,97.4,102.1,100.0
		,98.2,98.2,99.4,99.3,99.7,101.5,99.9
		,98.5,96.3,97.0,97.7,98.7,112.6,100.4
		,98.4,99.2,98.1,100.2,98.0,98.2,97.8
		,99.2,97.4,95.7,98.9,102.4,114.8,102.6
		,101.3,97.9,99.2,98.8,105.4,111.9,99.9
		,98.5,97.8,94.6,102.4,107.0,115.0,99.5
		,98.3,96.3,98.5,106.2,92.5,98.6,101.6
		,99.3,101.1,99.4,100.1,103.6,98.7,101.3
		,99.2,97.3,96.2,99.7,98.2,112.6,100.5
		,100.0,99.9,98.2,98.3,103.6,123.2,102.8
		,102.2,99.4,96.2,98.6,102.4,115.3,101.2
		,100.1,98.7,97.4,99.8,100.6,112.4,102.5
		,104.3,98.7,100.2,116.1,105.2,101.6,102.6
};

float Coordinates_test[DIMENTIONS]={
	104.3,98.7,100.2,116.1,105.2,101.6,102.6
};

#define PCA_MEAN	"mean"
#define PCA_EIGEN_VECTOR	"eigen_vector"
int main()
{
//load samples
	Mat SampleSet(SAMPLE_NUM, DIMENTIONS, CV_32FC1);
	for (int i=0; i<(SAMPLE_NUM); ++i)
	{
		for (int j=0; j<DIMENTIONS; ++j)
		{
			SampleSet.at<float>(i, j) = Coordinates[i*DIMENTIONS + j];
		}
	}
//Training
	PCA *pca = new PCA(SampleSet, Mat(), CV_PCA_DATA_AS_ROW);///////////////
	cout << "eigenvalues:" <<endl << pca->eigenvalues <<endl<<endl;
	//cout << "eigenvectors" <<endl << pca->eigenvectors << endl;

	Mat input(1,DIMENTIONS, CV_32FC1);//Test input
	for (int j=0; j<DIMENTIONS; ++j)
	{
		input.at<float>(0, j) = Coordinates_test[j];
	}
	//calculate the decreased dimensions
	int index;
	float sum=0, sum0=0, ratio;
	for (int d=0; d<pca->eigenvalues.rows; ++d)
	{
		sum += pca->eigenvalues.at<float>(d,0);
	}
	for (int d=0; d<pca->eigenvalues.rows; ++d)
	{
		sum0 += pca->eigenvalues.at<float>(d,0);
		ratio = sum0/sum;
		if(ratio > 0.9){
			index = d;
			break;
		}
	}
	Mat eigenvetors_d;
	eigenvetors_d.create((index+1), DIMENTIONS, CV_32FC1);//eigen values of decreased dimension
	for (int i=0; i<(index+1); ++i)
	{
		pca->eigenvectors.row(i).copyTo(eigenvetors_d.row(i));
	}
	cout << "eigenvectors" <<endl << eigenvetors_d << endl;
	FileStorage fs_w("config.xml", FileStorage::WRITE);//write mean and eigenvalues into xml file
	fs_w << PCA_MEAN << pca->mean;
	fs_w << PCA_EIGEN_VECTOR << eigenvetors_d;
	fs_w.release();
//Encoding
	
	PCA *pca_encoding = new PCA();
	FileStorage fs_r("config.xml", FileStorage::READ);
	fs_r[PCA_MEAN] >> pca_encoding->mean;
	fs_r[PCA_EIGEN_VECTOR] >> pca_encoding->eigenvectors;
	fs_r.release();
	Mat output_encode(1, pca_encoding->eigenvectors.rows, CV_32FC1);
	pca_encoding->project(input, output_encode);
	cout << endl << "pca_encode:" << endl << output_encode;

//Decoding
	PCA *pca_decoding = new PCA();
	FileStorage fs_d("config.xml", FileStorage::READ);
	fs_d[PCA_MEAN] >> pca_decoding->mean;
	fs_d[PCA_EIGEN_VECTOR] >> pca_decoding->eigenvectors;
	fs_d.release();
	Mat output_decode(1, DIMENTIONS, CV_32FC1);
	pca_decoding->backProject(output_encode,output_decode);
	cout <<endl<< "pca_Decode:" << endl << output_decode;

	delete pca;
	delete pca_encoding;
	return 0;
}


结果为:

eigenvalues:
[43.182041; 14.599923; 9.2121401; 4.0877957; 2.8236785; 0.88751495; 0.66496396]


eigenvectors
[0.01278889, 0.03393811, -0.099844977, -0.13044992, 0.20732452, 0.96349025, -0.020049129;
  0.15659945, 0.037932698, 0.12129638, 0.89324093, 0.39454412, 0.046447847, 0.060190294;
  0.21434425, 0.018043749, -0.0012475925, -0.40428901, 0.81335503, -0.22759444, 0.2773709;
  0.43591988, -0.047541384, 0.19851086, -0.0035106051, -0.35545754, 0.10898948, 0.79376709]


pca_encode:
[-5.6273661, 17.138182, -0.078819014, 0.68144321]
pca_Decode:
[102.88557, 98.402702, 100.33086, 116.21081, 105.37261, 101.63729, 103.39891]

这个例子里最后backProject的结果跟原始输入比较起来感觉差别不是很大

float Coordinates_test[DIMENTIONS]={
104.3,98.7,100.2,116.1,105.2,101.6,102.6
};


主元分析,顾名思义找出数据中最主要的信息,去除次要的,以降低数据量。

具体步骤是:

1.对每个样本提取出有用的信息组成一个向量;

2.求取出所有样本向量的平均值;

3.用每个样本向量减去向量的平均值后组成一个矩阵;

4.该矩阵乘以该矩阵的逆为协方差矩阵,这个协方差矩阵是可对角化的,对角化后剩下的元素为特征值,每个特征值对应一个特征向量(特征向量要标准化);

5.选取最大的N个特征值(其中N即为PCA的主元(PC)数,我感觉这个主元数是PCA的核心之处,可自己选取数的多少,数越少,越降低数据量,但识别效果也越差),将这N个特征值对应的特征向量组成新的矩阵;

6.将新的矩阵转置后乘以样本向量即可得到降维后的数据(这些数据是原数据中相对较为主要的,而数据量一般也远远小于原数据量,当然这要取决于你选取的主元数)。



[1] http://blog.csdn.net/yang_xian521/article/details/7445536

[2] http://blog.csdn.net/abcjennifer/article/details/8002329

你可能感兴趣的:(OpenCV下PCA降维)