这两天看了下PCA降维,用OpenCV测试了下。主要是参考[1]和[2]。根据我的理解,代码记录下。
#include <opencv/cv.h> #include <opencv/highgui.h> #include <stdio.h> #include <stdlib.h> using namespace cv; using namespace std; #define DIMENTIONS 7 #define SAMPLE_NUM 31 float Coordinates[DIMENTIONS*SAMPLE_NUM]={ 101.5,100.4,97.0,98.7,100.8,114.2,104.2 ,100.8,93.5,95.9,100.7,106.7,104.3,106.4 ,100.8,97.4,98.2,98.2,99.5,103.6,102.4 ,99.4,96.0,98.2,97.8,99.1,98.3,104.3 ,101.8,97.7,99.0,98.1,98.4,102.0,103.7 ,101.8,96.8,96.4,92.7,99.6,101.3,103.4 ,101.3,98.2,99.4,103.7,98.7,101.4,105.3 ,101.9,100.0,98.4,96.9,102.7,100.3,102.3 ,100.3,98.9,97.2,97.4,98.1,102.1,102.3 ,99.3,97.7,97.6,101.1,96.8,110.1,100.4 ,98.7,98.4,97.0,99.6,95.6,107.2,99.8 ,99.7,97.7,98.0,99.3,97.3,104.1,102.7 ,97.6,96.5,97.6,102.5,97.2,100.6,99.9 ,98.0,98.4,97.1,100.5,101.4,103.0,99.9 ,101.1,98.6,98.7,102.4,96.9,108.2,101.7 ,100.4,98.6,98.0,100.7,99.4,102.4,103.3 ,99.3,96.9,94.0,98.1,99.7,109.7,99.2 ,98.6,97.4,96.4,99.8,97.4,102.1,100.0 ,98.2,98.2,99.4,99.3,99.7,101.5,99.9 ,98.5,96.3,97.0,97.7,98.7,112.6,100.4 ,98.4,99.2,98.1,100.2,98.0,98.2,97.8 ,99.2,97.4,95.7,98.9,102.4,114.8,102.6 ,101.3,97.9,99.2,98.8,105.4,111.9,99.9 ,98.5,97.8,94.6,102.4,107.0,115.0,99.5 ,98.3,96.3,98.5,106.2,92.5,98.6,101.6 ,99.3,101.1,99.4,100.1,103.6,98.7,101.3 ,99.2,97.3,96.2,99.7,98.2,112.6,100.5 ,100.0,99.9,98.2,98.3,103.6,123.2,102.8 ,102.2,99.4,96.2,98.6,102.4,115.3,101.2 ,100.1,98.7,97.4,99.8,100.6,112.4,102.5 ,104.3,98.7,100.2,116.1,105.2,101.6,102.6 }; float Coordinates_test[DIMENTIONS]={ 104.3,98.7,100.2,116.1,105.2,101.6,102.6 }; #define PCA_MEAN "mean" #define PCA_EIGEN_VECTOR "eigen_vector" int main() { //load samples Mat SampleSet(SAMPLE_NUM, DIMENTIONS, CV_32FC1); for (int i=0; i<(SAMPLE_NUM); ++i) { for (int j=0; j<DIMENTIONS; ++j) { SampleSet.at<float>(i, j) = Coordinates[i*DIMENTIONS + j]; } } //Training PCA *pca = new PCA(SampleSet, Mat(), CV_PCA_DATA_AS_ROW);/////////////// cout << "eigenvalues:" <<endl << pca->eigenvalues <<endl<<endl; //cout << "eigenvectors" <<endl << pca->eigenvectors << endl; Mat input(1,DIMENTIONS, CV_32FC1);//Test input for (int j=0; j<DIMENTIONS; ++j) { input.at<float>(0, j) = Coordinates_test[j]; } //calculate the decreased dimensions int index; float sum=0, sum0=0, ratio; for (int d=0; d<pca->eigenvalues.rows; ++d) { sum += pca->eigenvalues.at<float>(d,0); } for (int d=0; d<pca->eigenvalues.rows; ++d) { sum0 += pca->eigenvalues.at<float>(d,0); ratio = sum0/sum; if(ratio > 0.9){ index = d; break; } } Mat eigenvetors_d; eigenvetors_d.create((index+1), DIMENTIONS, CV_32FC1);//eigen values of decreased dimension for (int i=0; i<(index+1); ++i) { pca->eigenvectors.row(i).copyTo(eigenvetors_d.row(i)); } cout << "eigenvectors" <<endl << eigenvetors_d << endl; FileStorage fs_w("config.xml", FileStorage::WRITE);//write mean and eigenvalues into xml file fs_w << PCA_MEAN << pca->mean; fs_w << PCA_EIGEN_VECTOR << eigenvetors_d; fs_w.release(); //Encoding PCA *pca_encoding = new PCA(); FileStorage fs_r("config.xml", FileStorage::READ); fs_r[PCA_MEAN] >> pca_encoding->mean; fs_r[PCA_EIGEN_VECTOR] >> pca_encoding->eigenvectors; fs_r.release(); Mat output_encode(1, pca_encoding->eigenvectors.rows, CV_32FC1); pca_encoding->project(input, output_encode); cout << endl << "pca_encode:" << endl << output_encode; //Decoding PCA *pca_decoding = new PCA(); FileStorage fs_d("config.xml", FileStorage::READ); fs_d[PCA_MEAN] >> pca_decoding->mean; fs_d[PCA_EIGEN_VECTOR] >> pca_decoding->eigenvectors; fs_d.release(); Mat output_decode(1, DIMENTIONS, CV_32FC1); pca_decoding->backProject(output_encode,output_decode); cout <<endl<< "pca_Decode:" << endl << output_decode; delete pca; delete pca_encoding; return 0; }
结果为:
eigenvalues:
[43.182041; 14.599923; 9.2121401; 4.0877957; 2.8236785; 0.88751495; 0.66496396]
eigenvectors
[0.01278889, 0.03393811, -0.099844977, -0.13044992, 0.20732452, 0.96349025, -0.020049129;
0.15659945, 0.037932698, 0.12129638, 0.89324093, 0.39454412, 0.046447847, 0.060190294;
0.21434425, 0.018043749, -0.0012475925, -0.40428901, 0.81335503, -0.22759444, 0.2773709;
0.43591988, -0.047541384, 0.19851086, -0.0035106051, -0.35545754, 0.10898948, 0.79376709]
pca_encode:
[-5.6273661, 17.138182, -0.078819014, 0.68144321]
pca_Decode:
[102.88557, 98.402702, 100.33086, 116.21081, 105.37261, 101.63729, 103.39891]
这个例子里最后backProject的结果跟原始输入比较起来感觉差别不是很大
float Coordinates_test[DIMENTIONS]={
104.3,98.7,100.2,116.1,105.2,101.6,102.6
};
主元分析,顾名思义找出数据中最主要的信息,去除次要的,以降低数据量。
具体步骤是:
1.对每个样本提取出有用的信息组成一个向量;
2.求取出所有样本向量的平均值;
3.用每个样本向量减去向量的平均值后组成一个矩阵;
4.该矩阵乘以该矩阵的逆为协方差矩阵,这个协方差矩阵是可对角化的,对角化后剩下的元素为特征值,每个特征值对应一个特征向量(特征向量要标准化);
5.选取最大的N个特征值(其中N即为PCA的主元(PC)数,我感觉这个主元数是PCA的核心之处,可自己选取数的多少,数越少,越降低数据量,但识别效果也越差),将这N个特征值对应的特征向量组成新的矩阵;
6.将新的矩阵转置后乘以样本向量即可得到降维后的数据(这些数据是原数据中相对较为主要的,而数据量一般也远远小于原数据量,当然这要取决于你选取的主元数)。
[1] http://blog.csdn.net/yang_xian521/article/details/7445536
[2] http://blog.csdn.net/abcjennifer/article/details/8002329