核主成分分析KPCA+C代码

KPCA用非线性变换将输入数据空间映射到高维空间,使非线性问题转为线性问题,然后在高维空间中使用PCA方法提取主成分,在保持原数据信息量的基础上达到降维的目的。

常用的核函数有以下几种:

核主成分分析KPCA+C代码_第1张图片

核主成分分析KPCA+C代码_第2张图片

核函数化后的得到m*m的样本矩阵(m为样本个数)。用核函数将原始样本投射到高维空间,再用PCA进行降维。

实现步骤:

1. 将数据进行核函数化;

2. 对核矩阵样本进行归一化;归一化方法如下:

核主成分分析KPCA+C代码_第3张图片

2. 之后用PCA进行降维,我上篇博客有:点击打开链接


实现代码:(代码是参考matlab代码和按自己的理解编写的,如有错误,欢迎指正)

#include "stdio.h"
#include "stdlib.h"
#include "math.h"
#include "vector"
using namespace std;


#define C 2   //常数
#define G 3   //gama值
#define S 1   //高斯核函数的方差
#define P 2   //多项式阶数
#define E 0.0000001
#define INF 99999 
#define dimNum 4     //维数
#define MAXITER 10000   //最大迭代次数


typedef vector doubleVector;
typedef vector dim2Vector;


vector getInputSample(char* File);  //获取输入样本
void PAC(vector nomalKernel, vector kernelTrain);  //主成分分析法PAC
vector normalizationSPSS(vector inputTrain);  //采用z-score法标准数据
vector calCovariation(vector inputTrain);   //计算协方差
vector Jacobi(vector Array);   //使用Jacobi计算协方差的特征值和特征矩阵
bool QueryArray(vector Array);   //检查是否满足
vector matTran(vector Array);   //矩阵转置
vector matMul(vector mat1, vector mat2);   //矩阵相乘


vector kernelLinearFun(vector inputTrain);  //使用线性核函数
vector kernelPolyFun(vector inputTrain);  //使用多项式核函数
vector kernelGuassFun(vector inputTrain);  //使用高斯径向基(RBF)核函数
vector kernelTanhFun(vector inputTrain);  //多层感知器(MLP)核函数
vector normalizationKernel(vector kernel);   //标准化核矩阵


double Input_Meam[dimNum] = {0};    //每一维的均值
double Input_Dev[dimNum] = {0};     //每一维的标准差


void main()
{
char *File = "input.txt";


vector inputTrain;
vector kernelTrain;
vector nomalKernel;


inputTrain = getInputSample(File);
inputTrain = normalizationSPSS(inputTrain);


// kernelTrain = kernelLinearFun(inputTrain);    //使用线性核函数
// kernelTrain = kernelPolyFun(inputTrain);    //使用多项式核函数
// kernelTrain = kernelGuassFun(inputTrain);  //使用高斯径向基(RBF)核函数
kernelTrain = kernelTanhFun(inputTrain);  //使用多层感知器(MLP)核函数


nomalKernel = normalizationKernel(kernelTrain);   //标准化数据



PAC(nomalKernel,  kernelTrain);  //主成分分析法PAC







//主成分分析法PAC
void PAC(vector inputTrain,  vector kernelTrain)
{
int i, j, m, n;
vector input_Cov;  //协方差
vector jacobi;   //1为特征值,2为特征矩阵
double rate;  //贡献率
double rateSum1=0;
double rateSum2=0;
doubleVector tempVector;
vector redTemp;  
vector reduce_Dim_Mat;  //降维矩阵
vector reduce_Dim_Sample;  //降维数据


input_Cov = calCovariation(inputTrain);   //计算协方差


jacobi = Jacobi(input_Cov);   //使用Jacobi计算协方差的特征值和特征矩阵


//计算贡献率
for(i=0; i{
for(j=0; jrateSum1 += jacobi[0][i][j];


for(j=0; j{
rateSum2 += jacobi[0][i][j];
rate = rateSum2/rateSum1;
if(rate>=0.85)
break;
}


//获取将维矩阵并归一化(除以sqrt(特征值))
for(m=0; m<=j; m++)
{
tempVector.clear();
for(n=0; ntempVector.push_back(jacobi[1][n][m]/sqrtf(jacobi[0][0][m]));


reduce_Dim_Mat.push_back(tempVector);
}
}


reduce_Dim_Mat = matTran(reduce_Dim_Mat);


reduce_Dim_Sample = matMul(kernelTrain, reduce_Dim_Mat);  //计算降维结果


printf("协方差为:\n");
for(i=0; i{
for(j=0; jprintf("%lf  ", input_Cov[i][j]);
printf("\n");
}


printf("\n特征值:\n");
for(i=0; i{
for(j=0; jprintf("%lf  ", jacobi[0][i][j]);
printf("\n");
}

printf("\n特征向量:\n");
for(i=0; i{
for(j=0; jprintf("%lf  ", jacobi[1][i][j]);
printf("\n");
  }




printf("\n降维矩阵:\n");
for(i=0; i{
for(j=0; jprintf("%lf  ", reduce_Dim_Mat[i][j]);
printf("\n");
}


printf("\n降维结果:\n");
for(i=0; i{
for(j=0; jprintf("%lf  ", reduce_Dim_Sample[i][j]);
printf("\n");
}

}




//计算协方差
vector calCovariation(vector inputTrain)
{
int i, j, k;
doubleVector tempDst(inputTrain[0].size(), 0);
vector dst(inputTrain.size(), tempDst);

for(i=0 ; iInput_Meam[i] = 0;

//计算均值
for(i=0; i{
for(j=0; jInput_Meam[i] += inputTrain[j][i];

Input_Meam[i] = Input_Meam[i]/inputTrain.size();
}

//计算协方差
for(i=0; ifor(j=0; j{
for(k=0; kdst[i][j] += (inputTrain[k][i]-Input_Meam[i])*(inputTrain[k][j]-Input_Meam[j]);

dst[i][j] = dst[i][j]/(inputTrain.size()-1);

}


return dst;
}






//使用Jacobi计算协方差的特征值和特征矩阵
vector Jacobi(vector Array)
{
int i, j;
int count;
bool flag = false;
vector dst;
doubleVector tempArray(Array.size(), 0);
vector charatMat(Array.size(), tempArray);   //特征向量
vector sortArray;  //排序后的特征值
vector dim2Jac;
vector dim2JacT;
vector dim3Jac;
double maxArrayNum;
int laber_j, laber_i;

double theta;


//开始迭代
count = 0;
tempArray.clear();
tempArray.resize(Array.size(), 0);
while(count{
count++;
dim2Jac.clear();
dim2Jac.resize(Array.size(), tempArray);
maxArrayNum = 0;
laber_i = laber_j = 0;


//寻找非对角元中绝对值最大的A[i][j]
for(i=0; ifor(j=0; j{
if(i==j)
continue;


if(maxArrayNum{
maxArrayNum = fabs(Array[i][j]);
laber_i = i;
laber_j = j;
}
}


theta = atanf(Array[laber_i][laber_j]*2/(Array[laber_i][laber_i]-Array[laber_j][laber_j]+E));


//构造雅克比矩阵
for(i=0; idim2Jac[i][i] = 1;


dim2Jac[laber_i][laber_i] = dim2Jac[laber_j][laber_j] = cosf(theta/2);
dim2Jac[laber_i][laber_j] = sinf(theta/2);
dim2Jac[laber_j][laber_i] = -sinf(theta/2);


dim2JacT = matTran(dim2Jac);  //矩阵转置
dim3Jac.push_back(dim2JacT);  //保存矩阵


Array = matMul(matMul(dim2Jac, Array), dim2JacT);


if(QueryArray(Array))
flag = true;

}


//初始化特征矩阵
for(i=0; icharatMat[i][i] = 1;


//计算特征矩阵
for(i=0; icharatMat = matMul(charatMat, dim3Jac[i]);


//排序
doubleVector sortA;
double tempNum;
for(i=0; isortA.push_back(Array[i][i]);


for(i=0; i{
maxArrayNum = sortA[i];
laber_j = i;


for(j=i; jif(maxArrayNum{
maxArrayNum = sortA[j];
laber_j = j;
}


tempNum = sortA[i];
sortA[i] = sortA[laber_j];
sortA[laber_j] = tempNum;


for(j=0; jtempArray[j] = charatMat[j][i];


for(j=0; jcharatMat[j][i] = charatMat[j][laber_j];


for(j=0; jcharatMat[j][laber_j] = tempArray[j]; 


}


sortArray.push_back(sortA);


dst.push_back(sortArray);
dst.push_back(charatMat);


return dst;
}




//检查是否满足
bool QueryArray(vector Array)
{
int i, j;


for(i=0; ifor(j=0; j{
if(i==j)
continue;


if(fabs(Array[i][j])>E)
return false;
}


return true;
}




//矩阵转置
vector matTran(vector Array)
{
int i, j;
doubleVector temp(Array.size(), 0);
vector dst(Array[0].size(), temp);


for(i=0; ifor(j=0; jdst[j][i] = Array[i][j];


return dst;
}


//矩阵相乘
vector matMul(vector mat1, vector mat2)
{
  int i, j, k;
  doubleVector temp(mat2[0].size(), 0);
  vector dst(mat1.size(), temp);


  for(i=0; i   for(j=0; j   for(k=0; k   dst[i][j] += mat1[i][k]*mat2[k][j];


return dst;
}




//采用z-score法标准数据
vector normalizationSPSS(vector inputTrain)
{
int i, j;
vector dst;
doubleVector tempDst;

//初始化
for(i=0 ; i{
Input_Meam[i] = 0;
Input_Dev[i] = 0;
}

//计算均值
for(i=0; i{
for(j=0; jInput_Meam[i] += inputTrain[j][i];

Input_Meam[i] = Input_Meam[i]/inputTrain.size();
}

//计算标准差
for(i=0; i{
for(j=0; jInput_Dev[i] += (inputTrain[j][i]-Input_Meam[i])*(inputTrain[j][i]-Input_Meam[i]);

Input_Dev[i] = sqrtf(Input_Dev[i]/(inputTrain.size()-1));
}

//标准化
for(i=0; i{
tempDst.clear();
for(j=0; jtempDst.push_back((inputTrain[i][j]-Input_Meam[j])/Input_Dev[j]);

dst.push_back(tempDst);
}


return dst;
}




//核矩阵标准化
vector normalizationKernel(vector kernel)
{
int i, j;
doubleVector tempKernel(kernel[0].size(), 1);
vector dstKernel(kernel.size(), tempKernel);
vector onesMat(kernel.size(), tempKernel);
vector K1, K2, K3;

K1 = matMul(onesMat, kernel);
K2 = matMul(kernel, onesMat);
K3 = matMul(matMul(onesMat, kernel), onesMat);

for(i=0; ifor(j=0; jdstKernel[i][j] = kernel[i][j]-K1[i][j]-K2[i][j]+K3[i][j];

return dstKernel;

}




//获取输入样本
vector getInputSample(char* File)
{
vector dst;
doubleVector temp;
int i;
double num;


FILE *fp = fopen(File, "r");

if(fp == NULL)
{
printf("OPEN FILE ERROR!!\n");
exit(0);
}


//从文件读取样本
i=1;
temp.clear();
dst.clear();
while(fscanf(fp, "%lf", &num)!=EOF)
{
temp.push_back(num);
if(i%dimNum==0)
{
dst.push_back(temp);
temp.clear();
}
i++;
}


return dst;
}




//多层感知器(MLP)核函数
vector kernelTanhFun(vector inputTrain)
{
int i, j, k;
doubleVector tempKernel(inputTrain.size(), 0);
vector dstKernel(inputTrain.size(), tempKernel);

for(i=0; ifor(j=0; j{
for(k=0; kdstKernel[i][j] += inputTrain[i][k]*inputTrain[j][k];

dstKernel[i][j] = tanh(G*dstKernel[i][j]+C);
}

return dstKernel;
}




//使用高斯径向基(RBF)核函数
vector kernelGuassFun(vector inputTrain)
{
int i, j, k;
double sum;
doubleVector tempKernel(inputTrain.size(), 0);
vector dstKernel(inputTrain.size(), tempKernel);

for(i=0; ifor(j=0; j{
sum = 0;
//计算向量的2范数
for(k=0; ksum += (inputTrain[i][k]-inputTrain[j][k])*(inputTrain[i][k]-inputTrain[j][k]);

//高斯径向基(RBF)核函数计算公式
dstKernel[i][j] = exp(-0.5*(sqrtf(sum)/S)*(sqrtf(sum)/S));
}

return dstKernel;
}




//使用多项式核函数
vector kernelPolyFun(vector inputTrain)
{
int i, j, k, p;
double sum;
doubleVector tempKernel(inputTrain.size(), 0);
vector dstKernel(inputTrain.size(), tempKernel);

for(i=0; ifor(j=0; j{
for(k=0; kdstKernel[i][j] += inputTrain[i][k]*inputTrain[j][k];

sum = 1;
for(p=0; psum = sum*(dstKernel[i][j]+1);

dstKernel[i][j] = sum;
}

return dstKernel;
}


//使用线性核函数
vector kernelLinearFun(vector inputTrain)
{
int i, j, k;
doubleVector tempKernel(inputTrain.size(), 0);
vector dstKernel(inputTrain.size(), tempKernel);

for(i=0; ifor(j=0; jfor(k=0; kdstKernel[i][j] += inputTrain[i][k]*inputTrain[j][k];

return dstKernel;
}



你可能感兴趣的:(模式识别算法)