KPCA用非线性变换将输入数据空间映射到高维空间,使非线性问题转为线性问题,然后在高维空间中使用PCA方法提取主成分,在保持原数据信息量的基础上达到降维的目的。
常用的核函数有以下几种:
核函数化后的得到m*m的样本矩阵(m为样本个数)。用核函数将原始样本投射到高维空间,再用PCA进行降维。
实现步骤:
1. 将数据进行核函数化;
2. 对核矩阵样本进行归一化;归一化方法如下:
2. 之后用PCA进行降维,我上篇博客有:点击打开链接
实现代码:(代码是参考matlab代码和按自己的理解编写的,如有错误,欢迎指正)
#include "stdio.h" sum = sum*(dstKernel[i][j]+1);
#include "stdlib.h"
#include "math.h"
#include "vector"
using namespace std;
#define C 2 //常数
#define G 3 //gama值
#define S 1 //高斯核函数的方差
#define P 2 //多项式阶数
#define E 0.0000001
#define INF 99999
#define dimNum 4 //维数
#define MAXITER 10000 //最大迭代次数
typedef vector
typedef vector
vector
void PAC(vector
vector
vector
vector
bool QueryArray(vector
vector
vector
vector
vector
vector
vector
vector
double Input_Meam[dimNum] = {0}; //每一维的均值
double Input_Dev[dimNum] = {0}; //每一维的标准差
void main()
{
char *File = "input.txt";
vector
vector
vector
inputTrain = getInputSample(File);
inputTrain = normalizationSPSS(inputTrain);
// kernelTrain = kernelLinearFun(inputTrain); //使用线性核函数
// kernelTrain = kernelPolyFun(inputTrain); //使用多项式核函数
// kernelTrain = kernelGuassFun(inputTrain); //使用高斯径向基(RBF)核函数
kernelTrain = kernelTanhFun(inputTrain); //使用多层感知器(MLP)核函数
nomalKernel = normalizationKernel(kernelTrain); //标准化数据
PAC(nomalKernel, kernelTrain); //主成分分析法PAC
}
//主成分分析法PAC
void PAC(vector
{
int i, j, m, n;
vector
vector
double rate; //贡献率
double rateSum1=0;
double rateSum2=0;
doubleVector tempVector;
vector
vector
vector
input_Cov = calCovariation(inputTrain); //计算协方差
jacobi = Jacobi(input_Cov); //使用Jacobi计算协方差的特征值和特征矩阵
//计算贡献率
for(i=0; i
for(j=0; j
for(j=0; j
rateSum2 += jacobi[0][i][j];
rate = rateSum2/rateSum1;
if(rate>=0.85)
break;
}
//获取将维矩阵并归一化(除以sqrt(特征值))
for(m=0; m<=j; m++)
{
tempVector.clear();
for(n=0; n
reduce_Dim_Mat.push_back(tempVector);
}
}
reduce_Dim_Mat = matTran(reduce_Dim_Mat);
reduce_Dim_Sample = matMul(kernelTrain, reduce_Dim_Mat); //计算降维结果
printf("协方差为:\n");
for(i=0; i
for(j=0; j
printf("\n");
}
printf("\n特征值:\n");
for(i=0; i
for(j=0; j
printf("\n");
}
printf("\n特征向量:\n");
for(i=0; i
for(j=0; j
printf("\n");
}
printf("\n降维矩阵:\n");
for(i=0; i
for(j=0; j
printf("\n");
}
printf("\n降维结果:\n");
for(i=0; i
for(j=0; j
printf("\n");
}
}
//计算协方差
vector
{
int i, j, k;
doubleVector tempDst(inputTrain[0].size(), 0);
vector
for(i=0 ; i
//计算均值
for(i=0; i
for(j=0; j
Input_Meam[i] = Input_Meam[i]/inputTrain.size();
}
//计算协方差
for(i=0; i
for(k=0; k
dst[i][j] = dst[i][j]/(inputTrain.size()-1);
}
return dst;
}
//使用Jacobi计算协方差的特征值和特征矩阵
vector
{
int i, j;
int count;
bool flag = false;
vector
doubleVector tempArray(Array.size(), 0);
vector
vector
vector
vector
vector
double maxArrayNum;
int laber_j, laber_i;
double theta;
//开始迭代
count = 0;
tempArray.clear();
tempArray.resize(Array.size(), 0);
while(count
count++;
dim2Jac.clear();
dim2Jac.resize(Array.size(), tempArray);
maxArrayNum = 0;
laber_i = laber_j = 0;
//寻找非对角元中绝对值最大的A[i][j]
for(i=0; i
if(i==j)
continue;
if(maxArrayNum
maxArrayNum = fabs(Array[i][j]);
laber_i = i;
laber_j = j;
}
}
theta = atanf(Array[laber_i][laber_j]*2/(Array[laber_i][laber_i]-Array[laber_j][laber_j]+E));
//构造雅克比矩阵
for(i=0; i
dim2Jac[laber_i][laber_i] = dim2Jac[laber_j][laber_j] = cosf(theta/2);
dim2Jac[laber_i][laber_j] = sinf(theta/2);
dim2Jac[laber_j][laber_i] = -sinf(theta/2);
dim2JacT = matTran(dim2Jac); //矩阵转置
dim3Jac.push_back(dim2JacT); //保存矩阵
Array = matMul(matMul(dim2Jac, Array), dim2JacT);
if(QueryArray(Array))
flag = true;
}
//初始化特征矩阵
for(i=0; i
//计算特征矩阵
for(i=0; i
//排序
doubleVector sortA;
double tempNum;
for(i=0; i
for(i=0; i
maxArrayNum = sortA[i];
laber_j = i;
for(j=i; j
maxArrayNum = sortA[j];
laber_j = j;
}
tempNum = sortA[i];
sortA[i] = sortA[laber_j];
sortA[laber_j] = tempNum;
for(j=0; j
for(j=0; j
for(j=0; j
}
sortArray.push_back(sortA);
dst.push_back(sortArray);
dst.push_back(charatMat);
return dst;
}
//检查是否满足
bool QueryArray(vector
{
int i, j;
for(i=0; i
if(i==j)
continue;
if(fabs(Array[i][j])>E)
return false;
}
return true;
}
//矩阵转置
vector
{
int i, j;
doubleVector temp(Array.size(), 0);
vector
for(i=0; i
return dst;
}
//矩阵相乘
vector
{
int i, j, k;
doubleVector temp(mat2[0].size(), 0);
vector
for(i=0; i
return dst;
}
//采用z-score法标准数据
vector
{
int i, j;
vector
doubleVector tempDst;
//初始化
for(i=0 ; i
Input_Meam[i] = 0;
Input_Dev[i] = 0;
}
//计算均值
for(i=0; i
for(j=0; j
Input_Meam[i] = Input_Meam[i]/inputTrain.size();
}
//计算标准差
for(i=0; i
for(j=0; j
Input_Dev[i] = sqrtf(Input_Dev[i]/(inputTrain.size()-1));
}
//标准化
for(i=0; i
tempDst.clear();
for(j=0; j
dst.push_back(tempDst);
}
return dst;
}
//核矩阵标准化
vector
{
int i, j;
doubleVector tempKernel(kernel[0].size(), 1);
vector
vector
vector
K1 = matMul(onesMat, kernel);
K2 = matMul(kernel, onesMat);
K3 = matMul(matMul(onesMat, kernel), onesMat);
for(i=0; i
return dstKernel;
}
//获取输入样本
vector
{
vector
doubleVector temp;
int i;
double num;
FILE *fp = fopen(File, "r");
if(fp == NULL)
{
printf("OPEN FILE ERROR!!\n");
exit(0);
}
//从文件读取样本
i=1;
temp.clear();
dst.clear();
while(fscanf(fp, "%lf", &num)!=EOF)
{
temp.push_back(num);
if(i%dimNum==0)
{
dst.push_back(temp);
temp.clear();
}
i++;
}
return dst;
}
//多层感知器(MLP)核函数
vector
{
int i, j, k;
doubleVector tempKernel(inputTrain.size(), 0);
vector
for(i=0; i
for(k=0; k
dstKernel[i][j] = tanh(G*dstKernel[i][j]+C);
}
return dstKernel;
}
//使用高斯径向基(RBF)核函数
vector
{
int i, j, k;
double sum;
doubleVector tempKernel(inputTrain.size(), 0);
vector
for(i=0; i
sum = 0;
//计算向量的2范数
for(k=0; k
//高斯径向基(RBF)核函数计算公式
dstKernel[i][j] = exp(-0.5*(sqrtf(sum)/S)*(sqrtf(sum)/S));
}
return dstKernel;
}
//使用多项式核函数
vector
{
int i, j, k, p;
double sum;
doubleVector tempKernel(inputTrain.size(), 0);
vector
for(i=0; i
for(k=0; k
sum = 1;
for(p=0; p
dstKernel[i][j] = sum;
}
return dstKernel;
}
//使用线性核函数
vector
{
int i, j, k;
doubleVector tempKernel(inputTrain.size(), 0);
vector
for(i=0; i
return dstKernel;
}