function [eigvector, eigvalue, elapse] = PCA(data, ReducedDim)
%PCA Principal Component Analysis
%
% Usage:
% [eigvector, eigvalue] = PCA(data, ReducedDim)
% [eigvector, eigvalue] = PCA(data)
%
% Input:
% data - Data matrix. Each row vector of fea is a data point.
%
% ReducedDim - The dimensionality of the reduced subspace. If 0,
% all the dimensions will be kept.
% Default is 0.
%
% Output:
% eigvector - Each column is an embedding function, for a new
% data point (row vector) x, y = x*eigvector
% will be the embedding result of x.
% eigvalue - The sorted eigvalue of PCA eigen-problem.
%
% Examples:
% fea = rand(7,10);
% [eigvector,eigvalue] = PCA(fea,4);
% Y = fea*eigvector;
%
%
% version 2.1 --June/2007
% version 2.0 --May/2007
% version 1.1 --Feb/2006
% version 1.0 --April/2004
%
% Written by Deng Cai (dengcai2 AT cs.uiuc.edu)
%
if (~exist('ReducedDim','var'))
ReducedDim = 0;
end
[nSmp,nFea] = size(data);
if (ReducedDim > nFea) | (ReducedDim <=0)
ReducedDim = nFea;
end
tmp_T = cputime;
if issparse(data)
data = full(data);
end
sampleMean = mean(data,1);
data = (data - repmat(sampleMean,nSmp,1));
if nFea/nSmp > 1.0713
% This is an efficient method which computes the eigvectors of
% of A*A^T (instead of A^T*A) first, and then convert them back to
% the eigenvectors of A^T*A.
ddata = data*data';
ddata = max(ddata, ddata');
dimMatrix = size(ddata,2);
if dimMatrix > 1000 & ReducedDim < dimMatrix/10 % using eigs to speed up!
option = struct('disp',0);
[eigvector, eigvalue] = eigs(ddata,ReducedDim,'la',option);
eigvalue = diag(eigvalue);
else
[eigvector, eigvalue] = eig(ddata);
eigvalue = diag(eigvalue);
[junk, index] = sort(-eigvalue);
eigvalue = eigvalue(index);
eigvector = eigvector(:, index);
end
clear ddata;
maxEigValue = max(abs(eigvalue));
eigIdx = find(abs(eigvalue)/maxEigValue < 1e-12);
eigvalue (eigIdx) = [];
eigvector (:,eigIdx) = [];
eigvector = data'*eigvector; % Eigenvectors of A^T*A
eigvector = eigvector*diag(1./(sum(eigvector.^2).^0.5)); % Normalization
else
ddata = data'*data;
ddata = max(ddata, ddata');
dimMatrix = size(ddata,2);
if dimMatrix > 1000 & ReducedDim < dimMatrix/10 % using eigs to speed up!
option = struct('disp',0);
[eigvector, eigvalue] = eigs(ddata,ReducedDim,'la',option);
eigvalue = diag(eigvalue);
else
[eigvector, eigvalue] = eig(ddata);
eigvalue = diag(eigvalue);
[junk, index] = sort(-eigvalue);
eigvalue = eigvalue(index);
eigvector = eigvector(:, index);
end
clear ddata;
maxEigValue = max(abs(eigvalue));
eigIdx = find(abs(eigvalue)/maxEigValue < 1e-12);
eigvalue (eigIdx) = [];
eigvector (:,eigIdx) = [];
end
if ReducedDim < length(eigvalue)
eigvalue = eigvalue(1:ReducedDim);
eigvector = eigvector(:, 1:ReducedDim);
end
elapse = cputime - tmp_T;
测试:
fea = rand(7,10)
[eigvector,eigvalue] = PCA(fea,4)
Y = fea*eigvector
fea =
0.0305 0.8594 0.4899 0.6820 0.7224 0.4538 0.8314 0.6280 0.3724 0.7379
0.7441 0.8055 0.1679 0.0424 0.1499 0.4324 0.8034 0.2920 0.1981 0.2691
0.5000 0.5767 0.9787 0.0714 0.6596 0.8253 0.0605 0.4317 0.4897 0.4228
0.4799 0.1829 0.7127 0.5216 0.5186 0.0835 0.3993 0.0155 0.3395 0.5479
0.9047 0.2399 0.5005 0.0967 0.9730 0.1332 0.5269 0.9841 0.9516 0.9427
0.6099 0.8865 0.4711 0.8181 0.6490 0.1734 0.4168 0.1672 0.9203 0.4177
0.6177 0.0287 0.0596 0.8175 0.8003 0.3909 0.6569 0.1062 0.0527 0.9831
eigvector =
-0.1487 0.1730 -0.3812 0.2153
-0.1381 -0.5340 0.5429 0.2571
-0.4056 -0.1441 0.0047 -0.5249
0.4681 0.1735 0.5405 -0.3343
-0.1373 0.4380 0.1915 -0.1696
-0.0795 -0.2602 -0.1359 -0.0552
0.2845 0.0474 0.1770 0.5382
-0.4609 0.2519 0.1666 0.4194
-0.5001 0.1770 0.3892 -0.0415
0.0814 0.5268 0.0462 0.0352
eigvalue =
1.5668
1.4181
0.9042
0.8643
Y =
-0.3170 0.4447 1.3333 0.3162
-0.3083 -0.0766 0.4278 0.7718
-1.0658 0.1451 0.4726 -0.2309
-0.2380 0.5501 0.5203 -0.2640
-1.1723 1.3025 0.6794 0.4791
-0.5088 0.3902 1.2730 -0.0102
0.3133 1.0587 0.5222 0.1090