function varargout = pca(X, N, method)
%PCA PRINCIPLE COMPONENTS ANALYSIS
% X是n1*n2的,n1是变量个数,n2是样本数
% 返回值p是转换坐标系 N说明投影到N个坐标系上
% p*X 才是投影结果
% Performing principal components analysis on the N1-by-N2 real-valued
% data matrix X, where N1 and N2 are the number of features (N1 变量个数)
% and observations (N2 采样个数), respectively.
%
% P = pca(X,N,method) returns the N-by-N1 matrix P of basis vectors, one
% basis vector per row, in order of decreasing corresponding eigenvalues,
% i.e. P*X retains the first N principle components(保持的前N个主成分). If N is not
% specified, all components (N=N1) are kept.
%
% Two methods are available: 'eig' (default) and 'svd' which solve the
% problem by eigenvalue decomposition and singular value decomposition,
% respectively. 'svd' is running in 'economy' mode. If there are more
% variables than samples (N1>N2), 'svd' is recommended for this code.
%
% [P, D] = pca(X,N,method) also returns all eigenvalues (normalized) in D,
% in descending order.
%
% [P, D, Y] = pca(X,N,method) further returns the N-by-N2 matrix Y = P*X,
% each column of whom is the projection of the corresponding observation
% from X onto the basis vectors contained in P.
%
% Siqing Wu, <[email protected] >
% Version: 1.1, Date: 2008-07-30
error(nargchk(1,3,nargin)) % check the number of arguments
error(nargoutchk(0,3,nargout))
[nr, nc] = size(X);
if nargin<3
method = 'eig'; % default method
end
if nargin<2
N = nr; % keep all components
elseif N < 1 || N > nr || round(N)~=N
fprintf('Input N=%g is not valid; all components will be retained./n', N)
N = nr;
end
X = X-repmat(mean(X,2),[1 nc]); % center data
switch method
case 'eig'
C = X*X.';
% should be C/(nc-1) for unbiased estimate of the covariance matix,
% but won't affect P.
[E,D] = eig(C); % D lists eigenvalues from small to large
% rearrange D and E
D = diag(D);
D = D(end:-1:1)/max(D);
E = E(:,end:-1:1)';
P = E(1:N,:);
case 'svd'
% Instead of solving X*X' = E*D*E', solve X = U*Sigma*V'
% X*X' = U*Sigma*V'*V*Sigma*U' = U*Sigma^2*U' -> P = U';
[U,sigma] = svd(X,'econ'); % "economy size" decomposition
if N > nc
fprintf('SVD -> N=%d is used./n', nc)
N = nc;
end
D = diag(sigma).^2;
D = D/max(D);
P = U(:,1:N)';
otherwise
error('Undefined method!')
end
fprintf('Top %d components are retained; cumulative eigenvalue contribution is %1.2f/n', N, sum(D(1:N))/sum(D))
switch nargout
case {0,1}
varargout = {P};
case {2}
varargout = {P, D};
case {3}
Y = P*X;
varargout = {P, D, Y};
end