奇异值分解(Singular Value Decomposition)是线性代数中一种重要的矩阵分解,和 特征值分解 有一定的关联,作用都在于将矩阵分解成 多个矩阵的乘积,从而方便进行数据的拆分,实现数据的投影或者降维。
从数学的角度来看,特征值分解 和 奇异值分解 都是给一个矩阵(线性变换)找一组特殊的基。
我们 先从特征值分解层面来引入问题:
● 特征值分解
特征值分解 是基于方阵来讲的,方阵 A 对应特征向量 v,v对应的 特征值 λ,描述为:
则 方阵A的 特征值分解形式为:
其中,Q对应方阵A 的特征向量组成的矩阵,Σ 是对角矩阵,对角线元素即为 特征值(从大到小排列),实际上我们认为 Q 就是这样一组基,原方阵A在这组基上的投影,而对角阵 就代表了在这组基上的影响指数,大的特征值 影响较大,小的特征值可以忽略,这就为实现PCA降维方法提供了基础。
● 奇异值分解
特征值分解 有一个限制条件,那就是 矩阵A 必须为方阵,那么针对待分解矩阵 不是方阵的情况,该如何处理呢?
这就是 本节要讲的任意矩阵分解的方法,针对 任意矩阵 A(m*n),奇异值分解:
其中,U是一个m*m的方阵(也称左奇异向量),VT是一个n*n的矩阵(也称右奇异向量),Σ 是一个m*n的矩阵(除了对角线的元素都是0,对角线上的元素称为奇异值)。
从数学上看,表示我们找到了U和VT这样两组基,并且这两组基正交。
参考代码(来自于Numerical Recipes in C):
/*******************************************************************************
Singular value decomposition program, svdcmp, from "Numerical Recipes in C"
(Cambridge Univ. Press) by W.H. Press, S.A. Teukolsky, W.T. Vetterling,
and B.P. Flannery
*******************************************************************************/
#include
#include
#include
#define NR_END 1
#define FREE_ARG char*
#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
static double dmaxarg1,dmaxarg2;
#define DMAX(a,b) (dmaxarg1=(a),dmaxarg2=(b),(dmaxarg1) > (dmaxarg2) ? (dmaxarg1) : (dmaxarg2))
static int iminarg1,iminarg2;
#define IMIN(a,b) (iminarg1=(a),iminarg2=(b),(iminarg1) < (iminarg2) ? (iminarg1) : (iminarg2))
double **dmatrix(int nrl, int nrh, int ncl, int nch)
/* allocate a double matrix with subscript range m[nrl..nrh][ncl..nch] */
{
int i,nrow=nrh-nrl+1,ncol=nch-ncl+1;
double **m;
/* allocate pointers to rows */
m=(double **) malloc((size_t)((nrow+NR_END)*sizeof(double*)));
m += NR_END;
m -= nrl;
/* allocate rows and set pointers to them */
m[nrl]=(double *) malloc((size_t)((nrow*ncol+NR_END)*sizeof(double)));
m[nrl] += NR_END;
m[nrl] -= ncl;
for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;
/* return pointer to array of pointers to rows */
return m;
}
double *dvector(int nl, int nh)
/* allocate a double vector with subscript range v[nl..nh] */
{
double *v;
v=(double *)malloc((size_t) ((nh-nl+1+NR_END)*sizeof(double)));
return v-nl+NR_END;
}
void free_dvector(double *v, int nl, int nh)
/* free a double vector allocated with dvector() */
{
free((FREE_ARG) (v+nl-NR_END));
}
double pythag(double a, double b)
/* compute (a2 + b2)^1/2 without destructive underflow or overflow */
{
double absa,absb;
absa=fabs(a);
absb=fabs(b);
if (absa > absb) return absa*sqrt(1.0+(absb/absa)*(absb/absa));
else return (absb == 0.0 ? 0.0 : absb*sqrt(1.0+(absa/absb)*(absa/absb)));
}
/******************************************************************************/
void svdcmp(double **a, int m, int n, double w[], double **v)
/*******************************************************************************
Given a matrix a[1..m][1..n], this routine computes its singular value
decomposition, A = U.W.VT. The matrix U replaces a on output. The diagonal
matrix of singular values W is output as a vector w[1..n]. The matrix V (not
the transpose VT) is output as v[1..n][1..n].
*******************************************************************************/
{
int flag,i,its,j,jj,k,l,nm;
double anorm,c,f,g,h,s,scale,x,y,z,*rv1;
rv1=dvector(1,n);
g=scale=anorm=0.0; /* Householder reduction to bidiagonal form */
for (i=1;i<=n;i++) {
l=i+1;
rv1[i]=scale*g;
g=s=scale=0.0;
if (i <= m) {
for (k=i;k<=m;k++) scale += fabs(a[k][i]);
if (scale) {
for (k=i;k<=m;k++) {
a[k][i] /= scale;
s += a[k][i]*a[k][i];
}
f=a[i][i];
g = -SIGN(sqrt(s),f);
h=f*g-s;
a[i][i]=f-g;
for (j=l;j<=n;j++) {
for (s=0.0,k=i;k<=m;k++) s += a[k][i]*a[k][j];
f=s/h;
for (k=i;k<=m;k++) a[k][j] += f*a[k][i];
}
for (k=i;k<=m;k++) a[k][i] *= scale;
}
}
w[i]=scale *g;
g=s=scale=0.0;
if (i <= m && i != n) {
for (k=l;k<=n;k++) scale += fabs(a[i][k]);
if (scale) {
for (k=l;k<=n;k++) {
a[i][k] /= scale;
s += a[i][k]*a[i][k];
}
f=a[i][l];
g = -SIGN(sqrt(s),f);
h=f*g-s;
a[i][l]=f-g;
for (k=l;k<=n;k++) rv1[k]=a[i][k]/h;
for (j=l;j<=m;j++) {
for (s=0.0,k=l;k<=n;k++) s += a[j][k]*a[i][k];
for (k=l;k<=n;k++) a[j][k] += s*rv1[k];
}
for (k=l;k<=n;k++) a[i][k] *= scale;
}
}
anorm = DMAX(anorm,(fabs(w[i])+fabs(rv1[i])));
}
for (i=n;i>=1;i--) { /* Accumulation of right-hand transformations. */
if (i < n) {
if (g) {
for (j=l;j<=n;j++) /* Double division to avoid possible underflow. */
v[j][i]=(a[i][j]/a[i][l])/g;
for (j=l;j<=n;j++) {
for (s=0.0,k=l;k<=n;k++) s += a[i][k]*v[k][j];
for (k=l;k<=n;k++) v[k][j] += s*v[k][i];
}
}
for (j=l;j<=n;j++) v[i][j]=v[j][i]=0.0;
}
v[i][i]=1.0;
g=rv1[i];
l=i;
}
for (i=IMIN(m,n);i>=1;i--) { /* Accumulation of left-hand transformations. */
l=i+1;
g=w[i];
for (j=l;j<=n;j++) a[i][j]=0.0;
if (g) {
g=1.0/g;
for (j=l;j<=n;j++) {
for (s=0.0,k=l;k<=m;k++) s += a[k][i]*a[k][j];
f=(s/a[i][i])*g;
for (k=i;k<=m;k++) a[k][j] += f*a[k][i];
}
for (j=i;j<=m;j++) a[j][i] *= g;
} else for (j=i;j<=m;j++) a[j][i]=0.0;
++a[i][i];
}
for (k=n;k>=1;k--) { /* Diagonalization of the bidiagonal form. */
for (its=1;its<=30;its++) {
flag=1;
for (l=k;l>=1;l--) { /* Test for splitting. */
nm=l-1; /* Note that rv1[1] is always zero. */
if ((double)(fabs(rv1[l])+anorm) == anorm) {
flag=0;
break;
}
if ((double)(fabs(w[nm])+anorm) == anorm) break;
}
if (flag) {
c=0.0; /* Cancellation of rv1[l], if l > 1. */
s=1.0;
for (i=l;i<=k;i++) {
f=s*rv1[i];
rv1[i]=c*rv1[i];
if ((double)(fabs(f)+anorm) == anorm) break;
g=w[i];
h=pythag(f,g);
w[i]=h;
h=1.0/h;
c=g*h;
s = -f*h;
for (j=1;j<=m;j++) {
y=a[j][nm];
z=a[j][i];
a[j][nm]=y*c+z*s;
a[j][i]=z*c-y*s;
}
}
}
z=w[k];
if (l == k) { /* Convergence. */
if (z < 0.0) { /* Singular value is made nonnegative. */
w[k] = -z;
for (j=1;j<=n;j++) v[j][k] = -v[j][k];
}
break;
}
if (its == 30) printf("no convergence in 30 svdcmp iterations\n");
x=w[l]; /* Shift from bottom 2-by-2 minor. */
nm=k-1;
y=w[nm];
g=rv1[nm];
h=rv1[k];
f=((y-z)*(y+z)+(g-h)*(g+h))/(2.0*h*y);
g=pythag(f,1.0);
f=((x-z)*(x+z)+h*((y/(f+SIGN(g,f)))-h))/x;
c=s=1.0; /* Next QR transformation: */
for (j=l;j<=nm;j++) {
i=j+1;
g=rv1[i];
y=w[i];
h=s*g;
g=c*g;
z=pythag(f,h);
rv1[j]=z;
c=f/z;
s=h/z;
f=x*c+g*s;
g = g*c-x*s;
h=y*s;
y *= c;
for (jj=1;jj<=n;jj++) {
x=v[jj][j];
z=v[jj][i];
v[jj][j]=x*c+z*s;
v[jj][i]=z*c-x*s;
}
z=pythag(f,h);
w[j]=z; /* Rotation can be arbitrary if z = 0. */
if (z) {
z=1.0/z;
c=f*z;
s=h*z;
}
f=c*g+s*y;
x=c*y-s*g;
for (jj=1;jj<=m;jj++) {
y=a[jj][j];
z=a[jj][i];
a[jj][j]=y*c+z*s;
a[jj][i]=z*c-y*s;
}
}
rv1[l]=0.0;
rv1[k]=f;
w[k]=x;
}
}
free_dvector(rv1,1,n);
}