任意 U(i) 是一个 n 维实数向量(或者复数向量),
P(i) = I - U(i) * U(i)' / H(i)
其中: Hi = (1/2) * Ui' * Ui
Hi = = H(i)等;
—————————————————————
Q = P(n-3)*P(n-2)*...*P(1)*P(0)
T = Q * A * Q'
A = inv(Q) * A * inv(Q')
_____________________________________
在计算如下值得过程中,
Pi = I - Ui * Ui' / Hi
Hi = (1/2) * Ui' * Ui
,为了实现数值稳定,Hi作为一个分母,这里在选取 U[t-1] 的时候,最大化了U[t-1]的绝对值:
通过:
U[t - 1] += get_sign(U[t - 1]) * sqrt(delta);
tridiagonal_symmetirc_matrix.cpp
#include
#include
#include
#include
#include
#define NA 5
#define PRINT_ALL 1
void print_matrix(float *A, int M, int N, int lda);
void print_vector(float *A, int len);
void cpu_gemm(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha); // C = alpha * A*B
void cpu_gemm_NT(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha); // C = alpha * A*B'
void tridiag_symm_mat_Householder(float *A, int n, int lda, float *Q, int ldq);
int symmetric_matrix_test();
int hermite_matrix_test();
void complex_gemm(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha);
void complex_gemm_NH(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha);
float get_sign(float aij);
union np_f_i
{
float f;
int i;
};
typedef union np_f_i np_f_i;
float get_sign(float aij)
{
np_f_i x;
x.f = aij;
return (x.i >> (sizeof(float) - 1)) ? -1.0 : 1.0;
}
void tridiag_symm_mat_Householder(float *A, int n, int lda, float *Q, int ldq)
{
for (int i = 0; i < n - 2; i++) // i = 0 ~ 3;
{
int t;
float H;
float delta;
float U[NA];
float P[NA * NA];
int ldp = n;
t = n - i - 1;
// 01. fill the first U
memset(U, 0x00, NA * sizeof(float));
for (int idx = 0; idx < t; idx++)
{ // U(0:t-1) = A(t, 0:t-1)
U[idx] = A[t + idx * lda];
}
// void print_vector(float *A, int len)
printf("U(%d) first =\n", i);
print_vector(U, n);
// 02. calculat delta:
delta = 0.0f;
for (int idx = 0; idx < t; idx++)
{
delta += U[idx] * U[idx];
}
printf("delta(%d)=%7.4f\n", i, delta);
// 03. fill the second U
U[t - 1] += get_sign(U[t - 1]) * sqrt(delta); // for numerical stable;
printf("U(%d) second =\n", i);
print_vector(U, n);
// 04. calculate H, a scalar
H = 0.0f;
for (int idx = 0; idx < t; idx++)
{
H += U[idx] * U[idx];
}
H *= 0.5f;
printf("H_i(%d) = %7.4f\n", i, H);
// 05. calculate P = -U*U'/H; P = I - U*U'/H;
// void cpu_gemm_NT(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha); // C = alpha * A*B'
cpu_gemm(n, n, 1, U, n, U, 1, P, n, -1.0f / H); // not need NT
printf("P(%d) first=\n", i);
print_matrix(P, n, n, ldp);
// P = I + P = I - U*U'/H
for (int idx = 0; idx < n; idx++)
{
P[idx + idx * ldp] += 1.0f;
}
printf("P(%d) second=\n", i);
print_matrix(P, n, n, ldp);
// 06. A(i+1) = P(i)*A(i)*P(i)
cpu_gemm(n, n, n, P, ldp, A, lda, A, lda, 1.0f);
cpu_gemm(n, n, n, A, lda, P, ldp, A, lda, 1.0f);
printf("A(%d) =\n", i);
print_matrix(A, n, n, lda);
// 07. Q(i+1) = P(i)*Q(i); Q = P(n-3)*P(n-2)*...*P(0)*I
cpu_gemm(n, n, n, P, ldp, Q, ldq, Q, ldq, 1.0f);
printf("Q(%d) =\n", i);
print_matrix(Q, n, n, ldq);
}
}
int main()
{
printf("\nN-1 to 2 tridiaganal symm ...\n");
static float A_h[NA * NA] = {
10.0, 1.0, 2.0, 3.0, 4.0,
1.0, 9.0, -1.0, 2.0, -3.0,
2.0, -1.0, 7.0, 3.0, -5.0,
3.0, 2.0, 3.0, 12.0, -1.0,
4.0, -3.0, -5.0, -1.0, 15.0};
///
// i=0,1,...,n-3; t=n-i-1;
float *A = nullptr;
float *Q = nullptr;
int n = NA;
int lda = n;
int ldq = n;
// alloc A
A = (float *)malloc(n * n * sizeof(float));
memcpy(A, A_h, n * n * sizeof(float));
printf("A(0) =\n");
print_matrix(A, n, n, lda);
// alloc Q
Q = (float *)malloc(ldq * n * sizeof(float));
memset(Q, 0x00, ldq * n * sizeof(float));
for (int idx = 0; idx < n; idx++)
{
Q[idx + idx * ldq] = 1.0f;
}
// void tridiag_symm_mat_Householder(float *A, int n, int lda, float *Q, int ldq)
tridiag_symm_mat_Householder(A, n, lda, Q, ldq);
/**
for (int i = 0; i < n - 2; i++)// i = 0 ~ 3;
{
int t;
float H;
float delta;
float U[NA];
float P[NA * NA];
int ldp = n;
t = n - i - 1;
// 01. fill the first U
memset(U, 0x00, NA * sizeof(float));
for (int idx = 0; idx < t; idx++)
{ // U(0:t-1) = A(t, 0:t-1)
U[idx] = A[t + idx * lda];
}
// void print_vector(float *A, int len)
printf("U(%d) first =\n", i);
print_vector(U, n);
// 02. calculat delta:
delta = 0.0f;
for (int idx = 0; idx < t; idx++)
{
delta += U[idx] * U[idx];
}
printf("delta(%d)=%7.4f\n", i, delta);
// 03. fill the second U
U[t - 1] += get_sign(U[t - 1]) * sqrt(delta); // for numerical stable;
printf("U(%d) second =\n", i);
print_vector(U, n);
// 04. calculate H, a scalar
H = 0.0f;
for (int idx = 0; idx < t; idx++)
{
H += U[idx] * U[idx];
}
H *= 0.5f;
printf("H_i(%d) = %7.4f\n", i, H);
// 05. calculate P = -U*U'/H; P = I - U*U'/H;
// void cpu_gemm_NT(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha); // C = alpha * A*B'
cpu_gemm(n, n, 1, U, n, U, 1, P, n, -1.0f / H);// not need NT
printf("P(%d) first=\n", i);
print_matrix(P, n, n, ldp);
// P = I + P = I - U*U'/H
for (int idx = 0; idx < n; idx++)
{
P[idx + idx * ldp] += 1.0f;
}
printf("P(%d) second=\n", i);
print_matrix(P, n, n, ldp);
// 06. A(i+1) = P(i)*A(i)*P(i)
cpu_gemm(n, n, n, P, ldp, A, lda, A, lda, 1.0f);
cpu_gemm(n, n, n, A, lda, P, ldp, A, lda, 1.0f);
printf("A(%d) =\n", i);
print_matrix(A, n, n, lda);
// 07. Q(i+1) = P(i)*Q(i); Q = P(n-3)*P(n-2)*...*P(0)*I
cpu_gemm(n, n, n, P, ldp, Q, ldq, Q, ldq, 1.0f);
printf("Q(%d) =\n", i);
print_matrix(Q, n, n, ldq);
}
*/
printf("T=[ ...\n");
print_matrix(A, n, n, lda);
printf("]\nQ=[ ...\n");
print_matrix(Q, n, n, ldq);
printf("]\n");
return 0;
}
void print_matrix(float *A, int M, int N, int lda)
{
for (int i = 0; i < M; i++)
{
for (int j = 0; j < N; j++)
{
printf("%7.3f ", A[i + j * lda]);
}
printf("; ...\n");
}
// printf("\n");
}
void print_vector(float *A, int len)
{
for (int idx = 0; idx < len; idx++)
{
printf("%7.4f ", A[idx]);
}
printf("\n");
}
void cpu_gemm(int M, int N, int K, float *A_h, int lda, float *B_h, int ldb, float *C, int ldc, float alpha)
{
float *A = nullptr;
float *B = nullptr;
A = (float *)malloc(lda * K * sizeof(float));
B = (float *)malloc(ldb * N * sizeof(float));
memcpy(A, A_h, lda * K * sizeof(float));
memcpy(B, B_h, ldb * N * sizeof(float));
for (int i = 0; i < M; i++)
{
for (int j = 0; j < N; j++)
{
float sigma;
sigma = 0.0f;
for (int k = 0; k < K; k++)
{
sigma += A[i + k * lda] * B[k + j * ldb];
}
C[i + j * ldc] = alpha * sigma;
}
}
free(A);
free(B);
}
void cpu_gemm_NT(int M, int N, int K, float *A_h, int lda, float *B_h, int ldb, float *C, int ldc, float alpha)
{
float *A = nullptr;
float *B = nullptr;
A = (float *)malloc(lda * K * sizeof(float));
B = (float *)malloc(ldb * N * sizeof(float));
memcpy(A, A_h, lda * K * sizeof(float));
memcpy(B, B_h, ldb * N * sizeof(float));
for (int i = 0; i < M; i++)
{
for (int j = 0; j < N; j++)
{
float sigma;
sigma = 0.0f;
for (int k = 0; k < K; k++)
{
sigma += A[i + k * lda] * B[k * ldb + j];
}
C[i + j * ldc] = alpha * sigma;
}
}
free(A);
free(B);
}
void complex_gemm_NH(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha)
{
cuComplex zero_;
zero_ = make_cuFloatComplex(0.0f, 0.0f);
for (int i = 0; i < M; i++)
{
for (int j = 0; j < N; j++)
{
cuComplex sigma;
sigma = zero_;
for (int k = 0; k < K; k++)
{
// sigma += A[i + k*lda]*B[k+j*ldb];
sigma = cuCaddf(sigma, cuCmulf(A[i + k * lda], cuConjf(B[k * ldb + j])));
}
C[i + j * ldc] = make_cuFloatComplex(alpha * cuCrealf(sigma), alpha * cuCimagf(sigma));
}
}
}
void complex_gemm(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha)
{
cuComplex zero_;
zero_ = make_cuFloatComplex(0.0f, 0.0f);
for (int i = 0; i < M; i++)
{
for (int j = 0; j < N; j++)
{
cuComplex sigma;
sigma = zero_;
for (int k = 0; k < K; k++)
{
// sigma += A[i + k*lda]*B[k+j*ldb];
sigma = cuCaddf(sigma, cuCmulf(A[i + k * lda], B[k + j * ldb]));
}
C[i + j * ldc] = make_cuFloatComplex(alpha * cuCrealf(sigma), alpha * cuCimagf(sigma));
}
}
}
/**
#include
#include
void strq(double *a, int n, double *q, double* b, double* c)//int n; double a[],q[],b[],c[];
{
int i,j,k,u,v;
double h,f,g,h2;
for (i=0; i<=n-1; i++)
for (j=0; j<=n-1; j++)
{ u=i*n+j; q[u]=a[u];}
for (i=n-1; i>=1; i--)
{ h=0.0;
if (i>1)
for (k=0; k<=i-1; k++)
{ u=i*n+k; h=h+q[u]*q[u];}
if (h+1.0==1.0)
{ c[i]=0.0;
if (i==1) c[i]=q[i*n+i-1];
b[i]=0.0;
}
else
{ c[i]=sqrt(h);
u=i*n+i-1;
if (q[u]>0.0) c[i]=-c[i];
h=h-q[u]*c[i];
q[u]=q[u]-c[i];
f=0.0;
for (j=0; j<=i-1; j++)
{ q[j*n+i]=q[i*n+j]/h;
g=0.0;
for (k=0; k<=j; k++)
g=g+q[j*n+k]*q[i*n+k];
if (j+1<=i-1)
for (k=j+1; k<=i-1; k++)
g=g+q[k*n+j]*q[i*n+k];
c[j]=g/h;
f=f+g*q[j*n+i];
}
h2=f/(h+h);
for (j=0; j<=i-1; j++)
{ f=q[i*n+j];
g=c[j]-h2*f;
c[j]=g;
for (k=0; k<=j; k++)
{ u=j*n+k;
q[u]=q[u]-f*c[k]-g*q[i*n+k];
}
}
b[i]=h;
}
}
for (i=0; i<=n-2; i++) c[i]=c[i+1];
c[n-1]=0.0;
b[0]=0.0;
for (i=0; i<=n-1; i++)
{ if ((b[i]!=0.0)&&(i-1>=0))
for (j=0; j<=i-1; j++)
{ g=0.0;
for (k=0; k<=i-1; k++)
g=g+q[i*n+k]*q[k*n+j];
for (k=0; k<=i-1; k++)
{ u=k*n+j;
q[u]=q[u]-g*q[k*n+i];
}
}
u=i*n+i;
b[i]=q[u]; q[u]=1.0;
if (i-1>=0)
for (j=0; j<=i-1; j++)
{ q[i*n+j]=0.0; q[j*n+i]=0.0;}
}
return;
}
int main()
{
int i,j;
static double b[5],c[5],q[5*5];
static double a[5*5]={
10.0, 1.0, 2.0, 3.0, 4.0,
1.0, 9.0, -1.0, 2.0, -3.0,
2.0, -1.0, 7.0, 3.0, -5.0,
3.0, 2.0, 3.0, 12.0, -1.0,
4.0, -3.0, -5.0, -1.0, 15.0
};
int n = 5;
strq(a,n,q,b,c);
printf("MAT A IS:\n");
int lda = n;
int ldq = n;
for (i=0; i<=4; i++)
{
for (j=0; j<=4; j++)
printf("%7.3f ",a[i*lda + j]);
printf("\n");
}
printf("\n");
printf("MAT Q IS:\n");
for (i=0; i<=4; i++)
{
for (j=0; j<=4; j++)
printf("%7.3f ",q[i*ldq + j]);
printf("\n");
}
printf("\n");
printf("MAT B IS:\n");
for (i=0; i<=4; i++)
printf("%7.3f ",b[i]);
printf("\n\n");
printf("MAT C IS:\n");
for (i=0; i<=4; i++)
printf("%7.3f ",c[i]);
printf("\n\n");
return 0;
}
***/