/* 已知X,Y,利用最小二乘法估计回归方程Y=XB+E */ *ods trace output; proc iml; start reg; n = nrow(x); /* 观测值数目 */ k = ncol(x); /* 变量数目 */ xpx = x`*x; /* 内积 */ xpy = x`*y; xpxi = inv(xpx); /* 内积矩阵的逆矩阵 */ b = xpxi*xpy; /* 参数b的估计 */ yhat = x*b; /* y的预测值 */ resid = y-yhat; /* 真实值预测值残差 */ sse = resid`*resid; /* 残差平方和*/ dfe = n-k; /* 残差的自由度 */ mse = sse/dfe; /* 均方误差 */ rmse = sqrt(mse); /*均方标准差 */ covb = xpxi#mse; /* 估计值b的协方差 */ stdb = sqrt(vecdiag(covb)); /* 标准误差 */ t = b/stdb; /* 估计值的t检验,零假设b=0 */ probt = 1-probf(t#t,1,dfe); /* t检验有显著性的p值 */ print name b stdb t probt; s = diag(1/stdb); corrb = s*covb*s; /* 估计值的相关系数矩阵 */ print ,"Covariance of Estimates", covb[r=name c=name] , "Correlation of Estimates",corrb[r=name c=name] ; if nrow(tval)=0 then return; /* 设定t值 */ projx = x*xpxi*x`; /* hat matrix */ vresid = (i(n)-projx)*mse; /* covariance of residuals */ vpred = projx#mse; /* covariance of predicted values */ h = vecdiag(projx); /* hat leverage values */ lowerm = yhat-tval#sqrt(h*mse); /* low. conf lim for mean */ upperm = yhat+tval#sqrt(h*mse); /* upper lim. for mean */ lower = yhat-tval#sqrt(h*mse+mse); /* lower lim. for indiv*/ upper = yhat+tval#sqrt(h*mse+mse);/* upper lim. for indiv */ print ,,"Predicted Values, Residuals, and Limits" ,, y yhat resid h lowerm upperm lower upper; finish reg; /* Routine to test a linear combination of the estimates */ /* given L, this routine tests hypothesis that LB = 0. */ start test; dfn=nrow(L); Lb=L*b; vLb=L*xpxi*L`; q=Lb`*inv(vLb)*Lb /dfn; f=q/mse; prob=1-probf(f,dfn,dfe); print ,f dfn dfe prob; finish test; /* Run it on population of U.S. for decades beginning 1790 */ x= { 1 1 1, 1 2 4, 1 3 9, 1 4 16, 1 5 25, 1 6 36, 1 7 49, 1 8 64 }; y= {3.929,5.308,7.239,9.638,12.866,17.069,23.191,31.443}; name={"Intercept", "Decade", "Decade**2" }; tval=2.57; /* for 5 df at 0.025 level to get 95% conf. int. */ reset fw=7; run reg; do; print ,"TEST Coef for Linear"; L={0 1 0 }; run test; print ,"TEST Coef for Linear,Quad"; L={0 1 0,0 0 1}; run test; print ,"TEST Linear+Quad = 0"; L={0 1 1 }; run test; end;
线性代数知识梳理
1.Y=XB+E,参数B结果:
PROC IML;
x={1 2,2 3,4 3};
y={-1,-2,-2};
b=INV(x‘*x)*x‘*y;
PRINT b;
QUIT;
2.方差的最小二乘估计:
PROC IML;
x={1 2,2 3,4 3};
y={-1,-2,-2};
n=NROW(x); /*样本容量n*/
p=ROUND(TRACE(GINV(x)*x)); /*矩阵稚次*/
in=I(n);
s2=(1/(n-p))*y‘*(in-x*GINV(x‘*x)*x‘)*y;
s=SQRT(s2);
PRINT s2 s;
QUIT;
Proc IML;
a = {1 2 3 4,
5 6 7 8,
9 0 1 2};
RESET FUZZ;
CALL SVD(u,q,v,a);
upu = u‘*u;
vpv = v‘*v;
vvp = v*v‘;
uup = u*u‘;
a2 = u*DIAG(q)*v‘;
ginva=GINV(a);
m = NROW(a); n = NCOL(a);
DO i=1 TO n;
IF q[i] <= 1E-12 * q[1] then q[i] = 0;
ELSE q[i] = 1 / q[i];
END;
ga = v*DIAG(q)*u‘;
PRINT a a2„ u q v„ upu uup„ vpv vvp„ ginva ga;
QUIT;
b.)施密特正交化,CALL GSORTH()
PROC IML;
a={1 2 3,2 3 5,3 7 11};
CALL GSORTH(p,t,lindep,a);
p_inv=INV(p);
b=p*t;
PRINT a b,p p_inv,t lindep;
QUIT;
4.线性方程求解
• HOMOGEN Function: 解其次线性方程
• SOLVE Function: 解普通线性方程
• TRISOLV Function: 三角形矩阵解方程
a).Solving AX=0 for X:
PROC IML;
RESET FUZZ;
a={10 5 15, 12 6 18, 14 7 21, 16 8 24};
x=HOMOGEN(a);
zero1=a*x[,1];
zero2=a*x[,2];
PRINT a x zero1 zero2;
QUIT;
b).Solving AX=B for X: 区别:x=INV(a)*b;
PROC IML;
a={1 2 3,6 5 4,0 7 8};
b={1, 2, 3};
x=SOLVE(a,b);
ax=a*x;
PRINT a x b ax ;
QUIT;
5.特征值与特征向量
• EIGEN CALL计算特征值和特征向量为方阵
• EIGVAL Function计算特征值为一个方阵
• EIGVEC Function计算特征向量为一个方阵
• GENEIG Call 计算特征值和特征向量为一般矩阵
Proc IML;
a={ 1 0 1,
0 4 -1,
1 -1 2};
RESET FUZZ;
eigval=EIGVAL(a);
rank=ROUND(TRACE(GINV(a)*a)); /** 矩阵A的稚 **/
PRINT a , ,eigval, ,rank;
RESET FUZZ;
CALL EIGEN(eigval,eigvec,a);
ax1=a*eigvec[,1];
ax2=a*eigvec[,2];
ax3=a*eigvec[,3];
lx1=eigval[1]*eigvec[,1];
lx2=eigval[2]*eigvec[,2];
lx3=eigval[3]*eigvec[,3];
PRINT a„eigval„eigvec„ax1 lx1„ax2 lx2„ax3 lx3;
QUIT;