#######lm 编程实现
mlr<-function(Y,X){ ####编写成一个函数mlr 调用格式mlr(Y=Y1,X=X2) Y1为因变量,X1为自变量矩阵
Y=data1[,1]
X=data1[,-1]
Intercept<-rep(1,nrow(X))
X=cbind(Intercept,X)
X=as.matrix(X)
Y=as.matrix(Y)
X.T=t(X)
coefficient<-solve(X.T %*% X) %*% X.T %*% Y #回归系数
Y_predict<-X%*%coefficient #估计y值
residuals<-Y-Y_predict#残差
TSS=sum((Y-mean(Y))^2)# TSS
RSS=sum((Y-Y_predict)^2)# RSS
ESS=sum((Y_predict-mean(Y))^2)#ESS
MSE=RSS/(dim(X)[1]-dim(X)[2]-1)#求算离回归方差
R2=ESS/TSS #拟合优度R2
R2_a=1-(RSS/(dim(X)[1]-dim(X)[2]))/(TSS/(dim(X)[1]-1))#调整判决系数R2
R2_a2=1-((1-R2)*(dim(X)[1]-1)/(dim(X)[1]-dim(X)[2]))#调整判决系数R2
F=(ESS/(dim(X)[2]-1))/(RSS/(dim(X)[1]-dim(X)[2]))#F 检验值
F_p=round(1-pf(F,dim(X)[2]-1,dim(X)[1]-dim(X)[2]), digits = 6)#F检验P值
sigam2=RSS/(dim(X)[1]-dim(X)[2])#siagma2
cjj=t(diag(solve(t(X)%*%X)))#cjj
SEbeta=sqrt(sigam2*cjj)#SE(beta)
T=coefficient/t(SEbeta)#回归系数的T值
TP=2*(1-pt(abs(T),dim(X)[1]-dim(X)[2]))#T检验的P值
###估计参数的区间估计
alpha=0.05
t1=-qt(alpha/2,dim(X)[1]-dim(X)[2])
lower=coefficient-t(t1*SEbeta)#区间下界
upper=coefficient+t(t1*SEbeta)#区间上届
result<-cbind(coefficient,t(SEbeta),T,TP,lower,upper)
colnames(result)<-c('Estimate','Std.Error','t-value','P(>|t|)','95%-lower Estimate','95%-upper Estimate')
result###系数估计结果
a1=data.frame('F-statistic'=F,'P-value'=F_p)
a2=data.frame('Multiple R-squared'=R2,'Adjusted R-squared'=R2_a)
a3=data.frame('Residual standard error'=sqrt(sigam2))
Z=list(result,a1,a2,a3,Y_predict,residuals)
names(Z)<-c('result','F-test','R2','SE','fitted','residuals')
dev.new(1)
plot(residuals,col='blue',main='residuals plot')
dev.new(2)
plot(Y,Y_predict,main='Comparison of Estimated Value with Predicted Value',pch=16,col='red')
abline(a=0,b=sqrt(R2),col='orange')
text(max(Y)/2-4,max(Y)/2-4,paste('R-squared=',round(R2,3)))
dev.new(3)
plot(Y,type='b',pch=16,col='blue',lty=2)
par(new=TRUE)###在一幅图里加两个
plot(Y_predict,type='b',pch=17,col='red',lty=1,ylab='Y')
legend("topleft", inset=.05, title="Drug Type", c("Y","Y-fitted"), lty=c(2,1), pch=c(16, 17), col=c("blue",'red'))
title('Comparison of Estimated Value with Predicted Value')
print(Z)
return(Z)
}
#####运行使用求解数据
data1<-read.csv('C:\\Users\\lvdian\\Desktop\\纪录片播放量.csv')#读取数据设置好路径
data1<-data1[,-1]#删除第一列无用数据
Y=data1$y#因变量
X=data1[,-1]#自变量
L=mlr(Y=Y,X=X)#调用