####回归分析与相关分析####
####相关性及其度量####
####Pearson相关性检验、Spearman秩检验、Kendall检验-cor.test####
cor.test(x, y,
alternative = c("two.sided", "less", "greater"),
method = c("pearson", "kendall", "spearman"),
exact = NULL, conf.level = 0.95, continuity = FALSE, ...)
#x,y是相同长度的向量
cor.test(formula, data, subset, na.action, ...)
####线性回归####
#求解回归方程
lm(formula, data, subset, weights, na.action,
method = "qr", model = TRUE, x = FALSE, y = FALSE, qr = TRUE,
singular.ok = TRUE, contrasts = NULL, offset, ...)
#求参数的置信区间
confint(object, parm, level = 0.95, ...)
#object是指回归方程,parm为参数(要求置信区间的)
#残差分析
residuals(object, ...)
#预测
predict (object, ...)
####多元回归:变量选择与最优回归####
####逐步回归####
step(object, scope, scale = 0,
direction = c("both", "backward", "forward"),
trace = 1, keep = NULL, steps = 1000, k = 2, ...)
####回归诊断####
####残差分析####
#残差及残差图
#残差
residuals(object, ...)
resid(object, ...)
#标准化残差
rstandard(model, infl = lm.influence(model, do.coef = FALSE),
sd = sqrt(deviance(model)/df.residual(model)),
type = c("sd.1", "predictive"), ...)
#学生化残差
rstudent(model, infl = lm.influence(model, do.coef = FALSE),
res = infl$wt.res, ...)
####影响函数####
influence(model, do.coef = TRUE)
#do.coef = TRUE表示给出去掉第i个观测点后的模型回归系数
####Cook距离####
cooks.distance(model, infl = lm.influence(model, do.coef = FALSE),
res = weighted.residuals(model),
sd = sqrt(deviance(model)/df.residual(model)),
hat = infl$hat, ...)
####dffits准则####
dffits(model, infl = , res = )
####covratio准则####
covratio(model, infl = lm.influence(model, do.coef = FALSE),
res = weighted.residuals(model))
#以上的影响分析可以用下面的函数概括
influence.measures(model, infl = influence(model))
####共线性诊断####
eigen(x, symmetric, only.values = FALSE, EISPACK = FALSE)
#若有r个特征值近似0,则有r个贡献性关系,
#且共线性关系的系数向量就是近似于0的特征值对应的特征向量
kappa(z, exact = FALSE)
#10~30之间为若相关性,30~100为中等相关,100以上为强相关
library(DAAG)
vif(obj, digits=5)
#vif>10,模型中有很强的共线性问题
####logistic回归####
glm(formula, family = gaussian, data, weights, subset,
na.action, start = NULL, etastart, mustart, offset,
control = list(...), model = TRUE, method = "glm.fit",
x = FALSE, y = TRUE, singular.ok = TRUE, contrasts = NULL, ...)
#family=gaussian,基于正态分布族的广义线性模型
#family=binominal,基于二项分布的广义线性模型<-logitic回归
#family=poisson(link=log),基于泊松分布的广义线性模型
#family=gamma(link=inverse),基于伽马分布的广义线性模型