《R语言与统计分析》-回归分析与相关分析

####回归分析与相关分析####
####相关性及其度量####
####Pearson相关性检验、Spearman秩检验、Kendall检验-cor.test####
cor.test(x, y,
         alternative = c("two.sided", "less", "greater"),
         method = c("pearson", "kendall", "spearman"),
         exact = NULL, conf.level = 0.95, continuity = FALSE, ...)
#x,y是相同长度的向量

cor.test(formula, data, subset, na.action, ...)

####线性回归####
#求解回归方程
lm(formula, data, subset, weights, na.action,
   method = "qr", model = TRUE, x = FALSE, y = FALSE, qr = TRUE,
   singular.ok = TRUE, contrasts = NULL, offset, ...)

#求参数的置信区间
confint(object, parm, level = 0.95, ...)
#object是指回归方程,parm为参数(要求置信区间的)

#残差分析
residuals(object, ...)

#预测
predict (object, ...)

####多元回归:变量选择与最优回归####
####逐步回归####
step(object, scope, scale = 0,
     direction = c("both", "backward", "forward"),
     trace = 1, keep = NULL, steps = 1000, k = 2, ...)

####回归诊断####
####残差分析####
#残差及残差图
#残差
residuals(object, ...)
resid(object, ...)

#标准化残差
rstandard(model, infl = lm.influence(model, do.coef = FALSE),
          sd = sqrt(deviance(model)/df.residual(model)),
          type = c("sd.1", "predictive"), ...)

#学生化残差
rstudent(model, infl = lm.influence(model, do.coef = FALSE),
         res = infl$wt.res, ...)

####影响函数####
influence(model, do.coef = TRUE)
#do.coef = TRUE表示给出去掉第i个观测点后的模型回归系数

####Cook距离####
cooks.distance(model, infl = lm.influence(model, do.coef = FALSE),
               res = weighted.residuals(model),
               sd = sqrt(deviance(model)/df.residual(model)),
               hat = infl$hat, ...)

####dffits准则####
dffits(model, infl = , res = )

####covratio准则####
covratio(model, infl = lm.influence(model, do.coef = FALSE),
         res = weighted.residuals(model))

#以上的影响分析可以用下面的函数概括
influence.measures(model, infl = influence(model))

####共线性诊断####
eigen(x, symmetric, only.values = FALSE, EISPACK = FALSE)
#若有r个特征值近似0,则有r个贡献性关系,
#且共线性关系的系数向量就是近似于0的特征值对应的特征向量

kappa(z, exact = FALSE)
#10~30之间为若相关性,30~100为中等相关,100以上为强相关

library(DAAG)
vif(obj, digits=5)
#vif>10,模型中有很强的共线性问题

####logistic回归####
glm(formula, family = gaussian, data, weights, subset,
    na.action, start = NULL, etastart, mustart, offset,
    control = list(...), model = TRUE, method = "glm.fit",
    x = FALSE, y = TRUE, singular.ok = TRUE, contrasts = NULL, ...)
#family=gaussian,基于正态分布族的广义线性模型
#family=binominal,基于二项分布的广义线性模型<-logitic回归
#family=poisson(link=log),基于泊松分布的广义线性模型
#family=gamma(link=inverse),基于伽马分布的广义线性模型

你可能感兴趣的:(python,机器学习,回归)