Y = β 0 + β 1 X 1 + β 2 X 2 + ⋯ + β m X m + ϵ Y = \beta_0 + \beta_1X_1 + \beta_2X_2 + \cdots + \beta_mX_m + \epsilon Y=β0+β1X1+β2X2+⋯+βmXm+ϵ
lm()
函数lm(formula, data, subset, weights, na.action, ...)
formula
参数为模型公式,例如 Y ~ X1 + X2
data
参数为 dataframe 格式数据blood <- data.frame(
X1 = c(76.0, 91.5, 85.5, 82.5, 79.0, 80.5, 74.5, 79.0, 85.0, 76.5, 82.0, 95.0, 92.5),
X2 = c(50, 20, 20, 30, 30, 50, 60, 50, 40, 55, 40, 40, 20),
Y = c(120, 141, 124, 126, 117, 125, 123, 125, 132, 123, 132, 155, 147)
)
lm_sol <- lm(Y ~ X1 + X2, data = blood)
summary(lm_sol)
new <- data.frame(
X1 = c(75, 85),
X2 = c(40, 60)
)
predict(lm_sol, new, interval = "confidence", level = 0.95)
ln ( P 1 − P ) = β 0 + β 1 X 1 + β 2 X 2 + ⋯ + β m X m \ln\left(\frac{P}{1-P}\right) = \beta_0 + \beta_1X_1 + \beta_2X_2 + \cdots + \beta_mX_m ln(1−PP)=β0+β1X1+β2X2+⋯+βmXm
glm()
函数glm(formula, family = binomial(link = 'logit'), data, weights, subset, na.action, ...)
family
参数为拟合分布所属的分布族,取 logit 则为 logistic 回归library(readr)
student <- read_csv("student.csv")[, 2:4]
lr_sol <- glm(qualification ~ GPA + ability, family = binomial(link = 'logit'), data = student)
summary(lr_sol)
new <- data.frame(
GPA = c(3.0, 2.5, 3.5),
ability = c(550, 420, 600)
)
prob_fit <- predict(lr_sol, new, type = 'response')
threshold <- 0.5
new_prediction <- rep(0, nrow(new))
new_prediction[prob_fit > threshold] <- 1