glm回归的结果仅部分展示在summary(), 使用broom包中的tidy将summary后的变量转换为data.frame
library(dplyr)
fit1 <- glm(outcome ~ x1 + age + gender, data = adl1, family = poisson())
summary(fit1)
##使用broom包中的tidy将summary后的变量转换为data.frame
##tidy()只能输出回归表(系数和P)
install.packages("broom")
library(broom)
data_frame_result <- tidy(fit1)
## RR and 95% CI
exp(coef(fit1))
exp(confint(fit1))
万能函数出马!
glm models: output OR/PR/RR and 95% CI
#for poisson, OUTPUT OR (95% CI)和P
func_or_glm <- function(myfit){
x <- summary(myfit)
B <- round(x$coef[,1],3)
se <- round(x$coef[,2],3)
z <- round(x$coef[,3],3)
wald <- round(x$coef[,3]^2,3)
or <- round(exp(coef(myfit)),2)
pvalue <- round(x$coef[,4],3)
orci.lower <- round(exp(confint(myfit))[,1],2)
orci.upper <- round(exp(confint(myfit))[,2],2)
ci <- paste("(", orci.lower,", ",orci.upper,")",sep="")
res <- data.frame(B,se,z,or,ci,
or_with_ci=paste(or,ci,sep=" "),
P=format.pval(pvalue,digits = 3,eps=0.001)
)
res1 <- res[c(2,3),] #output or of x1(0,1,2)
res1$var <- c('X1=1','X1=2')
res1
return(res1)
}
##model 1 poisson
fit1 <- glm(outcome ~ x1 + age + gender, data = adl1, family = poisson())
summary(fit1)
res_or1 <- func_or_glm(fit1)
COX models: output HR and 95% CI
#for poisson, OUTPUT OR (95% CI)和P
###HR CI
func_hr_cox <- function(myfit){
x <- summary(myfit)
B <- round(x$coef[,1],3)
se <- round(x$coef[,3],3)
z <- round(x$coef[,4],3)
hr <- round(x$coef[,2],2)
pvalue <- round(x$coef[,5],3)
hrci.lower <- round(x$conf.int[,3],2)
hrci.upper <- round(x$conf.int[,4],2)
ci <- paste("(", hrci.lower,", ",hrci.upper,")",sep="")
res <- data.frame(B,se,z,hr,ci,
hr_with_ci=paste(hr,ci,sep=" "),
P=format.pval(pvalue,digits = 3,eps=0.001)
)
res1 <- res[c(1,2,3),] #output or of x1(0,1,2,3)
res1$var <- c('X1=1','X1=2','X1=3')
res1
return(res1)
}
## cox model
library(survival)
library(survminer)
fit2 <- coxph(Surv(time, status)~x1+gender+age, data=ana5)
summary(fit2)
res_hr1 <- func_hr_cox(fit2)
批量生产:使用神奇的list(), function(), sapply(), lapply().
## for example: three outcomes with model 1-2
mylist <- list(c("ayear","outcome1"), c("iyear","outcome2"),c("dyear","outcome3"))
##functions
func_model1 <- function(data){
as.formula(
paste('Surv(', data[1], ',',data[2], ')~ex+gender+age',sep="")
)}
func_model2 <- function(data){
as.formula(
paste('Surv(', data[1], ',',data[2], ')~ex+gender+age+x2+x3+x4',sep="")
)}
func_hr_cox # hr with ci
### output cox formulas
model1_func_ex <- sapply(mylist,func_model1) # cox formulas
model1_func_ex # model1
model2_func_ex <- sapply(mylist,func_model2)
model2_func_ex # model2
#####model1
model_cox <- lapply(model1_func_ex,function(x){coxph(x,data=ana5)}) # cox models
model_cox
model_res <- lapply(model_cox, func_hr_cox) ##model results
#model1_res
#class(model_res) # list
# merge hr results of 3 outcomes into one data.frame
res_m1 <- as.data.frame(model_res[[1]], check.names = FALSE)
res_m2 <- as.data.frame(model_res[[2]], check.names = FALSE)
res_m3 <- as.data.frame(model_res[[3]], check.names = FALSE)
res_cox_model1 <- rbind(res_m1,res_m2,res_m3)
res_cox_model1$name <- c("D1","D1","D1","D2","D2","D2","D3","D3","D3")
res_cox_model1
###model2
model_cox1 <- lapply(model2_func_ex,function(x){coxph(x,data=ana5)}) # cox models
model_cox1
model_res1 <- lapply(model_cox1, func_hr_cox) ##model results
# merge hr results of 3 outcomes into one data.frame
res_m1 <- as.data.frame(model_res[[1]], check.names = FALSE)
res_m2 <- as.data.frame(model_res[[2]], check.names = FALSE)
res_m3 <- as.data.frame(model_res[[3]], check.names = FALSE)
res_cox_model2 <- rbind(res_m1,res_m2,res_m3)
res_cox_model2$name <- c("D1","D1","D1","D2","D2","D2","D3","D3","D3")
res_cox_model2
##output
library(writexl)
sheets <- list("hr_cox_model2" = res_cox_model,"hr_cox_model2" = res_cox_model2)
write_xlsx(sheets, "res_hr_cox_model_0107.xlsx")
相乘交互作用
(1)定义:若交互作用不存在,则两个(或以上)因素共同作用于某一疾病(或疾病相关变量)时,其效应等于这些因素的独立作用之积。即R11/R00=(R10/R00)(R01/R00),其中R00、R10、R01、R11分别表示无暴露、仅暴露于因素1、仅暴露于因素2及同时暴露于两因素的发病率或其他频率指标。
(2)检验相乘交互作用:检验R11R00/(R01*R10)*是否为1。
(3) 评价指标就是 OR或 RR 的对数值,为 logistic 回归、对数线性模型、泊松回归和 Cox 模型中交互项的回归系数,直接通过判定回归系数是否为0 就可以检验相乘交互作用。
相加交互作用
(1)定义:若交互作用不存在,则两个(或以上)因素共同作用于某一疾病(或疾病相关变量)时,其效应等于这些因素的独立作用之和。即R11-R00=(R10-R00)+(R01-R00)。
(2) 评价指标RERI、AP ( attributable proportion due to interaction,归因比) 和 S( synergy index,协同作用指数) 。检验RERI 和 AP 的可信区间是否包含 0,S 的可信区间是否包含 1。
RERI反映归因交互作用引起的相对危险度;
AP反映在两因素同时存在时,它们的总效应中归因于交互作用的部分所占的比例;
S 反映的是两因素同时存在时的效应与两因素独立效应和的比值。
(3) 假定结局和 2 个自变量均为二分类变量,如果结局和自变量为多分类变量,可以通过设置哑变量转换为均为二分类的情况。
使用epiR包:
install.packages("epiR") ## 相加交互作用
library(epiR)
fit1 <- glm(outcome ~ x1 + age + gender, data = adl1, family = poisson())
summary(fit1)
a1 <- epi.interaction(model = fit1, coef = c(2,3,4), type = "RERI", param = "dummy", conf.level = 0.95)
a2 <- epi.interaction(model = fit1, coef = c(2,3,4), type = "APAB", param="dummy", conf.level = 0.95)
a3 <- epi.interaction(model = fit1, coef = c(2,3,4), param = "dummy", type = "S", conf.level = 0.95)
a4 <- rbind(a1,a2,a3)
a5 <- within(a4,{
name <- c("RERI","APAB","S")
group <- c("total","total","total")
inter_with_CI=paste(round(est,2)," (",round(lower,2),"~",round(upper,2),")",sep="")
})
a5 <- select(a5,inter_with_CI,name,group)
a5
#思考:如果变量A是三分类,B是二分类,epiR用不了,怎么做相加相互?
注意:
(1) AP 和 S 是比值形式,因此,需要先取对数再计算可信区间,然后对上下限取幂后得到 AP 和 S 的可信区间。
(2) RERI 和 AP 的可信区间不包含 0,S 的可信区间不包含 1,则说明存在相加交互作用,以排除抽样误差引入的偏倚。
(3) RERI 不为0,说明存在相加交互作用,但是RERI 等于 0,也可能存在相互抵消的相加交互作用。
(4) 这三个指标适用于评价危险因素的相加交互作用,对于保护因素,赋值应以最低风险的一类为参照,否则,可能导致三个指标的结果解释方向不一致。
(5) 存在混杂因素的情况下,采用 logistic 回归、对数线性模型、Cox 模型估计的三个指标与混杂因素无关; 线性风险模型和线性比值模型中 S 的估计不因混杂因素的水平而变化,但是 RERI和 AP 的估计与混杂因素的取值有关,因此,得到的估计不唯一。