前言
本文使用随机生成的数据集进行批量单因素logistic回归分析,并提取P<0.05的变量以供后续多因素logistic回归。后续会继续分享一些R语言分析代码,欢迎大家一起讨论学习。
构建数据,Y为因变量,其他为自变量
#0.构建数据,Y为因变量,其他为自变量
set.seed(1234)#设置随机种子,保证生成数据一致
log_data<- data.frame(Y = sample(0:1, 600, replace = T),
sex=sample(1:2, 600, replace = T),
edu=sample(1:4, 600, replace = T),
BMI=rnorm(600, mean = 22, sd = 3),
白蛋白=rnorm(600, mean = 35, sd = 6),
随机血糖=rnorm(600, mean = 4.75, sd = 1.2))
summary(log_data)
Y sex edu BMI 白蛋白 随机血糖
Min. :0.0000 Min. :1.000 Min. :1.00 Min. :13.21 Min. :16.27 Min. :1.036
1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:2.00 1st Qu.:20.18 1st Qu.:30.94 1st Qu.:3.956
Median :1.0000 Median :1.000 Median :3.00 Median :22.17 Median :34.78 Median :4.816
Mean :0.5017 Mean :1.467 Mean :2.57 Mean :22.05 Mean :35.04 Mean :4.778
3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:4.00 3rd Qu.:23.98 3rd Qu.:39.19 3rd Qu.:5.579
Max. :1.0000 Max. :2.000 Max. :4.00 Max. :30.07 Max. :54.01 Max. :8.377
# 数据处理,因子化、数值化
VarsC<-c("Y","sex","edu")
for(i in VarsC){
log_data[,i] <- as.factor(log_data[,i])
}#利用循环因子化
summary(log_data)
Y sex edu BMI 白蛋白 随机血糖
0:299 1:320 1:145 Min. :13.21 Min. :16.27 Min. :1.036
1:301 2:280 2:142 1st Qu.:20.18 1st Qu.:30.94 1st Qu.:3.956
3:139 Median :22.17 Median :34.78 Median :4.816
4:174 Mean :22.05 Mean :35.04 Mean :4.778
3rd Qu.:23.98 3rd Qu.:39.19 3rd Qu.:5.579
Max. :30.07 Max. :54.01 Max. :8.377
准备进行分析的自变量
#2.准备进行分析的自变量
varsU<-names(log_data[,2:6])#自变量
批量单因素logistic回归
#3.批量单因素logistic回归
Result<-c()
for (i in 1:length(varsU)){
fit<-glm(substitute(Y~x,list(x=as.name(varsU[i]))),data=log_data,family=binomial())
fitSum<-summary(fit)
result1<-c()
result1<-rbind(result1,fitSum$coef)
OR<-exp(fitSum$coef[,'Estimate'])
result1<-data.frame(cbind(result1,cbind(OR,exp(confint(fit)))))
result1$Characteristics<-varsU[i] #添加变量名
Result<-rbind(Result,result1[-1,])#[-1,],删除常数项
}
提取制表变量重命名,提取有意义的变量
#4.提取制表变量重命名,提取有意义的变量
Uni_log<-data.frame(Result[,c(1,4:8)]) #提取"P","OR","CIlower","CIupper"和变量名
colnames(Uni_log)[2:5]<-c("P","OR","CIlower","CIupper")#变量重命名
ExtractVar<-unique(Uni_log$Characteristics[Uni_log$"P"<0.05])#提取有意义的变量
write.csv(Uni_log,file="Uni_log.csv")#输出文档
Uni_log
Estimate P OR CIlower CIupper Characteristics
sex2 0.094662809 0.5631116 1.0992881 0.7975801 1.515763 sex
edu2 0.154180710 0.5141640 1.1667017 0.7342831 1.856362 edu
edu3 -0.146213397 0.5389345 0.8639733 0.5412489 1.377153 edu
edu4 0.156454546 0.4869958 1.1693576 0.7523336 1.819676 edu
BMI 0.021802997 0.4450671 1.0220424 0.9664705 1.081118 BMI
白蛋白 0.004185709 0.7496789 1.0041945 0.9786903 1.030415 白蛋白
随机血糖 -0.076032436 0.2659728 0.9267861 0.8100385 1.059365 随机血糖