生存分析 R语言 样本模块

##external draft
table(mydata$stage,mydata$group)
fisher.test(mydata$stage,mydata$group)

############survival pipeline BY JIN
#load the data
setwd("C:/Users/Administrator/Desktop/mission/survival")
library(readxl)
mydata <- read_excel("mydata.xlsx")

#clean the data
mydata$afp_new[mydata$before_AFP<400]<-1
mydata$afp_new[mydata$before_AFP>=400]<-2

mydata$ALB_new[mydata$ALB<35]<-1
mydata$ALB_new[mydata$ALB>=35]<-2

mydata$ALT_new[mydata$ALT<40]<-1
mydata$ALT_new[mydata$ALT>=40]<-2

mydata$AST_new[mydata$AST<40]<-1
mydata$AST_new[mydata$AST>=40]<-2

mydata$TBIL_new[mydata$TBIL<17.1]<-1
mydata$TBIL_new[mydata$TBIL>=17.1]<-2

mydata$group[mydata$max_size<=5]<-1
mydata$group[mydata$max_size>5]<-2

mydata_selected<-mydata[c(1,2,23,27,24,9,10,12,25,26,11,28,14)]

#descriptive analysis
library(tableone)
vars <- names(mydata_selected[-c(12)])
factor_vars<- names(mydata_selected[c(2,3,4,5,6,7,8,9,10,13)])
  
tableone <- CreateTableOne(vars = vars, strata = c("group"),factorVars = factor_vars, data = mydata_selected)
print(tableone, quote = TRUE, noSpaces = TRUE)

#calculate the statistics value
#chisquare value
for (i in factor_vars){
  print(i)
  print(chisq.test(mydata_selected[[i]],mydata_selected$group))
}
#t value 
t.test(mydata_selected$age[mydata_selected$group==1],mydata_selected$age[mydata_selected$group==2])
t.test(mydata_selected$max_size[mydata_selected$group==1],mydata_selected$max_size[mydata_selected$group==2])

#KM plot
library(survival)
library(survminer)
ggsurvplot(survfit(Surv(OS,OS_status) ~ group,
                   data = mydata),
           risk.table = TRUE,
           break.time.by = 10, 
           main = "Survival curve", xlab = 'Overall Survival(months)',
           pval=TRUE,
           legend.title = 'group',legend.labs = c('小肝癌','大肝癌'))

#log-rank test
survdiff(Surv(OS,OS_status) ~ group,data = mydata)

#factorize
for (i in factor_vars){
  mydata_selected[[i]]<-as.numeric(mydata_selected[[i]])
}
mydata_selected$group<-as.numeric(mydata_selected$group)

#Univar COX
time=mydata[mydata_selected$group==2,]$OS
status=mydata[mydata_selected$group==2,]$OS_status
colnames(mydata_selected)
dcl2=as.data.frame(mydata_selected[mydata_selected$group==2,])

result0<-list()
HR0<-list()
HRCOEF0<-list()
for(i in 1:ncol(dcl2))
{
  result0[[i]]<-anova(coxph(Surv(time, status)~dcl2[,i],data=dcl2))$Pr[2]
  HR0[[i]]<-summary(coxph(Surv(time, status)~dcl2[,i],data=dcl2))$conf.int[1,] ##
  HRCOEF0[[i]]<-summary(coxph(Surv(time, status)~dcl2[,i],data=dcl2))$coefficients[1,]
}
data_temp<-t(dcl2)
pval0<-unlist(result0)
gene0<-rownames(data_temp)
HRR0<-do.call(rbind, lapply(HR0, `[`, c(1:4)))#######??????
HRR0<-data.frame(HRR0)
names(HRR0) <- c("exp(coef)","exp(-coef)","lower .95","upper .95")######
head(HRR0)
HRCOEFF0<-do.call(rbind, lapply(HRCOEF0, `[`, c(1:5)))#######??????
HRCOEFF0<-data.frame(HRCOEFF0)
names(HRCOEFF0)<-c("coef1","exp(coef)1","se(coef)1","z1","Pvalue1")#coef coef   coef z Pr(>|z|)
resultsata0<-data.frame(gene0,pval0,HRR0,HRCOEFF0)#####################the univariate cox model results
selectresult0<-resultsata0[which(resultsata0$Pvalue1<0.05),]
rownames(selectresult0)<-selectresult0[,1]
selected0<-as.data.frame(dcl2[,rownames(selectresult0)])
dat2<-selected0  #selected data

#multiple COX
surfit=coxph(Surv(time, status)~.,data=dat2)
summary(surfit)
surfit_f<-step(surfit)
summary(surfit_f)
 

你可能感兴趣的:(生存分析 R语言 样本模块)