TCGA数据批量运行Coxph函数

df数据框形如:
TCGA数据批量运行Coxph函数_第1张图片

djs.coxph <- function(df,genelist){
  library(survival)
  library(survminer)
  
  dir.create("./survival")
  setwd("./survival")
  
  # 准备好的生存分析数据框,变量中包括OS.time,OS以及values of gene expression 
  df <- as.data.frame(df)
  genelist <- genelist
  
  
  # 生成文件头,用于保存cox分析结果
  colname<-c("gene","beta", "HR (95% CI for HR)", "wald.test", "p.value")
  write.table(t(colname),file="./summary_HR.csv",
              sep=",",append=T,col.names=F,row.names=F)
  
  # 对每一个gene运行coxph
  
  lapply(genelist, function(x){
    univ_formulas <- paste('Surv(OS.time, OS)~', x)
    univ_models <- coxph(x, data = df)
    
    x <- summary(univ_models)
    p.value<-signif(x$wald["pvalue"], digits=2)
    wald.test<-signif(x$wald["test"], digits=2)
    beta<-signif(x$coef[1], digits=2);#coeficient beta
    HR <-signif(x$coef[2], digits=2);#exp(beta)
    HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
    HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
    HR <- paste0(HR, " (", 
                 HR.confint.lower, "-", HR.confint.upper, ")")
    res<-c(i,beta, HR, wald.test, p.value)
    
    # 写入结果
    write.table(t(res),file="./summary_HR.csv",
                sep=",",append=T,col.names=F,row.names=F)
  })
}
djs.KMplot <- function(df,genelist,group){
  library(survival)
  library(survminer)
  
  dir.create("./survival")
  setwd("./survival")
  
  # 准备好的生存分析数据框,变量中包括OS.time,OS以及values of gene expression 
  df <- as.data.frame(df)
  genelist <- genelist
  group <- group
  
  # 判断使用那种分组方法
  if(group == "median"){
    lapply(genelist, function(x){
      df$group <- ifelse(df[,x] >= median(df[,x]),"high","low")
      # KMplot
      fit <- survfit(Surv(OS.time, OS) ~ group,data = df)
      a <- ggsurvplot(fit,
                      pval = TRUE,
                      conf.int=TRUE,
                      pval.size=5,
                      xlab=i,
                      palette=c("red", "blue"),
                      legend.labs=c("High", "Low"),
                      risk.table=T,
                      risk.table.height=.25)
      
      b <- surv_pvalue(fit)
      # 输出pvalue of logrank test
      write.table(t(c(x,b$pval)),file="./pvalue.of.survivaldata.csv",
                  sep=",",append=T,col.names=F,row.names=F)
      # 输出 风险事件表
      write.table(x,file="./risktable.of.survivaldata.csv",
                  sep=",",append=T,col.names=F,row.names=F)
      write.table(a$data.survtable,file="./risktable.of.survivaldata.csv",
                  sep=",",append=T,col.names=T,row.names=F)
      # 输出KMplot
      png(paste("./",x,"_survival.png",sep = ""))
      print(a)
      dev.off() 
    })
  }
  
  if(group == "mean"){
    lapply(genelist, function(x){
      df$group <- ifelse(df[,x] >= mean(df[,x]),"high","low")
      # KMplot
      fit <- survfit(Surv(OS.time, OS) ~ group,data = df)
      a <- ggsurvplot(fit,
                      pval = TRUE,
                      conf.int=TRUE,
                      pval.size=5,
                      xlab=i,
                      palette=c("red", "blue"),
                      legend.labs=c("High", "Low"),
                      risk.table=T,
                      risk.table.height=.25)
      
      b <- surv_pvalue(fit)
      # 输出pvalue of logrank test
      write.table(t(c(x,b$pval)),file="./pvalue.of.survivaldata.csv",
                  sep=",",append=T,col.names=F,row.names=F)
      # 输出 风险事件表
      write.table(x,file="./risktable.of.survivaldata.csv",
                  sep=",",append=T,col.names=F,row.names=F)
      write.table(a$data.survtable,file="./risktable.of.survivaldata.csv",
                  sep=",",append=T,col.names=T,row.names=F)
      # 输出KMplot
      png(paste("./",x,"_survival.png",sep = ""))
      print(a)
      dev.off() 
    })
  }
  
  if(group == "quantile"){
      lapply(genelist, function(x){
        df$group <- ifelse(df[,x] >= quantile(df[,x])[[4]],"high",
                           ifelse(df[,x] <= quantile(df[,x])[[2]],"low","undetermine"))
        # KMplot
        fit <- survfit(Surv(OS.time, OS) ~ group,data = df[df$group != "undetermine",])
        a <- ggsurvplot(fit,
                        pval = TRUE,
                        conf.int=TRUE,
                        pval.size=5,
                        xlab=i,
                        palette=c("red", "blue"),
                        legend.labs=c("High", "Low"),
                        risk.table=T,
                        risk.table.height=.25)
        
        b <- surv_pvalue(fit)
        # 输出pvalue of logrank test
        write.table(t(c(x,b$pval)),file="./pvalue.of.survivaldata.csv",
                    sep=",",append=T,col.names=F,row.names=F)
        # 输出 风险事件表
        write.table(x,file="./risktable.of.survivaldata.csv",
                    sep=",",append=T,col.names=F,row.names=F)
        write.table(a$data.survtable,file="./risktable.of.survivaldata.csv",
                    sep=",",append=T,col.names=T,row.names=F)
        # 输出KMplot
        png(paste("./",x,"_survival.png",sep = ""))
        print(a)
        dev.off() 
      })
  }
}

你可能感兴趣的:(R,R)