(大全)预后Cox 列线图Nomogram 校正曲线calibration curve 时间依赖ROC survivalROC C指数C-index 两ROC比较

# 内置包数据运行,预期结果看图  部分代码加上自己的理解

#可以直接复制到R运行

(大全)预后Cox 列线图Nomogram 校正曲线calibration curve 时间依赖ROC survivalROC C指数C-index 两ROC比较_第1张图片

(大全)预后Cox 列线图Nomogram 校正曲线calibration curve 时间依赖ROC survivalROC C指数C-index 两ROC比较_第2张图片

 (大全)预后Cox 列线图Nomogram 校正曲线calibration curve 时间依赖ROC survivalROC C指数C-index 两ROC比较_第3张图片

(大全)预后Cox 列线图Nomogram 校正曲线calibration curve 时间依赖ROC survivalROC C指数C-index 两ROC比较_第4张图片

#加载包 我用 R-3.6版本的
library(cmprsk) #已经包含在这里了library(survival)
library(riskRegression)
library(rms)  #绘制列线图 ??rms
library(timeROC)
library(survivalROC)
library(regplot)  #绘制列线图花样的

#加载数据
df<- mgus2
#查看下数据变量都是什么类型的
str(df)
table(is.na(df))
which(is.na(df),arr.ind = T)
#试验
df2 <- df[1:4,]
#fix(df2) #修改了第一个为29,第三个改45,原来数据比较小才允许修改
#查找两列中不一样的
df2[-which(df2$ptime %in% df2$futime),]  
all(df2$ptime %in% df2$futime)

#回归本次任务 两个变量是一样的
all(df$ptime%in%df$futime)
df[-which(df$ptime %in% df$futime),]  

#解释
# hgb 血红蛋白;  # creat 肌酐;
# ptime 直至发展为浆细胞恶性肿瘤(PCM)或最后一次访视的时间(以月为单位);
# pstat 出现PCM(浆细胞恶性肿瘤):0 =否,1 =是;
# futime 直到死亡或最后一次接触的时间,以月为单位;
# death 发生死亡:0 =否,1 =是;这里把PCM作为结局事件,death作为竞争事件。

#试验
#fix(df2) #修改age第二个为空,原来数据比较小才允许修改
#df2 <- na.omit(df2) ##删掉空值
df2
#查找缺失值
table(is.na(df))
df[which(is.na(df)),]
#直接删除掉整行 数据
df <- na.omit(df) ##删掉空值
#
df$time <- ifelse(df$pstat==0, df$futime, df$ptime)
df$time <- df$time*30    #转化成天
df$event <- ifelse(df$pstat==0, 2*df$death, 1)
df$event <- factor(df$event, 0:2)
df[1,]
class(df$event)
#0为结尾数据 1=PCM 2=死于其他疾病

#绘制多个时间点的ROC曲线
library(timeROC)
library(survivalROC)

#直接删除掉整行 数据
df <- na.omit(df) ##删掉空值
#这里使用 1=结局 0=结尾数据
df$time <- ifelse(df$pstat==0, df$futime, df$ptime)
df$time <- df$time*30 #转化成天
df$event <- ifelse(df$pstat==0, 0, 1)
df$event <- factor(df$event, 0:1)
df[1:6,]
table(df$event) #看一下结局事件情况
class(df$event)

time_roc <- timeROC(
  T = df$time,
  delta = df$event,
  marker = -df$hgb, #方向相反加个-
  cause = 1,
  weighting="marginal", #uses the Kaplan-Meier
  times = c(3 * 365, 5 * 365, 10 * 365),
  ROC = TRUE,
  iid = TRUE
)

#AUC是 结局=0 1的 竞争风险看AUC_2
#Let suppose that we are interested in the event δ_i=1. 
#Then, a case is defined as a subject i with T_i <=t and δ_i = 1.
time_roc[["AUC_1"]]  #这里看这个
time_roc[["AUC_2"]]
#查看置信区间
time_roc
sd=time_roc[["inference"]][["vect_sd_1"]]
sd=as.vector(sd)
sd
AUC=as.vector(time_roc[["AUC_1"]])
AUC=round(AUC,digits = 3)
a2=AUC+sd
a1=AUC-sd
ci=round(cbind(a1,a2),digit=3)
ci=cbind(ci,AUC) 
ci
#由95% CI=1.96*se,且se=SD/2得:95% CI=1.96*SD/2=

#绘图
time_ROC_df <- data.frame(
  TP_3year = time_roc[["TP"]][,1],
  FP_3year = time_roc[["FP_1"]][,1],
  TP_5year = time_roc[["TP"]][,2],
  FP_5year = time_roc[["FP_1"]][,2],
  TP_10year = time_roc[["TP"]][,3],
  FP_10year = time_roc[["FP_1"]][,3]
)

library(ggplot2)
ggplot(data = time_ROC_df) +
  geom_line(aes(x = FP_3year, y = TP_3year), size = 1, color = "#BC3C29FF") +
  geom_line(aes(x = FP_5year, y = TP_5year), size = 1, color = "#0072B5FF") +
  geom_line(aes(x = FP_10year, y = TP_10year), size = 1, color = "#E18727FF") +
  geom_abline(slope = 1, intercept = 0, color = "grey", size = 1, linetype = 2) +
  theme_bw() +
  annotate("text", x = 0.75, y = 0.25, size = 4.5,
           label = paste0("AUC at 3 years = 0.661(0.649-0.673)", 
           sprintf("%.3f", time_roc$AUC[[1]])), color = "#BC3C29FF"
  ) +
  annotate("text", x = 0.75, y = 0.15, size = 4.5,
           label = paste0("AUC at 5 years = 0.623(0.613-0.633)", 
          sprintf("%.3f", time_roc$AUC[[2]])), color = "#0072B5FF"
  ) +
  annotate("text", x = 0.75, y = 0.05, size = 4.5,
           label = paste0("AUC at 10 years = 0.613(0.606-0.620)", 
           sprintf("%.3f", time_roc$AUC[[3]])), color = "#E18727FF"
  ) +
  labs(x = "False positive rate", y = "True positive rate") +
  theme(
    axis.text = element_text(face = "bold", size = 11, color = "black"),
    axis.title.x = element_text(face = "bold", size = 14, color = "black", 
                                margin = margin(c(15, 0, 0, 0))),
    axis.title.y = element_text(face = "bold", size = 14, color = "black", 
                                margin = margin(c(0, 15, 0, 0)))
  )

#比较两个time-dependent AUC
#换另外一个指标
df <- na.omit(df) ##删掉空值
time_roc2 <- timeROC(
  T = df$time,
  delta = df$event,
  marker = df$creat,  #creat 肌酐
  cause = 1,
  weighting="marginal",
  times = c(3 * 365, 5 * 365, 10 * 365),
  ROC = TRUE,
  iid = TRUE
)

compare(time_roc, time_roc2)
compare(time_roc, time_roc2,adjusted=TRUE)


#绘制不同时间节点的AUC曲线及其置信区间,
#也可将多个ROC曲线的AUC值放在一起绘制
plotAUCcurve(time_roc, conf.int=TRUE, col="red")
plotAUCcurve(time_roc2, conf.int=TRUE, col="blue", add=TRUE)
legend("bottomright",c("mayoscore5", "mayoscore4"), col = c("red","blue"), lty=1, lwd=2)

#最佳CUTOFF值
df$hgb[which.max(time_ROC_df$TP_3year - time_ROC_df$FP_3year)]

#Nomogram  
#??crerep 
#Function to create weighted data set for competing risks analyses
library(mstate)
##加权数据 用在竞争风险
df_w <- crprep("time", "event",
               data=df, trans= c(0,1), # c(1,2) 竞争风险
               #Values of status for which weights are to be calculated.
               cens=0, id="id",
               keep=c("age","sex","hgb"))
df_w$Time<- df_w$Tstop - df_w$Tstart
table(df_w$failcode)
names(df_w)
#跑下cox模型  ??coxph
coxModle<- coxph(Surv(Time,status)~age+sex+hgb,
                data=df_w[df_w$failcode==1,],
                 weight=weight.cens,id=id)
summary(coxModle)

#本地安装R包 #缺什么在从那个网站下载相应的包
#下载地址https://cran.r-project.org/web/packages/regplot/index.html
library(regplot)
regplot(coxModle,observation=df_w[df_w$failcode==1,],
        failtime = c(120, 240), 
        prfail = T, droplines=T,
        points=T)


#1. 区分度 (discrimination)  C-index、AUC都很常用,
#此外还有IDI、NRI等。此处介绍C-index。
#上一步已经输出
#直接查看 Concordance= 0.595  (se = 0.029 )
#由95% CI=1.96*se,且se=SD/2得:95% CI=1.96*SD/2=


#一般cox
coxModle2  <- survival::coxph(Surv(time,event ==1) ~ age+sex+hgb,
                             x=T,y=T,data=df) 
summary(coxModle2)
regplot(coxModle2 ,
        failtime = rev(c(1095,1825,3650)), #rev 相反排序下
        prfail = T, droplines=T,
        points=T)
??regplot
#一般nomogram
summary(df$age)
#variable age does not have limits defined by datadist
df <- df[df$age >30, ]
df <- df[df$age <70, ]
df$age <- as.numeric(df$age)
df$sex <- as.factor(df$sex)
f <- cph(Surv(time,event==1) ~age+sex+hgb, x=T, y=T, surv=T, 
         data=df, time.inc=1095)
survival <- Survival(f)
survival1 <- list(function(x) surv(1095, x), 
           function(x) surv(1825, x), 
           function(x) surv(3650, x)) 
#数据打包
dd <- datadist(df)
options(datadist="dd")
#建立nomogram#maxscale为列线图第一行最大的分值,默认值100,
#这是文献中列线图普遍采用的最大分值;本例由于原文设定最大分值为10,
#故输入代码maxscale=10。
#可以调2、5、12生存率输出图片的坐标数目。置信区间(可以删掉连逗号)
nom <- nomogram(f, fun=survival1, 
      lp=F, funlabel=c("3-year survival", "5-year survival", "10-year survival"), 
      maxscale=10, fun.at=c(0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.6, 0.5) ,
      conf.int = c(0.05,0.95))   
#查看nomogram图片
plot(nom)
plot(nom,xfrac=0.2,cex.axis=0.9,cex.var=0.9)

#校准曲线  ??calibrate
# m=用你的数据mydata的行数除以??,4(你希望的拟合点的个数),得数取整,
f3 <- cph(Surv(time, event==1) ~  age+sex+hgb, 
          x=T, y=T, surv=T, data= df, time.inc=1095)
cal3 <- calibrate(f3, cmethod="KM", method="boot", u=1095, m=330, B=100)
plot(cal3)
plot(cal3,lwd=2,lty=1,errbar.col=c(rgb(0,118,192,maxColorValue=255)),
     xlab="Nomogram-Predicted Probability of 3-Year OS",
     ylab="Actual 3-Year OS(proportion)",
     col=c(rgb(192,98,83,maxColorValue=255)))
plot(cal3,lwd=2,lty=1,errbar.col=c(rgb(0,118,192,maxColorValue=255)),
     xlim=c(0,1),ylim=c(0,1),
     xlab="Nomogram-Predicted Probability of 3-Year OS",
     ylab="Actual 3-Year OS(proportion)",
     col=c(rgb(192,98,83,maxColorValue=255)))
lines(cal3[,c("mean.predicted","KM")],type="b",lwd=2,
      col=c(rgb(192,98,83,maxColorValue=255)),pch=16)
abline(0,1,lty=3,lwd=2,col=c(rgb(0,118,192,maxColorValue=255)))


f5 <- cph(Surv(time, event==1) ~  age+sex+hgb, 
          x=T, y=T, surv=T, data= df, time.inc=1825)
cal5 <- calibrate(f5, cmethod="KM", method="boot", u=60, m=180, B=10)
plot(cal5,lwd=2,lty=1,errbar.col=c(rgb(0,118,192,maxColorValue=255)),
     xlim=c(0,1),ylim=c(0,1),
     xlab="Nomogram-Predicted Probability of 5-Year OS",
     ylab="Actual 5-Year OS(proportion)",
     col=c(rgb(192,98,83,maxColorValue=255)))
lines(cal5[,c("mean.predicted","KM")],type="b",lwd=2,
      col=c(rgb(192,98,83,maxColorValue=255)),pch=16)
abline(0,1,lty=3,lwd=2,col=c(rgb(0,118,192,maxColorValue=255)))


f10 <- cph(Surv(Time, s) ~  age+sex+hgb, 
           x=T, y=T, surv=T, data= df, time.inc=3625)
cal1 <- calibrate(f1, cmethod="KM", method="boot", u=12, m=180, B=1000)
plot(cal1,lwd=2,lty=1,errbar.col=c(rgb(0,118,192,maxColorValue=255)),
     xlim=c(0,1),ylim=c(0,1),
     xlab="Nomogram-Predicted Probability of 1-Year OS",
     ylab="Actual 1-Year OS(proportion)",
     col=c(rgb(192,98,83,maxColorValue=255)))
lines(cal10[,c("mean.predicted","KM")],type="b",lwd=2,
      col=c(rgb(192,98,83,maxColorValue=255)),pch=16)
abline(0,1,lty=3,lwd=2,col=c(rgb(0,118,192,maxColorValue=255)))


 

你可能感兴趣的:(r语言,统计学)