#加载包 我用 R-3.6版本的
library(cmprsk) #已经包含在这里了library(survival)
library(riskRegression)
library(rms) #绘制列线图 ??rms
library(timeROC)
library(survivalROC)
library(regplot) #绘制列线图花样的
#加载数据
df<- mgus2
#查看下数据变量都是什么类型的
str(df)
table(is.na(df))
which(is.na(df),arr.ind = T)
#试验
df2 <- df[1:4,]
#fix(df2) #修改了第一个为29,第三个改45,原来数据比较小才允许修改
#查找两列中不一样的
df2[-which(df2$ptime %in% df2$futime),]
all(df2$ptime %in% df2$futime)
#回归本次任务 两个变量是一样的
all(df$ptime%in%df$futime)
df[-which(df$ptime %in% df$futime),]
#解释
# hgb 血红蛋白; # creat 肌酐;
# ptime 直至发展为浆细胞恶性肿瘤(PCM)或最后一次访视的时间(以月为单位);
# pstat 出现PCM(浆细胞恶性肿瘤):0 =否,1 =是;
# futime 直到死亡或最后一次接触的时间,以月为单位;
# death 发生死亡:0 =否,1 =是;这里把PCM作为结局事件,death作为竞争事件。
#试验
#fix(df2) #修改age第二个为空,原来数据比较小才允许修改
#df2 <- na.omit(df2) ##删掉空值
df2
#查找缺失值
table(is.na(df))
df[which(is.na(df)),]
#直接删除掉整行 数据
df <- na.omit(df) ##删掉空值
#
df$time <- ifelse(df$pstat==0, df$futime, df$ptime)
df$time <- df$time*30 #转化成天
df$event <- ifelse(df$pstat==0, 2*df$death, 1)
df$event <- factor(df$event, 0:2)
df[1,]
class(df$event)
#0为结尾数据 1=PCM 2=死于其他疾病
#绘制多个时间点的ROC曲线
library(timeROC)
library(survivalROC)
#直接删除掉整行 数据
df <- na.omit(df) ##删掉空值
#这里使用 1=结局 0=结尾数据
df$time <- ifelse(df$pstat==0, df$futime, df$ptime)
df$time <- df$time*30 #转化成天
df$event <- ifelse(df$pstat==0, 0, 1)
df$event <- factor(df$event, 0:1)
df[1:6,]
table(df$event) #看一下结局事件情况
class(df$event)
time_roc <- timeROC(
T = df$time,
delta = df$event,
marker = -df$hgb, #方向相反加个-
cause = 1,
weighting="marginal", #uses the Kaplan-Meier
times = c(3 * 365, 5 * 365, 10 * 365),
ROC = TRUE,
iid = TRUE
)
#AUC是 结局=0 1的 竞争风险看AUC_2
#Let suppose that we are interested in the event δ_i=1.
#Then, a case is defined as a subject i with T_i <=t and δ_i = 1.
time_roc[["AUC_1"]] #这里看这个
time_roc[["AUC_2"]]
#查看置信区间
time_roc
sd=time_roc[["inference"]][["vect_sd_1"]]
sd=as.vector(sd)
sd
AUC=as.vector(time_roc[["AUC_1"]])
AUC=round(AUC,digits = 3)
a2=AUC+sd
a1=AUC-sd
ci=round(cbind(a1,a2),digit=3)
ci=cbind(ci,AUC)
ci
#由95% CI=1.96*se,且se=SD/2得:95% CI=1.96*SD/2=
#绘图
time_ROC_df <- data.frame(
TP_3year = time_roc[["TP"]][,1],
FP_3year = time_roc[["FP_1"]][,1],
TP_5year = time_roc[["TP"]][,2],
FP_5year = time_roc[["FP_1"]][,2],
TP_10year = time_roc[["TP"]][,3],
FP_10year = time_roc[["FP_1"]][,3]
)
library(ggplot2)
ggplot(data = time_ROC_df) +
geom_line(aes(x = FP_3year, y = TP_3year), size = 1, color = "#BC3C29FF") +
geom_line(aes(x = FP_5year, y = TP_5year), size = 1, color = "#0072B5FF") +
geom_line(aes(x = FP_10year, y = TP_10year), size = 1, color = "#E18727FF") +
geom_abline(slope = 1, intercept = 0, color = "grey", size = 1, linetype = 2) +
theme_bw() +
annotate("text", x = 0.75, y = 0.25, size = 4.5,
label = paste0("AUC at 3 years = 0.661(0.649-0.673)",
sprintf("%.3f", time_roc$AUC[[1]])), color = "#BC3C29FF"
) +
annotate("text", x = 0.75, y = 0.15, size = 4.5,
label = paste0("AUC at 5 years = 0.623(0.613-0.633)",
sprintf("%.3f", time_roc$AUC[[2]])), color = "#0072B5FF"
) +
annotate("text", x = 0.75, y = 0.05, size = 4.5,
label = paste0("AUC at 10 years = 0.613(0.606-0.620)",
sprintf("%.3f", time_roc$AUC[[3]])), color = "#E18727FF"
) +
labs(x = "False positive rate", y = "True positive rate") +
theme(
axis.text = element_text(face = "bold", size = 11, color = "black"),
axis.title.x = element_text(face = "bold", size = 14, color = "black",
margin = margin(c(15, 0, 0, 0))),
axis.title.y = element_text(face = "bold", size = 14, color = "black",
margin = margin(c(0, 15, 0, 0)))
)
#比较两个time-dependent AUC
#换另外一个指标
df <- na.omit(df) ##删掉空值
time_roc2 <- timeROC(
T = df$time,
delta = df$event,
marker = df$creat, #creat 肌酐
cause = 1,
weighting="marginal",
times = c(3 * 365, 5 * 365, 10 * 365),
ROC = TRUE,
iid = TRUE
)
compare(time_roc, time_roc2)
compare(time_roc, time_roc2,adjusted=TRUE)
#绘制不同时间节点的AUC曲线及其置信区间,
#也可将多个ROC曲线的AUC值放在一起绘制
plotAUCcurve(time_roc, conf.int=TRUE, col="red")
plotAUCcurve(time_roc2, conf.int=TRUE, col="blue", add=TRUE)
legend("bottomright",c("mayoscore5", "mayoscore4"), col = c("red","blue"), lty=1, lwd=2)
#最佳CUTOFF值
df$hgb[which.max(time_ROC_df$TP_3year - time_ROC_df$FP_3year)]
#Nomogram
#??crerep
#Function to create weighted data set for competing risks analyses
library(mstate)
##加权数据 用在竞争风险
df_w <- crprep("time", "event",
data=df, trans= c(0,1), # c(1,2) 竞争风险
#Values of status for which weights are to be calculated.
cens=0, id="id",
keep=c("age","sex","hgb"))
df_w$Time<- df_w$Tstop - df_w$Tstart
table(df_w$failcode)
names(df_w)
#跑下cox模型 ??coxph
coxModle<- coxph(Surv(Time,status)~age+sex+hgb,
data=df_w[df_w$failcode==1,],
weight=weight.cens,id=id)
summary(coxModle)
#本地安装R包 #缺什么在从那个网站下载相应的包
#下载地址https://cran.r-project.org/web/packages/regplot/index.html
library(regplot)
regplot(coxModle,observation=df_w[df_w$failcode==1,],
failtime = c(120, 240),
prfail = T, droplines=T,
points=T)
#1. 区分度 (discrimination) C-index、AUC都很常用,
#此外还有IDI、NRI等。此处介绍C-index。
#上一步已经输出
#直接查看 Concordance= 0.595 (se = 0.029 )
#由95% CI=1.96*se,且se=SD/2得:95% CI=1.96*SD/2=
#一般cox
coxModle2 <- survival::coxph(Surv(time,event ==1) ~ age+sex+hgb,
x=T,y=T,data=df)
summary(coxModle2)
regplot(coxModle2 ,
failtime = rev(c(1095,1825,3650)), #rev 相反排序下
prfail = T, droplines=T,
points=T)
??regplot
#一般nomogram
summary(df$age)
#variable age does not have limits defined by datadist
df <- df[df$age >30, ]
df <- df[df$age <70, ]
df$age <- as.numeric(df$age)
df$sex <- as.factor(df$sex)
f <- cph(Surv(time,event==1) ~age+sex+hgb, x=T, y=T, surv=T,
data=df, time.inc=1095)
survival <- Survival(f)
survival1 <- list(function(x) surv(1095, x),
function(x) surv(1825, x),
function(x) surv(3650, x))
#数据打包
dd <- datadist(df)
options(datadist="dd")
#建立nomogram#maxscale为列线图第一行最大的分值,默认值100,
#这是文献中列线图普遍采用的最大分值;本例由于原文设定最大分值为10,
#故输入代码maxscale=10。
#可以调2、5、12生存率输出图片的坐标数目。置信区间(可以删掉连逗号)
nom <- nomogram(f, fun=survival1,
lp=F, funlabel=c("3-year survival", "5-year survival", "10-year survival"),
maxscale=10, fun.at=c(0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.6, 0.5) ,
conf.int = c(0.05,0.95))
#查看nomogram图片
plot(nom)
plot(nom,xfrac=0.2,cex.axis=0.9,cex.var=0.9)
#校准曲线 ??calibrate
# m=用你的数据mydata的行数除以??,4(你希望的拟合点的个数),得数取整,
f3 <- cph(Surv(time, event==1) ~ age+sex+hgb,
x=T, y=T, surv=T, data= df, time.inc=1095)
cal3 <- calibrate(f3, cmethod="KM", method="boot", u=1095, m=330, B=100)
plot(cal3)
plot(cal3,lwd=2,lty=1,errbar.col=c(rgb(0,118,192,maxColorValue=255)),
xlab="Nomogram-Predicted Probability of 3-Year OS",
ylab="Actual 3-Year OS(proportion)",
col=c(rgb(192,98,83,maxColorValue=255)))
plot(cal3,lwd=2,lty=1,errbar.col=c(rgb(0,118,192,maxColorValue=255)),
xlim=c(0,1),ylim=c(0,1),
xlab="Nomogram-Predicted Probability of 3-Year OS",
ylab="Actual 3-Year OS(proportion)",
col=c(rgb(192,98,83,maxColorValue=255)))
lines(cal3[,c("mean.predicted","KM")],type="b",lwd=2,
col=c(rgb(192,98,83,maxColorValue=255)),pch=16)
abline(0,1,lty=3,lwd=2,col=c(rgb(0,118,192,maxColorValue=255)))
f5 <- cph(Surv(time, event==1) ~ age+sex+hgb,
x=T, y=T, surv=T, data= df, time.inc=1825)
cal5 <- calibrate(f5, cmethod="KM", method="boot", u=60, m=180, B=10)
plot(cal5,lwd=2,lty=1,errbar.col=c(rgb(0,118,192,maxColorValue=255)),
xlim=c(0,1),ylim=c(0,1),
xlab="Nomogram-Predicted Probability of 5-Year OS",
ylab="Actual 5-Year OS(proportion)",
col=c(rgb(192,98,83,maxColorValue=255)))
lines(cal5[,c("mean.predicted","KM")],type="b",lwd=2,
col=c(rgb(192,98,83,maxColorValue=255)),pch=16)
abline(0,1,lty=3,lwd=2,col=c(rgb(0,118,192,maxColorValue=255)))
f10 <- cph(Surv(Time, s) ~ age+sex+hgb,
x=T, y=T, surv=T, data= df, time.inc=3625)
cal1 <- calibrate(f1, cmethod="KM", method="boot", u=12, m=180, B=1000)
plot(cal1,lwd=2,lty=1,errbar.col=c(rgb(0,118,192,maxColorValue=255)),
xlim=c(0,1),ylim=c(0,1),
xlab="Nomogram-Predicted Probability of 1-Year OS",
ylab="Actual 1-Year OS(proportion)",
col=c(rgb(192,98,83,maxColorValue=255)))
lines(cal10[,c("mean.predicted","KM")],type="b",lwd=2,
col=c(rgb(192,98,83,maxColorValue=255)),pch=16)
abline(0,1,lty=3,lwd=2,col=c(rgb(0,118,192,maxColorValue=255)))