文章:
Adrenocortical Carcinoma Steroid Profiles: In Silico Pan-Cancer Analysis of TCGA Data Uncovers Immunotherapy Targets for Potential Improved Outcomes
Q1 IF: 6.055
2021-6
不算新,但这里面的图挺好的,拿来玩玩。
1.强调某一癌症的免疫浸润箱线图
数据来自2018年泛癌免疫图谱文章:The Immune Landscape of Cancer(table s1)
rm(list = ls())
a = rio::import("NIHMS958212-supplement-2.xlsx")
a[a=="NA"]=NA
a$`Leukocyte Fraction`= as.numeric(a$`Leukocyte Fraction`)
library(tidyverse)
dat = drop_na(a,`Leukocyte Fraction`)
#只要tumor样本
dat$Group = ifelse(str_sub(dat$`TCGA Participant Barcode`,14,15)<10,"tumor","normal")
dat = dat[dat$Group=="tumor",]
#横坐标排序
dat2 = dat %>%
group_by(`TCGA Study`) %>%
summarise(median = median(`Leukocyte Fraction`)) %>%
arrange(median)
dat$`TCGA Study`=factor(dat$`TCGA Study`,levels = dat2$`TCGA Study`)
#填充颜色
dat$type = ifelse(dat$`TCGA Study`=="ACC","ACC","other")
library(ggplot2)
#坐标轴颜色
axiscolor = ifelse(dat2$`TCGA Study`=="ACC","#f87669","black")
ggplot(dat,aes(`TCGA Study`,`Leukocyte Fraction`))+
stat_boxplot(geom ='errorbar')+
geom_boxplot(outlier.size = 0.5,aes(fill = type))+
scale_fill_manual(values = c("#f87669","white"))+
theme_bw()+
theme(axis.text.x = element_text(angle = 90,vjust = 0.5,
color = axiscolor),
legend.position = "none")
2.强调亚型的免疫浸润箱线图
第二张图也是这个数据,只是把ACC分成了两个亚型。
亚型信息也可以从xena下载,https://xenabrowser.net/datapages/?cohort=TCGA%20Pan-Cancer%20(PANCAN)&removeHub=https%3A%2F%2Fxena.treehouse.gi.ucsc.edu%3A443
我把分子亚型、免疫亚型和临床信息合并成了一个数据,存为了Rdata。
f = "TCGA_pancancer_ph.Rdata"
if(!file.exists(f)){
# 临床信息
m = read.delim("Survival_SupplementalTable_S1_20171025_xena_sp")
# 分子亚型
s = read.delim("TCGASubtype.20170308.tsv.gz")
# 免疫亚型
i = read.delim("Subtype_Immune_Model_Based.txt.gz")
# 三者合并
ph = full_join(m,s,by = c("sample"="sampleID"))
ph = full_join(ph,i,"sample")
ph[ph==""]=NA
colnames(ph)[c(3,4,7)] = c("TYPE","age","stage")
ph$Group = ifelse(str_sub(ph$sample,14,15)<10,"tumor","normal")
save(ph,file = f)
}
load(f)
a = rio::import("NIHMS958212-supplement-2.xlsx")
a[a=="NA"]=NA
a$`Leukocyte Fraction`= as.numeric(a$`Leukocyte Fraction`)
library(tidyverse)
dat = drop_na(a,`Leukocyte Fraction`)
#只要tumor样本
dat$Group = ifelse(str_sub(dat$`TCGA Participant Barcode`,14,15)<10,"tumor","normal")
dat = dat[dat$Group=="tumor",]
length(intersect(dat$`TCGA Participant Barcode`,ph$X_PATIENT))
## [1] 10368
#只要tumor样本
ph2 = ph[ph$Group == "tumor",]
dat = left_join(dat,ph,
by = c("TCGA Participant Barcode" = "X_PATIENT"))
k = dat$`TCGA Study`=="ACC";table(k)
## k
## FALSE TRUE
## 11838 79
table(dat$Subtype_mRNA[k],useNA = "always")
##
## steroid-phenotype-high steroid-phenotype-high+proliferation
## 25 22
## steroid-phenotype-low steroid-phenotype-low+proliferation
## 27 4
##
## 1
# ACC替换为亚型
dat$`TCGA Study` = ifelse(k,
dat$Subtype_mRNA,
dat$`TCGA Study`)
dat$`TCGA Study` = case_when(str_detect(dat$`TCGA Study`,"high")~"HSP",
str_detect(dat$`TCGA Study`,"low")~"LSP",
T~dat$`TCGA Study`)
table(dat$`TCGA Study` %in% c("HSP","LSP"))
##
## FALSE TRUE
## 11839 78
table(dat$`TCGA Study`)
##
## BLCA BRCA CESC CHOL COAD ESCA GBM HNSC HSP KICH KIRC KIRP LGG LIHC LSP LUAD
## 436 1208 311 45 524 201 433 599 47 89 909 345 528 435 31 689
## LUSC MESO OV PAAD PCPG PRAD READ SARC SKCM STAD TGCT THCA UCEC UCS UVM
## 605 87 611 194 186 563 169 267 479 508 138 576 566 57 80
# 去掉NA
table(is.na(dat$`TCGA Study`))
##
## FALSE TRUE
## 11916 1
dat = drop_na(dat,`TCGA Study`)
#横坐标排序
dat2 = group_by(dat,`TCGA Study`) %>%
summarise(median = median(`Leukocyte Fraction`)) %>%
arrange(median)
dat$`TCGA Study`=factor(dat$`TCGA Study`,levels = dat2$`TCGA Study`)
# 不同填充颜色
dat$type = ifelse(dat$`TCGA Study` %in% c("HSP","LSP"),
dat$`TCGA Study`,
"other")
library(ggplot2)
#横坐标颜色
axiscolor = ifelse(dat2$`TCGA Study`=="HSP","#f87669",
ifelse(dat2$`TCGA Study`=="LSP","#2874C5",
"black"))
ggplot(dat,aes(`TCGA Study`,`Leukocyte Fraction`))+
stat_boxplot(geom ='errorbar')+
geom_boxplot(outlier.size = 0.5,aes(fill = type))+
scale_fill_manual(values = c("#2874C5","#f87669","white"))+
theme_bw()+
theme(axis.text.x = element_text(angle = 90,vjust = 0.5,
color = axiscolor),
legend.position = "none")
如果不加强调色,这个代码会简单很多咯。