图表复现-加强调色的泛癌免疫浸润箱线图

文章:

Adrenocortical Carcinoma Steroid Profiles: In Silico Pan-Cancer Analysis of TCGA Data Uncovers Immunotherapy Targets for Potential Improved Outcomes

Q1 IF: 6.055

2021-6

不算新,但这里面的图挺好的,拿来玩玩。

1.强调某一癌症的免疫浸润箱线图

数据来自2018年泛癌免疫图谱文章:The Immune Landscape of Cancer(table s1)

rm(list = ls())
a = rio::import("NIHMS958212-supplement-2.xlsx")
a[a=="NA"]=NA
a$`Leukocyte Fraction`= as.numeric(a$`Leukocyte Fraction`)
library(tidyverse)
dat = drop_na(a,`Leukocyte Fraction`)
#只要tumor样本
dat$Group =  ifelse(str_sub(dat$`TCGA Participant Barcode`,14,15)<10,"tumor","normal")
dat = dat[dat$Group=="tumor",]
#横坐标排序
dat2 = dat %>% 
  group_by(`TCGA Study`) %>% 
  summarise(median = median(`Leukocyte Fraction`)) %>% 
  arrange(median)
dat$`TCGA Study`=factor(dat$`TCGA Study`,levels = dat2$`TCGA Study`)
#填充颜色
dat$type = ifelse(dat$`TCGA Study`=="ACC","ACC","other")
library(ggplot2)
#坐标轴颜色
axiscolor = ifelse(dat2$`TCGA Study`=="ACC","#f87669","black")

ggplot(dat,aes(`TCGA Study`,`Leukocyte Fraction`))+
  stat_boxplot(geom ='errorbar')+
  geom_boxplot(outlier.size = 0.5,aes(fill = type))+
  scale_fill_manual(values = c("#f87669","white"))+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90,vjust = 0.5,
                                   color = axiscolor),
        legend.position = "none")

2.强调亚型的免疫浸润箱线图

第二张图也是这个数据,只是把ACC分成了两个亚型。

亚型信息也可以从xena下载,https://xenabrowser.net/datapages/?cohort=TCGA%20Pan-Cancer%20(PANCAN)&removeHub=https%3A%2F%2Fxena.treehouse.gi.ucsc.edu%3A443

我把分子亚型、免疫亚型和临床信息合并成了一个数据,存为了Rdata。

f = "TCGA_pancancer_ph.Rdata"
if(!file.exists(f)){
  # 临床信息
  m = read.delim("Survival_SupplementalTable_S1_20171025_xena_sp")
  # 分子亚型
  s = read.delim("TCGASubtype.20170308.tsv.gz")
  # 免疫亚型
  i = read.delim("Subtype_Immune_Model_Based.txt.gz")
  # 三者合并
  ph = full_join(m,s,by = c("sample"="sampleID"))
  ph = full_join(ph,i,"sample")
  ph[ph==""]=NA
  colnames(ph)[c(3,4,7)] = c("TYPE","age","stage")
  ph$Group = ifelse(str_sub(ph$sample,14,15)<10,"tumor","normal")
  save(ph,file = f)
}
load(f)
a = rio::import("NIHMS958212-supplement-2.xlsx")
a[a=="NA"]=NA
a$`Leukocyte Fraction`= as.numeric(a$`Leukocyte Fraction`)
library(tidyverse)
dat = drop_na(a,`Leukocyte Fraction`)
#只要tumor样本
dat$Group =  ifelse(str_sub(dat$`TCGA Participant Barcode`,14,15)<10,"tumor","normal")
dat = dat[dat$Group=="tumor",]

length(intersect(dat$`TCGA Participant Barcode`,ph$X_PATIENT))
## [1] 10368
#只要tumor样本
ph2 = ph[ph$Group == "tumor",]
dat = left_join(dat,ph,
                by = c("TCGA Participant Barcode" = "X_PATIENT"))
k = dat$`TCGA Study`=="ACC";table(k)
## k
## FALSE  TRUE 
## 11838    79
table(dat$Subtype_mRNA[k],useNA = "always")
## 
##               steroid-phenotype-high steroid-phenotype-high+proliferation 
##                                   25                                   22 
##               steroid-phenotype-low   steroid-phenotype-low+proliferation 
##                                   27                                    4 
##                                  
##                                    1
# ACC替换为亚型
dat$`TCGA Study` = ifelse(k,
               dat$Subtype_mRNA,
               dat$`TCGA Study`)

dat$`TCGA Study` = case_when(str_detect(dat$`TCGA Study`,"high")~"HSP",
                  str_detect(dat$`TCGA Study`,"low")~"LSP",
                  T~dat$`TCGA Study`)
table(dat$`TCGA Study` %in% c("HSP","LSP"))
## 
## FALSE  TRUE 
## 11839    78
table(dat$`TCGA Study`)
## 
## BLCA BRCA CESC CHOL COAD ESCA  GBM HNSC  HSP KICH KIRC KIRP  LGG LIHC  LSP LUAD 
##  436 1208  311   45  524  201  433  599   47   89  909  345  528  435   31  689 
## LUSC MESO   OV PAAD PCPG PRAD READ SARC SKCM STAD TGCT THCA UCEC  UCS  UVM 
##  605   87  611  194  186  563  169  267  479  508  138  576  566   57   80
# 去掉NA
table(is.na(dat$`TCGA Study`))
## 
## FALSE  TRUE 
## 11916     1
dat = drop_na(dat,`TCGA Study`)
#横坐标排序
dat2 = group_by(dat,`TCGA Study`) %>% 
  summarise(median = median(`Leukocyte Fraction`)) %>% 
  arrange(median)

dat$`TCGA Study`=factor(dat$`TCGA Study`,levels = dat2$`TCGA Study`)
# 不同填充颜色
dat$type = ifelse(dat$`TCGA Study` %in% c("HSP","LSP"),
                  dat$`TCGA Study`,
                  "other")

library(ggplot2)
#横坐标颜色
axiscolor = ifelse(dat2$`TCGA Study`=="HSP","#f87669",
                   ifelse(dat2$`TCGA Study`=="LSP","#2874C5",
                          "black"))

ggplot(dat,aes(`TCGA Study`,`Leukocyte Fraction`))+
  stat_boxplot(geom ='errorbar')+
  geom_boxplot(outlier.size = 0.5,aes(fill = type))+
  scale_fill_manual(values = c("#2874C5","#f87669","white"))+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90,vjust = 0.5,
                                   color = axiscolor),
        legend.position = "none")

如果不加强调色,这个代码会简单很多咯。

你可能感兴趣的:(图表复现-加强调色的泛癌免疫浸润箱线图)