常见免疫治疗队列的表达矩阵

Riaz.png

生信文章中经常用到免疫治疗队列来验证构建的signature能否预测免疫治疗的疗效,常用的免疫治疗队列包括:IMvigor210 (膀胱癌)、GSE91061(黑色素瘤)、GSE135222 (非小细胞肺癌)、GSE78220 (黑色素瘤)等。下面介绍他们的表达矩阵和临床信息的获取。如上图中的Riaz 队列就是GSE91061队列。

1.IMvigor210

首先去官网下载安装包至本地

http://research-pub.gene.com/IMvigor210CoreBiologies

IMvigor210CoreBiologies包的安装可能会出现较多问题,可以参考一些推送:IMvigor210CoreBiologies包安装指北;R包安装困难户必看教程 (平稳解决non-zero exit和版本available问题)

library(IMvigor210CoreBiologies)
library(IOBR)
data(cds)
head(counts(cds))
head(fData(cds))
head(pData(cds))
exp=counts(cds)
fdata=fData(cds)
phe=pData(cds)
library(tidyverse)
exp=as.data.frame(exp)
exp=rownames_to_column(exp,'entrez_id')
exp=inner_join(fdata,exp,by='entrez_id')
exp=exp[,-c(1:5)]
exp2=aggregate(.~Symbol, exp,mean)
exp2=exp2 %>% column_to_rownames('Symbol')

data(cds)
expMatrix <- counts(cds)
eff_length2 <- fData(cds)[,c("entrez_id","length","symbol")]
rownames(eff_length2) <- eff_length2$entrez_id
head(eff_length2)
feature_ids <- rownames(expMatrix)
expMatrix <- expMatrix[feature_ids %in% rownames(eff_length2),]
mm <- match(rownames(expMatrix),rownames(eff_length2))
eff_length2 <- eff_length2[mm,]

##TPM格式
x <- expMatrix/eff_length2$length
eset <- t(t(x)/colSums(x))*1e6
summary(duplicated(rownames(eset)))

eset <- IOBR::anno_eset(eset = eset,
                        annotation = eff_length2,
                        symbol = "symbol",
                        probe = "entrez_id",
                        method = "mean")
if(max(eset)>100) eset <- log2(eset+1)


pdata <- pData(cds)
colnames(pdata) <- gsub(colnames(pdata),pattern = " ",replacement = "_")
pdata <- rownames_to_column(pdata[,c("binaryResponse",
                                     "FMOne_mutation_burden_per_MB",
                                     "Neoantigen_burden_per_MB",
                                     "censOS","os")],var = "ID")
colnames(pdata)<-c("ID","BOR_binary","TMB","TNB","status","time")
pdata<-pdata[!is.na(pdata$BOR_binary),]
pdata$BOR_binary<-ifelse(pdata$BOR_binary=="CR/PR","R","NR")

2.GSE91061 (Riaz队列)

先在GEO官网下载FPKM表达矩阵文件

expr=read.csv('GSE91061_BMS038109Sample.hg19KnownGene.fpkm.csv') 
dim(expr)
#[1] 22187   110
expMatrix <-expr

表达矩阵第一列的1 10 100 1000等是假探针,其实是entrez id,利用IMvigor210CoreBiologies包里的数据对它们进行注释

library(IMvigor210CoreBiologies)
library(IOBR)
data(cds)
head(counts(cds))
head(fData(cds))
head(pData(cds))
exp=counts(cds)
fdata=fData(cds)
phe=pData(cds)

eff_length2 <- fData(cds)[,c("entrez_id","length","symbol")]
rownames(eff_length2) <- eff_length2$entrez_id
head(eff_length2)
feature_ids <- expMatrix$X
expMatrix <- expMatrix[feature_ids %in% rownames(eff_length2),]
eff_length2=eff_length2[rownames(eff_length2) %in% feature_ids,]
rownames(expMatrix)=expMatrix$X
expMatrix=expMatrix[,-1]

eset=expMatrix
summary(duplicated(rownames(eset)))

eset <- IOBR::anno_eset(eset = eset,
                        annotation = eff_length2,
                        symbol = "symbol",
                        probe = "entrez_id",
                        method = "mean")

fpkmToTpm <- function(fpkm)##FPKM转为TPM
{ 
  exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
}
eset_tpm <- apply(eset, 2, fpkmToTpm)
if(max(eset_tpm)>100) eset_tpm <- log2(eset_tpm+1)

临床信息可以在文献的附件中下载,Riaz N, Havel JJ, Makarov V, Desrichard A et al. Tumor and Microenvironment Evolution during Immunotherapy with Nivolumab. Cell 2017 Nov 2;171(4):934-949.e16.,PMID: 29033130

3.GSE135222

library(tidyverse)
library(stringr)
expr=read_tsv('GSE135222_GEO_RNA-seq_omicslab_exp.tsv')

gtf_v22 <- read_tsv(file = "gencode.gene.info.v22.tsv") 
gtf_v22$gene_id=unlist(lapply(gtf_v22$gene_id, function(x){
  strsplit(x,'[.]')[[1]][1]
}))
table(duplicated(gtf_v22$gene_id))

expr$gene_id=unlist(lapply(expr$gene_id, function(x){
  strsplit(x,'[.]')[[1]][1]
}))
expr <- inner_join(gtf_v22,expr, by = "gene_id") %>% dplyr::select(-1)
expr=expr[,-c(2:11)]
table(duplicated(expr$gene_name))
expr <- aggregate(.~ gene_name, expr, mean)###对基因名去重取均值
expr <- expr %>% column_to_rownames("gene_name")
fpkmToTpm <- function(fpkm)##FPKM转为TPM
{ 
  exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
}
expr <- apply(expr, 2, fpkmToTpm)
expr=log2(expr+1)

临床信息可以在文献的附件中下载,Jung H, Kim HS, Kim JY, Sun JM et al. DNA methylation loss promotes immune evasion of tumours with high mutation and copy number load. Nat Commun 2019 Sep 19;10(1):4278.PMID: 31537801.

  1. GSE78220
library(GEOquery)
gset=getGEO('GSE78220',destdir = '.',AnnotGPL = F, getGPL = F)
gset[[1]]
pdata=pData(gset[[1]])
library(readxl)
exp=read_excel('GSE78220_PatientFPKM.xlsx')
exp=as.data.frame(exp)
rownames(exp)=exp$Gene
exp=exp[,-1]
colnames(exp)=rownames(pdata)

fpkmToTpm <- function(fpkm)##FPKM转为TPM
{ 
  exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
}
exp <- apply(exp, 2, fpkmToTpm)
exp=log2(exp+1)

还有很多其他的免疫治疗队列,如 Van Allen et al., Science 2015;Miao et al., Science 2018等由于下载权限、不会下载等原因我无法获得,因此无法分享,欢迎批评指正及补充。

你可能感兴趣的:(常见免疫治疗队列的表达矩阵)