2022新版TCGA批量下载表达矩阵及临床信息

# BiocManager::install("BioinformaticsFMRP/TCGAbiolinksGUI.data")
# BiocManager::install("BioinformaticsFMRP/TCGAbiolinks")
gdcdata=function(i){
library(TCGAbiolinks)
projects <- getGDCprojects()
library(dplyr)
projects <- projects %>% 
  as.data.frame() %>% 
  select(project_id,tumor) %>% 
  filter(grepl(pattern="TCGA",project_id))
  ## 0.运行信息
  print(paste0("Downloading number ",i,",project name: ",projects$project_id[i]))
  ## 1.查询信息
  query.exp = GDCquery(project = projects$project_id[i], 
                       data.category = "Transcriptome Profiling",
                       data.type = "Gene Expression Quantification",
                       workflow.type = "STAR - Counts")
  ## 2.正式下载
  GDCdownload(query.exp)
  ## 3.多个数据合并
  pre.exp = GDCprepare(query = query.exp)
  ## 4.提取表达量数据
  library(SummarizedExperiment)
  countsdata = SummarizedExperiment::assay(pre.exp,1)
  fpkmdata=SummarizedExperiment::assay(pre.exp,5)
  tpmdata=SummarizedExperiment::assay(pre.exp,4)
  gene_id=data.frame(id=rowData(pre.exp)@listData[["gene_id"]], gene_name= rowData(pre.exp)@listData[["gene_name"]],gene_type=rowData(pre.exp)@listData[["gene_type"]])
  counts=cbind(gene_id,countsdata)
  fpkm=cbind(gene_id,fpkmdata)
  tpm=cbind(gene_id,tpmdata)
  #临床信息
  clinical <- GDCquery_clinic(project = projects$project_id[i], type = "clinical")
  ## 5.保存数据
  filename1 = paste0("result/",projects$project_id[i],"-counts.txt")
  filename2 = paste0("result/",projects$project_id[i],"-fpkm.txt")
  filename3 = paste0("result/",projects$project_id[i],"-tpm.txt")
  filename4 = paste0("result/",projects$project_id[i],"-clinical.txt")
  write.table(counts,filename1,sep="\t",col.names=T,row.names=F,quote=F) 
  write.table(fpkm,filename2,sep="\t",col.names=T,row.names=F,quote=F) 
  write.table(tpm,filename3,sep="\t",col.names=T,row.names=F,quote=F) 
  write.table(clinical,filename4,sep="\t",col.names=T,row.names=F,quote=F) 
}
dir.create("result")
for (i in 1:33) {
  gdcdata(i)
}


你可能感兴趣的:(2022新版TCGA批量下载表达矩阵及临床信息)