R-clinical data

技能树TP53_BRCA example,DEseq2

# Step2 Grouping by special clinical information --------------------------

if (!file.exists( '.TCGA-BRCA.GDC_phenotype.tsv.gz' )) {
  gzfile <- "./TCGA-BRCA.GDC_phenotype.tsv.gz"
  download.file("https://gdc.xenahubs.net/download/TCGA-BRCA/Xena_Matrices/TCGA-BRCA.GDC_phenotype.tsv.gz", 
                destfile = gzfile)
  phenoData <- read.table( gzfile,
                           header = T,
                           sep = '  ',
                           quote = '' )
  save( phenoData, file = './TCGA-BRCA.GDC_phenotype.Rdata' )
}else{
  load('./TCGA-BRCA.GDC_phenotype.Rdata')
}
pheno_num <- c()
invisible(
  lapply(1:ncol(phenoData), 
         function(col_num){
           ## Assume that the classification project is between 2 and 4   #??
           if (1 < dim(table(phenoData[,4])) & 
               dim(table(phenoData[,col_num])) < 5) {
             pheno_num <<- append(pheno_num, col_num, after = length(pheno_num))
           }
         }
  )
)
View(phenoData[, pheno_num])
names(phenoData[, pheno_num])


## Category 3: TP53     
#SNV( single nucleotide variants)
if (!file.exists( './TCGA-BRCA.mutect2_snv.tsv.gz' )) {
  gzfile <- "./TCGA-BRCA.mutect2_snv.tsv.gz"
  download.file("https://gdc.xenahubs.net/download/TCGA-BRCA/Xena_Matrices/TCGA-BRCA.mutect2_snv.tsv.gz", 
                destfile = gzfile)
  mutype_file <- read.table( gzfile,
                             header = T,
                             sep = '    ',
                             quote = '' )
  save( mutype_file, file = './TCGA-BRCA.mutect2_snv.Rdata' )
}else{
  load('./TCGA-BRCA.mutect2_snv.Rdata')
}

raw_data[1:5, 1:15]

## Pick columns that contains 'tp53'
TP53 <- mutype_file[mutype_file$gene == 'tp53' | mutype_file$gene == 'TP53',]
TP53_sample <- unique( sort( TP53$Sample_ID ) )
tumor_sample <- colnames(raw_data)[substr( colnames(raw_data),14,15) < 10] #find 癌group="01" 
TP53_sample <- intersect(tumor_sample, TP53_sample)
noTP53_sample <- setdiff(tumor_sample, TP53_sample)
save(TP53_sample, noTP53_sample, file = './sample_by_TP53.Rdata')

# Step3 Filt sample ------------------------------------------------

load('./TCGA-BRCA.htseq_counts.Rdata')

tp53_sample <- c(TP53_sample, noTP53_sample)
AssayData <- raw_data[, tp53_sample]
dim(AssayData)
group_list <- c(rep('TP53', length(TP53_sample)),
                rep('NO_TP53', length(noTP53_sample)))
save(AssayData, group_list, file = './tnbc_tumor_TP53_AssayData.Rdata')

你可能感兴趣的:(R-clinical data)