GEO提取任意基因表达量(bioconductor包注释)

rm(list=ls())
##this Rcode is used for 1):extract target gene expression from GSE file; and 2) draw box plot
##annotation is accomplished by relevant bioconductor package
##----------------
##GSE matrix file download
if(F){
  suppressPackageStartupMessages(library(GEOquery))
  eSet <- getGEO('GSE3325', destdir=".",
                 AnnotGPL = F,
                 getGPL = F) 
  #getGPL = F: no GPL will be downloaded
  #getGPL = T: a GPLxxx.SOFT file will be downloaded
  save(eSet,file='GSE3325_eSet.Rdata')
}
##----------------
##row exprSet(rownames are probeids) and phenotype preparation
load('GSE3325_eSet.Rdata')
b = eSet[[1]]
raw_exprSet=exprs(b) 
raw_exprSet[1:4,1:4]
phe=pData(b)
phe$title
library(stringr)
group_list= tolower(str_split(as.character(phe$title),' ',simplify = T)[,2])
# head(group_list);table(group_list)
#identical(rownames(phe),colnames(raw_exprSet)) # the return value must be TRUE
save(raw_exprSet,group_list,
     file='GSE3325_raw_exprSet.Rdata')
##----------------
##annotation and pickout intersted genes
rm(list=ls())
load(file='GSE3325_raw_exprSet.Rdata')
#annotation with bioconductor package
suppressPackageStartupMessages(library(hgu133plus2.db))
eg2probe=toTable(hgu133plus2SYMBOL)
#input intersted gene symbol
eg2probe[eg2probe$symbol=='TRAF4',]
raw_exprSet[1:4,1:4]
exprSet=log2(raw_exprSet)
##extract intersted gene expression 
exprSet_select <- exprSet[eg2probe[eg2probe$symbol=='TRAF4',]$probe_id,]
##pick out the most max value of rowMeans
dat=data.frame(gene= exprSet_select[which.max(rowMeans(exprSet_select)),],
               mut= group_list)
head(dat)
#-----------------
#visualization 
if(require('ggpubr')){
  library(ggpubr)
  # google search : ggpubr boxplot add p-value
  # http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
  p <- ggboxplot(dat, x = "mut", y = "gene",
                 color = "mut", palette = "jco",
                 add = "jitter")
  #  Add p-value
  p + stat_compare_means()
}
if(require('ggstatsplot')){
  library(ggstatsplot)
  ggbetweenstats(data = dat, x = mut,  y = gene)
}
if(require('ggplot2')){
  library(ggplot2)
  ggplot(dat,aes(x=mut,y=gene))+
    geom_boxplot()+
    theme_bw()
}

你可能感兴趣的:(GEO提取任意基因表达量(bioconductor包注释))