GEOquery

下载GSE

rm(list = ls())
options(stringsAsFactors = F)
library(GEOquery)
library(limma)
library(dplyr)
gset <- getGEO('GSE112790',destdir = '.',AnnotGPL = T,getGPL = T)
exprSet <- exprs(gset[[1]])
dim(exprSet)
pData <- pData(gset[[1]])
group_list=as.character(pData$geo_accession)

过滤表达矩阵

library(hgu133plus2.db)
ids <- toTable(hgu133plus2SYMBOL)
length(unique(ids$symbol))
exprSet <- exprSet[rownames(exprSet) %in% ids$probe_id, ]

删除表达量低的探针,取最大表达量

ids <- ids[match(rownames(exprSet),ids$probe_id),]
head(ids)
exprSet[1:5,1:5]
tmp <- by(exprSet,ids$symbol,function(x) rownames(x)[which.max(rowMeans(x))] )
probes <- as.character(tmp)
exprSet <- exprSet[rownames(exprSet) %in% probes,]
dim(exprSet)
head(exprSet)

探针名转为基因名

rownames(exprSet) <- ids[match(rownames(exprSet),ids$probe_id),2]
exprSet[1:5,1:5]
write.csv(exprSet,"GSE112790_matrix.csv")

制作分组矩阵

sample <- pData$geo_accession
tissue <- rep(c("normal","tumor"),c(15,183))
design_df <- data.frame(sample,tissue)

制作差异比较矩阵

cont.matrix <- makeContrasts(
  vs = TStumor-TSnormal,levels = design)

lmFit

TS <- tissue
TS <- factor(TS, levels = unique(TS))
design <- model.matrix(~0+TS)
fit <- lmFit(exprSet, design)

eBayes

fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)

topTable

tempOutput = topTable(fit2, coef=1, n=Inf)
nrDEG = na.omit(tempOutput) 
head(nrDEG)
write.csv(nrDEG,"limma_notrend.results.csv",quote = F)
results <- decideTests(fit2)
vennDiagram(results)

你可能感兴趣的:(GEOquery)