GEO分析

library(Biobase)
library(GEOquery)
library(limma)

## load series and platform data from GEO ##不需改动

gset <- getGEO("GSE66660", GSEMatrix =TRUE, AnnotGPL=TRUE)
if (length(gset) > 1) idx <- grep("GPL11532", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]

## make proper column names to match toptable   ##不需改动

fvarLabels(gset) <- make.names(fvarLabels(gset))

## group names for all samples  ##不需改动

gsms <- "11001100"
sml <- c()
for (i in 1:nchar(gsms)) { sml[i] <- substr(gsms,i,i) }

## log2 transform(RMA方法) ##采用RMA方法则不需改动,可改为mas5等方法

ex <- exprs(gset)
qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <- (qx[5] > 100) ||

          (qx[6]-qx[1] > 50 && qx[2] > 0) ||

          (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)

if (LogC) { ex[which(ex <= 0)] <- NaN

  exprs(gset) <- log2(ex) }

## set up the data and proceed with analysis

sml <- paste("G", sml, sep="")    ## set group names
fl <- as.factor(sml)
gset$description <- fl
design <- model.matrix(~ description + 0, gset)
colnames(design) <- levels(fl)
fit <- lmFit(gset, design)
cont.matrix <- makeContrasts(G1-G0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)  ##原文为eBayes(fit2,0.01)
tT <- topTable(fit2, adjust="BH", sort.by="B", number=dim(gset)[1])  ##adjust.method是P.value成为adj.P.value所采用的方法,number是tT中包含的基因数目(可自行设置,我这个就是显示所有)

tT <- subset(tT, select=c("ID","adj.P.Val","P.Value","t","B","logFC","Gene.symbol","Gene.title","Gene.ID")) ##不要改动顺序

write.csv(tT, paste("all genes(T).xls"),quote=FALSE, sep="\t") ##原文为write.table(tT, file=stdout(), row.names=F, sep="\t"),两者均可,个人喜欢改动后的,简单易懂

#######一定要注意:上面保存的是全部基因!!下面我进行差异基因筛选:p<0.05,|logFC|>2。是原文里没有的!



degup<-tT[tT[,"adj.P.Val"]<0.05,]
degup<-degup[degup[,"logFC"]>2,]   ##上调基因
write.csv(degup,paste("GSE66600DEGs(upregulate).xls"),quote=FALSE,sep="\t")


degdown<-tT[tT[,"adj.P.Val"]<0.05,]
degdown<-degdown[degdown[,"logFC"]<(-2),] ##下调基因
write.csv(degdown,paste("GSE66600DEGs(downregulate).xls"),quote=FALSE,sep="\t")


gene=as.character(tT[,1])
OrgDb=org.Hs.eg.db    P.Value


require(DOSE)
require(clusterProfiler)

ekk <- enrichKEGG(gene=gene,organism="human",pvalueCutoff=0.01)
ego <- enrichGO(gene=gene,OrgDb="org.Hs.eg.db",ont="CC",pvalueCutoff=0.01,readable=TRUE)

write.csv(summary(ekk),"KEGG-enrich.csv",row.names =F)
write.csv(summary(ego),"GO-enrich.csv",row.names =F)


ego <- enrichGO(gene          = gene,
                universe      = names(geneList),
                OrgDb         = org.Hs.eg.db,
                ont           = "CC",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05)
ego2 <- enrichGO(gene         = gene.df$ENSEMBL,
                OrgDb         = org.Hs.eg.db,
                keytype       = 'ENSEMBL',
                ont           = "CC",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05)
ego3 <- enrichGO(gene         = gene.df$SYMBOL,
                OrgDb         = “org.Hs.eg.db”,
                keytype       = 'SYMBOL',
                ont           = "CC",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05)
ego <- enrichGO(gene          = DEG$,
                OrgDb         = "org.Hs.eg.db",
                ont           = "CC",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05)
可以把 CC BP MF 改成 ALL
ego <- enrichGO(gene=gene,OrgDb="org.Hs.eg.db",ont="ALL",pvalueCutoff=0.01,readable=TRUE)

ekk <- enrichKEGG(gene = eg$ENTREZID, organism ="human",keyType ="kegg", pAdjustMethod ="BH",  pvalueCutoff =0.01, qvalueCutoff =0.05)

DO<-enrichDO(gene=DEG$Gene.ID, ont = "DO", pvalueCutoff = 0.01, pAdjustMethod = "BH",qvalueCutoff = 0.05)


eg=bitr(geneID = PP$geneID, "ENTREZID", "SYMBOL", "org.Hs.eg.db")


df.id<-bitr(df$SYMBOL, fromType ="SYMBOL", toType ="ENTREZID",OrgDb ="org.Hs.eg.db")

easy.df<-merge(df,df.id,by="SYMBOL",all=F)

sortdf<-easy.df[order(easy.df$foldChange, decreasing =T),]

gene.expr = sortdf$foldChange

names(gene.expr) <- sortdf$ENTREZID

edox <- setReadable(edo,'org.Hs.eg.db','ENTREZID')


forestplot(as.matrix(dat[,1:5]),mean = dat$V6,lower = dat$V7,upper = dat$V8,graph.pos= 5, graphwidth = unit(50,"mm"), is.summary = c(TRUE,rep(FALSE,6)),zero = 1,boxsize = 0.5,lineheight = unit(8,'mm'),colgap = unit(2,'mm'),lwd.zero = 3,lwd.ci =2,col=fpColors(box='#458B00',summary="#8B008B",lines = 'black',zero = '#7AC5CD'),xlab="The estimates",lwd.xaxis=3,lty.ci = "solid")

你可能感兴趣的:(GEO分析)