RNA-seq寻找到差异基因后,对比GO与KEGG库进行差异分析
rm(list = ls())
options(stringsAsFactors = F)
DEG <- read.csv('./DEGs_filter.csv', header = T, row.names = 1)
gene <- DEG$Entrezid
genelist <- data.frame(DEG$Entrezid, DEG$log2FoldChange)
head(genelist)
dim(genelist)
genelist <- genelist[order(genelist$DEG.log2FoldChange, decreasing = T),]
head(genelist)
dim(genelist)
gene_fc <- genelist$DEG.log2FoldChange
names(gene_fc) <- genelist$DEG.Entrezid
library(clusterProfiler)
library(org.Mm.eg.db)
library(org.Hs.eg.db)
#### GO: Over-Representation Analysis ####
# Biological Process
ego_BP <- enrichGO(gene = gene,
keyType ='ENTREZID' ,
ont="BP",
OrgDb = org.Mm.eg.db,
pAdjustMethod = 'BH',
pvalueCutoff = 0.01,
qvalueCutoff =0.05,
readable = T)
ego_BP2 <- simplify(ego_BP, cutoff=0.7, by='p.adjust', select_fun = min)
write.csv(ego_BP2@result, './GO ORA_BP.csv')
png('GO_BP_barplot.png')
barplot(ego_BP2, showCategory = 10, title = 'Biological Process of TOP10')
dev.off()
png('GO_BP_dotplot.png')
dotplot(ego_BP2, showCategory = 10, title = 'Biological Process of TOP10')
dev.off()
# Cellular Component
ego_CC <- enrichGO(gene = gene,
keyType ='ENTREZID' ,
ont="CC",
OrgDb = org.Mm.eg.db,
pAdjustMethod = 'BH',
pvalueCutoff = 0.01,
qvalueCutoff =0.05,
readable = T)
ego_CC2 <- simplify(ego_CC, cutoff=0.7, by='p.adjust', select_fun = min)
write.csv(ego_CC2@result, './GO ORA_CC.csv')
png('GO_CC_barplot.png')
barplot(ego_CC2, showCategory = 10, title = 'Cellular Component of TOP10')
dev.off()
png('GO_CC_dotplot.png')
dotplot(ego_CC2, showCategory = 10, title = 'Cellular Component of TOP10')
dev.off()
# Molecualr Function
ego_MF <- enrichGO(gene = gene,
keyType ='ENTREZID' ,
ont="ALL",
OrgDb = org.Mm.eg.db,
pAdjustMethod = 'BH',
pvalueCutoff = 0.01,
qvalueCutoff =0.05,
readable = T)
ego_MF2 <- simplify(ego_MF, cutoff=0.7, by='p.adjust', select_fun = min)
write.csv(ego_MF2@result, './GO ORA_MF.csv')
png('GO_MF_barplot.png')
barplot(ego_MF2, showCategory = 10, title = 'Molecualr Function of TOP10')
dev.off()
png('GO_MF_dotplot.png')
dotplot(ego_MF2, showCategory = 10, title = 'Molecualr Function of TOP10')
dev.off()
#### GO: GSEA ####
## Biological Process
GO.GSEA_BP <- gseGO( geneList = gene_fc,
ont = "BP",
OrgDb = org.Mm.eg.db,
keyType = "ENTREZID",
exponent = 1,
minGSSize = 10,
maxGSSize = 500,
eps = 1e-10,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
verbose = TRUE,
seed = FALSE,
by = "fgsea")
write.csv(GO.GSEA_BP@result, './GO_GSEA_BP.csv')
gseaplot(GO.GSEA_BP,
geneSetID = 'GO:0060079',
by = 'all',
title = '',
color = "black",
color.line = "green",
color.vline = "#FA5860",
base_size = 6,
rel_height = c(-1, 0.2, 0.6), #副图的相对高度
subplots = 1:3,
pvalue_table = T,
ES_geom = 'line')
### Cellular Componets
GO.GSEA_CC <- gseGO( geneList = gene_fc,
ont = "CC",
OrgDb = org.Mm.eg.db,
keyType = "ENTREZID",
exponent = 1,
minGSSize = 10,
maxGSSize = 500,
eps = 1e-10,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
verbose = TRUE,
seed = FALSE,
by = "fgsea")
write.csv(GO.GSEA_CC@result, './GO_GSEA_CC.csv')
gseaplot(GO.GSEA_CC,
geneSetID = 'GO:0098552',
by = 'all',
title = '',
color = "black",
color.line = "green",
color.vline = "#FA5860",
base_size = 6,
rel_height = c(-1, 0.2, 0.6), #副图的相对高度
subplots = 1:3,
pvalue_table = T,
ES_geom = 'line')
### Molecular Function
GO.GSEA_MF <- gseGO( geneList = gene_fc,
ont = "MF",
OrgDb = org.Mm.eg.db,
keyType = "ENTREZID",
exponent = 1,
minGSSize = 10,
maxGSSize = 500,
eps = 1e-10,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
verbose = TRUE,
seed = FALSE,
by = "fgsea")
write.csv(GO.GSEA_MF@result, './GO_GSEA_MF.csv')
gseaplot(GO.GSEA_MF,
geneSetID = 'GO:0003723',
by = 'all',
title = '',
color = "black",
color.line = "green",
color.vline = "#FA5860",
base_size = 6,
rel_height = c(-1, 0.2, 0.6), #副图的相对高度
subplots = 1:3,
pvalue_table = T,
ES_geom = 'line')
### KEGG: Over-Representation Analysis ####
kk <- enrichKEGG(gene = gene,
organism = "mmu",
keyType = "ncbi-geneid",
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
minGSSize = 10,
maxGSSize = 500,
qvalueCutoff = 0.2,
use_internal_data = FALSE)
write.csv(kk@result, './KEGG_ORA.csv')
png('KEGG_ORA_barplot.png')
barplot(kk, showCategory = 10, title = 'KEGG of TOP10')
dev.off()
png('KEGG_ORA_dotplot.png')
dotplot(kk, showCategory = 10, title = 'KEGG of TOP10')
dev.off()
#### KEGG: GSEA ####
KEGG.GSEA <- gseKEGG(geneList = gene_fc,
organism = "mmu",
keyType = "ncbi-geneid",
exponent = 1,
minGSSize = 10,
maxGSSize = 500,
eps = 1e-10,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
verbose = TRUE,
use_internal_data = FALSE,
seed = FALSE,
by = "fgsea")
write.csv(KEGG.GSEA@result, './KEGG_GSEA.csv')
gseaplot(KEGG.GSEA,
geneSetID = 'mmu04514',
by = 'all',
title = '',
color = "black",
color.line = "green",
color.vline = "#FA5860",
base_size = 6,
rel_height = c(-1, 0.2, 0.6), #副图的相对高度
subplots = 1:3,
pvalue_table = T,
ES_geom = 'line')
了解都还有Disease Ontology (DO)库与Drug相关的库,后续进行详细学习