clusterprofiler包进行GO与KEGG富集分析

RNA-seq寻找到差异基因后,对比GO与KEGG库进行差异分析

rm(list = ls())
options(stringsAsFactors = F)
DEG <- read.csv('./DEGs_filter.csv', header  = T, row.names = 1)
gene <- DEG$Entrezid

genelist <- data.frame(DEG$Entrezid, DEG$log2FoldChange)
head(genelist)
dim(genelist)
genelist <- genelist[order(genelist$DEG.log2FoldChange, decreasing = T),]
head(genelist)
dim(genelist)
gene_fc <- genelist$DEG.log2FoldChange
names(gene_fc) <- genelist$DEG.Entrezid


library(clusterProfiler)
library(org.Mm.eg.db)
library(org.Hs.eg.db)


#### GO: Over-Representation Analysis  ####

# Biological Process
ego_BP <- enrichGO(gene = gene,
                keyType ='ENTREZID' , 
                ont="BP",
                OrgDb = org.Mm.eg.db,
                pAdjustMethod = 'BH',
                pvalueCutoff = 0.01,
                qvalueCutoff =0.05,
                readable = T)
ego_BP2 <- simplify(ego_BP, cutoff=0.7, by='p.adjust', select_fun = min)

write.csv(ego_BP2@result, './GO ORA_BP.csv')

png('GO_BP_barplot.png')
barplot(ego_BP2, showCategory = 10, title = 'Biological Process of TOP10')
dev.off()

png('GO_BP_dotplot.png')
dotplot(ego_BP2, showCategory = 10, title = 'Biological Process of TOP10')
dev.off()

# Cellular Component
ego_CC <- enrichGO(gene = gene,
                   keyType ='ENTREZID' , 
                   ont="CC",
                   OrgDb = org.Mm.eg.db,
                   pAdjustMethod = 'BH',
                   pvalueCutoff = 0.01,
                   qvalueCutoff =0.05,
                   readable = T)
ego_CC2 <- simplify(ego_CC, cutoff=0.7, by='p.adjust', select_fun = min)

write.csv(ego_CC2@result, './GO ORA_CC.csv')

png('GO_CC_barplot.png')
barplot(ego_CC2, showCategory = 10, title = 'Cellular Component of TOP10')
dev.off()

png('GO_CC_dotplot.png')
dotplot(ego_CC2, showCategory = 10, title = 'Cellular Component of TOP10')
dev.off()


# Molecualr Function
ego_MF <- enrichGO(gene = gene,
                   keyType ='ENTREZID' , 
                   ont="ALL",
                   OrgDb = org.Mm.eg.db,
                   pAdjustMethod = 'BH',
                   pvalueCutoff = 0.01,
                   qvalueCutoff =0.05,
                   readable = T)
ego_MF2 <- simplify(ego_MF, cutoff=0.7, by='p.adjust', select_fun = min)

write.csv(ego_MF2@result, './GO ORA_MF.csv')

png('GO_MF_barplot.png')
barplot(ego_MF2, showCategory = 10, title = 'Molecualr Function of TOP10')
dev.off()

png('GO_MF_dotplot.png')
dotplot(ego_MF2, showCategory = 10, title = 'Molecualr Function of TOP10')
dev.off()


####  GO:  GSEA  ####
## Biological Process
GO.GSEA_BP <- gseGO(  geneList = gene_fc,
        ont = "BP",
        OrgDb = org.Mm.eg.db,
        keyType = "ENTREZID",
        exponent = 1,
       minGSSize = 10,
        maxGSSize = 500,
        eps = 1e-10,
       pvalueCutoff = 0.05,
        pAdjustMethod = "BH",
       verbose = TRUE,
       seed = FALSE,
       by = "fgsea")
write.csv(GO.GSEA_BP@result, './GO_GSEA_BP.csv')

gseaplot(GO.GSEA_BP,
         geneSetID = 'GO:0060079',
         by = 'all',
         title = '', 
         color = "black",
         color.line = "green",
         color.vline = "#FA5860",
         base_size = 6,
         rel_height = c(-1, 0.2, 0.6), #副图的相对高度
         subplots = 1:3, 
         pvalue_table = T,
         ES_geom = 'line')

###  Cellular Componets
GO.GSEA_CC <- gseGO(  geneList = gene_fc,
                      ont = "CC",
                      OrgDb = org.Mm.eg.db,
                      keyType = "ENTREZID",
                      exponent = 1,
                      minGSSize = 10,
                      maxGSSize = 500,
                      eps = 1e-10,
                      pvalueCutoff = 0.05,
                      pAdjustMethod = "BH",
                      verbose = TRUE,
                      seed = FALSE,
                      by = "fgsea")
write.csv(GO.GSEA_CC@result, './GO_GSEA_CC.csv')

gseaplot(GO.GSEA_CC,
         geneSetID = 'GO:0098552',
         by = 'all',
         title = '', 
         color = "black",
         color.line = "green",
         color.vline = "#FA5860",
         base_size = 6,
         rel_height = c(-1, 0.2, 0.6), #副图的相对高度
         subplots = 1:3, 
         pvalue_table = T,
         ES_geom = 'line')

### Molecular Function
GO.GSEA_MF <- gseGO(  geneList = gene_fc,
                      ont = "MF",
                      OrgDb = org.Mm.eg.db,
                      keyType = "ENTREZID",
                      exponent = 1,
                      minGSSize = 10,
                      maxGSSize = 500,
                      eps = 1e-10,
                      pvalueCutoff = 0.05,
                      pAdjustMethod = "BH",
                      verbose = TRUE,
                      seed = FALSE,
                      by = "fgsea")
write.csv(GO.GSEA_MF@result, './GO_GSEA_MF.csv')

gseaplot(GO.GSEA_MF,
         geneSetID = 'GO:0003723',
         by = 'all',
         title = '', 
         color = "black",
         color.line = "green",
         color.vline = "#FA5860",
         base_size = 6,
         rel_height = c(-1, 0.2, 0.6), #副图的相对高度
         subplots = 1:3, 
         pvalue_table = T,
         ES_geom = 'line')

### KEGG: Over-Representation Analysis   ####
kk <- enrichKEGG(gene = gene,
                 organism = "mmu",
                 keyType = "ncbi-geneid",
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH",
                 minGSSize = 10,
                 maxGSSize = 500,
                 qvalueCutoff = 0.2,
                 use_internal_data = FALSE)

write.csv(kk@result, './KEGG_ORA.csv')

png('KEGG_ORA_barplot.png')
barplot(kk, showCategory = 10, title = 'KEGG of TOP10')
dev.off()

png('KEGG_ORA_dotplot.png')
dotplot(kk, showCategory = 10, title = 'KEGG of TOP10')
dev.off()


#### KEGG: GSEA  ####


KEGG.GSEA <-  gseKEGG(geneList = gene_fc,
                 organism = "mmu",
                 keyType = "ncbi-geneid",
                 exponent = 1,
                 minGSSize = 10,
                 maxGSSize = 500,
                 eps = 1e-10,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH",
                 verbose = TRUE,
                 use_internal_data = FALSE,
                 seed = FALSE,
                 by = "fgsea")

write.csv(KEGG.GSEA@result, './KEGG_GSEA.csv')

gseaplot(KEGG.GSEA,
         geneSetID = 'mmu04514',
         by = 'all',
         title = '', 
         color = "black",
         color.line = "green",
         color.vline = "#FA5860",
         base_size = 6,
         rel_height = c(-1, 0.2, 0.6), #副图的相对高度
         subplots = 1:3, 
         pvalue_table = T,
         ES_geom = 'line')



了解都还有Disease Ontology (DO)库与Drug相关的库,后续进行详细学习

你可能感兴趣的:(r语言,数据库)