如果你觉得GO
或KEGG
不够解释,或许可以试试GSEA
具体内容参考这篇
#GSEA analysis steps
#得到差异分析结果
DESeq2_DEG = as.data.frame(read.table("路径/DESeq2_result.txt", sep = "\t", header = T, row.names = 1))
#提取log2FoldChange这列
geneList <- as.data.frame(DESeq2_DEG$log2FoldChange)
GSEA
分析需要用ENTREZID
!!!
#转换名字,调用需要的R包
library(stringr)
library(clusterProfiler)
library(org.Hs.eg.db)
geneList_tr <- bitr(row.names(DESeq2_DEG),
fromType = "SYMBOL",
toType = c("ENTREZID","ENSEMBL"),
OrgDb = org.Hs.eg.db)
#将ENTREZID与差异基因进行合并
DESeq2_DEG_withsymbol <- cbind(DESeq2_DEG, row.names(DESeq2_DEG))
names(DESeq2_DEG_withsymbol )[7]<- c("SYMBOL")
new_list <- merge(DESeq2_DEG_withsymbol, geneList_tr)
new_list <- merge(new_list, geneList_tr, by = "ENSEMBL")
geneList <- new_list$log2FoldChange
names(geneList) <- geneList_tr$ENTREZID
# 最后从大到小排序,得到一个字符串
geneList <- sort(geneList,decreasing = T)
#进行GSEA富集分析
go_result <- gseGO(geneList = geneList,
ont = "BP",
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
pvalueCutoff = 0.05,
pAdjustMethod = "BH")#p值校正方法
kegg_result <- gseKEGG(
geneList,
organism = "hsa",
keyType = "kegg",
exponent = 1,
minGSSize = 10,
maxGSSize = 500,
eps = 1e-10,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
verbose = TRUE,
use_internal_data = FALSE,
seed = FALSE,
by = "fgsea")
head(kegg_result)
dim(kegg_result)
#按照enrichment score从高到低排序,便于查看富集通路
write.table(kegg_result,"路径/GSEA富集.txt",sep = "\t",quote = F,col.names = T,row.names = F)
#画GSEA富集图
library(enrichplot)
gseaplot2(
kegg_result, #gseaResult object,即GSEA结果
"hsa04727",#富集的ID编号
#标题
color = "green",#GSEA线条颜色
base_size = 11,#基础字体大小
rel_heights = c(1.5, 0.5, 1),#副图的相对高度
subplots = 1:3, #要显示哪些副图 如subplots=c(1,3) #只要第一和第三个图,subplots=1#只要第一个图
pvalue_table = FALSE, #是否添加 pvalue table
ES_geom = "line") #running enrichment score用先还是用点ES_geom = "dot"
如果想画好几条在一幅图 可以写
paths "hsa04510", "hsa04512", "hsa04974", "hsa05410")
gseaplot2(kk, paths)