#相关R包载入(还有部分R包使用前再载入):
library(dplyr)#数据清洗
library(stringr)
library(org.Hs.eg.db)#物种注释(Homo sapiens)
library(clusterProfiler)#富集分析
library(ggplot2)#个性化绘图
library(RColorBrewer)#配色调整
#差异分析结果表格导入(结果来自往期推文整理):
load("TCGA_CHOL_DESeq2.Rdata")
head(DESeq2)
#添加上下调基因分组标签:
DESeq2$group <- case_when(
DESeq2$log2FoldChange > 2 & DESeq2$pvalue < 0.05 ~ "up",
DESeq2$log2FoldChange < -2 & DESeq2$pvalue < 0.05 ~ "down",
abs(DESeq2$log2FoldChange) <= 2 ~ "none",
DESeq2$pvalue >= 0.05 ~ "none"
)
head(DESeq2)
#根据研究目的筛选差异基因(仅上调、下调或者全部):
up <- rownames(DESeq2)[DESeq2$group=="up"]#差异上调
down <- rownames(DESeq2)[DESeq2$group=="down"]#差异下调
diff <- c(up,down)#所有差异基因
head(up)
head(down)
#ID转换:
#查看可转换的ID类型:
columns(org.Hs.eg.db)
##使用clusterProfiler包自带ID转换函数bitr(基于org.Hs.eg.db):
#up:
up_entrez <- bitr(up,
fromType = "SYMBOL",#现有的ID类型
toType = "ENTREZID",#需转换的ID类型
OrgDb = "org.Hs.eg.db")
#down:
down_entrez <- bitr(down,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = "org.Hs.eg.db")
#diff:
diff_entrez <- bitr(diff,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = "org.Hs.eg.db")
head(diff_entrez)
#GO(Gene Ontology)富集分析:
##MF(我们以总差异基因的GO富集为例):
GO_MF_diff <- enrichGO(gene = diff_entrez$ENTREZID, #用来富集的差异基因
OrgDb = org.Hs.eg.db, #指定包含该物种注释信息的org包
ont = "MF", #可以三选一分别富集,或者"ALL"合并
pAdjustMethod = "BH", #多重假设检验矫正方法
pvalueCutoff = 0.05,
qvalueCutoff = 0.05,
readable = TRUE) #是否将gene ID映射到gene name
#提取结果表格:
GO_MF_result <- GO_MF_diff@result
View(GO_MF_result)
#CC:
GO_CC_diff <- enrichGO(gene = diff_entrez$ENTREZID,
OrgDb = org.Hs.eg.db,
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05,
readable = TRUE)
#提取结果表格:
GO_CC_result <- GO_CC_diff@result
#BP:
GO_BP_diff <- enrichGO(gene = diff_entrez$ENTREZID,
OrgDb = org.Hs.eg.db,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05,
readable = TRUE)
#提取结果表格:
GO_BP_result <- GO_BP_diff@result
#MF、CC、BP三合一:
GO_all_diff <- enrichGO(gene = diff_entrez$ENTREZID,
OrgDb = org.Hs.eg.db,
ont = "ALL", #三合一选择“ALL”
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05,
readable = TRUE)
#提取结果表格:
GO_all_result <- GO_all_diff@result
##保存GO富集结果:
save(GO_MF_diff,GO_CC_diff,GO_BP_diff,GO_all_diff,file = c("GO_diff.Rdata"))
#GO富集条形图:
barplot(
GO_CC_diff,
x = "Count", #or "GeneRatio"
color = "pvalue", #or "p.adjust" and "qvalue"
showCategory = 20,#显示前top20(enrichResult按照p值排序)
font.size = 12,
title = "Cellular Component enrichment barplot",
label_format = 30 #超过30个字符串换行
)
#GO富集气泡图:
dotplot(
GO_CC_diff,
x = "GeneRatio",
color = "p.adjust",
title = "Top 20 of GO CC terms Enrichment",
showCategory = 20,
label_format = 30
)
####使用ggplot2进行可视化:
#取前top20,并简化命名:
MF <- GO_MF_result[1:20,]
CC <- GO_CC_result[1:20,]
BP <- GO_BP_result[1:20,]
#自定义主题
mytheme <- theme(axis.title = element_text(size = 13),
axis.text = element_text(size = 11),
plot.title = element_text(size = 14,
hjust = 0.5,
face = "bold"),
legend.title = element_text(size = 13),
legend.text = element_text(size = 11))
#在MF的Description中存在过长字符串,我们将长度超过50的部分用...代替:
MF2 <- MF
MF2$Description <- str_trunc(MF$Description,width = 50,side = "right")
MF2$Description
#指定绘图顺序(转换为因子):
MF2$term <- factor(MF2$Description,levels = rev(MF2$Description))
CC$term <- factor(CC$Description,levels = rev(CC$Description))
BP$term <- factor(BP$Description,levels = rev(BP$Description))
#GO富集柱形图:
GO_bar <- function(x){
y <- get(x)
ggplot(data = y,
aes(x = Count,
y = term,
fill = -log10(pvalue))) +
scale_y_discrete(labels = function(y) str_wrap(y, width = 50) ) + #label换行,部分term描述太长
geom_bar(stat = "identity",width = 0.8) +
labs(x = "Gene Number",
y = "Description",
title = paste0(x," of GO enrichment barplot")) +
theme_bw() +
mytheme
}
#MF:
p1 <- GO_bar("MF2")+scale_fill_distiller(palette = "Blues",direction = 1)
#CC:
p2 <- GO_bar("CC")+scale_fill_distiller(palette = "Reds",direction = 1)
#BP:
p3 <- GO_bar("BP")+scale_fill_distiller(palette = "Oranges",direction = 1)
#GO富集气泡图:
#计算富集因子(Rich Factor):
#MF:
rf<- apply(MF2,1,function(x){
GeneRatio <- eval(parse(text = x["GeneRatio"]))
BgRatio <- eval(parse(text = x["BgRatio"]))
RF<- round(GeneRatio/BgRatio,2)
RF
})
MF2$Rich_Factor <- rf
##CC和BP计算方法相同(这里省略代码,不再重复展示)
#绘制GO富集气泡图:
GO_dot <- function(x){
y = get(x)
ggplot(data = y,
aes(x = Rich_Factor,
y = term)) +
geom_point(aes(size = Count,
color = -log10(pvalue))) + # 气泡大小及颜色设置
scale_y_discrete(labels = function(y) str_wrap(y, width = 50) ) +
labs(x = "Rich Factor",
y = "Description",
title = paste0(x,"of GO enrichment Dotplot"), # 设置坐标轴标题及图标题
size = "Gene Number") + # 设置图例颜色及大小
theme_bw()+
mytheme
}
#MF:
pp1 <- GO_dot("MF2") + scale_color_distiller(palette = "YlOrRd",direction = 1)
#CC:
pp2 <- GO_dot("CC") + scale_color_distiller(palette = "YlGnBu",direction = 1)
#BP:
pp3 <- GO_dot("BP") + scale_color_distiller(palette = "YlOrBr",direction = 1)
#将三个ontology拉通取top30(按照p值排序)绘图:
all_result <- arrange(GO_all_result,pvalue) #默认升序
#取top30:
all <- all_result[1:30,]
#指定绘图顺序(转换为因子):
all$term <- factor(all$Description,levels = rev(all$Description))
#自定义y轴标签颜色(区分不同ontology):
col_function <- function(x){
col <- rep("black", length(x))
BP <- which(x %in% c("BP"))
CC <- which(x %in% c("CC"))
MF <- which(x %in% c("MF"))
col[BP] <- "#fc4d26"
col[CC] <- "#1792c1"
col[MF] <- "#3fad5d"
col
}
y_text_color <- col_function(all$ONTOLOGY)
#绘制富集柱形图
ppp1
#绘制富集气泡图
ppp2
请问这里的ppp1和ppp2生成的代码怎么编写?研究了半天,参照上面的代码,也没弄成功。菜鸟一枚,请大神指教,谢谢。