求助关于GO富集分析可视化代码问题

#相关R包载入(还有部分R包使用前再载入):

library(dplyr)#数据清洗

library(stringr)

library(org.Hs.eg.db)#物种注释(Homo sapiens)

library(clusterProfiler)#富集分析

library(ggplot2)#个性化绘图

library(RColorBrewer)#配色调整

#差异分析结果表格导入(结果来自往期推文整理):

load("TCGA_CHOL_DESeq2.Rdata")

head(DESeq2)

#添加上下调基因分组标签:

DESeq2$group <- case_when(

DESeq2$log2FoldChange > 2 & DESeq2$pvalue < 0.05 ~ "up",

DESeq2$log2FoldChange < -2 & DESeq2$pvalue < 0.05 ~ "down",

abs(DESeq2$log2FoldChange) <= 2 ~ "none",

DESeq2$pvalue >= 0.05 ~ "none"

)

head(DESeq2)

#根据研究目的筛选差异基因(仅上调、下调或者全部):

up <- rownames(DESeq2)[DESeq2$group=="up"]#差异上调

down <- rownames(DESeq2)[DESeq2$group=="down"]#差异下调

diff <- c(up,down)#所有差异基因

head(up)

head(down)

#ID转换:

#查看可转换的ID类型:

columns(org.Hs.eg.db)

##使用clusterProfiler包自带ID转换函数bitr(基于org.Hs.eg.db):

#up:

up_entrez <- bitr(up,

fromType = "SYMBOL",#现有的ID类型

toType = "ENTREZID",#需转换的ID类型

OrgDb = "org.Hs.eg.db")

#down:

down_entrez <- bitr(down,

fromType = "SYMBOL",

toType = "ENTREZID",

OrgDb = "org.Hs.eg.db")

#diff:

diff_entrez <- bitr(diff,

fromType = "SYMBOL",

toType = "ENTREZID",

OrgDb = "org.Hs.eg.db")

head(diff_entrez)

#GO(Gene Ontology)富集分析:

##MF(我们以总差异基因的GO富集为例):

GO_MF_diff <- enrichGO(gene = diff_entrez$ENTREZID, #用来富集的差异基因

OrgDb = org.Hs.eg.db, #指定包含该物种注释信息的org包

ont = "MF", #可以三选一分别富集,或者"ALL"合并

pAdjustMethod = "BH", #多重假设检验矫正方法

pvalueCutoff = 0.05,

qvalueCutoff = 0.05,

readable = TRUE) #是否将gene ID映射到gene name

#提取结果表格:

GO_MF_result <- GO_MF_diff@result

View(GO_MF_result)

#CC:

GO_CC_diff <- enrichGO(gene = diff_entrez$ENTREZID,

OrgDb = org.Hs.eg.db,

ont = "CC",

pAdjustMethod = "BH",

pvalueCutoff = 0.05,

qvalueCutoff = 0.05,

readable = TRUE)

#提取结果表格:

GO_CC_result <- GO_CC_diff@result

#BP:

GO_BP_diff <- enrichGO(gene = diff_entrez$ENTREZID,

OrgDb = org.Hs.eg.db,

ont = "BP",

pAdjustMethod = "BH",

pvalueCutoff = 0.05,

qvalueCutoff = 0.05,

readable = TRUE)

#提取结果表格:

GO_BP_result <- GO_BP_diff@result

#MF、CC、BP三合一:

GO_all_diff <- enrichGO(gene = diff_entrez$ENTREZID,

OrgDb = org.Hs.eg.db,

ont = "ALL", #三合一选择“ALL”

pAdjustMethod = "BH",

pvalueCutoff = 0.05,

qvalueCutoff = 0.05,

readable = TRUE)

#提取结果表格:

GO_all_result <- GO_all_diff@result

##保存GO富集结果:

save(GO_MF_diff,GO_CC_diff,GO_BP_diff,GO_all_diff,file = c("GO_diff.Rdata"))

#GO富集条形图:

barplot(

GO_CC_diff,

x = "Count", #or "GeneRatio"

color = "pvalue", #or "p.adjust" and "qvalue"

showCategory = 20,#显示前top20(enrichResult按照p值排序)

font.size = 12,

title = "Cellular Component enrichment barplot",

label_format = 30 #超过30个字符串换行

)

#GO富集气泡图:

dotplot(

GO_CC_diff,

x = "GeneRatio",

color = "p.adjust",

title = "Top 20 of GO CC terms Enrichment",

showCategory = 20,

label_format = 30

)

####使用ggplot2进行可视化:

#取前top20,并简化命名:

MF <- GO_MF_result[1:20,]

CC <- GO_CC_result[1:20,]

BP <- GO_BP_result[1:20,]

#自定义主题

mytheme <- theme(axis.title = element_text(size = 13),

axis.text = element_text(size = 11),

plot.title = element_text(size = 14,

hjust = 0.5,

face = "bold"),

legend.title = element_text(size = 13),

legend.text = element_text(size = 11))

#在MF的Description中存在过长字符串,我们将长度超过50的部分用...代替:

MF2 <- MF

MF2$Description <- str_trunc(MF$Description,width = 50,side = "right")

MF2$Description

#指定绘图顺序(转换为因子):

MF2$term <- factor(MF2$Description,levels = rev(MF2$Description))

CC$term <- factor(CC$Description,levels = rev(CC$Description))

BP$term <- factor(BP$Description,levels = rev(BP$Description))

#GO富集柱形图:

GO_bar <- function(x){

y <- get(x)

ggplot(data = y,

aes(x = Count,

y = term,

fill = -log10(pvalue))) +

scale_y_discrete(labels = function(y) str_wrap(y, width = 50) ) + #label换行,部分term描述太长

geom_bar(stat = "identity",width = 0.8) +

labs(x = "Gene Number",

y = "Description",

title = paste0(x," of GO enrichment barplot")) +

theme_bw() +

mytheme

}

#MF:

p1 <- GO_bar("MF2")+scale_fill_distiller(palette = "Blues",direction = 1)

#CC:

p2 <- GO_bar("CC")+scale_fill_distiller(palette = "Reds",direction = 1)

#BP:

p3 <- GO_bar("BP")+scale_fill_distiller(palette = "Oranges",direction = 1)

#GO富集气泡图:

#计算富集因子(Rich Factor):

#MF:

rf<- apply(MF2,1,function(x){

GeneRatio <- eval(parse(text = x["GeneRatio"]))

BgRatio <- eval(parse(text = x["BgRatio"]))

RF<- round(GeneRatio/BgRatio,2)

RF

})

MF2$Rich_Factor <- rf

##CC和BP计算方法相同(这里省略代码,不再重复展示)

#绘制GO富集气泡图:

GO_dot <- function(x){

y = get(x)

ggplot(data = y,

aes(x = Rich_Factor,

y = term)) +

geom_point(aes(size = Count,

color = -log10(pvalue))) + # 气泡大小及颜色设置

scale_y_discrete(labels = function(y) str_wrap(y, width = 50) ) +

labs(x = "Rich Factor",

y = "Description",

title = paste0(x,"of GO enrichment Dotplot"), # 设置坐标轴标题及图标题

size = "Gene Number") + # 设置图例颜色及大小

theme_bw()+

mytheme

}

#MF:

pp1 <- GO_dot("MF2") + scale_color_distiller(palette = "YlOrRd",direction = 1)

#CC:

pp2 <- GO_dot("CC") + scale_color_distiller(palette = "YlGnBu",direction = 1)

#BP:

pp3 <- GO_dot("BP") + scale_color_distiller(palette = "YlOrBr",direction = 1)

#将三个ontology拉通取top30(按照p值排序)绘图:

all_result <- arrange(GO_all_result,pvalue) #默认升序

#取top30:

all <- all_result[1:30,]

#指定绘图顺序(转换为因子):

all$term <- factor(all$Description,levels = rev(all$Description))

#自定义y轴标签颜色(区分不同ontology):

col_function <- function(x){

col <- rep("black", length(x))

BP <- which(x %in% c("BP"))

CC <- which(x %in% c("CC"))

MF <- which(x %in% c("MF"))

col[BP] <- "#fc4d26"

col[CC] <- "#1792c1"

col[MF] <- "#3fad5d"

col

}

y_text_color <- col_function(all$ONTOLOGY)

#绘制富集柱形图

ppp1

求助关于GO富集分析可视化代码问题_第1张图片

#绘制富集气泡图

ppp2

求助关于GO富集分析可视化代码问题_第2张图片

请问这里的ppp1和ppp2生成的代码怎么编写?研究了半天,参照上面的代码,也没弄成功。菜鸟一枚,请大神指教,谢谢。

你可能感兴趣的:(r语言)