DAVID富集分析及R语言可视化

DAVID富集分析

DAVID富集支持多种基因名类型,本次以genesymbol为例

DAVID:https://david.ncifcrf.gov/

进入之后跟着以下步骤操作即可



选择相应物种,以人和小鼠为例,载点击Submit



一般是进行GO(BP\CC\MF)、KEGG分析,注意选择


右击Download File -> 链接另存为,即可将富集分析结果保存至本地文档

```



R语言可视化

setwd("G:/富集分析")  #设置工作路径

#此处以本次分析的上调基因为例,可以分别可视化GO和KEGG的结果,本次以GO_BP和KEGG为例

#上调富集分析绘图

DA_UP <- read.table("DAVID富集结果.txt",sep = "\t",header = T)

#select Pvalue<0.05  将P<0.05认为有统计学意义

DA_UP <- subset(DA_UP,DA_UP$PValue<0.05)

DA_UP <- subset(DA_UP,DA_UP$Count > 3)

DA_UP$PValue <- -(log10(DA_UP$PValue))

UP_BP <- subset(DA_UP,DA_UP$Category == "GOTERM_BP_DIRECT")

UP_KEGG <- subset(DA_UP,DA_UP$Category == "KEGG_PATHWAY")

UP_BP$Category <- sub("GOTERM_BP_DIRECT","GOBP_PATHWAY",UP_BP$Category)

#保留绘图所需的信息

library(tidyverse)

UP_KEGG <- UP_KEGG[,c("Category","Term","Count","PValue")]

#修改通路类别名称格式,使结果更好看

UP_KEGG <- separate(data = UP_KEGG, col = Term, into = c("ID", "Term"), sep = ":")

UP_KEGG <- UP_KEGG[,-2]

UP_BP <- UP_BP[,c("Category","Term","Count","PValue")]

UP_BP <- separate(data = UP_BP, col = Term, into = c("ID", "Term"), sep = "~")

UP_BP <- UP_BP[,-2]

UP <- rbind.data.frame(UP_KEGG,UP_BP)

DOWN <- rbind.data.frame(DOWN_KEGG,DOWN_BP)

write.table(UP,file = "UP_enrichment.txt",sep = "\t",quote = F,row.names = F)

write.table(DOWN,file = "DOWN_enrichment.txt",sep = "\t",quote = F,row.names = F)

##绘制柱状图

#使用reorder对柱状图排序

#想让y轴从大到小变化,只需在Change前面加个负号

##########################################################

##上下调差异基因富集图

########################################################

setwd("G:/科研/毕设/富集分析")

UP <- read.table("UP_enrichment.txt",header = T,sep = "\t")

DOWN <- read.table("DOWN_enrichment.txt",header = T,sep = "\t")

#因为富集结果过多,因此选择与研究内容相关需要的通路

UP_END <- UP[c(1,2,4,5,6,8,9,13,17,19,20,21,24,28,32,46,49,51,53,62),]

DOWN_END <- DOWN[c(1,4,6,7,10,13,14,17,20,21,31,33,34,36,40,44,45,53,59,71),]

library(ggplot2)

a <- UP_END

b <- "UP_END"

pngname <- paste(b,".png",sep = "")

pdfname <- paste(b,".pdf",sep = "")

p1 <- ggplot(data = a,

            aes(x = reorder(Term, PValue), 

                y = PValue,

                fill = Category)) + 

  geom_bar(stat = "identity",

          width = 0.8,

          position = position_dodge(width = 0.9))  +

  geom_text(aes(label = Count),

            vjust = 0.4, hjust = -0.2) +

  facet_grid(Category~., scales = 'free_y') +

  theme(panel.grid.major.x = element_line(colour = "white"),

        panel.background = element_blank(),

        axis.line.y = element_blank(),

        axis.line.x = element_line(color = "black"),

        axis.title.y = element_blank(),

        strip.text.y = element_blank()) +

  #scale_x_discrete(position = "top") +

  ggtitle("UP_GENE")+ #添加标题

  ylab("-log10PValue")+

  #geom_text(aes(label = y))+

  coord_flip()

p1

ggsave("UP_gene.png",p1, units="in", dpi=300, width=6, height=6, device="png")

```



你可能感兴趣的:(DAVID富集分析及R语言可视化)