更新go和kegg注释

非模式物种

step1:注释
把本物种的蛋白序列用diamond到eggnog数据库比对

##/home/lx_sky6/software/eggnog-mapper-2.1.8/emapper.py -i GCF_905147795.1_ilPieRapa1.1_protein.faa -m diamond -o eggnog --cpu 60

拿到eggnog的注释结果记得吧以#开头的行全部删掉
step2:构建自己的物种数据库

eggnogmapper<- read_delim(
  file = 'eggnog.emapper.annotations2',
  "\t",
  escape_double =FALSE,
  col_names=FALSE,
  comment = "#",trim_ws = TRUE)%>%
  dplyr::select(GID=X1,
                Gene_Symbol=X9,
                GO=X10,
                KO=X12,
                Pathway=X13,
                OG=X7,
                Gene_Name=X21)
gene_info <- dplyr::select(eggnogmapper,  GID, Gene_Name) %>%
  dplyr::filter(!is.na(Gene_Name))
# 提取GO信息
gene2go <- dplyr::select(eggnogmapper, GID, GO) %>%
  separate_rows(GO, sep = ',', convert = F) %>%
  filter(!is.na(GO)) %>%
  mutate(EVIDENCE = 'IEA')
gene2go <- subset(gene2go,grepl("^.*GO.*$",GO))


# 构建 OrgDB,参数十分玄学,请按照以下格式
library(AnnotationForge)
AnnotationForge::makeOrgPackage(gene_info=gene_info,
                                               go=gene2go,
                                               maintainer='yuantao <[email protected]>',
                                               author='yuantao',
                                               outputDir="/home/lx_sky6/yt/rworkplace",
                                               tax_id=64459,
                                               genus='Pieris',                     #只改动这里
                                               species='rapae',               #和这里,进行命名
                                               goTable="go",
                                               version="1.0")
pkgbuild::build('/home/lx_sky6/yt/rworkplace/org.Prap.eg.db', dest_path = "/home/lx_sky6/yt/rworkplace/")       #这两个路径自己创建

step3:安装注释包进行注释

install.packages("./org.Prapae.eg.db", repos = NULL,type = "source")

library(org.Prapae.eg.db)
library(readr)
fuji1_gene <- read_table2("../anita/fuji1.txt")
##fuji1:254个基因
de_gene <- pull(fuji1_gene, GID)
de_ego <- enrichGO(gene = de_gene,
                   OrgDb = org.Prapae.eg.db,
                   keyType = 'GID',
                   ont = 'ALL',
                   qvalueCutoff = 0.05,
                   pvalueCutoff = 0.05)
de_ego_df <- as.data.frame(de_ego)
enrichplot::dotplot(de_ego,showCategory = 20)


##run__KEGG富集 
pathway2gene <- dplyr::select(eggnogmapper, Pathway, GID) %>%
  separate_rows(Pathway, sep = ',', convert = F) %>%
  filter(str_detect(Pathway, 'ko')) %>%
  mutate(Pathway = str_remove(Pathway, 'ko'))
library(magrittr)
get_path2name <- function(){
  keggpathid2name.df <- clusterProfiler:::kegg_list("pathway")
  keggpathid2name.df[,1] %<>% gsub("path:map", "", .)
  colnames(keggpathid2name.df) <- c("path_id","path_name")
  return(keggpathid2name.df)
}
pathway2name <- get_path2name()
library(clusterProfiler)
de_ekp <- enricher(de_gene,
                   TERM2GENE = pathway2gene,
                   TERM2NAME = pathway2name,
                   pvalueCutoff = 0.05,
                   qvalueCutoff = 0.05)
de_ekp_df <- as.data.frame(de_ekp)
head(de_ekp_df)
enrichplot::dotplot(de_ekp, showCategory = 20)
##导出数据
write.table(de_ego_df,file = '/home/lx_sky6/yt/rworkplace/914_GO.csv',sep = ',',quote = FALSE)

write.table(de_ekp_df,file = '/home/lx_sky6/yt/rworkplace/914_Kegg.csv',sep = ',',quote = FALSE)

你可能感兴趣的:(更新go和kegg注释)