非模式物种
step1:注释
把本物种的蛋白序列用diamond到eggnog数据库比对
##/home/lx_sky6/software/eggnog-mapper-2.1.8/emapper.py -i GCF_905147795.1_ilPieRapa1.1_protein.faa -m diamond -o eggnog --cpu 60
拿到eggnog的注释结果记得吧以#开头的行全部删掉
step2:构建自己的物种数据库
eggnogmapper<- read_delim(
file = 'eggnog.emapper.annotations2',
"\t",
escape_double =FALSE,
col_names=FALSE,
comment = "#",trim_ws = TRUE)%>%
dplyr::select(GID=X1,
Gene_Symbol=X9,
GO=X10,
KO=X12,
Pathway=X13,
OG=X7,
Gene_Name=X21)
gene_info <- dplyr::select(eggnogmapper, GID, Gene_Name) %>%
dplyr::filter(!is.na(Gene_Name))
# 提取GO信息
gene2go <- dplyr::select(eggnogmapper, GID, GO) %>%
separate_rows(GO, sep = ',', convert = F) %>%
filter(!is.na(GO)) %>%
mutate(EVIDENCE = 'IEA')
gene2go <- subset(gene2go,grepl("^.*GO.*$",GO))
# 构建 OrgDB,参数十分玄学,请按照以下格式
library(AnnotationForge)
AnnotationForge::makeOrgPackage(gene_info=gene_info,
go=gene2go,
maintainer='yuantao <[email protected]>',
author='yuantao',
outputDir="/home/lx_sky6/yt/rworkplace",
tax_id=64459,
genus='Pieris', #只改动这里
species='rapae', #和这里,进行命名
goTable="go",
version="1.0")
pkgbuild::build('/home/lx_sky6/yt/rworkplace/org.Prap.eg.db', dest_path = "/home/lx_sky6/yt/rworkplace/") #这两个路径自己创建
step3:安装注释包进行注释
install.packages("./org.Prapae.eg.db", repos = NULL,type = "source")
library(org.Prapae.eg.db)
library(readr)
fuji1_gene <- read_table2("../anita/fuji1.txt")
##fuji1:254个基因
de_gene <- pull(fuji1_gene, GID)
de_ego <- enrichGO(gene = de_gene,
OrgDb = org.Prapae.eg.db,
keyType = 'GID',
ont = 'ALL',
qvalueCutoff = 0.05,
pvalueCutoff = 0.05)
de_ego_df <- as.data.frame(de_ego)
enrichplot::dotplot(de_ego,showCategory = 20)
##run__KEGG富集
pathway2gene <- dplyr::select(eggnogmapper, Pathway, GID) %>%
separate_rows(Pathway, sep = ',', convert = F) %>%
filter(str_detect(Pathway, 'ko')) %>%
mutate(Pathway = str_remove(Pathway, 'ko'))
library(magrittr)
get_path2name <- function(){
keggpathid2name.df <- clusterProfiler:::kegg_list("pathway")
keggpathid2name.df[,1] %<>% gsub("path:map", "", .)
colnames(keggpathid2name.df) <- c("path_id","path_name")
return(keggpathid2name.df)
}
pathway2name <- get_path2name()
library(clusterProfiler)
de_ekp <- enricher(de_gene,
TERM2GENE = pathway2gene,
TERM2NAME = pathway2name,
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
de_ekp_df <- as.data.frame(de_ekp)
head(de_ekp_df)
enrichplot::dotplot(de_ekp, showCategory = 20)
##导出数据
write.table(de_ego_df,file = '/home/lx_sky6/yt/rworkplace/914_GO.csv',sep = ',',quote = FALSE)
write.table(de_ekp_df,file = '/home/lx_sky6/yt/rworkplace/914_Kegg.csv',sep = ',',quote = FALSE)