#####安装archr包##别处复制
.libPaths(c("/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2",
"/home/data/t040413/R/yll/usr/local/lib/R/site-library",
"/usr/local/lib/R/library",
"/home/data/refdir/Rlib/"))
#BiocManager::install("Nebulosa")
#library("Nebulosa")
library("Seurat")
library(ggplot2)
getwd()
dir.create("/home/data/t040413/ipf/All_mesenchymal")
setwd("/home/data/t040413/ipf/All_mesenchymal")
getwd()
load("/home/data/t040413/ipf/gse157379_young_old_blm_mice/monocle/All.merge_final.rds")
table(All.merge$cell.type,All.merge$group)
DimPlot(All.merge,label = TRUE,repel = TRUE,label.size = 6)
# table(Idents(All.merge))
# All.merge$cell.type=Idents(All.merge)
# All.merge$cell.type=factor(All.merge$cell.type,levels = c(
#
# "Universal fibroblast",
# "Specialized fibroblast",
# "Interferon-responsive fibroblast",
# "Spp1 fibroblast",
# "Mesenchymal progenitor",
# "Hhip fibroblast",
# "Pdgfrb fibroblast",
# "Myofibroblast"
#
# ))
# table(All.merge$cell.type)
# Idents(All.merge)=All.merge$cell.type
# save(All.merge,file = "/home/data/t040413/ipf/gse157379_young_old_blm_mice/monocle/All.merge_final.rds")
ggplot([email protected],
aes(x=stim, fill=Idents(All.merge))) + geom_bar(position = "fill")
ggplot([email protected],
aes(x=group, fill=Idents(All.merge))) + geom_bar(position = "fill")
specialized_degs=FindMarkers(All.merge,ident.1 ="Aged Fibrosis",ident.2 ="Aged",densify = TRUE,
subset.ident = "Specialized fibroblast",group.by = "group" )
specialized_degs$gene=rownames(specialized_degs)
allmarkers_for_each=FindAllMarkers(All.merge,densify = TRUE )
allmarkers$gene=rownames(allmarkers)
openxlsx::write.xlsx(specialized_degs,file = "specialized_degs_agedfibrosis_vs_fibrosis.xlsx")
openxlsx::write.xlsx(allmarkers,file = "allmarkers_for_each.xlsx")
print(getwd())
VlnPlot(All.merge,features = "Inmt",split.by = "group",split.plot = TRUE)
VlnPlot(All.merge,features = c("Fn1","Inmt","Fap"),split.by = "group",split.plot = TRUE)
VlnPlot(All.merge,features = c("Acta2"),split.by = "group",split.plot = TRUE)
getwd()#"/home/data/t040413/ipf/All_mesenchymal/cluster_enrichments"
dir.create("./cluster_enrichments")
setwd("./cluster_enrichments/")
#
#
# allmarkers=allmarkers_for_each %>%
# dplyr::group_by(cluster) %>%dplyr::filter(cluster=="Specialized fibroblast")
#
# allmarkers_up=allmarkers[allmarkers$avg_log2FC>0,]
# allmarkers_up
#
#
# allmarkers_down=allmarkers[allmarkers$avg_log2FC<0,]
# allmarkers_down
# library(dplyr)
# special=specialized_degs %>%dplyr::filter(p_val_adj<0.05 &
# ( avg_log2FC >0.5 | avg_log2FC <(-0.5)))
#
# special_up=special[special$avg_log2FC>0,]
# special_down=special[special$avg_log2FC<0,]
# allmarkers
#
# common=intersect(special$gene,allmarkers$gene)
#
# common_up=intersect(special_up$gene,allmarkers$gene)
#
# common_down=intersect(special_down$gene,allmarkers$gene)
#
#
#
# alllist=list(common=common,common_down=common_down,common_up=common_up)
#
# a_list=alllist
# names(a_list)
#
# mergedlist=do.call(cbind, lapply(lapply(a_list, unlist), `length<-`, max(lengths(a_list))))
# # cat months dog
# mergedlist=data.frame(mergedlist)
# openxlsx::write.xlsx(mergedlist,file = "meg.xlsx")
getwd()
allmarkers_for_each=openxlsx::read.xlsx("/home/data/t040413/ipf/All_mesenchymal/allmarkers_for_each.xlsx")
head(allmarkers_for_each)
table(allmarkers_for_each$cluster)
range(allmarkers_for_each$avg_log2FC)
specialized_degs=openxlsx::read.xlsx("/home/data/t040413/ipf/All_mesenchymal/specialized_degs_agedfibrosis_vs_fibrosis.xlsx")
head(specialized_degs)
setwd("../")
#################################################################1----------------------
###########单纯基因做富集分析single group enrichments----------
getwd()
dir.create("sing_groupgene_specialgene")
setwd("sing_groupgene_specialgene")
getwd()
genes_for_enrich_special_marker=
allmarkers_for_each[allmarkers_for_each$cluster=="Specialized fibroblast",] %>%
dplyr::filter(avg_log2FC>0.5) %>% slice_max(avg_log2FC,n = 100) %>% dplyr::select(gene) %>% .$'gene' %>%
gsub(pattern="\\.1",replacement = "",x = .,ignore.case = TRUE)
getwd()
a_list=list(genes_for_enrich_special_marker)
i=0
lapply(a_list,FUN = function(each_single_group) {
i=i+1
group= paste(each_single_group[1:5],sep = "_",collapse = "_")
group=paste(group,i)
print(group)
print(getwd())
dir.create(paste0(group))
setwd(paste0(group))
print(getwd())
#each_single_group=a_list[[1]]
each_single_group=data.frame("gene"=each_single_group)
library(clusterProfiler)
library(org.Hs.eg.db) #人
library(org.Mm.eg.db) #鼠
library(ggplot2)
# degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung)
# head(markers)
# df=markers %>%dplyr::group_by(cluster)%>%
# filter(p_val_adj <0.05
# )
# sce.markers=df
# head(sce.markers)
# print(getwd())
sce.markers=each_single_group
ids <- suppressWarnings(bitr(sce.markers[,1], 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db'))
head(ids)
head(sce.markers)
tail(sce.markers)
dim(sce.markers)
sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL')
head(sce.markers)
dim(sce.markers)
# sce.markers$group=sce.markers$cluster
# sce.markers=sce.markers[sce.markers$group!="none",]
# dim(sce.markers)
head(sce.markers)
getwd()
# sce.markers=openxlsx::read.xlsx("/home/data/t040413/silicosis/fibroblast_myofibroblast/group_enrichments/")
#sce.markers$cluster=sce.markers$mygroup
dim(sce.markers)
head(sce.markers)
# gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
gcSample=sce.markers$ENTREZI
library(clusterProfiler)
gcSample # entrez id , enrichGO
print("===========开始go= All ont===========")
xx <- enrichGO(gene = gcSample,OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05) #organism="hsa",
xx.BP <- enrichGO(gene = gcSample, ,OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="BP") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
print(getwd())
ggsave(paste0(group,'_degs_enrichGO-BP_enrichment--3.pdf'),plot = p,width = 14,height = 40,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-BP_enrichment--3.png'),plot = p,width = 14,height = 40,limitsize = F)
xx.CC <- enrichGO(gcSample ,OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="CC") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-CC_enrichment--3.pdf'),plot = p,width = 14,height = 40,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-CC_enrichment--3.png'),plot = p,width = 14,height = 40,limitsize = F)
xx.MF <- enrichGO(gene = gcSample,OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="MF") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-MF_enrichment--3.pdf'),plot = p,width = 14,height = 40,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-MF_enrichment--3.png'),plot = p,width = 14,height = 40,limitsize = F)
print(getwd())
.libPaths()
print("===========开始 kegg All ont============")
gg<-clusterProfiler::enrichKEGG(gene = gcSample, #readable=TRUE,
keyType = 'kegg', #KEGG 富集
organism='mmu',#"rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
print("kegg---done------")
#标准富集分析
p=clusterProfiler::dotplot(object = xx,showCategory = 20,
label_format =100)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-GO_enrichment--3.pdf'),plot = p,width = 14,height = 40,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-GO_enrichment--3.png'),plot = p,width = 14,height = 40,limitsize = F)
xx
write.csv(xx,file = paste0(group,"enrichGO-GO_enrichment.csv"))
p=clusterProfiler::dotplot(gg,showCategory = 20,
label_format = 40)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave(paste0(group,'_degs_enrichGO-KEGG_enrichment-2.pdf'),plot = p4,width = 14,height = 25,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-KEGG_enrichment-2.png'),plot = p4,width = 14,height = 25,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = paste0(group,"_enrichGO-KEGG_enrichment.xlsx"))
getwd()
openxlsx::write.xlsx(sce.markers,file = paste0(group,"_sce.markers_for_each_clusterfor_enrichment.xlsx"))
##放大图片
{
getwd()
#bp
p=clusterProfiler::dotplot(object = xx.BP,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-BP_enrichment-100terms-3.pdf'),plot = p,width = 14,height = 100,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-BP_enrichment-100terms-3.png'),plot = p,width = 14,height = 100,limitsize = F)
print((getwd()))
#cc
p=clusterProfiler::dotplot(object = xx.CC,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-CC_enrichment-100terms-3.pdf'),plot = p,width = 14,height = 100,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-CC_enrichment-100terms-3.png'),plot = p,width = 14,height = 100,limitsize = F)
print((getwd()))
#MF
p=clusterProfiler::dotplot(object = xx.MF,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-MF_enrichment-100terms-3.pdf'),plot = p,width = 14,height = 100,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-MF_enrichment-100terms-3.png'),plot = p,width = 14,height = 100,limitsize = F)
print((getwd()))
#KEGG
p=clusterProfiler::dotplot(object = gg,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-KEGG_enrichment-100terms-3.pdf'),plot = p,width = 14,height = 100,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-KEGG_enrichment-100terms-3.png'),plot = p,width = 14,height = 100,limitsize = F)
print((getwd()))
}
# save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata")
bp= xx.BP@result
cc= xx.CC@result
mf=xx.MF@result
xx.all=do.call(rbind,list(bp,cc,mf))
head(xx.all)
openxlsx::write.xlsx(xx.all,file = paste0(group,"_enrichments_all.xlsx"))
# save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata")
result <- tryCatch({
save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = paste0(group,"_xx.Rdata"))
# 在这里添加你希望在没有报错时执行的代码
print("没有报错")
}, error = function(e) {
print(getwd()) # 在报错时执行的代码
# 在这里添加你希望在报错时执行的额外代码
save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers, gg, file = paste0(group,"_xx_all.Rdata"))
})
# print(result)
print(getwd())
if (F) {
#大鼠
print("===========开始go============")
xx <-clusterProfiler::enrichGO(gcSample, ,OrgDb="org.Rn.eg.db",
readable=TRUE,
ont = 'ALL', #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者
pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
print("===========开始 kegg============")
gg<-clusterProfiler::enrichGO(gcSample,fun = "enrichKEGG",
keyType = 'kegg', #KEGG 富集
organism="rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
p=dotplot(xx)
p2=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p2
ggsave('degs_enrichGO-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F)
xx
openxlsx::write.xlsx(xx,file = "enrichGO-GO_enrichment.xlsx")
#
p=dotplot(gg)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave('degs_enrichGO-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = "enrichGO-KEGG_enrichment.xlsx")
}
setwd("../")
})
enrich_Res=openxlsx::read.xlsx("./Inmt_Npnt_Mettl7a1_Macf1_Pcolce2 1/Inmt_Npnt_Mettl7a1_Macf1_Pcolce2 1_enrichments_all.xlsx")
head(enrich_Res)
degs_enrich=openxlsx::read.xlsx("./0.5/0.5_enrichments_all.xlsx")
head(degs_enrich)
##########gsea-----------
{
#https://zhuanlan.zhihu.com/p/518144716
#https://yulab-smu.top/biomedical-knowledge-mining-book/faq.html
# library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(clusterProfiler)
library(enrichplot)
library(tidyverse)
library(ggstatsplot)
{
getwd()
setwd("../")
print(getwd())
dir.create("cluster_special_genes_GSEA_kegg_go")
setwd("cluster_special_genes_GSEA_kegg_go")
## 物种设置
organism = 'mmu' # 人类'hsa' 小鼠'mmu'
OrgDb = 'org.Mm.eg.db'#人类"org.Hs.eg.db" 小鼠"org.Mm.eg.db"
head(allmarkers_for_each)
marker_degs=allmarkers_for_each[allmarkers_for_each$cluster=="Specialized fibroblast",] %>%
dplyr::filter(p_val_adj<0.05) %>%
dplyr::mutate(gene=
gsub(pattern="\\.1",replacement = "",x = .$gene))
head(marker_degs)
rownames(marker_degs)=marker_degs$gene
marker_degs
#### 按照需要可选择不同的DEG方法数据集 ####
need_DEG <- marker_degs %>%
dplyr::filter(p_val_adj<0.05)
need_DEG <- need_DEG[,c(2,5)] #选择log2FoldChange和pvalue(凑成数据框)
colnames(need_DEG) <- c('log2FoldChange','pvalue')
need_DEG$SYMBOL <- rownames(need_DEG)
##### 创建gsea分析的geneList(包含从大到小排列的log2FoldChange和ENTREZID信息)####
#转化id
df <- bitr(rownames(need_DEG),
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = OrgDb) #人数据库org.Hs.eg.db 小鼠org.Mm.eg.db
need_DEG <- merge(need_DEG, df, by='SYMBOL') #按照SYMBOL合并注释信息
head(need_DEG) # https://yulab-smu.top/biomedical-knowledge-mining-book/faq.html
geneList <- need_DEG$log2FoldChange
names(geneList) <- need_DEG$ENTREZID
geneList <- sort(geneList, decreasing = T) #从大到小排序
head(geneList)
##### gsea富集 ####
KEGG_kk_entrez <- gseKEGG(geneList = geneList,
organism = organism, #人hsa 鼠mmu
pvalueCutoff = 0.25,
eps = 0) #实际为padj阈值,可调整
KEGG_kk_clustergene <- DOSE::setReadable(KEGG_kk_entrez,
OrgDb=OrgDb,
keyType='ENTREZID')#转化id
GO_kk_entrez <- gseGO(geneList = geneList,
ont = "ALL", # "BP"、"MF"和"CC"或"ALL"
OrgDb = OrgDb,#人类org.Hs.eg.db 鼠org.Mm.eg.db
keyType = "ENTREZID",
pvalueCutoff = 0.25,
eps=0) #实际为padj阈值可调整
GO_kk_clustergene <- DOSE::setReadable(GO_kk_entrez,
OrgDb=OrgDb,
keyType='ENTREZID')#转化id
print(getwd())
save(KEGG_kk_entrez, GO_kk_entrez,
KEGG_kk_clustergene,GO_kk_clustergene,
file = "cluster_special_gene_GSEA_result.RData")
#首先对富集结果进行条件筛选,一般认为|NES|>1,NOM pvalue<0.05,FDR(padj)<0.25的通路是显著富集的;还可以从结果中细分出上下调通路单独绘图,以下代码仅展示KEGG通路富集结果的上调通路。 gseaplot2()函数既可以对单独的通路绘图,也可以合并几个通路一起绘图;
KEGG_kk_clustergene@result
kegg_gsea_res=data.frame(KEGG_kk_clustergene@result)
##选取富集结果
kk_gse <- KEGG_kk_clustergene
kk_gse_entrez <- KEGG_kk_entrez
###条件筛选
#一般认为|NES|>1,NOM pvalue<0.05,FDR(padj)<0.25的通路是显著富集的
kk_gse_cut <- kk_gse[kk_gse$pvalue<0.05 & kk_gse$p.adjust<0.25 & abs(kk_gse$NES)>1]
kk_gse_cut_down <- kk_gse_cut[kk_gse_cut$NES < 0,]
kk_gse_cut_up <- kk_gse_cut[kk_gse_cut$NES > 0,]
#选择展现NES前几个通路
down_gsea <- kk_gse_cut_down[tail(order(kk_gse_cut_down$NES,decreasing = T),100),]
up_gsea <- kk_gse_cut_up[head(order(kk_gse_cut_up$NES,decreasing = T),100),]
diff_gsea <- kk_gse_cut[head(order(abs(kk_gse_cut$NES),decreasing = T),100),]
#### 经典的GSEA图 uo-----
up_gsea$Description
i=2
gselist=list()
for (i in 1:length(up_gsea$Description)) {
gseap1 <- gseaplot2(kk_gse,
up_gsea$ID[i],#富集的ID编号
title = up_gsea$Description[i],#标题
color = "red", #GSEA线条颜色
base_size = 20,#基础字体大小
rel_heights = c(1.5, 0.5, 1),#副图的相对高度
subplots = 1:3, #要显示哪些副图 如subplots=c(1,3) #只要第一和第三个图
ES_geom = "line", #enrichment score用线还是用点"dot"
pvalue_table = T) #显示pvalue等信息
gselist[[up_gsea$Description[i]]]=gseap1
print(paste0(getwd(),up_gsea$Description[i]))
pdf(file = paste0(up_gsea$Description[i], 'cluster_special_gene_GSEA_up_1.pdf'), width =12, height =8)
print(gseap1)
dev.off()
}
pdf(file = paste0("all_split", 'cluster_GSEA_up_1.pdf'), width =12, height =8)
for (i in names(gselist)) {
print(gselist[[i]])
}
dev.off()
###下调gsea------
{
down_gsea$Description
i=2
gselist=list()
for (i in 1:length(down_gsea$Description)) {
gseap1 <- gseaplot2(kk_gse,
down_gsea$ID[i],#富集的ID编号
title = down_gsea$Description[i],#标题
color = "red", #GSEA线条颜色
base_size = 20,#基础字体大小
rel_heights = c(1.5, 0.5, 1),#副图的相对高度
subplots = 1:3, #要显示哪些副图 如subplots=c(1,3) #只要第一和第三个图
ES_geom = "line", #enrichment score用线还是用点"dot"
pvalue_table = T) #显示pvalue等信息
gselist[[down_gsea$Description[i]]]=gseap1
print(paste0(getwd(),down_gsea$Description[i]))
pdf(file = paste0(down_gsea$Description[i], '_cluster_GSEA_down_1.pdf'), width =12, height =8)
print(gseap1)
dev.off()
}
pdf(file = paste0("all_split", '_cluster_GSEA_down_1.pdf'), width =12, height =8)
for (i in names(gselist)) {
print(gselist[[i]])
}
dev.off()
}
#### 合并 GSEA通路
gseap2 <- gseaplot2(kk_gse,
up_gsea$ID,#富集的ID编号
title = "UP_GSEA_all",#标题
color = "red",#GSEA线条颜色
base_size = 20,#基础字体大小
rel_heights = c(1.5, 0.5, 1),#副图的相对高度
subplots = 1:3, #要显示哪些副图 如subplots=c(1,3) #只要第一和第三个图
ES_geom = "line",#enrichment score用线还是用点"dot"
pvalue_table = T) #显示pvalue等信息
ggsave(gseap2, filename = "combined_GSEA_up_all.pdf",width =12,height =12)
go_gsea_res=data.frame(GO_kk_clustergene@result)
}
}
#########多个组放在list里面去做富集分析----------
lapply(a_list,head)
i=0
lapply(a_list,FUN = function(each_single_group) {
i=i+1
group= paste(each_single_group[1:5],sep = "_",collapse = "_")
group=paste(group,i)
print(group)
print(getwd())
dir.create(paste0(group))
setwd(paste0(group))
print(getwd())
#each_single_group=a_list[[1]]
each_single_group=data.frame("gene"=each_single_group)
library(clusterProfiler)
library(org.Hs.eg.db) #人
library(org.Mm.eg.db) #鼠
library(ggplot2)
# degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung)
# head(markers)
# df=markers %>%dplyr::group_by(cluster)%>%
# filter(p_val_adj <0.05
# )
# sce.markers=df
# head(sce.markers)
# print(getwd())
sce.markers=each_single_group
ids <- suppressWarnings(bitr(sce.markers[,1], 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db'))
head(ids)
head(sce.markers)
tail(sce.markers)
dim(sce.markers)
sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL')
head(sce.markers)
dim(sce.markers)
# sce.markers$group=sce.markers$cluster
# sce.markers=sce.markers[sce.markers$group!="none",]
# dim(sce.markers)
head(sce.markers)
getwd()
# sce.markers=openxlsx::read.xlsx("/home/data/t040413/silicosis/fibroblast_myofibroblast/group_enrichments/")
#sce.markers$cluster=sce.markers$mygroup
dim(sce.markers)
head(sce.markers)
# gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
gcSample=sce.markers$ENTREZI
library(clusterProfiler)
gcSample # entrez id , enrichGO
print("===========开始go= All ont===========")
xx <- enrichGO(gene = gcSample,OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05) #organism="hsa",
xx.BP <- enrichGO(gene = gcSample, ,OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="BP") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
print(getwd())
ggsave(paste0(group,'_degs_enrichGO-BP_enrichment--3.pdf'),plot = p,width = 14,height = 40,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-BP_enrichment--3.png'),plot = p,width = 14,height = 40,limitsize = F)
xx.CC <- enrichGO(gcSample ,OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="CC") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-CC_enrichment--3.pdf'),plot = p,width = 14,height = 40,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-CC_enrichment--3.png'),plot = p,width = 14,height = 40,limitsize = F)
xx.MF <- enrichGO(gene = gcSample,OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="MF") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-MF_enrichment--3.pdf'),plot = p,width = 14,height = 40,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-MF_enrichment--3.png'),plot = p,width = 14,height = 40,limitsize = F)
print(getwd())
.libPaths()
print("===========开始 kegg All ont============")
gg<-clusterProfiler::enrichKEGG(gene = gcSample, #readable=TRUE,
keyType = 'kegg', #KEGG 富集
organism='mmu',#"rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
print("kegg---done------")
#标准富集分析
p=clusterProfiler::dotplot(object = xx,showCategory = 20,
label_format =100)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-GO_enrichment--3.pdf'),plot = p,width = 14,height = 40,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-GO_enrichment--3.png'),plot = p,width = 14,height = 40,limitsize = F)
xx
write.csv(xx,file = paste0(group,"enrichGO-GO_enrichment.csv"))
p=clusterProfiler::dotplot(gg,showCategory = 20,
label_format = 40)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave(paste0(group,'_degs_enrichGO-KEGG_enrichment-2.pdf'),plot = p4,width = 14,height = 25,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-KEGG_enrichment-2.png'),plot = p4,width = 14,height = 25,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = paste0(group,"_enrichGO-KEGG_enrichment.xlsx"))
getwd()
openxlsx::write.xlsx(sce.markers,file = paste0(group,"_sce.markers_for_each_clusterfor_enrichment.xlsx"))
##放大图片
{
getwd()
#bp
p=clusterProfiler::dotplot(object = xx.BP,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-BP_enrichment-100terms-3.pdf'),plot = p,width = 14,height = 100,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-BP_enrichment-100terms-3.png'),plot = p,width = 14,height = 100,limitsize = F)
print((getwd()))
#cc
p=clusterProfiler::dotplot(object = xx.CC,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-CC_enrichment-100terms-3.pdf'),plot = p,width = 14,height = 100,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-CC_enrichment-100terms-3.png'),plot = p,width = 14,height = 100,limitsize = F)
print((getwd()))
#MF
p=clusterProfiler::dotplot(object = xx.MF,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-MF_enrichment-100terms-3.pdf'),plot = p,width = 14,height = 100,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-MF_enrichment-100terms-3.png'),plot = p,width = 14,height = 100,limitsize = F)
print((getwd()))
#KEGG
p=clusterProfiler::dotplot(object = gg,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(group,'_degs_enrichGO-KEGG_enrichment-100terms-3.pdf'),plot = p,width = 14,height = 100,limitsize = F)
ggsave(paste0(group,'_degs_enrichGO-KEGG_enrichment-100terms-3.png'),plot = p,width = 14,height = 100,limitsize = F)
print((getwd()))
}
# save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata")
bp= xx.BP@result
cc= xx.CC@result
mf=xx.MF@result
xx.all=do.call(rbind,list(bp,cc,mf))
head(xx.all)
openxlsx::write.xlsx(xx.all,file = paste0(group,"_enrichments_all.xlsx"))
# save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata")
result <- tryCatch({
save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = paste0(group,"_xx.Rdata"))
# 在这里添加你希望在没有报错时执行的代码
print("没有报错")
}, error = function(e) {
print(getwd()) # 在报错时执行的代码
# 在这里添加你希望在报错时执行的额外代码
save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers, gg, file = paste0(group,"_xx_all.Rdata"))
})
# print(result)
print(getwd())
if (F) {
#大鼠
print("===========开始go============")
xx <-clusterProfiler::enrichGO(gcSample, ,OrgDb="org.Rn.eg.db",
readable=TRUE,
ont = 'ALL', #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者
pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
print("===========开始 kegg============")
gg<-clusterProfiler::enrichGO(gcSample,fun = "enrichKEGG",
keyType = 'kegg', #KEGG 富集
organism="rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
p=dotplot(xx)
p2=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p2
ggsave('degs_enrichGO-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F)
xx
openxlsx::write.xlsx(xx,file = "enrichGO-GO_enrichment.xlsx")
#
p=dotplot(gg)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave('degs_enrichGO-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = "enrichGO-KEGG_enrichment.xlsx")
}
setwd("../")
})
##############################################################2-------------------------
{
##########################---------------------批量富集分析findallmarkers-enrichment analysis==================================================
#https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg
df=all_degs
##筛选阈值确定:p<0.05,|log2FC|>1
p_val_adj = 0.05
# avg_log2FC = 0.1
fc=seq(0.8)
print(getwd())
#setwd("../")
for (avg_log2FC in fc) {
dir.create(paste0(avg_log2FC,"/"))
setwd(paste0(avg_log2FC,"/"))
print(getwd())
print(paste0("Start----",avg_log2FC))
head(all_degs)
#根据阈值添加上下调分组标签:
df$direction <- case_when(
df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up",
df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down",
TRUE ~ 'none'
)
head(df)
df=df[df$direction!="none",]
head(df)
dim(df)
df$mygroup=paste(df$group,df$direction,sep = "_")
head(df)
dim(df)
##########################----------------------enrichment analysis=
#https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg
{
library(clusterProfiler)
library(org.Hs.eg.db) #人
library(org.Mm.eg.db) #鼠
library(ggplot2)
# degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung)
head(markers)
df=markers %>%dplyr::group_by(cluster)%>%
filter(p_val_adj <0.05
)
sce.markers=df
head(sce.markers)
print(getwd())
ids <- suppressWarnings(bitr(sce.markers$gene, 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db'))
head(ids)
head(sce.markers)
tail(sce.markers)
dim(sce.markers)
sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL')
head(sce.markers)
dim(sce.markers)
sce.markers$group=sce.markers$cluster
sce.markers=sce.markers[sce.markers$group!="none",]
dim(sce.markers)
head(sce.markers)
getwd()
# sce.markers=openxlsx::read.xlsx("/home/data/t040413/silicosis/fibroblast_myofibroblast/group_enrichments/")
#sce.markers$cluster=sce.markers$mygroup
dim(sce.markers)
head(sce.markers)
gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
library(clusterProfiler)
gcSample # entrez id , compareCluster
print("===========开始go= All ont===========")
xx <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05) #organism="hsa",
xx.BP <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="BP") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-BP_enrichment--3.pdf'),plot = p,width = 13,height = 40,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-BP_enrichment--3.png'),plot = p,width = 13,height = 40,limitsize = F)
xx.CC <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="CC") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-CC_enrichment--3.pdf'),plot = p,width = 13,height = 40,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-CC_enrichment--3.png'),plot = p,width = 13,height = 40,limitsize = F)
xx.MF <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="MF") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-MF_enrichment--3.pdf'),plot = p,width = 13,height = 40,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-MF_enrichment--3.png'),plot = p,width = 13,height = 40,limitsize = F)
print(getwd())
.libPaths()
print("===========开始 kegg All ont============")
gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", #readable=TRUE,
keyType = 'kegg', #KEGG 富集
organism='mmu',#"rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
p=clusterProfiler::dotplot(object = xx,showCategory = 20,
label_format =100)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-GO_enrichment--3.pdf'),plot = p,width = 13,height = 40,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-GO_enrichment--3.png'),plot = p,width = 13,height = 40,limitsize = F)
xx
write.csv(xx,file = paste0(avg_log2FC,"compareCluster-GO_enrichment.csv"))
p=clusterProfiler::dotplot(gg,showCategory = 20,
label_format = 40)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave(paste0(avg_log2FC,'_degs_compareCluster-KEGG_enrichment-2.pdf'),plot = p4,width = 13,height = 25,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-KEGG_enrichment-2.png'),plot = p4,width = 13,height = 25,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = paste0(avg_log2FC,"_compareCluster-KEGG_enrichment.xlsx"))
getwd()
openxlsx::write.xlsx(sce.markers,file = paste0(avg_log2FC,"_sce.markers_for_each_clusterfor_enrichment.xlsx"))
##放大图片
{
getwd()
#bp
p=clusterProfiler::dotplot(object = xx.BP,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-BP_enrichment-100terms-3.pdf'),plot = p,width = 13,height = 100,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-BP_enrichment-100terms-3.png'),plot = p,width = 13,height = 100,limitsize = F)
print((getwd()))
#cc
p=clusterProfiler::dotplot(object = xx.CC,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-CC_enrichment-100terms-3.pdf'),plot = p,width = 13,height = 100,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-CC_enrichment-100terms-3.png'),plot = p,width = 13,height = 100,limitsize = F)
print((getwd()))
#MF
p=clusterProfiler::dotplot(object = xx.MF,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-MF_enrichment-100terms-3.pdf'),plot = p,width = 13,height = 100,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-MF_enrichment-100terms-3.png'),plot = p,width = 13,height = 100,limitsize = F)
print((getwd()))
#KEGG
p=clusterProfiler::dotplot(object = gg,showCategory = 100,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-KEGG_enrichment-100terms-3.pdf'),plot = p,width = 13,height = 100,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-KEGG_enrichment-100terms-3.png'),plot = p,width = 13,height = 100,limitsize = F)
print((getwd()))
}
# save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata")
xx.BP@compareClusterResult$oncology="BP"
xx.CC@compareClusterResult$oncology="CC"
xx.MF@compareClusterResult$oncology="MF"
xx.all=do.call(rbind,list(xx.BP@compareClusterResult,
xx.CC@compareClusterResult,
xx.MF@compareClusterResult))
head(xx.all)
openxlsx::write.xlsx(xx.all,file = paste0(avg_log2FC,"_enrichments_all.xlsx"))
# save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata")
result <- tryCatch({
save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = paste0(avg_log2FC,"_xx.Rdata"))
# 在这里添加你希望在没有报错时执行的代码
print("没有报错")
}, error = function(e) {
print(getwd()) # 在报错时执行的代码
# 在这里添加你希望在报错时执行的额外代码
save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers, gg, file = paste0(avg_log2FC,"_xx_all.Rdata"))
})
# print(result)
print(getwd())
if (F) {
#大鼠
print("===========开始go============")
xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db",
readable=TRUE,
ont = 'ALL', #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者
pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
print("===========开始 kegg============")
gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG",
keyType = 'kegg', #KEGG 富集
organism="rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
p=dotplot(xx)
p2=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p2
ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F)
xx
openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx")
#
p=dotplot(gg)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
}
setwd("../")
}
}
}
###############################################################################3----------
##findallmarkers做富集分析 每个cluster的不同组别之间---------------
load("/home/data/t040413/ipf/gse157379_young_old_blm_mice/monocle/All.merge_final.rds")
All.merge$cell_type=All.merge$cell.type
subset_data=All.merge
fibroblast=subset_data
head([email protected])
table(fibroblast$group)
degs=list()
#Idents(fibroblast)=fibroblast$group
for (cluster in unique(fibroblast$cell_type) ) {
Idents(fibroblast)=fibroblast$cell_type
subset_data=base::subset(fibroblast,idents = cluster)
for (eachgroup in c('Aged Fibrosis' )) {
Idents(subset_data)=subset_data$group
try({
degs[[paste0(cluster,eachgroup)]] = FindMarkers(subset_data,ident.1 = eachgroup,densify = TRUE,
ident.2 = "Aged"
)
degs[[paste0(cluster,eachgroup)]]$group=paste0(cluster,"_",eachgroup,"_vs_NS")
}, silent = TRUE) # 设置 silent = TRUE 以捕获错误但不中断执行
print(paste0(cluster," __",eachgroup,"======done!========="))
}
print(paste0(" __",cluster,"======done!========="))
# degs[[eachgroup]]$group=paste0(eachgroup,"_VS_NS")
print(getwd())
}
head(degs)
names(degs)
degs=lapply(degs, function(x){
x$gene=rownames(x)
return(x)
})
head(degs)
merged_degs=do.call(rbind,degs)
head(merged_degs)
markers=merged_degs
############################针对性 修改sepcialized degs-------------------
markers=specialized_degs
print(getwd())
setwd("../")
print(getwd())
head(markers)
dir.create("./special_degs_for_fibrosis_vs_ns")
setwd("./special_degs_for_fibrosis_vs_ns")
print(getwd())
{
print(getwd())
all_degs=markers
df=all_degs
##筛选阈值确定:p<0.05,|log2FC|>1
p_val_adj = 0.05
# avg_log2FC = 0.8
head(all_degs)
various_fc= seq(0.5,0.8)
for (avg_log2FC in various_fc) {
dir.create(paste0(avg_log2FC,"/"))
setwd(paste0(avg_log2FC,"/"))
#根据阈值添加上下调分组标签:
df$direction <- case_when(
df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up",
df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down",
TRUE ~ 'none'
)
head(df)
print(getwd())
df=df[df$direction!="none",]
head(df)
dim(df)
df$mygroup=paste(df$group,df$direction,sep = "_")
head(df)
table(df$mygroup)
dim(df)
print(getwd())
# dir.create("./degs_enrichments1fc")
# setwd("./degs_enrichments1fc")
##########################----------------------enrichment analysis==
#https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg
{
library(clusterProfiler)
library(org.Hs.eg.db) #人
library(org.Mm.eg.db) #鼠
library(ggplot2)
# degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung)
head(df)
df$cluster=df$mygroup
df=df %>%dplyr::group_by(cluster)%>%
filter(p_val_adj <0.05)
sce.markers=df
head(sce.markers)
print(getwd())
ids <- suppressWarnings(bitr(sce.markers$gene, 'SYMBOL', 'ENTREZID', 'org.Mm.eg.db'))
head(ids)
head(sce.markers)
tail(sce.markers)
dim(sce.markers)
sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL')
head(sce.markers)
dim(sce.markers)
sce.markers$group=sce.markers$cluster
sce.markers=sce.markers[sce.markers$group!="none",]
dim(sce.markers)
head(sce.markers)
#sce.markers=openxlsx::read.xlsx("~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/sce.markers_for_each_clusterfor_enrichment.xlsx")
#sce.markers$cluster=sce.markers$mygroup
dim(sce.markers)
head(sce.markers)
gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
library(clusterProfiler)
library(org.Mm.eg.db)
gcSample # entrez id , compareCluster
names(gcSample)
print("===========开始go============")
print("===========开始go= All ont===========")
xx <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05) #organism="hsa",
xx.BP <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="BP") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.BP,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-BP_enrichment--3.pdf'),plot = p,width = 13,height = 40,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-BP_enrichment--3.png'),plot = p,width = 13,height = 40,limitsize = F)
xx.CC <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="CC") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.CC,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-CC_enrichment--3.pdf'),plot = p,width = 13,height = 40,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-CC_enrichment--3.png'),plot = p,width = 13,height = 40,limitsize = F)
xx.MF <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" , #'org.Hs.eg.db',
pvalueCutoff=0.05,readable=TRUE,
ont="MF") #organism="hsa",
p=clusterProfiler::dotplot(object = xx.MF,showCategory = 20,
label_format =60)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-MF_enrichment--3.pdf'),plot = p,width = 13,height = 40,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-MF_enrichment--3.png'),plot = p,width = 13,height = 40,limitsize = F)
print(getwd())
.libPaths()
print("===========开始 kegg All ont============")
gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG", #readable=TRUE,
keyType = 'kegg', #KEGG 富集
organism='mmu',#"rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
p=clusterProfiler::dotplot(object = xx,showCategory = 20,
label_format =100)
p=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p
ggsave(paste0(avg_log2FC,'_degs_compareCluster-GO_enrichment--3.pdf'),plot = p,width = 13,height = 40,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-GO_enrichment--3.png'),plot = p,width = 13,height = 40,limitsize = F)
xx
write.csv(xx,file = paste0(avg_log2FC,"compareCluster-GO_enrichment.csv"))
p=clusterProfiler::dotplot(gg,showCategory = 20,
label_format = 40)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave(paste0(avg_log2FC,'_degs_compareCluster-KEGG_enrichment-2.pdf'),plot = p4,width = 13,height = 25,limitsize = F)
ggsave(paste0(avg_log2FC,'_degs_compareCluster-KEGG_enrichment-2.png'),plot = p4,width = 13,height = 25,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = paste0(avg_log2FC,"_compareCluster-KEGG_enrichment.xlsx"))
getwd()
openxlsx::write.xlsx(sce.markers,file = paste0(avg_log2FC,"_sce.markers_for_each_clusterfor_enrichment.xlsx"))
# save(xx.BP,xx.CC,xx.MF, gg, file = "~/silicosis/spatial_transcriptomicsharmony_cluster_0.5res_gsea/xx.Rdata")
xx.BP@compareClusterResult$oncology="BP"
xx.CC@compareClusterResult$oncology="CC"
xx.MF@compareClusterResult$oncology="MF"
xx.all=do.call(rbind,list(xx.BP@compareClusterResult,
xx.CC@compareClusterResult,
xx.MF@compareClusterResult))
head(xx.all)
openxlsx::write.xlsx(xx.all,file = paste0(avg_log2FC,"_enrichments_all.xlsx"))
# save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = "./xx.Rdata")
result <- tryCatch({
save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers,merged_degs, gg, file = paste0(avg_log2FC,"_xx.Rdata"))
# 在这里添加你希望在没有报错时执行的代码
print("没有报错")
}, error = function(e) {
print(getwd()) # 在报错时执行的代码
# 在这里添加你希望在报错时执行的额外代码
save(xx.BP,xx.CC,xx.MF,xx.all,sce.markers, gg, file = paste0(avg_log2FC,"_xx.Rdata"))
})
# print(result)
if (F) {
#大鼠
print("===========开始go============")
xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db",
readable=TRUE,
ont = 'ALL', #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者
pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
print("===========开始 kegg============")
gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG",
keyType = 'kegg', #KEGG 富集
organism="rno",
pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
)
p=dotplot(xx)
p2=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p2
ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F)
xx
openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx")
#
p=dotplot(gg)
p4=p+ theme(axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust=0.5))
p4
print(paste("保存位置",getwd(),sep = " : "))
ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F)
gg
openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
}
setwd("../")
}
}
}
#######gsea----
{
}
###############3gsea-----------
#https://zhuanlan.zhihu.com/p/518144716
#https://yulab-smu.top/biomedical-knowledge-mining-book/faq.html
# library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(clusterProfiler)
library(enrichplot)
library(tidyverse)
library(ggstatsplot)
{
getwd()
setwd("../")
print(getwd())
dir.create("5.GSEA_kegg_go")
setwd("5.GSEA_kegg_go")
## 物种设置
organism = 'mmu' # 人类'hsa' 小鼠'mmu'
OrgDb = 'org.Mm.eg.db'#人类"org.Hs.eg.db" 小鼠"org.Mm.eg.db"
head(specialized_degs)
rownames(specialized_degs)=specialized_degs$gene
#### 按照需要可选择不同的DEG方法数据集 ####
need_DEG <- specialized_degs %>%
dplyr::filter(p_val_adj<0.05)
need_DEG <- need_DEG[,c(2,5)] #选择log2FoldChange和pvalue(凑成数据框)
colnames(need_DEG) <- c('log2FoldChange','pvalue')
need_DEG$SYMBOL <- rownames(need_DEG)
##### 创建gsea分析的geneList(包含从大到小排列的log2FoldChange和ENTREZID信息)####
#转化id
df <- bitr(rownames(need_DEG),
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = OrgDb) #人数据库org.Hs.eg.db 小鼠org.Mm.eg.db
need_DEG <- merge(need_DEG, df, by='SYMBOL') #按照SYMBOL合并注释信息
head(need_DEG) # https://yulab-smu.top/biomedical-knowledge-mining-book/faq.html
geneList <- need_DEG$log2FoldChange
names(geneList) <- need_DEG$ENTREZID
geneList <- sort(geneList, decreasing = T) #从大到小排序
head(geneList)
##### gsea富集 ####
KEGG_kk_entrez <- gseKEGG(geneList = geneList,
organism = organism, #人hsa 鼠mmu
pvalueCutoff = 0.25) #实际为padj阈值,可调整
KEGG_kk <- DOSE::setReadable(KEGG_kk_entrez,
OrgDb=OrgDb,
keyType='ENTREZID')#转化id
GO_kk_entrez <- gseGO(geneList = geneList,
ont = "ALL", # "BP"、"MF"和"CC"或"ALL"
OrgDb = OrgDb,#人类org.Hs.eg.db 鼠org.Mm.eg.db
keyType = "ENTREZID",
pvalueCutoff = 0.25) #实际为padj阈值可调整
GO_kk <- DOSE::setReadable(GO_kk_entrez,
OrgDb=OrgDb,
keyType='ENTREZID')#转化id
print(getwd())
save(KEGG_kk_entrez, GO_kk_entrez, file = "dega_GSEA_result.RData")
#首先对富集结果进行条件筛选,一般认为|NES|>1,NOM pvalue<0.05,FDR(padj)<0.25的通路是显著富集的;还可以从结果中细分出上下调通路单独绘图,以下代码仅展示KEGG通路富集结果的上调通路。 gseaplot2()函数既可以对单独的通路绘图,也可以合并几个通路一起绘图;
KEGG_kk@result
kegg_gsea_res=data.frame(KEGG_kk@result)
##选取富集结果
kk_gse <- KEGG_kk
kk_gse_entrez <- KEGG_kk_entrez
###条件筛选
#一般认为|NES|>1,NOM pvalue<0.05,FDR(padj)<0.25的通路是显著富集的
kk_gse_cut <- kk_gse[kk_gse$pvalue<0.05 & kk_gse$p.adjust<0.25 & abs(kk_gse$NES)>1]
kk_gse_cut_down <- kk_gse_cut[kk_gse_cut$NES < 0,]
kk_gse_cut_up <- kk_gse_cut[kk_gse_cut$NES > 0,]
#选择展现NES前几个通路
down_gsea <- kk_gse_cut_down[tail(order(kk_gse_cut_down$NES,decreasing = T),100),]
up_gsea <- kk_gse_cut_up[head(order(kk_gse_cut_up$NES,decreasing = T),100),]
diff_gsea <- kk_gse_cut[head(order(abs(kk_gse_cut$NES),decreasing = T),100),]
#### 经典的GSEA图 uo-----
up_gsea$Description
i=2
gselist=list()
for (i in 1:length(up_gsea$Description)) {
gseap1 <- gseaplot2(kk_gse,
up_gsea$ID[i],#富集的ID编号
title = up_gsea$Description[i],#标题
color = "red", #GSEA线条颜色
base_size = 20,#基础字体大小
rel_heights = c(1.5, 0.5, 1),#副图的相对高度
subplots = 1:3, #要显示哪些副图 如subplots=c(1,3) #只要第一和第三个图
ES_geom = "line", #enrichment score用线还是用点"dot"
pvalue_table = T) #显示pvalue等信息
gselist[[up_gsea$Description[i]]]=gseap1
print(paste0(getwd(),up_gsea$Description[i]))
pdf(file = paste0(up_gsea$Description[i], '_GSEA_up_1.pdf'), width =12, height =8)
print(gseap1)
dev.off()
}
pdf(file = paste0("all_split", '_GSEA_up_1.pdf'), width =12, height =8)
for (i in names(gselist)) {
print(gselist[[i]])
}
dev.off()
###下调gsea------
{
down_gsea$Description
i=2
gselist=list()
for (i in 1:length(down_gsea$Description)) {
gseap1 <- gseaplot2(kk_gse,
down_gsea$ID[i],#富集的ID编号
title = down_gsea$Description[i],#标题
color = "red", #GSEA线条颜色
base_size = 20,#基础字体大小
rel_heights = c(1.5, 0.5, 1),#副图的相对高度
subplots = 1:3, #要显示哪些副图 如subplots=c(1,3) #只要第一和第三个图
ES_geom = "line", #enrichment score用线还是用点"dot"
pvalue_table = T) #显示pvalue等信息
gselist[[down_gsea$Description[i]]]=gseap1
print(paste0(getwd(),down_gsea$Description[i]))
pdf(file = paste0(down_gsea$Description[i], '_GSEA_down_1.pdf'), width =12, height =8)
print(gseap1)
dev.off()
}
pdf(file = paste0("all_split", '_GSEA_down_1.pdf'), width =12, height =8)
for (i in names(gselist)) {
print(gselist[[i]])
}
dev.off()
}
#### 合并 GSEA通路
gseap2 <- gseaplot2(kk_gse,
up_gsea$ID,#富集的ID编号
title = "UP_GSEA_all",#标题
color = "red",#GSEA线条颜色
base_size = 20,#基础字体大小
rel_heights = c(1.5, 0.5, 1),#副图的相对高度
subplots = 1:3, #要显示哪些副图 如subplots=c(1,3) #只要第一和第三个图
ES_geom = "line",#enrichment score用线还是用点"dot"
pvalue_table = T) #显示pvalue等信息
ggsave(gseap2, filename = "combined_GSEA_up_all.pdf",width =12,height =12)
go_gsea_res=data.frame(GO_kk@result)
}