在之前的几篇文章里,介绍了如何整合dada2
的分析结果,并且已经通过MicrobiotaProcess
包转化为了phyloseq
对象,接下来先让我们进行一些基础的可视化操作.
示例数据:https://pan.baidu.com/s/1aY92IEQ_sDmLvEhJ_J4vkQ 提取码: 7ixa
1.整合数据转换格式
rm(list = ls())
library(MicrobiotaProcess)
library(phyloseq)
library(tidyverse)
library(RColorBrewer)
otu <- "otu_table.qza"
rep <- "rep-seqs.qza"
tree <- "rooted-tree.qza"
tax <- "taxonomy.qza"
sample <- "group.txt"
ps_dada2 <- import_qiime2(otuqza=otu, taxaqza=tax,refseqqza=rep,
mapfilename=sample,treeqza=tree)
在进行任何多样性图之前,让我们先探讨一下数据
ps <- ps_dada2
observed <- estimate_richness(ps, measures = c('Observed'))
explore.df <- cbind(observed, sample_sums(ps),sample_data(ps)$group)
colnames(explore.df) <- c('Observed', 'Sample_Sums',"group")
observed_mean <- mean(explore.df$Observed)
sample_sum_mean <- mean(explore.df$Sample_Sums)
ggplot(data = explore.df, aes(x = Sample_Sums, y = Observed,color=group)) +
geom_point() +
geom_smooth(method="auto", se=TRUE, fullrange=FALSE, level=0.95,
inherit.aes = F, mapping = aes(Sample_Sums, Observed),
data = explore.df)
2. Rarefaction
可视化
p_rare <- ggrarecurve(obj=ps_dada2,
indexNames=c("Observe","Chao1","ACE"),
chunks=300) +
theme(legend.spacing.y=unit(0.02,"cm"),
legend.text=element_text(size=4))+
theme_bw()
3. Alpha多样性可视化
alphaobj <- get_alphaindex(ps_dada2)
head(as.data.frame(alphaobj))
p_alpha <- ggbox(alphaobj, geom="violin", factorNames="group") +
scale_fill_manual(values=c("#2874C5", "#EABF00"))+
theme(strip.background = element_rect(colour=NA, fill="grey"))
p_alpha
4. 物种组成分类可视化
4.1 无分组物种组成图
phytax <- get_taxadf(obj=ps_dada2, taxlevel=2)
phybar <- ggbartax(obj=phytax) +
xlab(NULL) + ylab("relative abundance (%)")+
theme(axis.text.x=element_text(face="plain",
color="black",hjust=0.8,vjust=0.6,
size=9, angle=90))+
theme(legend.position="right")
phybar
4.2 门水平物种组成图
phytax <- get_taxadf(obj=ps_dada2, taxlevel=2)
phybar <- ggbartax(obj=phytax,facetNames="group", count=FALSE) +
xlab(NULL) + ylab("relative abundance (%)")+
theme(axis.text.x=element_text(face="plain",
color="black",hjust=0.8,vjust=0.6,
size=9, angle=90))+
theme(legend.position="right")
phybar
4.3 属水平物种组成图
genustax <- get_taxadf(obj=ps_dada2, taxlevel=6)
genusbar <- ggbartax(obj=genustax, facetNames="group", count=FALSE) +
xlab(NULL) + ylab("relative abundance (%)")+
theme(axis.text.x=element_text(face="plain",
color="black",hjust=0.8,vjust=0.6,
size=9, angle=90))+
theme(strip.text.x = element_text(size=8, color="black",
face="plain"))+
theme(legend.position="right")
genusbar
5. PCA分析
pcares <- get_pca(obj=ps_dada2, method="hellinger")
pcaplot <- ggordpoint(obj=pcares, biplot=TRUE, speciesannot=TRUE,
pc=c(1,2),factorNames=c("group"), ellipse=TRUE) +
scale_color_manual(values=c("#2874C5", "#EABF00"))
6. PCOA分析
pcoares <- get_pcoa(obj=ps_dada2,
distmethod="euclidean", method="hellinger")
pcoaplot <- ggordpoint(obj=pcoares, biplot=TRUE,
speciesannot=TRUE,pc = c(2,1),
factorNames=c("group"), ellipse=T)
pcoares <- get_pcoa(obj=ps_dada2,
distmethod="Unweighted-UniFrac",
method="hellinger")
pcoaplot <- ggordpoint(obj=pcoares, biplot=TRUE,
speciesannot=TRUE,
pc = c(1,2),
factorNames=c("group"), ellipse=T)
pcoaplot
pcoares <- get_pcoa(obj=ps_dada2,
distmethod="weighted-UniFrac",
method="hellinger")
pcoaplot <- ggordpoint(obj=pcoares, biplot=TRUE,
speciesannot=TRUE,
pc = c(1,2),
factorNames=c("group"), ellipse=T)
pcoaplot
7. Venn图
upsetda <- get_upset(ps, factorNames="group")
upsetda
library(UpSetR)
upset(upsetda, sets=c("CASE_AH","CASE_BH",
"CASE_CH","CASE_DH"), sets.bar.color = "#56B4E9",
order.by = "freq", empty.intersections = "on")
8. NMDS分析
phyloseq-class experiment-level object
otu_table() OTU Table: [ 3365 taxa and 41 samples ]
sample_data() Sample Data: [ 41 samples by 2 sample variables ]
tax_table() Taxonomy Table: [ 3365 taxa by 7 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 3365 tips and 3363 internal nodes ]
8.1 根据Bray-Curtis距离和NMDS排序进行多元分析
carbom.ord <- ordinate(ps, "NMDS", "bray")
8.2 绘制OTU
plot_ordination(ps, carbom.ord, type="taxa", color="class",
title="OTUs")+guides(color=FALSE)+theme_bw()
8.3 绘制样本
plot_ordination(ps, carbom.ord, type="samples",title="Samples",
color="class") + labs(color = "")+
geom_point(size=3)+theme_bw()
8.4 显示样本和OTU
plot_ordination(ps, carbom.ord, type="split", color="class",
title="biplot", label = "station") +
geom_point(size=3)+guides(color=FALSE)+theme_bw()
9.典型对应分析(CCA)
pacman::p_load(tidyverse,phyloseq,MicrobiotaProcess,
ape,microbiome,patchwork)
data(dietswap)
pseq <- dietswap
pseq.cca <- ordinate(pseq, "CCA")
A <- plot_ordination(pseq, pseq.cca,
type = "samples", color = "nationality")+
geom_point(size = 4)+theme_bw()
B <- plot_ordination(pseq, pseq.cca,
type = "taxa", color = "Phylum")+
geom_point(size = 4)+theme_bw()
A|B
Split plot
plot_ordination(pseq, pseq.cca,
type = "split", shape = "nationality",
color = "Phylum", label = "nationality")+
theme_bw()
10. 计算距离矩阵
参考:https://rdrr.io/bioc/phyloseq/man/distance.html
https://rdrr.io/bioc/MicrobiotaProcess/man/get_dist.html
bray <- get_dist(ps,distmethod="bray") %>% as.matrix()
write.table (bray,file ="bray_distance.xls", sep ="\t", row.names = T)
uunifrac <- get_dist(ps,distmethod="uunifrac") %>% as.matrix()
write.table (uunifrac,file ="uunifrac_distance.xls", sep ="\t", row.names = T)
wunifrac <- get_dist(ps,distmethod="wunifrac") %>% as.matrix()
write.table (wunifrac,file ="wunifrac_distance.xls", sep ="\t", row.names = T)
以上通过MicrobiotaProcess
与phyloseq
等R包进行了一系列的分析,接下来我们将进行更加深入的相关性分析与与差异分析
参考:http://www.bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc/MicrobiotaProcess-basics.html