微生物多样性qiime2分析流程(6) 数据可视化分析(上）

在之前的几篇文章里，介绍了如何整合dada2的分析结果，并且已经通过MicrobiotaProcess包转化为了phyloseq对象，接下来先让我们进行一些基础的可视化操作.
示例数据:https://pan.baidu.com/s/1aY92IEQ_sDmLvEhJ_J4vkQ 提取码: 7ixa

1.整合数据转换格式

rm(list = ls())
library(MicrobiotaProcess)
library(phyloseq)
library(tidyverse)
library(RColorBrewer)
otu <- "otu_table.qza"
rep <- "rep-seqs.qza"
tree <- "rooted-tree.qza"
tax <- "taxonomy.qza"
sample <- "group.txt"
ps_dada2 <- import_qiime2(otuqza=otu, taxaqza=tax,refseqqza=rep,
                          mapfilename=sample,treeqza=tree)

在进行任何多样性图之前，让我们先探讨一下数据

ps <- ps_dada2

observed <- estimate_richness(ps, measures = c('Observed'))
explore.df <- cbind(observed, sample_sums(ps),sample_data(ps)$group)
colnames(explore.df) <- c('Observed', 'Sample_Sums',"group")
observed_mean <- mean(explore.df$Observed)
sample_sum_mean <- mean(explore.df$Sample_Sums)
ggplot(data = explore.df, aes(x = Sample_Sums, y = Observed,color=group)) + 
  geom_point() +
  geom_smooth(method="auto", se=TRUE, fullrange=FALSE, level=0.95, 
              inherit.aes = F, mapping = aes(Sample_Sums, Observed),
              data = explore.df)

sample .png

2. `Rarefaction`可视化

p_rare <- ggrarecurve(obj=ps_dada2, 
                      indexNames=c("Observe","Chao1","ACE"), 
                      chunks=300) +
  theme(legend.spacing.y=unit(0.02,"cm"),
        legend.text=element_text(size=4))+
  theme_bw()

p_rare.png

3. Alpha多样性可视化

alphaobj <- get_alphaindex(ps_dada2)

head(as.data.frame(alphaobj))

p_alpha <- ggbox(alphaobj, geom="violin", factorNames="group") + 
  scale_fill_manual(values=c("#2874C5", "#EABF00"))+
  theme(strip.background = element_rect(colour=NA, fill="grey"))
p_alpha

Alpha.png

4. 物种组成分类可视化

4.1 无分组物种组成图

phytax <- get_taxadf(obj=ps_dada2, taxlevel=2)
phybar <- ggbartax(obj=phytax) +
  xlab(NULL) + ylab("relative abundance (%)")+
  theme(axis.text.x=element_text(face="plain",
                                 color="black",hjust=0.8,vjust=0.6,
                                 size=9, angle=90))+
  theme(legend.position="right")
phybar

phy1.png

4.2 门水平物种组成图

phytax <- get_taxadf(obj=ps_dada2, taxlevel=2)
phybar <- ggbartax(obj=phytax,facetNames="group", count=FALSE) +
  xlab(NULL) + ylab("relative abundance (%)")+
  theme(axis.text.x=element_text(face="plain",
                                 color="black",hjust=0.8,vjust=0.6,
                                 size=9, angle=90))+
  theme(legend.position="right")
phybar

phy2.png

4.3 属水平物种组成图

genustax <- get_taxadf(obj=ps_dada2, taxlevel=6)
genusbar <- ggbartax(obj=genustax, facetNames="group", count=FALSE) +
  xlab(NULL) + ylab("relative abundance (%)")+
  theme(axis.text.x=element_text(face="plain",
                                 color="black",hjust=0.8,vjust=0.6,
                                 size=9, angle=90))+
  theme(strip.text.x = element_text(size=8, color="black",
                                    face="plain"))+
  theme(legend.position="right")
genusbar

genus.png

5. PCA分析

pcares <- get_pca(obj=ps_dada2, method="hellinger")
pcaplot <- ggordpoint(obj=pcares, biplot=TRUE, speciesannot=TRUE,
                      pc=c(1,2),factorNames=c("group"), ellipse=TRUE) + 
  scale_color_manual(values=c("#2874C5", "#EABF00"))

PCA1-2.png

6. PCOA分析

pcoares <- get_pcoa(obj=ps_dada2, 
                    distmethod="euclidean", method="hellinger")
pcoaplot <- ggordpoint(obj=pcoares, biplot=TRUE,
                       speciesannot=TRUE,pc = c(2,1),
                       factorNames=c("group"), ellipse=T)

pcoa1.png

pcoares <- get_pcoa(obj=ps_dada2, 
                    distmethod="Unweighted-UniFrac", 
                    method="hellinger")
pcoaplot <- ggordpoint(obj=pcoares, biplot=TRUE,
                       speciesannot=TRUE,
                       pc = c(1,2),
                       factorNames=c("group"), ellipse=T)
pcoaplot

pcoa2.png

pcoares <- get_pcoa(obj=ps_dada2, 
                    distmethod="weighted-UniFrac", 
                    method="hellinger")
pcoaplot <- ggordpoint(obj=pcoares, biplot=TRUE,
                       speciesannot=TRUE,
                       pc = c(1,2),
                       factorNames=c("group"), ellipse=T)
pcoaplot

pcoa3.png

7. Venn图

upsetda <- get_upset(ps, factorNames="group")
upsetda
library(UpSetR)
upset(upsetda, sets=c("CASE_AH","CASE_BH",
                      "CASE_CH","CASE_DH"), sets.bar.color = "#56B4E9",
      order.by = "freq", empty.intersections = "on")

venn.png

8. NMDS分析

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 3365 taxa and 41 samples ]
sample_data() Sample Data:       [ 41 samples by 2 sample variables ]
tax_table()   Taxonomy Table:    [ 3365 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 3365 tips and 3363 internal nodes ]

8.1 根据Bray-Curtis距离和NMDS排序进行多元分析

carbom.ord <- ordinate(ps, "NMDS", "bray")

8.2 绘制OTU

plot_ordination(ps, carbom.ord, type="taxa", color="class", 
                title="OTUs")+guides(color=FALSE)+theme_bw()

OTU.jpeg

8.3 绘制样本

plot_ordination(ps, carbom.ord, type="samples",title="Samples",
                color="class") + labs(color = "")+
  geom_point(size=3)+theme_bw()

sample.jpeg

8.4 显示样本和OTU

plot_ordination(ps, carbom.ord, type="split", color="class",
                title="biplot", label = "station") +  
  geom_point(size=3)+guides(color=FALSE)+theme_bw()

bioplot.jpeg

9.典型对应分析（CCA）

pacman::p_load(tidyverse,phyloseq,MicrobiotaProcess,
               ape,microbiome,patchwork)
data(dietswap)
pseq <- dietswap
pseq.cca <- ordinate(pseq, "CCA")
A <- plot_ordination(pseq, pseq.cca,
                     type = "samples", color = "nationality")+
  geom_point(size = 4)+theme_bw()
B <- plot_ordination(pseq, pseq.cca,
                     type = "taxa", color = "Phylum")+
  geom_point(size = 4)+theme_bw()

A|B

CA.jpeg

Split plot

plot_ordination(pseq, pseq.cca,
                type = "split", shape = "nationality", 
                color = "Phylum", label = "nationality")+
  theme_bw()

CA2.jpeg

10. 计算距离矩阵

参考：https://rdrr.io/bioc/phyloseq/man/distance.html
https://rdrr.io/bioc/MicrobiotaProcess/man/get_dist.html

bray <- get_dist(ps,distmethod="bray") %>% as.matrix()
write.table (bray,file ="bray_distance.xls", sep ="\t", row.names = T) 

uunifrac <- get_dist(ps,distmethod="uunifrac") %>% as.matrix()
write.table (uunifrac,file ="uunifrac_distance.xls", sep ="\t", row.names = T) 

wunifrac <- get_dist(ps,distmethod="wunifrac") %>% as.matrix()
write.table (wunifrac,file ="wunifrac_distance.xls", sep ="\t", row.names = T)

以上通过MicrobiotaProcess与phyloseq等R包进行了一系列的分析，接下来我们将进行更加深入的相关性分析与与差异分析
参考：http://www.bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc/MicrobiotaProcess-basics.html