Introduction
In this document, Comparing the output from lefse through different applications:
- XMAS2 (R package)
- lefse-conda (command line)
- lefse-galaxy (from the galaxy platfrom)
In all cases, using the same dataset, amplicon_ps and Zeybel_Gut, which are included in the XMAS package.
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(XMAS2)
library(dplyr)
library(ggplot2)
library(devtools)
library(tibble)
library(tidyr)
library(magrittr)
library(readr)
library(VennDiagram)
library(purrr)
# rm(list = ls())
options(stringsAsFactors = F)
options(future.globals.maxSize = 1000 * 1024^2)
Dataset
16s genus
data("amplicon_ps")
amplicon_ps_genus <- summarize_taxa(amplicon_ps, taxa_level = "Genus")
amplicon_ps_genus
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 406 taxa and 34 samples ]
## sample_data() Sample Data: [ 34 samples by 8 sample variables ]
## tax_table() Taxonomy Table: [ 406 taxa by 6 taxonomic ranks ]
metagenomics species
data("Zeybel_Gut")
Zeybel_ps_species <- summarize_taxa(Zeybel_Gut, taxa_level = "Species")
Zeybel_ps_species
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 372 taxa and 42 samples ]
## sample_data() Sample Data: [ 42 samples by 51 sample variables ]
## tax_table() Taxonomy Table: [ 372 taxa by 7 taxonomic ranks ]
Preparing for lefse galaxy and conda
1st row: class (required)
2nd row: subclass (optional)
3rd row: sampleID (required)
rownames: taxon
data format: splitted by "\t"
prepare_lefse <- function(ps,
Class,
Class_names,
Subclass = NULL,
cutoff = 10) {
# ps = amplicon_ps_genus
# Class = "SampleType"
# Class_names = c("gut", "tongue")
# Subclass = NULL
# cutoff = 10
sam_tab <- phyloseq::sample_data(ps) %>%
data.frame()
colnames(sam_tab)[which(colnames(sam_tab) == Class)] <- "CompClass"
if (is.null(Subclass)) {
sam_tab_final <- sam_tab %>%
dplyr::select(CompClass) %>%
tibble::rownames_to_column("TempRowNames") %>%
dplyr::filter(CompClass %in% Class_names) %>%
dplyr::select(all_of(c("TempRowNames", "CompClass"))) %>%
tibble::column_to_rownames("TempRowNames")
} else {
sam_tab_final <- sam_tab %>%
dplyr::select(all_of(c("CompClass", Subclass))) %>%
tibble::rownames_to_column("TempRowNames") %>%
dplyr::filter(CompClass %in% Class_names) %>%
dplyr::select(all_of(c("TempRowNames", "CompClass", Subclass))) %>%
tibble::column_to_rownames("TempRowNames")
}
colnames(sam_tab_final)[which(colnames(sam_tab_final) == "CompClass")] <- Class
phyloseq::sample_data(ps) <- phyloseq::sample_data(sam_tab_final)
otu_tab <- phyloseq::otu_table(ps) %>%
data.frame()
otu_tab_final <- otu_tab[rowSums(otu_tab) > cutoff, colSums(otu_tab) > cutoff, F]
phyloseq::otu_table(ps) <- phyloseq::otu_table(as.matrix(otu_tab_final), taxa_are_rows = TRUE)
lefse_data <- sam_tab_final %>%
tibble::rownames_to_column("Sample") %>%
dplyr::inner_join(otu_tab_final %>%
t() %>% data.frame() %>%
tibble::rownames_to_column("Sample"),
by = "Sample") %>%
dplyr::select(all_of(Class), Sample, all_of(Subclass), everything()) %>%
#stats::setNames(c(Class, "Sample", Subclass, rownames(otu_tab_final))) %>%
t() %>% data.frame()
lefse_data_nosub <- sam_tab_final %>%
tibble::rownames_to_column("Sample") %>%
dplyr::inner_join(otu_tab_final %>%
t() %>% data.frame() %>%
tibble::rownames_to_column("Sample"),
by = "Sample") %>%
dplyr::select(-Sample) %>%
dplyr::select(all_of(Class), all_of(Subclass), everything()) %>%
t() %>% data.frame()
res <- list(ps=ps,
lefse=lefse_data,
lefse_nosub=lefse_data_nosub)
return(res)
}
amplicon_ps_genus_lefse <- prepare_lefse(
ps = amplicon_ps_genus,
Class = "SampleType",
Class_names = c("gut", "tongue"),
cutoff = 10)
write.table(amplicon_ps_genus_lefse$lefse, "amplicon_ps_genus_lefse.tsv", quote = F, sep = "\t", col.names = F)
write.table(amplicon_ps_genus_lefse$lefse_nosub, "amplicon_ps_genus_lefse_nosub.tsv", quote = F, sep = "\t", col.names = F)
Zeybel_ps_species_lefse <- prepare_lefse(
ps = Zeybel_ps_species,
Class = "LiverFatClass",
Class_names = c("Mild", "Moderate"),
cutoff = 1e-4)
write.table(Zeybel_ps_species_lefse$lefse, "Zeybel_ps_species_lefse.tsv", quote = F, sep = "\t", col.names = F)
write.table(Zeybel_ps_species_lefse$lefse_nosub, "Zeybel_ps_species_lefse_nosub.tsv", quote = F, sep = "\t", col.names = F)
Run lefse independently with the three applications (R, conda, galaxy)
Running lefse in R (XMAS2)
Perform the analysis with the run_lefse2
function:
- amplicon_ps_genus
# run_lefse
amplicon_xmas2_output <- run_lefse(
ps = amplicon_ps_genus_lefse$ps,
group = "SampleType",
group_names = c("gut", "tongue"),
norm = "CPM") %>%
dplyr::mutate(app_name = "xmas_lefse") %>%
dplyr::arrange(LDA_Score)
head(amplicon_xmas2_output)
# run_lefse2
amplicon_xmas2_output2 <- run_lefse2(
ps = amplicon_ps_genus_lefse$ps,
group = "SampleType",
group_names = c("gut", "tongue"),
norm = "CPM") %>%
dplyr::mutate(app_name = "xmas_lefse2") %>%
dplyr::arrange(LDA_Score)
head(amplicon_xmas2_output2)
## TaxaID Block Enrichment LDA_Score
## 1 g__Bacteroides 8_gut vs 9_tongue gut -5.464661
## 2 g__Lachnospiraceae_unclassified 8_gut vs 9_tongue gut -4.451749
## 3 g__Lachnospira 8_gut vs 9_tongue gut -4.404415
## 4 g__Ruminococcaceae_unclassified 8_gut vs 9_tongue gut -4.394309
## 5 g__Faecalibacterium 8_gut vs 9_tongue gut -4.206251
## 6 g__Phascolarctobacterium 8_gut vs 9_tongue gut -4.183912
## EffectSize Log2FoldChange (Median)\ngut_vs_tongue Median Abundance\n(All)
## 1 5.734982 8.706188 4552.3520
## 2 2.296951 3.048798 11431.9387
## 3 6.503718 NA 0.0000
## 4 5.962548 8.833464 945.6265
## 5 5.492180 NA 0.0000
## 6 6.206589 NA 0.0000
## Median Abundance\ngut Median Abundance\ntongue
## 1 604210.78 1446.655
## 2 64739.42 7823.286
## 3 50368.43 0.000
## 4 53763.30 117.855
## 5 19194.92 0.000
## 6 18238.84 0.000
## Log2FoldChange (Mean)\ngut_vs_tongue Mean Abundance\n(All)
## 1 7.995845 277146.67
## 2 3.186478 34604.22
## 3 NA 22767.22
## 4 7.701566 26586.43
## 5 NA 13991.79
## 6 NA 13444.82
## Mean Abundance\ngut Mean Abundance\ntongue Occurrence (100%)\n(All)
## 1 586352.50 2297.0454 100.00
## 2 65446.48 7188.8734 100.00
## 3 48380.35 0.0000 47.06
## 4 56192.47 269.9452 76.47
## 5 29732.55 0.0000 47.06
## 6 28570.25 0.0000 47.06
## Occurrence (100%)\ngut Occurrence (100%)\ntongue Odds Ratio (95% CI)
## 1 100 100.00 4.7e-14 (-60;60)
## 2 100 100.00 4.4e-24 (-110;110)
## 3 100 0.00
## 4 100 55.56 7.2e-41 (-180;180)
## 5 100 0.00
## 6 100 0.00
## app_name
## 1 xmas_lefse
## 2 xmas_lefse
## 3 xmas_lefse
## 4 xmas_lefse
## 5 xmas_lefse
## 6 xmas_lefse
- Zeybel_ps_species
# run_lefse
MGS_xmas2_output <- run_lefse(
ps = Zeybel_ps_species_lefse$ps,
group = "LiverFatClass",
group_names = c("Mild", "Moderate"),
norm = "CPM") %>%
dplyr::mutate(app_name = "xmas_lefse") %>%
dplyr::arrange(LDA_Score)
head(MGS_xmas2_output)
# run_lefse2
MGS_xmas2_output2 <- run_lefse2(
ps = Zeybel_ps_species_lefse$ps,
group = "LiverFatClass",
group_names = c("Mild", "Moderate"),
norm = "CPM") %>%
dplyr::mutate(app_name = "xmas_lefse2") %>%
dplyr::arrange(LDA_Score)
head(MGS_xmas2_output2)
## TaxaID Block Enrichment LDA_Score
## 1 s__Butyricimonas_virosa 12_Mild vs 12_Moderate Moderate 2.462833
## 2 s__Bacteroides_salyersiae 12_Mild vs 12_Moderate Moderate 2.628574
## 3 s__Bacteroides_thetaiotaomicron 12_Mild vs 12_Moderate Moderate 3.177126
## 4 s__Bacteroides_clarus 12_Mild vs 12_Moderate Moderate 3.221333
## 5 s__Bacteroides_coprocola 12_Mild vs 12_Moderate Moderate 3.310338
## 6 s__Bacteroides_cellulosilyticus 12_Mild vs 12_Moderate Moderate 3.332193
## EffectSize Log2FoldChange (Median)\nMild_vs_Moderate Median Abundance\n(All)
## 1 1.668501 NA 0.8999998
## 2 1.573816 NA 0.0000000
## 3 2.775490 NA 223.8648101
## 4 2.331714 NA 0.0000000
## 5 1.367262 NA 0.0000000
## 6 3.145457 NA 82.9748375
## Median Abundance\nMild Median Abundance\nModerate
## 1 0 262.9238
## 2 0 0.0000
## 3 0 1123.0082
## 4 0 0.0000
## 5 0 0.0000
## 6 0 2707.7169
## Log2FoldChange (Mean)\nMild_vs_Moderate Mean Abundance\n(All)
## 1 -2.329681 353.2093
## 2 NA 262.0187
## 3 -2.241224 2413.3440
## 4 NA 1283.6658
## 5 NA 1704.3673
## 6 -3.325019 3170.2327
## Mean Abundance\nMild Mean Abundance\nModerate Occurrence (100%)\n(All)
## 1 117.2101 589.2084 50.00
## 2 0.0000 524.0375 16.67
## 3 842.6510 3984.0371 66.67
## 4 0.0000 2567.3315 20.83
## 5 0.0000 3408.7346 16.67
## 6 575.2844 5765.1810 62.50
## Occurrence (100%)\nMild Occurrence (100%)\nModerate Odds Ratio (95% CI)
## 1 25.00 75.00 3.9 (6.6;1.2)
## 2 0.00 33.33
## 3 41.67 91.67 3.1 (5.3;0.87)
## 4 0.00 41.67
## 5 0.00 33.33
## 6 41.67 83.33 17 (23;11)
## app_name
## 1 xmas_lefse
## 2 xmas_lefse
## 3 xmas_lefse
## 4 xmas_lefse
## 5 xmas_lefse
## 6 xmas_lefse
Running lefse-conda (command line)
lefse-conda installation and version
Note: I installed lefse following the instructions from this site
after installing conda.
## Add channels
conda config --add channels defaults
conda config --add channels bioconda
conda config --add channels conda-forge
conda config --add channels biobakery
## Install lefse
conda create -n lefse -c biobakery lefse -y
Conda and lefse versions:
conda --version
#> conda 4.12.0
conda list | grep -e "lefse"
# packages in environment at /home/samuel/miniconda3/envs/lefse:
#> lefse 1.1.2 pyhdfd78af_0 bioconda
Run lefse-conda
Generate a tabular dataset (amplicon_ps_genus_lefse or Zeybel_ps_species_lefse) compatible with lefse-conda and
lefse-galaxy using theget_dataset.R
script.Run the script
run_lefse.sh
(linux) with the following parameters:
# In general
# ./run_lefse.sh
# in my case (Hua Zou)
## amplicon_ps_genus_lefse
./run_lefse.sh /Users/zouhua/opt/anaconda3/bin/activate lefse /usr/local/bin/R amplicon_ps_genus_lefse
## Zeybel_ps_species_lefse
./run_lefse.sh /Users/zouhua/opt/anaconda3/bin/activate lefse /usr/local/bin/R Zeybel_ps_species_lefse
Note: All script files, get_dataset.R
and run_lefse.sh
, and this rmarkdown
document must be in the same directory.
Import output from lefse-conda into R
get_lefse_python <- function(datres,
Class_names,
name = "lefse_conda",
LDA_names = "lefse_conda_LDA",
LDA_cutoff = 2) {
# datres = "amplicon_ps_genus_lefse.res"
# Class_names = c("gut", "tongue")
# LDA_cutoff = 2
col_names <- c(
"TaxaID", "log_hi_class_avg", "Enrichment", "lefse_conda_LDA", "pval")
lefse_conda <- readr::read_tsv(datres, show_col_types = FALSE, col_names = FALSE ) %>%
magrittr::set_colnames(col_names) %>%
dplyr::filter(!is.na(lefse_conda_LDA)) %>%
dplyr::mutate(
lefse_conda_LDA = ifelse(
Enrichment == Class_names[1], -lefse_conda_LDA, lefse_conda_LDA),
app_name = name) %>%
dplyr::filter(abs(lefse_conda_LDA) >= LDA_cutoff) %>%
dplyr::arrange(lefse_conda_LDA)
colnames(lefse_conda)[which(colnames(lefse_conda) == "lefse_conda_LDA")] <- LDA_names
return(lefse_conda)
}
amplicon_ps_genus_lefse_conda <- get_lefse_python(
datres = "amplicon_ps_genus_lefse.res",
Class_names = c("gut", "tongue"),
LDA_names = "lefse_conda_LDA")
head(amplicon_ps_genus_lefse_conda)
Zeybel_ps_species_lefse_conda <- get_lefse_python(
datres = "Zeybel_ps_species_lefse.res",
Class_names = c("Mild", "Moderate"),
LDA_names = "lefse_conda_LDA")
head(Zeybel_ps_species_lefse_conda)
## # A tibble: 6 × 6
## TaxaID log_hi_class_avg Enrichment lefse_conda_LDA pval app_name
##
## 1 s__Butyricimonas_v… 2.77 Moderate 2.46 0.02… lefse_c…
## 2 s__Bacteroides_sal… 2.72 Moderate 2.47 0.03… lefse_c…
## 3 s__Bacteroides_cla… 3.41 Moderate 3.07 0.01… lefse_c…
## 4 s__Bacteroides_the… 3.60 Moderate 3.26 0.01… lefse_c…
## 5 s__Bacteroides_cop… 3.53 Moderate 3.28 0.03… lefse_c…
## 6 s__Bacteroides_int… 3.54 Moderate 3.32 0.03… lefse_c…
Running lefse from galaxy
Using the amplicon_ps_genus_lefse_nosub.txt
or Zeybel_ps_species_lefse_nosub.txt
file (no subjects included) as input for lefse from the galaxy platform of the Huttenhower lab at galaxy.
The conditions as follow:
alpha were 0.05 for both KW and Wilcox,
2.0 for LDA.
TSS normalization was applied as well.
converting the output into compared format:
amplicon_ps_genus_lefse_nosub.res
Zeybel_ps_species_lefse_nosub.res
amplicon_ps_genus_lefse_galaxy <- get_lefse_python(
datres = "amplicon_ps_genus_lefse_nosub.res",
name = "lefse_galaxy",
Class_names = c("gut", "tongue"),
LDA_names = "lefse_galaxy_LDA")
head(amplicon_ps_genus_lefse_galaxy)
Zeybel_ps_species_lefse_galaxy <- get_lefse_python(
datres = "Zeybel_ps_species_lefse_nosub.res",
name = "lefse_galaxy",
Class_names = c("Mild", "Moderate"),
LDA_names = "lefse_galaxy_LDA")
head(amplicon_ps_genus_lefse_galaxy)
## # A tibble: 6 × 6
## TaxaID log_hi_class_avg Enrichment lefse_galaxy_LDA pval app_name
##
## 1 g__Bacteroides 5.77 gut -5.46 0.00… lefse_g…
## 2 g__Lachnospiracea… 4.82 gut -4.45 0.00… lefse_g…
## 3 g__Ruminococcacea… 4.75 gut -4.41 0.00… lefse_g…
## 4 g__Lachnospira 4.68 gut -4.38 0.00… lefse_g…
## 5 g__Phascolarctoba… 4.46 gut -4.18 0.00… lefse_g…
## 6 g__Faecalibacteri… 4.47 gut -4.18 0.00… lefse_g…
Extracting results from XMAS2 results
run_lefse (lefser R package)
run_lefse2 (microbiomeMarker R package)
get_lefse_R <- function(datres,
name = "Rrun_lefse",
LDA_names = "lefse_R_LDA",
LDA_cutoff = 2) {
# datres = amplicon_xmas2_output
# name = "Rrun_lefse"
# LDA_cutoff = 2
col_names <- c(
"TaxaID", "Block", "Enrichment", "LDA_Score", "EffectSize")
lefse_R <- datres %>%
dplyr::select(all_of(col_names)) %>%
dplyr::mutate(app_name = name) %>%
dplyr::filter(abs(LDA_Score) >= LDA_cutoff) %>%
dplyr::arrange(LDA_Score)
colnames(lefse_R)[which(colnames(lefse_R) == "LDA_Score")] <- LDA_names
return(lefse_R)
}
amplicon_ps_genus_lefse_R <- get_lefse_R(
datres = amplicon_xmas2_output,
name = "Rrun_lefse",
LDA_names = "lefse_R_LDA")
head(amplicon_ps_genus_lefse_R)
amplicon_ps_genus_lefse_R2 <- get_lefse_R(
datres = amplicon_xmas2_output2,
name = "Rrun_lefse2",
LDA_names = "lefse_R2_LDA")
head(amplicon_ps_genus_lefse_R2)
Zeybel_ps_species_lefse_R <- get_lefse_R(
datres = MGS_xmas2_output,
name = "Rrun_lefse",
LDA_names = "lefse_R_LDA")
head(Zeybel_ps_species_lefse_R)
Zeybel_ps_species_lefse_R2 <- get_lefse_R(
datres = MGS_xmas2_output2,
name = "Rrun_lefse2",
LDA_names = "lefse_R2_LDA")
head(Zeybel_ps_species_lefse_R2)
## TaxaID Block Enrichment
## 1 s__Butyricimonas_virosa 12_Mild vs 12_Moderate Moderate
## 2 s__Bacteroides_salyersiae 12_Mild vs 12_Moderate Moderate
## 3 s__Bacteroides_thetaiotaomicron 12_Mild vs 12_Moderate Moderate
## 4 s__Bacteroides_intestinalis 12_Mild vs 12_Moderate Moderate
## 5 s__Bacteroides_coprocola 12_Mild vs 12_Moderate Moderate
## 6 s__Bacteroides_clarus 12_Mild vs 12_Moderate Moderate
## lefse_R2_LDA EffectSize app_name
## 1 2.735285 1.668501 Rrun_lefse2
## 2 2.776971 1.573816 Rrun_lefse2
## 3 3.437617 2.775490 Rrun_lefse2
## 4 3.449263 1.184353 Rrun_lefse2
## 5 3.457030 1.367262 Rrun_lefse2
## 6 3.474700 2.331714 Rrun_lefse2
Comparison of lefse-conda with XMAS2
Number of features reported as significant
- amplicon_ps_genus_lefse
plot_signif_taxa_num <- function(dat1, dat2, dat3, dat4) {
# dat1 = amplicon_ps_genus_lefse_conda
# dat2 = amplicon_ps_genus_lefse_galaxy
# dat3 = amplicon_ps_genus_lefse_R
# dat4 = amplicon_ps_genus_lefse_R2
combined_outputs <- dplyr::bind_rows(dat1, dat2, dat3, dat4) %>%
dplyr::mutate(LDA = coalesce(lefse_conda_LDA,
lefse_galaxy_LDA,
lefse_R_LDA,
lefse_R2_LDA))
pl <- combined_outputs %>%
count(app_name) %>%
ggplot(aes(app_name, n)) +
geom_col() +
geom_label(aes(label = n)) +
ggtitle('Number of significiant features identified by the different applications using lefse')
return(pl)
}
plot_signif_taxa_num(dat1 = amplicon_ps_genus_lefse_conda,
dat2 = amplicon_ps_genus_lefse_galaxy,
dat3 = amplicon_ps_genus_lefse_R,
dat4 = amplicon_ps_genus_lefse_R2)
- Zeybel_ps_species_lefse
plot_signif_taxa_num(dat1 = Zeybel_ps_species_lefse_conda,
dat2 = Zeybel_ps_species_lefse_galaxy,
dat3 = Zeybel_ps_species_lefse_R,
dat4 = Zeybel_ps_species_lefse_R2)
Overlap of features reported as significant
- amplicon_ps_genus_lefse
plot_signif_taxa_venn <- function(dat1, dat2, dat3, dat4) {
# dat1 = amplicon_ps_genus_lefse_conda
# dat2 = amplicon_ps_genus_lefse_galaxy
# dat3 = amplicon_ps_genus_lefse_R
# dat4 = amplicon_ps_genus_lefse_R2
set1 = dat1$TaxaID
set2 = dat2$TaxaID
set3 = dat3$TaxaID
set4 = dat4$TaxaID
grid.newpage()
venn_object <- venn.diagram(
x = list(set1, set2, set3, set4),
category.names = c("lefse-conda", "lefse-galaxy",
"run_lefse(lefser)", "run_lefse2(microbiomeMarker)"),
filename = NULL
)
grid.draw(venn_object)
}
plot_signif_taxa_venn(dat1 = amplicon_ps_genus_lefse_conda,
dat2 = amplicon_ps_genus_lefse_galaxy,
dat3 = amplicon_ps_genus_lefse_R,
dat4 = amplicon_ps_genus_lefse_R2)
- Zeybel_ps_species_lefse
plot_signif_taxa_venn(dat1 = Zeybel_ps_species_lefse_conda,
dat2 = Zeybel_ps_species_lefse_galaxy,
dat3 = Zeybel_ps_species_lefse_R,
dat4 = Zeybel_ps_species_lefse_R2)
LDA scores' comparison
LDA scores of the 14 overlapping features are similar.
- amplicon_ps_genus_lefse
amplicon_joint_output <- purrr::reduce(
.x = list(amplicon_ps_genus_lefse_conda, amplicon_ps_genus_lefse_galaxy,
amplicon_ps_genus_lefse_R, amplicon_ps_genus_lefse_R2),
.f = ~ inner_join(.x, .y, by = "TaxaID")) %>%
dplyr::select(TaxaID, lefse_conda_LDA, lefse_galaxy_LDA,
lefse_R_LDA, lefse_R2_LDA)
amplicon_joint_output
## # A tibble: 57 × 5
## TaxaID lefse_conda_LDA lefse_galaxy_LDA lefse_R_LDA lefse_R2_LDA
##
## 1 g__Bacteroides -5.47 -5.46 -5.46 -5.77
## 2 g__Lachnospiraceae… -4.47 -4.45 -4.45 -4.77
## 3 g__Ruminococcaceae… -4.46 -4.41 -4.39 -4.74
## 4 g__Lachnospira -4.36 -4.38 -4.40 -4.69
## 5 g__Faecalibacterium -4.20 -4.18 -4.21 -4.48
## 6 g__Phascolarctobac… -4.15 -4.18 -4.18 -4.47
## 7 g__Clostridiales_u… -4.03 -4.06 -4.04 -4.38
## 8 g__Akkermansia -3.91 -4.04 -4.11 -4.23
## 9 g__Oscillospira -3.87 -3.89 -3.86 -4.19
## 10 g__Rikenellaceae_u… -3.87 -3.91 -3.86 -4.23
## # … with 47 more rows
- Zeybel_ps_species_lefse
MGS_joint_output <- purrr::reduce(
.x = list(Zeybel_ps_species_lefse_conda, Zeybel_ps_species_lefse_galaxy,
Zeybel_ps_species_lefse_R, Zeybel_ps_species_lefse_R2),
.f = ~ inner_join(.x, .y, by = "TaxaID")) %>%
dplyr::select(TaxaID, lefse_conda_LDA, lefse_galaxy_LDA,
lefse_R_LDA, lefse_R2_LDA)
MGS_joint_output
## # A tibble: 11 × 5
## TaxaID lefse_conda_LDA lefse_galaxy_LDA lefse_R_LDA lefse_R2_LDA
##
## 1 s__Butyricimonas_v… 2.46 2.82 2.46 2.74
## 2 s__Bacteroides_sal… 2.47 3.16 2.63 2.78
## 3 s__Bacteroides_cla… 3.07 3.29 3.22 3.47
## 4 s__Bacteroides_the… 3.26 3.28 3.18 3.44
## 5 s__Bacteroides_cop… 3.28 3.24 3.31 3.46
## 6 s__Bacteroides_int… 3.32 3.31 3.49 3.45
## 7 s__Bacteroides_cel… 3.41 3.36 3.33 3.65
## 8 s__Bacteroides_egg… 3.52 3.59 3.41 3.82
## 9 s__Parabacteroides… 3.59 3.68 3.55 3.96
## 10 s__Barnesiella_int… 3.68 3.72 3.63 3.96
## 11 s__Prevotella_sp_C… 4.04 4.26 4.32 4.74
XMAS2 LDA scores vs lefse-conda LDA scores
amplicon_ps_genus_lefse
- run_lefse (lefser R package) vs lefse-conda
amplicon_joint_output %>%
ggplot(aes(lefse_conda_LDA, lefse_R_LDA)) +
geom_point(size = 3, shape = 1) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
ggtitle("Comparison of LDA scores of features reported as significant
by both lefse-conda and run_lefse")
- run_lefse2 (microbiomeMarker R package) vs lefse-conda
amplicon_joint_output %>%
ggplot(aes(lefse_conda_LDA, lefse_R2_LDA)) +
geom_point(size = 3, shape = 1) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
ggtitle("Comparison of LDA scores of features reported as significant
by both lefse-conda and run_lefse2")
Zeybel_ps_species_lefse
- run_lefse (lefser R package) vs lefse-conda
MGS_joint_output %>%
ggplot(aes(lefse_conda_LDA, lefse_R_LDA)) +
geom_point(size = 3, shape = 1) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
ggtitle("Comparison of LDA scores of features reported as significant
by both lefse-conda and run_lefse")
- run_lefse2 (microbiomeMarker R package) vs lefse-conda
MGS_joint_output %>%
ggplot(aes(lefse_conda_LDA, lefse_R2_LDA)) +
geom_point(size = 3, shape = 1) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
ggtitle("Comparison of LDA scores of features reported as significant
by both lefse-conda and run_lefse2")
XMAS2 LDA scores vs lefse-galaxy LDA scores
amplicon_ps_genus_lefse
- run_lefse (lefser R package) vs lefse-galaxy
amplicon_joint_output %>%
ggplot(aes(lefse_galaxy_LDA, lefse_R_LDA)) +
geom_point(size = 3, shape = 1) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
ggtitle("Comparison of LDA scores of features reported as significant
by both lefse-galaxy and run_lefse")
- run_lefse2 (microbiomeMarker R package) vs lefse-galaxy
amplicon_joint_output %>%
ggplot(aes(lefse_galaxy_LDA, lefse_R2_LDA)) +
geom_point(size = 3, shape = 1) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
ggtitle("Comparison of LDA scores of features reported as significant
by both lefse-galaxy and run_lefse2")
Zeybel_ps_species_lefse
- run_lefse (lefser R package) vs lefse-galaxy
MGS_joint_output %>%
ggplot(aes(lefse_galaxy_LDA, lefse_R_LDA)) +
geom_point(size = 3, shape = 1) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
ggtitle("Comparison of LDA scores of features reported as significant
by both lefse-galaxy and run_lefse")
- run_lefse2 (microbiomeMarker R package) vs lefse-galaxy
MGS_joint_output %>%
ggplot(aes(lefse_galaxy_LDA, lefse_R2_LDA)) +
geom_point(size = 3, shape = 1) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
ggtitle("Comparison of LDA scores of features reported as significant
by both lefse-galaxy and run_lefse2")
Results:
- The overlap between
run_lefse2
(microbiomeMarker R package) and lefse-conda or lefse-galaxy have the similar LDA scores. However, the overlap betweenrun_lefse
(lefser R package) and lefse-conda or lefse-galaxy seem have slightly different LDA scores.
Differences bewteen XMAS2 LDA scores and lefse-conda
setdiff(amplicon_ps_genus_lefse_conda$TaxaID, amplicon_ps_genus_lefse_R$TaxaID)
## [1] "g__Holdemania" "g__Alcaligenaceae_unclassified"
setdiff(amplicon_ps_genus_lefse_conda$TaxaID, amplicon_ps_genus_lefse_R2$TaxaID)
## character(0)
Data and code availability
lefse_comparison
Session info
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.1.2 (2021-11-01)
## os macOS Big Sur 10.16
## system x86_64, darwin17.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz Asia/Shanghai
## date 2022-07-19
## pandoc 2.17.1.1 @ /Applications/RStudio.app/Contents/MacOS/quarto/bin/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## ade4 1.7-18 2021-09-16 [1] CRAN (R 4.1.0)
## annotate 1.72.0 2021-10-26 [1] Bioconductor
## AnnotationDbi 1.56.2 2021-11-09 [1] Bioconductor
## ape 5.6-2 2022-03-02 [1] CRAN (R 4.1.2)
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.1.0)
## Biobase 2.54.0 2021-10-26 [1] Bioconductor
## BiocGenerics 0.40.0 2021-10-26 [1] Bioconductor
## BiocParallel 1.28.3 2021-12-09 [1] Bioconductor
## biomformat 1.22.0 2021-10-26 [1] Bioconductor
## Biostrings 2.62.0 2021-10-26 [1] Bioconductor
## bit 4.0.4 2020-08-04 [1] CRAN (R 4.1.0)
## bit64 4.0.5 2020-08-30 [1] CRAN (R 4.1.0)
## bitops 1.0-7 2021-04-24 [1] CRAN (R 4.1.0)
## blob 1.2.2 2021-07-23 [1] CRAN (R 4.1.0)
## brio 1.1.3 2021-11-30 [1] CRAN (R 4.1.0)
## bslib 0.3.1 2021-10-06 [1] CRAN (R 4.1.0)
## cachem 1.0.6 2021-08-19 [1] CRAN (R 4.1.0)
## callr 3.7.0 2021-04-20 [1] CRAN (R 4.1.0)
## caTools 1.18.2 2021-03-28 [1] CRAN (R 4.1.0)
## cli 3.3.0 2022-04-25 [1] CRAN (R 4.1.2)
## cluster 2.1.2 2021-04-17 [1] CRAN (R 4.1.2)
## codetools 0.2-18 2020-11-04 [1] CRAN (R 4.1.2)
## coin 1.4-2 2021-10-08 [1] CRAN (R 4.1.0)
## colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.1.2)
## crayon 1.5.0 2022-02-14 [1] CRAN (R 4.1.2)
## data.table 1.14.2 2021-09-27 [1] CRAN (R 4.1.0)
## DBI 1.1.2 2021-12-20 [1] CRAN (R 4.1.0)
## DelayedArray 0.20.0 2021-10-26 [1] Bioconductor
## desc 1.4.1 2022-03-06 [1] CRAN (R 4.1.2)
## DESeq2 1.34.0 2021-10-26 [1] Bioconductor
## devtools * 2.4.3 2021-11-30 [1] CRAN (R 4.1.0)
## digest 0.6.29 2021-12-01 [1] CRAN (R 4.1.0)
## dplyr * 1.0.8 2022-02-08 [1] CRAN (R 4.1.2)
## edgeR 3.36.0 2021-10-26 [1] Bioconductor
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.1.0)
## evaluate 0.15 2022-02-18 [1] CRAN (R 4.1.2)
## fansi 1.0.2 2022-01-14 [1] CRAN (R 4.1.2)
## farver 2.1.0 2021-02-28 [1] CRAN (R 4.1.0)
## fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.1.0)
## foreach 1.5.2 2022-02-02 [1] CRAN (R 4.1.2)
## formatR 1.11 2021-06-01 [1] CRAN (R 4.1.0)
## fs 1.5.2 2021-12-08 [1] CRAN (R 4.1.0)
## futile.logger * 1.4.3 2016-07-10 [1] CRAN (R 4.1.0)
## futile.options 1.0.1 2018-04-20 [1] CRAN (R 4.1.0)
## genefilter 1.76.0 2021-10-26 [1] Bioconductor
## geneplotter 1.72.0 2021-10-26 [1] Bioconductor
## generics 0.1.2 2022-01-31 [1] CRAN (R 4.1.2)
## GenomeInfoDb 1.30.1 2022-01-30 [1] Bioconductor
## GenomeInfoDbData 1.2.7 2022-03-09 [1] Bioconductor
## GenomicRanges 1.46.1 2021-11-18 [1] Bioconductor
## ggplot2 * 3.3.5 2021-06-25 [1] CRAN (R 4.1.0)
## glmnet 4.1-3 2021-11-02 [1] CRAN (R 4.1.0)
## glue 1.6.2 2022-02-24 [1] CRAN (R 4.1.2)
## gplots 3.1.1 2020-11-28 [1] CRAN (R 4.1.0)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 4.1.0)
## gtools 3.9.2 2021-06-06 [1] CRAN (R 4.1.0)
## highr 0.9 2021-04-16 [1] CRAN (R 4.1.0)
## hms 1.1.1 2021-09-26 [1] CRAN (R 4.1.0)
## htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.1.0)
## httr 1.4.2 2020-07-20 [1] CRAN (R 4.1.0)
## igraph 1.2.11 2022-01-04 [1] CRAN (R 4.1.2)
## IRanges 2.28.0 2021-10-26 [1] Bioconductor
## iterators 1.0.14 2022-02-05 [1] CRAN (R 4.1.2)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.1.0)
## jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.1.2)
## KEGGREST 1.34.0 2021-10-26 [1] Bioconductor
## KernSmooth 2.23-20 2021-05-03 [1] CRAN (R 4.1.2)
## knitr 1.37 2021-12-16 [1] CRAN (R 4.1.0)
## labeling 0.4.2 2020-10-20 [1] CRAN (R 4.1.0)
## lambda.r 1.2.4 2019-09-18 [1] CRAN (R 4.1.0)
## lattice 0.20-45 2021-09-22 [1] CRAN (R 4.1.2)
## libcoin 1.0-9 2021-09-27 [1] CRAN (R 4.1.0)
## lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.1.0)
## limma 3.50.1 2022-02-17 [1] Bioconductor
## locfit 1.5-9.5 2022-03-03 [1] CRAN (R 4.1.2)
## magrittr * 2.0.2 2022-01-26 [1] CRAN (R 4.1.2)
## MASS 7.3-55 2022-01-13 [1] CRAN (R 4.1.2)
## Matrix 1.4-0 2021-12-08 [1] CRAN (R 4.1.0)
## MatrixGenerics 1.6.0 2021-10-26 [1] Bioconductor
## matrixStats 0.61.0 2021-09-17 [1] CRAN (R 4.1.0)
## memoise 2.0.1 2021-11-26 [1] CRAN (R 4.1.0)
## metagenomeSeq 1.36.0 2021-10-26 [1] Bioconductor
## mgcv 1.8-39 2022-02-24 [1] CRAN (R 4.1.2)
## modeltools 0.2-23 2020-03-05 [1] CRAN (R 4.1.0)
## multcomp 1.4-18 2022-01-04 [1] CRAN (R 4.1.2)
## multtest 2.50.0 2021-10-26 [1] Bioconductor
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.1.0)
## mvtnorm 1.1-3 2021-10-08 [1] CRAN (R 4.1.0)
## nlme 3.1-155 2022-01-13 [1] CRAN (R 4.1.2)
## permute 0.9-7 2022-01-27 [1] CRAN (R 4.1.2)
## phyloseq 1.38.0 2021-10-26 [1] Bioconductor
## pillar 1.7.0 2022-02-01 [1] CRAN (R 4.1.2)
## pkgbuild 1.3.1 2021-12-20 [1] CRAN (R 4.1.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.0)
## pkgload 1.2.4 2021-11-30 [1] CRAN (R 4.1.0)
## plyr 1.8.6 2020-03-03 [1] CRAN (R 4.1.0)
## png 0.1-7 2013-12-03 [1] CRAN (R 4.1.0)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.1.0)
## processx 3.5.2 2021-04-30 [1] CRAN (R 4.1.0)
## ps 1.6.0 2021-02-28 [1] CRAN (R 4.1.0)
## purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.1.0)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.1.0)
## RColorBrewer 1.1-2 2014-12-07 [1] CRAN (R 4.1.0)
## Rcpp 1.0.8.2 2022-03-11 [1] CRAN (R 4.1.2)
## RCurl 1.98-1.6 2022-02-08 [1] CRAN (R 4.1.2)
## readr * 2.1.2 2022-01-30 [1] CRAN (R 4.1.2)
## remotes 2.4.2 2021-11-30 [1] CRAN (R 4.1.0)
## reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.1.0)
## rhdf5 2.38.1 2022-03-10 [1] Bioconductor
## rhdf5filters 1.6.0 2021-10-26 [1] Bioconductor
## Rhdf5lib 1.16.0 2021-10-26 [1] Bioconductor
## rlang 1.0.2 2022-03-04 [1] CRAN (R 4.1.2)
## rmarkdown 2.13 2022-03-10 [1] CRAN (R 4.1.2)
## rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.1.0)
## RSQLite 2.2.10 2022-02-17 [1] CRAN (R 4.1.2)
## rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.1.0)
## S4Vectors 0.32.3 2021-11-21 [1] Bioconductor
## sandwich 3.0-1 2021-05-18 [1] CRAN (R 4.1.0)
## sass 0.4.0 2021-05-12 [1] CRAN (R 4.1.0)
## scales 1.1.1 2020-05-11 [1] CRAN (R 4.1.0)
## sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.1.0)
## shape 1.4.6 2021-05-19 [1] CRAN (R 4.1.0)
## stringi 1.7.6 2021-11-29 [1] CRAN (R 4.1.0)
## stringr 1.4.0 2019-02-10 [1] CRAN (R 4.1.0)
## SummarizedExperiment 1.24.0 2021-10-26 [1] Bioconductor
## survival 3.3-1 2022-03-03 [1] CRAN (R 4.1.2)
## testthat 3.1.2 2022-01-20 [1] CRAN (R 4.1.2)
## TH.data 1.1-0 2021-09-27 [1] CRAN (R 4.1.0)
## tibble * 3.1.6 2021-11-07 [1] CRAN (R 4.1.0)
## tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.1.2)
## tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.1.2)
## tzdb 0.2.0 2021-10-27 [1] CRAN (R 4.1.0)
## usethis * 2.1.5 2021-12-09 [1] CRAN (R 4.1.0)
## utf8 1.2.2 2021-07-24 [1] CRAN (R 4.1.0)
## vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.1.0)
## vegan 2.5-7 2020-11-28 [1] CRAN (R 4.1.0)
## VennDiagram * 1.7.3 2022-04-12 [1] CRAN (R 4.1.2)
## vroom 1.5.7 2021-11-30 [1] CRAN (R 4.1.0)
## withr 2.5.0 2022-03-03 [1] CRAN (R 4.1.2)
## Wrench 1.12.0 2021-10-26 [1] Bioconductor
## xfun 0.30 2022-03-02 [1] CRAN (R 4.1.2)
## XMAS2 * 2.1.6 2022-07-19 [1] local
## XML 3.99-0.9 2022-02-24 [1] CRAN (R 4.1.2)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 4.1.0)
## XVector 0.34.0 2021-10-26 [1] Bioconductor
## yaml 2.3.5 2022-02-21 [1] CRAN (R 4.1.2)
## zlibbioc 1.40.0 2021-10-26 [1] Bioconductor
## zoo 1.8-9 2021-03-09 [1] CRAN (R 4.1.0)
##
## [1] /Library/Frameworks/R.framework/Versions/4.1/Resources/library
##
## ──────────────────────────────────────────────────────────────────────────────
Reference
- lefse_comparison