Title:
- 16s多样性分析和可视化(16s diversity analysis and visualization )
Author:
- GCC
Date:
- 2019年4月
1. clean数据
加载第三方包
#setup
knitr::opts_chunk$set(warning = FALSE, message = FALSE,tidy=TRUE, tidy.opts = list(blank = TRUE, width.cutoff=23),dpi = 600)
options(stringsAsFactors = FALSE)
#加载第三方包
library(dplyr)
library(ggplot2)
library(reshape)
library(vegan)
library(tidyr)
library(formatR)
导入备注信息和种丰度
#导入数据
setwd('C:\\Users\\GCC\\FMT')
annotations <-
readxl::read_xlsx('./annotation.xlsx') %>% select(., oldID, ID)
species <- read.delim('./种丰度.txt', sep = '\t')
重新标注样本信息
#重新对样本命名
newNames <- annotations$I
names(newNames) <- annotations$oldID
newNames <- c(FeatureID = 'FeatureID', newNames)
speciesNew <- plyr::rename(species, newNames)
speciesNew <-
speciesNew[, stringr::str_detect(colnames(speciesNew), pattern = '(negativeControl)|(FeatureID)|(treat)|(wt)|(model).*')]
2. alpha多样性分析
Simpson指数:Simpson反映的是优势种在群落中的地位和作用,若一个群落中优势种占的多,其他非优势物种所占的比例则会减少,那么Simpson 指数值较大,这说明群落多样性较低。
Shannon指数:Shannon值越大,说明群落多样性越高。
#多样性分析
set.seed(0408)
H <- diversity(t(speciesNew[, -1]))
#计算Shannon-Wiener指数
shannon = diversity(t(speciesNew[,-1]), "shannon")
#计算Simpson指数
simpson = diversity(t(speciesNew[,-1]), "simpson")
#计算Inverse Simpson指数
insimpson = diversity(t(speciesNew[,-1]), "inv")
pairs(cbind(H, simpson, insimpson), pch = "+", col = "blue")
# Species richness (S) and Pielou's evenness (J):
S <- specnumber(t(speciesNew[, -1]))
J <- H / log(S)
diversitys <-
data.frame(shannon = shannon,
simpson = simpson,
insimpson = insimpson)
diversitys <-
diversitys[stringr::str_detect(rownames(diversitys), '[^(_)].*'), ]
diversitys <-
within(diversitys,
{
time <-
stringr::str_split(rownames(diversitys), '_', simplify = T)[, 2] %>% factor(., levels = c(1:10))
group <-
stringr::str_extract(rownames(diversitys), '[^(_)]*')
}) %>% gather(.,
key = 'index',
value = 'values',
c(shannon, simpson, insimpson))
3. 作图
#index-plotting, fig.align='center', fig.cap = 'the diversity index in different group'
p1 <-
diversitys %>% ggplot(., aes(time, values, fill = group)) + geom_boxplot(alpha =
0.8, show.legend = FALSE) + geom_point(
position = position_jitter(),
size = 1.5,
alpha = 0.8,
aes(color = group),
show.legend = FALSE
) + facet_grid(index ~ group, scales = 'free', labeller = as_labeller(
c(
model = 'model',
negativeControl = 'negative',
treat = 'treat',
wt = 'wild type',
shannon = 'Shannon',
simpson = 'Simpson',
insimpson = 'Insimpson'
)
)) + labs(x = '', y = 'Alpha Diversity Measure') + theme(
panel.grid = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(color = 'black', fill = NA)
) #+ ggpubr::stat_compare_means(aes(group=interaction(time,group)))
print(p1)