使用绘制多个分组的箱线图,有时为了提高可读性,会间隔性添加阴影多边形。本以为是很简单的事情,geom_rect或者annotate(geom =='rect')就能解决。不曾想,却费了好一番功夫。
具体需求如下
- facet 分页显示微生物和植物多样性
- categorical variables 多个环境变量
- fill 每个分类变量包括两个植被类型
先放图
加载工具包
library(reshape2)
library(dplyr)
library(phyloseq)
library(lmerTest)
library(lme4)
加载并整理数据
load('../01_filter_and_rarefy_Bacteria/.RData')
remove(physeq)
sample_variables(rarefy)
rarefy = subset_samples(rarefy, NP<100&EC<2000&CN<150& (!Sequencing_ID_16s_18s%in% c('M24','M12','M30')))
map = data.frame(sample_data(rarefy))
map = mutate(map,Latitude = abs(Latitude), group = case_when(Vegetation == 'Forest'~ 'Forest', TRUE ~ 'NonForest'))
map$group = as.factor(map$group)
创建对象存储变量组合
envs = c('Latitude', "Elevation", "Slope", # geologic
"MAT", "AI", # climate
"Plant_cover", # plant
"pH", "EC", "Clay_silt", 'WHC','TP') # soil
funs = c("BG","PHOS","NAG","Rb","PO4","NO3", "NH4", 'NPP', 'ORC')
计算微生物物种多样性obseved richness
richness = estimate_richness(rarefy, measures="Observed")
rownames(richness) == map$Global_Atlas_Order
map$Bacteria_richness = richness$Observed
map$Global_Atlas_Order = NULL
混合效应模型
# 对所有数值变量进行标准化
forest_scale = mutate_if(forest, is.numeric, scale)
nonforest_scale = mutate_if(nonforest, is.numeric, scale)
formular1 =as.formula(
paste("Bacteria_richness~",
paste(c(envs, '(1|CLIMA2)','(1|LandUse)'), collapse="+")))
formular2 =as.formula(
paste("Plant_richness~",
paste(c(envs, '(1|CLIMA2)','(1|LandUse)'), collapse="+")))
lmer_forest1 <- lmer(formular1, data = forest_scale)
lmer_forest2 <- lmer(formular2, data = forest_scale)
lmer_nonforest1 <- lmer(formular1, data = nonforest_scale)
lmer_nonforest2 <- lmer(formular2, data = nonforest_scale)
#效应及显著性检验
# anova(lmer_forest1,type="I")
# summary(lmer_forest1)
# vif(lmer_forest1)
# rsquared(lmer2)
#置换检验
lmer_forest_boot1 <- bootMer(lmer_forest1,fixef,nsim = 1000,parallel = "snow")
lmer_forest_boot2 <- bootMer(lmer_forest2,fixef,nsim = 1000,parallel = "snow")
lmer_nonforest_boot1 <- bootMer(lmer_nonforest1,fixef,nsim = 1000,parallel = "snow")
lmer_nonforest_boot2 <- bootMer(lmer_nonforest2,fixef,nsim = 1000,parallel = "snow")
#汇总结果
lmer_boot_stats = rbind(data.frame(lmer_forest_boot1$t,group = 'Forest',diversity = 'Bacteria'),
data.frame(lmer_nonforest_boot1$t,group = 'NonForest',diversity = 'Bacteria'),
data.frame(lmer_forest_boot2$t,group = 'Forest',diversity = 'Plant'),
data.frame(lmer_nonforest_boot2$t,group = 'NonForest', diversity = 'Plant'))
lmer_boot_stats$X.Intercept. = NULL
#格式转换变为长列表形式
lmer_boot_stats_mlt = melt(lmer_boot_stats,variable.name = 'factors', value.name = 'estimates')
#根据环境变量的影响大小进行排序
t = aggregate(estimates~group*diversity*factors, lmer_boot_stats_mlt, median) %>% arrange(group,diversity, desc(estimates))
lmer_boot_stats_mlt$factors = factor(lmer_boot_stats_mlt$factors, levels = t$factors[1:11])
关键在这里,为阴影变量建立数据对象
odds <- seq(1, 12, 2)
rect1 = lmer_boot_stats_mlt[odds,]
rect2 = mutate(rect1, diversity = 'Plant')
成图
lmer_plot = ggplot()+
facet_grid(.~diversity,scales = 'free') +
geom_rect(data = rect1,
ymin = odds - 0.5,
ymax = odds + 0.5,
xmin = -Inf, xmax = +Inf,
fill = 'grey', alpha = 0.5, inherit.aes = F) +
geom_rect(data = rect2,
ymin = odds - 0.5,
ymax = odds + 0.5,
xmin = -Inf, xmax = +Inf,
fill = 'grey', alpha = 0.5, inherit.aes = F) +
geom_vline(xintercept = 0,lty = 2, color = 'black') +
geom_boxplot(data = lmer_boot_stats_mlt, aes(estimates, factors, fill = group)) +
scale_fill_brewer(palette = 'Set2') +
theme_classic() +
labs(x = 'standardized effects on diversity', y = NULL, fill = NULL) +
theme(axis.title = element_text(color = 'black'),
axis.text = element_text(color = 'black'),
axis.line = element_line(color = 'black'),
axis.ticks = element_line(color = 'black'),
legend.position = c(0.5,0.95),
legend.background = element_blank(),
strip.placement = 'outside',
strip.background = element_blank(),
panel.grid = element_blank(),
panel.spacing.x = unit(2,'lines'))