跟着Plos Biology学作图:R语言ggplot2散点图并添加误差线和带置信区间的拟合曲线

论文

Large variation in the association between seasonal antibiotic use and resistance across multiple bacterial species and antibiotic classes

数据代码链接

https://github.com/orgs/gradlab/repositories

今天的推文重复一下论文中的 Figure 1B

image.png

论文提供的代码涉及到了map系列函数,这个我之前基本没有用过,功能很强大。这里还涉及到了如何批量循环作图

首先是数据集

library(readr)
library(tidyverse)

use.model.params<-read_csv("use-resistance-seasonality/tables/use_model_values.csv")
use.deviates <- read_csv("use-resistance-seasonality/tables/use_seasonal_deviates.csv")

准备作图配色

colors <- setNames( c("#220050", "#b30059","#0091a8","#359023", "#ffa500"), 
                   c("Macrolides", "Nitrofurans", "Penicillins", "Quinolones", "Tetracyclines") )

三个自定义函数

# 第一个
filter_models_AIC_func <- function(table, group_cols) {
  
  table.fil = table %>%
    group_by_at(vars(all_of(group_cols))) %>%
    mutate(rank = dense_rank(AIC)) %>%
    ungroup() %>%
    filter(rank == 1) %>%
    select(-AIC, -rank)
  
  return(table.fil)
}

# 第二个

cos_func = function(month, amplitude, phase, omega, intercept) {
  amplitude * cos(omega *(month - phase)) + intercept
} 

# 第三个是画图函数
plot_use_model_func = function(deviates, class, amplitude, phase, omega, a_lower, a_upper, sig) {
  
  col = colors[class]
  
  if (sig) { 
    title = paste(class, "*")
  } else {
    title = class
  }
  
  ci = data.frame(month=seq(1,12,0.01)) %>%
    mutate(lower_ci = map_dbl(month, ~cos_func(., a_lower, phase, omega, 0))) %>%
    mutate(upper_ci = map_dbl(month, ~cos_func(., a_upper, phase, omega, 0))) 
  
  p = ggplot(data = deviates, aes(x = month)) +
    geom_point(aes(x = month, y = seasonal_deviate), color = col, size = 1) +
    geom_errorbar(aes(x = month, ymin = seasonal_deviate - sem, ymax = seasonal_deviate + sem), width = 0.5, color = col) + 
    stat_function(fun = cos_func, args = list(a = amplitude, phase = phase, omega = omega, intercept = 0), size = 0.7, color = col) +
    geom_ribbon(data = ci, aes(x = month, ymin = lower_ci, ymax = upper_ci), fill = col, alpha = 0.3) +
    scale_x_continuous(breaks=c(1, 3, 5, 7, 9, 11)) +
    ggtitle(title) +
    xlab("Month") +
    theme_classic() +
    theme(legend.position="none",
          plot.title = element_text(size = 11, hjust = 0.5, face = "bold"),
          axis.text = element_text(size = 10),
          axis.title.x = element_text(size = 11),
          axis.title.y = element_blank()
    )
  
  return(p)
}

数据整合

use.model.params.fil <- filter_models_AIC_func(use.model.params, c("drug_class"))

f1b_data_model = use.model.params.fil %>%
  filter(term %in% c("amplitude", "phase")) %>%
  select(drug_class, omega, term, estimate, ci.lower, ci.upper, p.value.BH) %>%
  gather(variable, value, -(c("drug_class", "term", "omega"))) %>%
  unite(temp, term, variable) %>%
  spread(temp, value) %>%
  mutate(sig = amplitude_p.value.BH < 0.05) 

f1b_data_deviates = f1b_data_model %>%
  select(drug_class, omega) %>%
  left_join(use.deviates) %>%
  select(drug_class, month, seasonal_deviate, sem)

作图

f1b_data_model %>%
  left_join(
    f1b_data_deviates %>%
      nest(-drug_class) %>%
      rename(deviates_table = data),
    by = c("drug_class")
  ) %>% 
  
  #make plots
  mutate(plot = pmap(.l = list(deviates = deviates_table, class = drug_class, amplitude = amplitude_estimate,
                               phase = phase_estimate, omega = omega, a_lower = amplitude_ci.lower,
                               a_upper = amplitude_ci.upper, sig = sig),
                     .f = plot_use_model_func)) %>%
  pull(plot) -> f1b_plots

这里他的处理方式是将多个图保存到一个数据框的一列

使用ggpubr包中的函数拼图

library(ggpubr)
f1b = do.call(ggarrange, c(f1b_plots, nrow = 2, ncol = 3, align = "hv")) %>%
  annotate_figure(left = text_grob("Seasonal deviates in use (mean daily claims/10,000 people)", size = 11, rot = 90)) 
print(f1b)
image.png

这个数据处理和作图的代码整体还相对比较复杂,如果有时间的话争取录视频逐行解释其中的代码。示例数据和代码可以在公众号后台留言20220324获取

欢迎大家关注我的公众号

小明的数据分析笔记本

小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!

你可能感兴趣的:(跟着Plos Biology学作图:R语言ggplot2散点图并添加误差线和带置信区间的拟合曲线)