R-ggplot2-箱图系列(2) 注释P值与组间比较

R-ggplot2-箱图系列(1) basic - (jianshu.com)
R-ggplot2-箱图系列(2) 注释P值与组间比较 - (jianshu.com)

组间比较分析时可能会涉及到以下的分析情况:
1、两组间比较:(1)选择有参法还是无参法;(2)能否进行配对比较
2、多组间比较:(1)多组间两两比较/多组整体比较(方差分析)
ggpubr包提供了组间比较的分析函数与可视化函数,主要参考自http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/

0、加载包与示例数据

library(ggpubr)
library(patchwork)

#组别名最好是字符型;如果是数值类型,最好转为因子化
ToothGrowth$dose = factor(ToothGrowth$dose)
summary(ToothGrowth)
#       len        supp     dose   
# Min.   : 4.20   OJ:30   0.5:20  
# 1st Qu.:13.07   VC:30   1  :20  
# Median :19.25           2  :20  
# Mean   :18.81                   
# 3rd Qu.:25.27                   
# Max.   :33.90

head(ToothGrowth)
#    len supp dose
# 1  4.2   VC  0.5
# 2 11.5   VC  0.5
# 3  7.3   VC  0.5
# 4  5.8   VC  0.5
# 5  6.4   VC  0.5
# 6 10.0   VC  0.5

1、组间分析函数

ggpubr::compare_means()

  • 两组的情况
# (1)Wilcoxon
compare_means(len ~ supp, data = ToothGrowth) #default
# # A tibble: 1 x 8
# .y.   group1 group2      p p.adj p.format p.signif method  
#                    
# 1 len   OJ     VC     0.0645 0.064 0.064    ns       Wilcoxon

# (2)t检验
compare_means(len ~ supp, data = ToothGrowth,
              method = "t.test") 
# 1 len   OJ     VC     0.0606 0.061 0.061    ns       T-test

# (3) 修改adjP的计算方法
compare_means(len ~ supp, data = ToothGrowth,
              p.adjust.method = "BH") #default "holm"
# 1 len   OJ     VC     0.0645 0.064 0.064    ns       Wilcoxon

# (4)考虑其它变量的影响
compare_means(len ~ supp, data = ToothGrowth,
              group.by = "dose") 
# # A tibble: 3 x 9
# dose  .y.   group1 group2       p p.adj p.format p.signif method  
#                      
# 1 0.5   len   OJ     VC     0.0232  0.046 0.023    *        Wilcoxon
# 2 1     len   OJ     VC     0.00403 0.012 0.004    **       Wilcoxon
# 3 2     len   OJ     VC     1       1     1.000    ns       Wilcoxon

# (5)如果进行配对分析
#那么需要保持每组的样本排列顺序是一致的
compare_means(len ~ supp, data = ToothGrowth,
              paired = T) 
# 1 len   OJ     VC     0.00431 0.0043 0.0043   **       Wilcoxon

# (6)修改标签阈值
compare_means(len ~ supp, data = ToothGrowth,
              symnum.args = list(cutpoints = c(0, 0.01, 0.05, 1), 
                                  symbols = c("***", "*", "not"))) 
# 1 len   OJ     VC     0.0645 0.064 0.064    not      Wilcoxon
  • 多组的情况
#(1)所有两两间比较
compare_means(len ~ dose, data = ToothGrowth) 
# .y.   group1 group2            p      p.adj p.format p.signif method  
#                               
# 1 len   0.5    1      0.00000702   0.000014   7.0e-06  ****     Wilcoxon
# 2 len   0.5    2      0.0000000841 0.00000025 8.4e-08  ****     Wilcoxon
# 3 len   1      2      0.000177     0.00018    0.00018  ***      Wilcoxon

# (2)都和0.5的组进行比较
compare_means(len ~ dose, data = ToothGrowth,
              ref.group = "0.5") 
# .y.   group1 group2            p      p.adj p.format p.signif method  
#                               
# 1 len   0.5    1      0.00000702   0.000007   7.0e-06  ****     Wilcoxon
# 2 len   0.5    2      0.0000000841 0.00000017 8.4e-08  ****     Wilcoxon

# (3)方差分析-有参
compare_means(len ~ dose, data = ToothGrowth,
              method = "anova") #有参
# .y.          p   p.adj p.format p.signif method
#                  
# 1 len   9.53e-16 9.5e-16 9.5e-16  ****     Anova

# (4)方差分析-无参
compare_means(len ~ dose, data = ToothGrowth,
              method = "kruskal.test") #无参
# .y.               p        p.adj p.format p.signif method        
#                                    
# 1 len   0.00000000148 0.0000000015 1.5e-09  ****     Kruskal-Wallis

2、箱图可视化

2.1 两组比较
  • (1) 不同比较方法
p <- ggboxplot(ToothGrowth, x = "supp", y = "len",
               # 配色方案 ?ggboxplot
               color = "supp", palette = "aaas",
               add = "jitter")
#  Add p-value
p1 = p + stat_compare_means() #default Wilcoxon
p2 = p + stat_compare_means(method = "t.test")
p1 + p2
  • (2)标签显示格式
#标签位置
p1 = p + stat_compare_means(label.x.npc = "left",
                       # label.x = 1.5, label.y = 40
                       label.y.npc = "top")
#标签内容
p2 = p + stat_compare_means(aes(label = ..p.signif..)) 
#自定义阈值
p3 = p + stat_compare_means(aes(label = ..p.signif..), 
                       symnum.args = list(cutpoints = c(0, 0.01, 0.05, 1), 
                                          symbols = c("***",  "*", "notsig")),
                       label.x = 1.5, label.y = 40)
p1 | p2 | p3
  • (3)配对分析
# 要确保相同样本在不同组的排列顺序相同
ggpaired(ToothGrowth, x = "supp", y = "len",
         color = "supp", palette = "jco",
         line.color = "gray", line.size = 0.4) +
  stat_compare_means(paired = TRUE)
  • (4)考虑其它分组变量的影响
p <- ggboxplot(ToothGrowth, x = "supp", y = "len",
               color = "supp", palette = "jco",
               #add = "jitter",
               facet.by = "dose", 
               short.panel.labs = F)
p1 = p + stat_compare_means(label = "p.format")
# p + stat_compare_means(label =  "p.signif", label.x = 1.5)

p <- ggboxplot(ToothGrowth, x = "dose", y = "len",
               color = "supp", palette = "jco")
p2 = p + stat_compare_means(aes(group = supp))
# p + stat_compare_means(aes(group = supp), label = "p.signif")
p1 / p2
2.2 多组比较
  • (1)多组间比较可视化时,默认是 default 方差分析
p1 = ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "dose", palette = "jco")+
  stat_compare_means()

p2 = ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "dose", palette = "jco")+
  stat_compare_means(method = "anova")
p1 + p2
  • (2)多组间两两比较
p1 = ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "dose", palette = "jco")+ 
  stat_compare_means(comparisons = list( c("0.5", "1"), 
                                         c("1", "2"), 
                                         c("0.5", "2") ))


p2 = ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "dose", palette = "jco")+ 
  stat_compare_means(comparisons = list( c("0.5", "1"), 
                                         c("1", "2"), 
                                         c("0.5", "2") ), 
                     label.y = c(29, 35, 40))+ #指定标签的高度
  stat_compare_means(label.y = 45) #添加方差分析结果

p1 | p2
  • (3)直接指定一个参考组
ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "dose", palette = "jco")+
  stat_compare_means(method = "anova", label.y = 40)+      # Add global p-value
  stat_compare_means(label = "p.signif", method = "t.test",
                     ref.group = "0.5")                    # Pairwise comparison against reference

你可能感兴趣的:(R-ggplot2-箱图系列(2) 注释P值与组间比较)