R-ggplot2-箱图系列(1) basic - (jianshu.com)
R-ggplot2-箱图系列(2) 注释P值与组间比较 - (jianshu.com)
组间比较分析时可能会涉及到以下的分析情况:
1、两组间比较:(1)选择有参法还是无参法;(2)能否进行配对比较
2、多组间比较:(1)多组间两两比较/多组整体比较(方差分析)
ggpubr
包提供了组间比较的分析函数与可视化函数,主要参考自http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
0、加载包与示例数据
library(ggpubr)
library(patchwork)
#组别名最好是字符型;如果是数值类型,最好转为因子化
ToothGrowth$dose = factor(ToothGrowth$dose)
summary(ToothGrowth)
# len supp dose
# Min. : 4.20 OJ:30 0.5:20
# 1st Qu.:13.07 VC:30 1 :20
# Median :19.25 2 :20
# Mean :18.81
# 3rd Qu.:25.27
# Max. :33.90
head(ToothGrowth)
# len supp dose
# 1 4.2 VC 0.5
# 2 11.5 VC 0.5
# 3 7.3 VC 0.5
# 4 5.8 VC 0.5
# 5 6.4 VC 0.5
# 6 10.0 VC 0.5
1、组间分析函数
ggpubr::compare_means()
- 两组的情况
# (1)Wilcoxon
compare_means(len ~ supp, data = ToothGrowth) #default
# # A tibble: 1 x 8
# .y. group1 group2 p p.adj p.format p.signif method
#
# 1 len OJ VC 0.0645 0.064 0.064 ns Wilcoxon
# (2)t检验
compare_means(len ~ supp, data = ToothGrowth,
method = "t.test")
# 1 len OJ VC 0.0606 0.061 0.061 ns T-test
# (3) 修改adjP的计算方法
compare_means(len ~ supp, data = ToothGrowth,
p.adjust.method = "BH") #default "holm"
# 1 len OJ VC 0.0645 0.064 0.064 ns Wilcoxon
# (4)考虑其它变量的影响
compare_means(len ~ supp, data = ToothGrowth,
group.by = "dose")
# # A tibble: 3 x 9
# dose .y. group1 group2 p p.adj p.format p.signif method
#
# 1 0.5 len OJ VC 0.0232 0.046 0.023 * Wilcoxon
# 2 1 len OJ VC 0.00403 0.012 0.004 ** Wilcoxon
# 3 2 len OJ VC 1 1 1.000 ns Wilcoxon
# (5)如果进行配对分析
#那么需要保持每组的样本排列顺序是一致的
compare_means(len ~ supp, data = ToothGrowth,
paired = T)
# 1 len OJ VC 0.00431 0.0043 0.0043 ** Wilcoxon
# (6)修改标签阈值
compare_means(len ~ supp, data = ToothGrowth,
symnum.args = list(cutpoints = c(0, 0.01, 0.05, 1),
symbols = c("***", "*", "not")))
# 1 len OJ VC 0.0645 0.064 0.064 not Wilcoxon
- 多组的情况
#(1)所有两两间比较
compare_means(len ~ dose, data = ToothGrowth)
# .y. group1 group2 p p.adj p.format p.signif method
#
# 1 len 0.5 1 0.00000702 0.000014 7.0e-06 **** Wilcoxon
# 2 len 0.5 2 0.0000000841 0.00000025 8.4e-08 **** Wilcoxon
# 3 len 1 2 0.000177 0.00018 0.00018 *** Wilcoxon
# (2)都和0.5的组进行比较
compare_means(len ~ dose, data = ToothGrowth,
ref.group = "0.5")
# .y. group1 group2 p p.adj p.format p.signif method
#
# 1 len 0.5 1 0.00000702 0.000007 7.0e-06 **** Wilcoxon
# 2 len 0.5 2 0.0000000841 0.00000017 8.4e-08 **** Wilcoxon
# (3)方差分析-有参
compare_means(len ~ dose, data = ToothGrowth,
method = "anova") #有参
# .y. p p.adj p.format p.signif method
#
# 1 len 9.53e-16 9.5e-16 9.5e-16 **** Anova
# (4)方差分析-无参
compare_means(len ~ dose, data = ToothGrowth,
method = "kruskal.test") #无参
# .y. p p.adj p.format p.signif method
#
# 1 len 0.00000000148 0.0000000015 1.5e-09 **** Kruskal-Wallis
2、箱图可视化
2.1 两组比较
- (1) 不同比较方法
p <- ggboxplot(ToothGrowth, x = "supp", y = "len",
# 配色方案 ?ggboxplot
color = "supp", palette = "aaas",
add = "jitter")
# Add p-value
p1 = p + stat_compare_means() #default Wilcoxon
p2 = p + stat_compare_means(method = "t.test")
p1 + p2
- (2)标签显示格式
#标签位置
p1 = p + stat_compare_means(label.x.npc = "left",
# label.x = 1.5, label.y = 40
label.y.npc = "top")
#标签内容
p2 = p + stat_compare_means(aes(label = ..p.signif..))
#自定义阈值
p3 = p + stat_compare_means(aes(label = ..p.signif..),
symnum.args = list(cutpoints = c(0, 0.01, 0.05, 1),
symbols = c("***", "*", "notsig")),
label.x = 1.5, label.y = 40)
p1 | p2 | p3
- (3)配对分析
# 要确保相同样本在不同组的排列顺序相同
ggpaired(ToothGrowth, x = "supp", y = "len",
color = "supp", palette = "jco",
line.color = "gray", line.size = 0.4) +
stat_compare_means(paired = TRUE)
- (4)考虑其它分组变量的影响
p <- ggboxplot(ToothGrowth, x = "supp", y = "len",
color = "supp", palette = "jco",
#add = "jitter",
facet.by = "dose",
short.panel.labs = F)
p1 = p + stat_compare_means(label = "p.format")
# p + stat_compare_means(label = "p.signif", label.x = 1.5)
p <- ggboxplot(ToothGrowth, x = "dose", y = "len",
color = "supp", palette = "jco")
p2 = p + stat_compare_means(aes(group = supp))
# p + stat_compare_means(aes(group = supp), label = "p.signif")
p1 / p2
2.2 多组比较
- (1)多组间比较可视化时,默认是 default 方差分析
p1 = ggboxplot(ToothGrowth, x = "dose", y = "len",
color = "dose", palette = "jco")+
stat_compare_means()
p2 = ggboxplot(ToothGrowth, x = "dose", y = "len",
color = "dose", palette = "jco")+
stat_compare_means(method = "anova")
p1 + p2
- (2)多组间两两比较
p1 = ggboxplot(ToothGrowth, x = "dose", y = "len",
color = "dose", palette = "jco")+
stat_compare_means(comparisons = list( c("0.5", "1"),
c("1", "2"),
c("0.5", "2") ))
p2 = ggboxplot(ToothGrowth, x = "dose", y = "len",
color = "dose", palette = "jco")+
stat_compare_means(comparisons = list( c("0.5", "1"),
c("1", "2"),
c("0.5", "2") ),
label.y = c(29, 35, 40))+ #指定标签的高度
stat_compare_means(label.y = 45) #添加方差分析结果
p1 | p2
- (3)直接指定一个参考组
ggboxplot(ToothGrowth, x = "dose", y = "len",
color = "dose", palette = "jco")+
stat_compare_means(method = "anova", label.y = 40)+ # Add global p-value
stat_compare_means(label = "p.signif", method = "t.test",
ref.group = "0.5") # Pairwise comparison against reference