ggplot2:方差分析多重比较标注显著字母

赖江山老师在科学网分享了Francois Gillet编写的两个方差分析多重比较的函数 boxplert()和boxplerk()【来源Numerical Ecology with R (second Edition)】

我看了一下出图的部分是用boxplot函数绘制的,作为一个ggplot2的爱好者自己尝试着用ggplot2把函数boxplert()重新写了一下。在重写的过程中收获几个问题:

  • X 轴如何按照给定的数据顺序而不是系统默认的编码顺序
  • 显著性标签怎么加,关键是位置放在哪
  • 绘图的颜色如何调整
  • 在ggplot中加annotate的方法,位置与字体的设置
  • 把ggplot2封装到函数里面合适吗?我觉得不写成函数会让出来的图更加易于调整,ggplot2 本来很灵活,写到函数里面反而限制了她的妖娆。

现在我们分别来测试一下,为了演示X轴的摆列顺序我把InsectSprays数据集写出来打乱里面本来按顺序的分组信息。

rm(list=ls())
setwd("C:\\Users\\Administrator\\Desktop\\boxplot")
library(agricolae)
library(stats)
data(InsectSprays)
# InsectSprays<-InsectSprays
# getwd()
# InsectSprays<-write.csv(InsectSprays,"InsectSprays.csv")
InsectSprays<-read.csv("InsectSprays.csv")
###
library(agricolae)
library(stats)

先看看之前的函数boxplert()出图的效果:

# 检验方差分析假设
shapiro.test(resid(aov(InsectSprays$count ~ InsectSprays$spray)))
boxplert(
  InsectSprays$count,
  InsectSprays$spray,
  ylab = "yield",
  xlab = "virus",
  bcol = "orange",
  p.adj = "holm"
)

输出:

$`comparison`
       mean        se       sd min max  n group
A 16.666667 1.7936476 6.213378   9  26 12     a
B  2.083333 0.5701984 1.975225   0   7 12     b
C 15.333333 1.2329647 4.271115   7  21 12     a
D  3.500000 0.5000000 1.732051   1   6 12     b
E 14.500000 1.3623732 4.719399   7  23 12     a
F  4.916667 0.7225621 2.503028   2  12 12     b

$p.value
[1] 3.182584e-17

再看新函数gglert()的效果:

# 检验方差分析假设
shapiro.test(resid(aov(InsectSprays$count ~ InsectSprays$spray)))
gglert(InsectSprays,
       InsectSprays$count,
       InsectSprays$spray,
       yLab = "count",
       xLab = "spray",
       bcol = "bisque",
       p.adj = "holm",
       las = 1)

输出:

$`comparison`
       mean        se       sd min max median  n group
E 14.500000 1.3623732 4.719399   7  23   14.0 12     a
C 15.333333 1.2329647 4.271115   7  21   16.5 12     b
B  2.083333 0.5701984 1.975225   0   7    1.5 12     a
F  4.916667 0.7225621 2.503028   2  12    5.0 12     b
D  3.500000 0.5000000 1.732051   1   6    3.0 12     a
A 16.666667 1.7936476 6.213378   9  26   15.0 12     b

$p.value
[1] 3.182584e-17

$plot

至于非参数组间差异的Kruskal-Wallis检验出图的函数boxplerk()我就不再修改了,真的,我觉得写成函数得不偿失,不写成函数还好一些。

赖老师科学网分享的boxplert()和boxplerk()函数
#普通方差分析多重比较
boxplert <-  function(X,
                      Y,
                      main = NULL,
                      xlab = NULL,
                      ylab = NULL,
                      bcol = "bisque",
                      p.adj = "none",
                      cexy = 1,
                      varwidth = TRUE,
                      las = 1,
                      paired = FALSE)
{
  aa <- levels(as.factor(Y))
  an <- as.character(c(1:length(aa)))
  tt1 <- matrix(nrow = length(aa), ncol = 6)    
  for (i in 1:length(aa))
  {
    temp <- X[Y == aa[i]]
    tt1[i, 1] <- mean(temp, na.rm = TRUE)
    tt1[i, 2] <- sd(temp, na.rm = TRUE) / sqrt(length(temp))
    tt1[i, 3] <- sd(temp, na.rm = TRUE)
    tt1[i, 4] <- min(temp, na.rm = TRUE)
    tt1[i, 5] <- max(temp, na.rm = TRUE)
    tt1[i, 6] <- length(temp)
  }
  
  tt1 <- as.data.frame(tt1)
  row.names(tt1) <- aa
  colnames(tt1) <- c("mean", "se", "sd", "min", "max", "n")
  
  boxplot(
    X ~ Y,
    main = main,
    xlab = xlab,
    ylab = ylab,
    las = las,
    col = bcol,
    cex.axis = cexy,
    cex.lab = cexy,
    varwidth = varwidth
  )    
  require(agricolae)
  Yn <- factor(Y, labels = an)
  sig <- "ns"
  model <- aov(X ~ Yn)    
  if (paired == TRUE & length(aa) == 2)
  {
    coms <- t.test(X ~ Yn, paired = TRUE)
    pp <- coms$p.value
  }    else
  {
    pp <- anova(model)$Pr[1]
  }    
  if (pp <= 0.1)
    sig <- "."
  if (pp <= 0.05)
    sig <- "*"
  if (pp <= 0.01)
    sig <- "**"
  if (pp <= 0.001)
    sig <- "***"
  
  mtext(
    sig,
    side = 3,
    line = 0.5,
    adj = 0,
    cex = 2,
    font = 1
  )    
  if(pp <= 0.05) {
    comp <- LSD.test(model,
                     "Yn",
                     alpha = 0.05,
                     p.adj = p.adj,
                     group = TRUE)      # gror <- comp$groups[order(comp$groups$groups), ]
    # tt1$cld <- gror$M
    gror <- comp$groups[order(rownames(comp$groups)), ]
    tt1$group <- gror$groups
    mtext(
      tt1$group,
      side = 3,
      at = c(1:length(aa)),
      line = 0.5,
      cex = 1,
      font = 4
    )
  }
  list(comparison = tt1, p.value = pp)
  
}
##非参数组间差异的Kruskal-Wallis检验
boxplerk <-  function(X,
                      Y,
                      main = NULL,
                      xlab = NULL,
                      ylab = NULL,
                      bcol = "bisque",
                      p.adj = "none",
                      cexy = 1,
                      varwidth = TRUE,
                      las = 1,
                      paired = FALSE)
{
  aa <- levels(as.factor(Y))
  an <- as.character(c(1:length(aa)))
  tt1 <- matrix(nrow = length(aa), ncol = 7)    
  for (i in 1:length(aa))
  {
    temp <- X[Y == aa[i]]
    tt1[i, 1] <- mean(temp, na.rm = TRUE)
    tt1[i, 2] <- sd(temp, na.rm = TRUE) / sqrt(length(temp))
    tt1[i, 3] <- sd(temp, na.rm = TRUE)
    tt1[i, 4] <- min(temp, na.rm = TRUE)
    tt1[i, 5] <- max(temp, na.rm = TRUE)
    tt1[i, 6] <- median(temp, na.rm = TRUE)
    tt1[i, 7] <- length(temp)
  }
  
  tt1 <- as.data.frame(tt1)
  row.names(tt1) <- aa
  colnames(tt1) <- c("mean", "se", "sd", "min", "max", "median", "n")
  
  boxplot(
    X ~ Y,
    main = main,
    xlab = xlab,
    ylab = ylab,
    las = las,
    col = bcol,
    cex.axis = cexy,
    cex.lab = cexy,
    varwidth = varwidth
  )    
  require(agricolae)
  Yn <- factor(Y, labels = an)
  comp <- kruskal(X, Yn, p.adj = p.adj)
  sig <- "ns"
  
  if (paired == TRUE & length(aa) == 2)
  {
    coms <- wilcox.test(X ~ Yn, paired = TRUE)
    pp <- coms$p.value
  }    else
  {
    pp <- comp$statistics$p.chisq
  }    
  if(pp <= 0.1)
    sig <- "."
  if(pp <= 0.05)
    sig <- "*"
  if(pp <= 0.01)
    sig <- "**"
  if(pp <= 0.001)
    sig <- "***"
  
  gror <- comp$groups[order(rownames(comp$groups)), ]
  tt1$rank <- gror$X
  tt1$group <- gror$groups
  mtext(
    sig,
    side = 3,
    line = 0.5,
    adj = 0,
    cex = 2,
    font = 1
  )   
  if(pp <= 0.1)
    mtext(
      tt1$group,
      side = 3,
      at = c(1:length(aa)),
      line = 0.5,
      cex = 1,
      font = 4
    )
  
  list(comparison = tt1, p.value = pp)
  
}
我根据boxplert()修改的函数gglert()
gglert <-function(data,
                  X,
                  Y,
                  main = NULL,
                  xLab = NULL,
                  yLab = NULL,
                  bcol = "bisque",
                  p.adj = "none",
                  cexy = 1,
                  varwidth = TRUE,
                  las = 1,
                  paired = FALSE){
library(ggplot2)
#names(mydata)=c("Group","count","color")
Y = factor(Y,levels =  unique(Y))
#color = unique(as.vector(mydata$color))
#color=(palette(rainbow(100)))[1:length(unique(Y))]
color=heat.colors(length(unique(Y)))

p<-ggplot(data=data,aes(Y,X,aes(Y,X,fill=unique(Y))))+
  geom_boxplot(fill=color)+theme(axis.text.x=element_text(angle=45,hjust=1))+labs(x=xLab,y=yLab)+#
  theme_light()#+
  #stat_summary(geom = 'text', label = labdata$groups, fun.y = max , vjust = -0.5
require(agricolae)
aa <- levels(factor(Y, levels=unique(Y)))#levels(as.factor(Y))
#an <- as.character(c(1:length(aa)))

tt1 <- matrix(nrow = length(aa), ncol = 7)
for (i in 1:length(aa)) {
  temp <- X[Y == aa[i]]
  tt1[i, 1] <- mean(temp, na.rm = TRUE)
  tt1[i, 2] <- sd(temp, na.rm = TRUE)/sqrt(length(temp))
  tt1[i, 3] <- sd(temp, na.rm = TRUE)
  tt1[i, 4] <- min(temp, na.rm = TRUE)
  tt1[i, 5] <- max(temp, na.rm = TRUE)
  tt1[i, 6] <- median(temp, na.rm = TRUE)
  tt1[i, 7] <- length(temp)
}
tt1 <- as.data.frame(tt1)
row.names(tt1) <- aa
colnames(tt1) <- c("mean", "se", "sd", "min", "max", "median", "n")
Yn <- factor(Y)
model <- aov(X ~ Yn)
if (paired == TRUE & length(aa) == 2)
{
  coms <- t.test(X ~ Yn, paired = TRUE)
  pp <- coms$p.value
}    else
{
  pp <- anova(model)$Pr[1]
}    

if (pp <= 0.05) {
  comp <- LSD.test(model,
                   "Yn",
                   alpha = 0.05,
                   p.adj = p.adj,
                   group = TRUE)
  labdata<- comp$groups
  labdata$name<-row.names(labdata)
  labdata<-labdata[match(unique(Y),labdata$name),]
  gror <- comp$groups[order(rownames(comp$groups)), ]
  tt1$group <- gror$groups
  
  p=p+stat_summary(geom = 'text', label = labdata$groups, fun.y = max , vjust = -0.5)
}
p=p+annotate("text",x=-Inf,y=Inf,hjust=-0.5,vjust=max(X)*0.05,label= paste("p=",sprintf("%.3f",pp),sep = ""),fontface = "italic",family = "Arial",size = 4)

list(comparison = tt1, p.value = pp,plot=p)

}



参考
方差分析多重比较出图

你可能感兴趣的:(ggplot2:方差分析多重比较标注显著字母)