其实该功能的实现直接用EasyStat就可以,集成的包,一键出图比我这还方便得多(但是好像得手动自己选择检验方式,但它可以快速做显著性和齐性判断,也很快,统计方法的选择也很多),但我写这个代码的时候还没有发现EasyStat...否则我200%不会浪费这个时间的...
EasyStat_差异分析快速添加显著标记字母
用SPSS也是很好的,可以多选几个检验方法,但我又懒又手拙眼拙怕哪里弄错...
使用sharpiro检验正态性,使用bartlett检验方差齐性
p值界限全都是0.05,两两检验全部是two-tailed
可以选择多图分面或绘制单图
输入文件:
输出文件:图(一个.pdf,一个.tiff),还有一个用于作图的表格文件。
作图时,因函数的代码较长(因为有大量重复部分hhh),建议用RStudio运行,把包的导入和函数放到一个.R文件里,先Ctrl+Enter快速按几次跑完它,再点开跑数据的另一个修改自己信息的.R文件跑。
如果要绘制箱线图,可以参考这个链接改一下作图函数,前面一直到生成的表格都是适配的。我自己因为组内重复太少就不做箱线图了。跟着iMeta学做图|分面箱线图展示alpha多样性并用标注差异分析结果
# 带显著的柱状图的function集合
# xjy-20230510-v1
library(ggplot2)
library(ggpubr)
library(dplyr) # rename_with # 载入它会有masked,但不影响
library(agricolae) #用于方差分析
library(ggsignif)
library(agricolae) # orderPvalue
library(dunn.test) # dunn检验(Kruskal-Wallis的基础上两两比对)
library(PMCMRplus) # dunnettT3Test
# install.packages("dunn.test") # 若有没安过的,自己安一下
# 6个函数
# 这些参数和非参的两两检验都是two-tailed
tukeyHSD <- function(variable,variable_name){
#计算分组平均数,最大值,标准差
group_mean <- aggregate(x = variable, by = list(data$group), FUN = mean)%>%
rename_with(~c("group", "mean_val"), 1:2)
group_max <- aggregate(x = variable, by = list(data$group), FUN = max)%>%
rename_with(~c("group", "max"), 1:2)
group_sd <- aggregate(x = variable, by = list(data$group),FUN = sd)%>%
rename_with(~c("group", "sd"), 1:2)
# 单因素ANOVA
anova <- aov(variable~group, data = data)
#两两比较,方差齐的话用TukeyHSD
pair_comparison = TukeyHSD(anova)
pair_comparison = as.data.frame(pair_comparison$group)
#创建一个pvalue矩阵
ntr <- nrow(group_mean) # 也即处理数,有几个处理就有几个平均值
mat <- matrix(1, ncol = ntr, nrow = ntr) #建立一个全是1的矩阵以备填充
p <- pair_comparison$`p adj`
k <- 0
# 往矩阵里填充p值,变成像SPSS里两两比对结果的三角矩阵(但这里是对称矩阵)
for (i in 1:(ntr - 1)) {
for (j in (i + 1):ntr) {
k <- k + 1
mat[i, j] <- p[k]
mat[j, i] <- p[k]
}
}
#write.table(mat,file="mat.txt",sep="\t")
treatments <- as.vector(group_mean$group)
means <- as.vector(group_mean$mean_val)
max <- as.vector(group_max$max)
#sd <- as.vector(group_sd$sd) #
alpha <- 0.05 # 显著性,可以自定???
pvalue <- mat
out <- orderPvalue(treatments, means,alpha,pvalue, console = TRUE) # 排出abcd
out$group <- rownames(out)
# left_join保留第一个表格的所有
out=left_join(out, group_max, by = "group")
out=left_join(out, group_sd, by = "group")
# 提取data大表中正在计算的那列来合并
data2 <- subset(data, select = c("group", "group2"))
data3 <- cbind(data2, variable)
data1=left_join(out, data3, by = "group", multiple = "all") # 仅返回左侧数据集中每行的第一个匹配项。如果存在多个匹配项,则警告消息建议使用 multiple = "all" 参数返回所有匹配行。
data1$maxx <- max(data1$means)
data1 $type <- variable_name
colnames(data1) <- c("means", "groups", "group","max","sd","group2","variable","maxx","type")
data1$test="TukeyHSD"
return(data1)
}
dunnetttest <- function(variable,variable_name){
data$group2=as.factor(data$group2) # 把变量转化为因子的形式
#计算分组平均数,最大值,标准差
group_mean <- aggregate(x = variable, by = list(data$group2), FUN = mean)%>%
rename_with(~c("group2", "mean_val"), 1:2)
group_max <- aggregate(x = variable, by = list(data$group2), FUN = max)%>%
rename_with(~c("group2", "max"), 1:2)
group_sd <- aggregate(x = variable, by = list(data$group2),FUN = sd)%>%
rename_with(~c("group2", "sd"), 1:2)
# 方差不齐的话:dunnettTest
pair_comparison = dunnettT3Test(variable~group2, data = data)
pair_comparison = as.data.frame(pair_comparison[["p.value"]])
ntr <- nrow(group_mean) # 也即处理数,有几个处理就有几个平均值
mat <- matrix(1, ncol = ntr, nrow = ntr) #建立一个全是1的矩阵以备填充
for (i in 2:ntr) {
for (j in (1:(i-1))) {
mat[i, j] <- pair_comparison[i-1,j]
mat[j, i] <- pair_comparison[i-1,j]
}
}
#write.table(mat,file="mat.txt",sep="\t")
treatments <- as.vector(group_mean$group)
means <- as.vector(group_mean$mean_val)
alpha <- 0.05 # 显著性,可以自定
pvalue <- mat
out <- orderPvalue(treatments, means,alpha,pvalue, console = TRUE) # 排出abcd
out$group2 <- rownames(out)
# left_join保留第一个表格的所有
out=left_join(out, group_max, by = "group2")
out=left_join(out, group_sd, by = "group2")
# 提取data大表中正在计算的那列来合并
data2 <- subset(data, select = c("group", "group2"))
data3 <- cbind(data2, variable)
data1=left_join(out, data3, by = "group2", multiple = "all") # 仅返回左侧数据集中每行的第一个匹配项。如果存在多个匹配项,则警告消息建议使用 multiple = "all" 参数返回所有匹配行。
data1$maxx <- max(data1$means)
data1 $type <- variable_name
data1[, c(3, 6)] <- data1[, c(6, 3)]
colnames(data1) <- c("means", "groups", "group","max","sd","group2","variable","maxx","type")
data1$test="dunnettT3Test"
return(data1)
}
Dunn <- function(variable,variable_name){
#计算分组平均数,最大值,标准差
group_mean <- aggregate(x = variable, by = list(data$group), FUN = mean)%>%
rename_with(~c("group", "mean_val"), 1:2)
group_max <- aggregate(x = variable, by = list(data$group), FUN = max)%>%
rename_with(~c("group", "max"), 1:2)
group_sd <- aggregate(x = variable, by = list(data$group),FUN = sd)%>%
rename_with(~c("group", "sd"), 1:2)
# 非参的dunn两两检验,默认双端
mc <- dunn.test(variable,data$group,method = "holm")
p <- mc$P.adjust
#创建一个pvalue矩阵
ntr <- nrow(group_mean) # 也即处理数,有几个处理就有几个平均值
mat <- matrix(1, ncol = ntr, nrow = ntr) #建立一个全是1的矩阵以备填充
k <- 0
# 往矩阵里填充p值,变成像SPSS里两两比对结果的三角矩阵(但这里是对称矩阵)
for (i in 2:(ntr - 1)) {
for (j in (1:(i-1))) {
k <- k + 1
mat[i, j] <- p[k]
mat[j, i] <- p[k]
}
}
#write.table(mat,file="mat.txt",sep="\t")
treatments <- as.vector(group_mean$group)
means <- as.vector(group_mean$mean_val)
max <- as.vector(group_max$max)
alpha <- 0.05 # 显著性,可以自定
pvalue <- mat
out <- orderPvalue(treatments, means,alpha,pvalue, console = TRUE) # 排出abcd
out$group <- rownames(out)
# left_join保留第一个表格的所有
out=left_join(out, group_max, by = "group")
out=left_join(out, group_sd, by = "group")
# 提取data大表中正在计算的那列来合并
data2 <- subset(data, select = c("group", "group2"))
data3 <- cbind(data2, variable)
data1=left_join(out, data3, by = "group", multiple = "all") # 仅返回左侧数据集中每行的第一个匹配项。如果存在多个匹配项,则警告消息建议使用 multiple = "all" 参数返回所有匹配行。
data1$maxx <- max(data1$means)
data1 $type <- variable_name
colnames(data1) <- c("means", "groups", "group","max","sd","group2","variable","maxx","type")
data1$test="Kruskal_Wallis_Dunn_holm"
return(data1)
}
# 进行自动判断条件的两两比较(包括正态性和方差齐性)
compare <- function(variable,variable_name){
if(all(sapply(split(variable, data$group), function(x) shapiro.test(x)$p.value >= 0.05))) {
# 如果数据满足正态性假设,进一步判断方差齐性假设
if(bartlett.test(variable, data$group)$p.value >= 0.05) {
# 如果数据满足方差齐性假设,使用TukeyHSD进行两两比较
final_data <- tukeyHSD(variable,variable_name)
} else {
# 如果数据不满足方差齐性假设
final_data <- dunnetttest(variable,variable_name)
}
} else {
# 如果数据不满足正态性假设
final_data <- Dunn(variable,variable_name)
}
return(final_data)
}
# 画图函数
facet_plot<-function(file,nrow,label,width,height,output_name){
p=ggplot(file , aes(x=group,y=means,col=group,fill=group))+
# ymin=means-0.05*sd是为了把误差线的下半部分藏在bar的后面防止穿模,如果要把他全部显示,去掉0.05*,并且把加error bar的移到画bar那行的下面就好了
geom_errorbar(aes(ymin=means-0.05*sd, ymax=means+sd), width=0.4,size=0.2, color="black")+# 第一张图的size???0.5。添加误差线,也可以用ymin=means-0.98*sd, ymax=means+0.98*sd,*0.98???1.96除根???4(4是样本个???),代???1.96SEM,即置信区???
geom_bar(stat = 'summary',fun=mean,size=0.6,width=0.7)+ #size=0.7,alpha=0.8,color="black"
#geom_plot(aes(y=means,fill = group))+
#geom_jitter(aes(group , type) , size = 0.8)+
labs(y=label,title="",x="")+ #坐标标题
scale_color_manual(values=mycolors)+
scale_fill_manual(values =mycolors)+
# 纵坐标科学计数
scale_y_continuous(labels = scales::scientific,expand = expansion(mult=c(0,0.05)))+ # expansion限制柱形图的上下沿
# 添加显著性标记abc
geom_text(aes(x = group , y = means+sd+0.05*maxx , label = marker) , size = 2.5 , position = position_dodge(0.6), color="black")+
facet_wrap(.~type , #type列作为变量,分面为一行多列
scales = "free_y",nrow=nrow)+ #scales = "free_y"可以使各个分面有自己的y轴刻度,nrow=2是两列
theme_bw()+
theme(
#aspect.ratio=2.5,#图像长宽比
#plot.title = element_text(hjust=0.5,size=15,face='bold'),# 标题居中,加粗则face='bold'
axis.title.y=element_text(size=12,vjust = 1), # 纵坐标标题,vjust越大越往???
axis.ticks.x = element_blank(), # 删去x轴刻度线
axis.ticks.length=unit(0.1,"cm"), # 刻度线长度
axis.text.y = element_text(size = 9, color = "black"), #纵坐标的属性
axis.text.x=element_text(hjust=1,vjust=0.2,angle=90,size=9.5, color = x_col), # 横坐标的属性,hjust=1,向右对齐
panel.grid=element_blank(), # 去掉背景网格
legend.position = "none",
#panel.border = element_rect(linewidth=0.9)
)
ggsave(paste0(output_name,".pdf"),width = width,height = height)
ggsave(paste0(output_name,".tiff"),width = width,height = height)
write.csv(file,paste0(output_name,"_plot_file.csv"),row.names = FALSE) #可以通过表格查看用了什么检验
return(p)
}
single_plot<-function(file,label,width,height,output_name){
p=ggplot(file,aes(x=group,y=means,col=group,fill=group))+
# ymin=means-0.05*sd是为了把误差线的下半部分藏在bar的后面防止穿模,如果要把他全部显示,去掉0.05*,并且把加error bar的移到画bar那行的下面就好了
geom_errorbar(aes(ymin=means-0.05*sd, ymax=means+sd), width=0.4,size=0.4, color="black")+# 添加误差线,也可以用ymin=means-0.98*sd, ymax=means+0.98*sd,4是样本个数时,1.96SEM,即大概是置信区间
geom_bar(stat = 'summary',fun=mean,size=0.6,width=0.7)+ #size=0.7,alpha=0.8,color="black"
#(有jitter时)coord_cartesian(ylim=c(4.5,5.27)) + #限制y轴范围,需要加上coord_cartesian保留所有数据点,否则会报错无法画出柱形图
#geom_jitter(width = 0.3,size=1,color="black")+#添加抖动点,width是这些都抖动点允许左右摆的距离
# 添加显著性标记abc
geom_text(aes(x = group, y=means+sd+0.04*maxx, label = groups), color="black",size = 4, position = position_dodge(0.6))+ #添加abc
scale_color_manual(values=mycolors)+
scale_fill_manual(values =mycolors)+
scale_y_continuous(expand = expansion(mult=c(0,0.05)))+#若长度为2,如mult=(0.1,0.2),则向下扩展10%,向上扩20%
#坐标标题
labs(y=label,title="",x="")+
theme_bw()+
theme(
aspect.ratio=1,#图像长宽比
plot.title = element_text(hjust=0.5,size=15,face='bold'),# 标题居中,加粗则face='bold'
axis.title.y=element_text(size=12,vjust = 1), # 纵坐标标题
axis.ticks.x = element_blank(), # 删去x轴刻度线
axis.ticks.length=unit(0.1,"cm"), # 刻度线长度
axis.text.y = element_text(size = 10, color = "black"), #纵坐标的属性
axis.text.x=element_text(hjust=0.5,vjust=0.81,angle=30,size=9.5, color = x_col), # 横坐标的属性,hjust=0.5居中
panel.grid=element_blank(), # 去掉背景网格
legend.position = "none",
panel.border = element_rect(linewidth=0.9)
)
ggsave(paste0(output_name,".pdf"),width = width,height = height)
ggsave(paste0(output_name,".tiff"),width = width,height = height)
write.csv(file,paste0(output_name,"_plot_file.csv"),row.names = FALSE) #可以通过表格查看用了什么检验
return(p)
}
data=read.csv(file='my.csv',row.names = 1,header=T) # 列名为样本,行名为变量
data=t(data) # 转置,当然如果文件本来就是列名为变量就不用转置了
# 导入分组信息,顺序一定需要和导入的data文件相同
# 这个mapping_file需要有一列全数字的分组,不然dunnettT3Test函数不识别
map=read.table('mapping_file.txt',header = T,sep='\t') #row.names = 1,
data<-data.frame(map,data) #合并两个表格
A1=data$A1
out_1 <- compare(A1,"A1")
A2=data$A2
out_2 <- compare(A2,"A2")
A3=data$A3
out_3 <- compare(A3,"A3")
# # 要画分面图的话,按行合并上述表格
alpha_out <- rbind(out_1,out_2)%>%rename_with(~"marker" , 2)
alpha_out$type <- factor(alpha_out$type, levels = unique(alpha_out$type)) # 把type这列因子化
write.csv(alpha_out,"significance.csv") #可以通过表格查看用了什么检验
#画图参数
mycolors<-c("#392F41","#1b6393","#64acbf" ,"#edab63","#cd5347","#edab63","#edab63")
x_col=rep(c("#black","#black","#black" ,"#black","#black","#black","#black")) #设置横坐标颜色顺序
# 画图!五个参数分别是输入文件,label名,width,height,输出文件前缀
p=facet_plot(alpha_out,"my",8,5,"trytry")
p
p=single_plot(out_1,"my",8,5,"trytry")
p