出发点
源自Large-scale proteomic analysis of Alzheimer’s disease brain and cerebrospinal fluid reveals early changes in energy metabolism associated with microglia and astrocyte activation这篇文章中的分析结果图(如下所示),想尝试看下是否可以利用自身数据复现改图!
初步思考
- 初步看文章,该分析源自WGCNA,目的在于比较基因模块在不同数据集中的保守性(preservation),以便剔除保守性较差的模块。
- 通过查看WGCNA文档,的确有modulePreservation()这个函数用来计算保守性Z值。
- 查看相关教程,发现有两种方式:一是将针对某数据,通过WGCNA分析得到基因模块信息,然后将总数据拆分成两组数据,再用modulePreservation()计算模块在两组数据中的保守性;二是分别对两组数据进行WCGNA分析,得到模块信息,然后以其中一组为对照,同样用modulePreservation()计算对照组模块在两组数据中的保守性,下文是第一种方式。
具体该如何操作?
清除环境,加载R包,载入数据
rm(list = ls())
library(WGCNA)
#save(datExpr,datTraits,file = "WGCNA_data.Rdata")
load(file = "WGCNA_data.Rdata")
datExpr[1:4,1:4] #56例样本,5000个基因
# ENSG00000210082 ENSG00000198712 ENSG00000198804 ENSG00000210845
# GSM1172844 78053.20 103151.73 112917.53 92808.32
# GSM1172845 96200.86 157203.85 163847.92 93501.17
# GSM1172846 18259.11 40704.97 13357.02 75183.17
# GSM1172847 33184.15 43673.63 15360.50 91278.61
datTraits[1:4,1:3] #56例样本,3种临床信息
# gsm cellline subtype
# GSM1172844 GSM1172844 184A1 Non-malignant
# GSM1172845 GSM1172845 184B5 Non-malignant
# GSM1172846 GSM1172846 21MT1 Basal
# GSM1172847 GSM1172847 21MT2 Basal
#保守性Z值的计算其实是不需要表型数据的
确定软阈值,结果为6
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
png("beta-value.png",width = 800,height = 600)
par(mfrow = c(1,2));
cex1 = 0.9;
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
abline(h=0.90,col="red")
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()
构建加权共表达网络,得到模块信息
net = blockwiseModules(
datExpr,
power = sft$powerEstimate,
maxBlockSize = 6000,
TOMType = "unsigned", minModuleSize = 30,
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = "AS-green-FPKM-TOM",
verbose = 3)
table(net$colors)
##正常情况下还需要对以上模块进行合并处理,这里直接跳过了
模块的可视化
moduleColors = labels2colors(net$colors)
table(moduleColors)
png("genes-modules.png",width = 800,height = 600)
plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
dev.off()
将数据拆分成两个数据集,进行保守性Z值的计算
datExpr_all=as.data.frame(t(datExpr))
dat1<-datExpr_all[,1:(length(colnames(datExpr_all))/2)]
dat2<-datExpr_all[,((length(colnames(datExpr_all))/2)+1):length(colnames(datExpr_all))]
#将dat1和dat2合成list的形式
multiExpr = list(A1=list(data=t(dat1)),A2=list(data=t(dat2)))
multiColor = list(A1 = moduleColors)
## modulePreservation计算Z值
mp=modulePreservation(multiExpr,multiColor,referenceNetworks=1,verbose=3,networkType="signed",
nPermutations=30,maxGoldModuleSize=100,maxModuleSize=400)
stats = mp$preservation$Z$ref.A1$inColumnsAlsoPresentIn.A2
stats[order(-stats[,2]),c(1:2)]
# moduleSize Zsummary.pres
# green 270 29.861591
# magenta 124 29.403919
# blue 355 27.080263
# turquoise 400 25.272498
# brown 305 25.124595
# pink 168 24.881998
# yellow 279 22.605801
# purple 108 22.061868
# midnightblue 84 17.624309
# grey60 67 17.286478
# tan 88 17.211929
# black 175 16.686788
# red 241 14.397391
# darkturquoise 41 12.963404
# orange 40 12.240383
# lightyellow 61 11.661585
# salmon 87 10.845303
# white 37 9.505908
# darkgrey 40 9.104845
# darkorange 38 8.615691
# darkred 47 8.612495
# darkgreen 44 8.114247
# royalblue 60 7.766209
# gold 100 7.111177
# greenyellow 101 6.883031
# cyan 84 6.503736
# lightgreen 62 4.917559
# lightcyan 77 4.261902
# grey 246 1.412561
Z值结果的可视化绘图
stats$module = rownames(stats)
library(ggplot2)
library(ggrepel)
g=ggplot(data=stats,aes(x=moduleSize,y=Zsummary.pres,col=module))+
geom_point(alpha=0.8, size=5) +
theme_bw(base_size=15)+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"))+
xlab("ModuleSize") + ylab("Zsummary.pres") +
ggtitle( "Preservation Zsummary" ) +
theme(plot.title = element_text(size=15,hjust = 0.5))+
scale_colour_manual(values = c(stats$module))+
## 去掉图注
theme(legend.position='none')+
## 添加阈值线
geom_hline(yintercept = c(2,10),lty=4,lwd=1,col=c("blue","red"))+
## 添加文本信息
geom_text_repel(aes(label=module),color="black",alpha = 0.8)
print(g)
ggsave(g,filename = "Preservation Zsummary.pdf",height = 8,width = 6)
dev.off()
图注说明:蓝线表示Z=2,红线表示Z=10,Z<2代表无保守性,2
往期回顾
R绘图 | 基于文献结果图的重现思考!
R绘图 | Cell-Type Enrichment分析探索
R绘图|韦恩图的常见绘制方法
R绘图|ggplot2火山图的绘制
R绘图|ggplot2散点图的绘制
R绘图|pheatmap热图绘制——基础篇
R绘图|pheatmap热图绘制——中阶篇
R绘图|pheatmap热图绘制——高阶篇