1. 挑选软阈值
- 软阈值的要求:无尺度拓扑网络系数 R^2 高于 0.85,或者或平均连接度降到 100 以下
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=1))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
# Plot the results:
sizeGrWindow(9, 5)
png("step2-beta-value.png",width = 800,height = 600)
par(mfrow = c(1,2));
cex1 = 0.9 # 图里面字体大小
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red")+
abline(h=0.70,col="red")# this line corresponds to using an R^2 cut-off of
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")+abline(h=100,col="red")
dev.off()
sft$powerEstimate # 自动评估的软阈值
-
- 如果没有合适的阈值,可以使用 R 包作者提供的经验阈值,如下
-
2. 构建网络(auto)
net = blockwiseModules(datExpr, power = 7,
TOMType = "unsigned",
minModuleSize = 30,
reassignThreshold = 0,
mergeCutHeight = 0.25,
numericLabels = TRUE,
pamRespectsDendro = FALSE,
saveTOMs = FALSE,
saveTOMFileBase = "WGCNA TOM",
verbose = 3)
# 参数mergeCutHeight是模块合并的阈值。net$Colors包含模块赋值,net$MES包含模块的模块特征(module eigengenes of the modules)。
table(net$colors)
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# 10 1215 1113 511 334 325 272 192 190 174 171 160 127 74 72 60
# 共15个模块,按照大小从大到小的顺序标记为1到15。0表示没有聚到另外15个模块里
#用于模块标识的层次聚类树形图(树)在net$Dendrogram[[1]];$中返回。
sizeGrWindow(12, 9)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
png("step2-genes-modules.png",width = 800,height = 500)
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
dev.off()
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs
geneTree = net$dendrograms[[1]] #用于模块标识的层次聚类树形图(树)保存在geneTree里面
save(MEs, moduleLabels, moduleColors, geneTree,
file = "WGCNA-02-networkConstruction-auto.RData")
3. 构建网络(step-by-step)
3.1. 生成邻接矩阵和 TOM 矩阵
## Co-expression similarity and adjacency
softPower = 7
# 若有最佳的power的估计值,则用“sft$powerEstimate”
adjacency = adjacency(datExpr, power = softPower)
## To minimize effects of noise and spurious associations (最小化噪声和伪关联的影响), we transform the adjacency into Topological Overlap Matrix, and calculate the corresponding dissimilarity (并计算相应的相异性):
# Turn adjacency into topological overlap
TOM = TOMsimilarity(adjacency);
dissTOM = 1-TOM
3.2. 聚类
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average")
# Plot the resulting clustering tree (dendrogram)
sizeGrWindow(12,9)
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
labels = FALSE, hang = 0.04);
3.3. 识别模块
# 在聚类树(树状图)中,每一片叶子,即一条短的垂直线,对应一个基因。树状图的分支群紧密相连,高度共表达基因。模块识别相当于单个分支的识别(“从树状图上切下分支”)。有几种分支切割方法;我们的标准方法是从包Dynamic Tree Cut中切割动态树。下一段代码说明了它的用途。
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 30;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize);
table(dynamicMods)
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# 1 471 419 240 227 216 193 190 185 174 167 151 151 151 131 116 110 108 103 98 96 93 89
# 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
# 88 87 80 74 73 73 72 66 65 63 60 60 54 48 47 39 37 34
# 函数返回40个模块,标记为1–40从大到小。标签0是为未分配的基因保留的。上面的命令列出了模块的大小。我们现在在基因树状图下绘制模块分配
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
# Plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
3.4. 合并共表达模块
# 动态树切割可以识别其表达谱非常相似的模块。我们应当合并高度共表达的模块。为了量化整个模块的共表达相似性,我们计算它们的特征基因并根据它们的相关性对它们进行聚类:
# Calculate eigengenes
MEList = moduleEigengenes(datExpr, colors = dynamicColors)
MEs = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs);
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average");
# Plot the result
sizeGrWindow(7, 6)
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "")
# 我们选择0.25的高度切割,相当于对相互之间相关性为0.75进行合并
MEDissThres = 0.25
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
# Call an automatic merging function
merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
# The merged module colors
mergedColors = merge$colors;
length(table(mergedColors))
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs;
sizeGrWindow(12, 9)
#pdf(file = "Plots/geneDendro-3.pdf", wi = 9, he = 6)
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
#dev.off()
3.5. 保存数据
# Rename to moduleColors
moduleColors = mergedColors
# Construct numerical labels corresponding to the colors
colorOrder = c("grey", standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1;
MEs = mergedMEs;
# Save module colors and labels for use in subsequent parts
save(MEs, moduleLabels, moduleColors, geneTree, file = "WGCNA-02-networkConstruction-stepByStep.RData")
友情宣传
- 全国巡讲全球听(买一得五),第二期 ,你的生物信息学入门课
- 生信技能树的2019年终总结 ,你的生物信息学成长宝藏
- 2020学习主旋律,B站74小时免费教学视频为你领路