1. 计算 ME 值
# Define numbers of genes and samples
nGenes = ncol(datExpr) #定义基因和样本的数量
nSamples = nrow(datExpr);
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs = orderMEs(MEs0) #不同颜色的模块的ME值矩 (样本vs模块)
2. 计算模块与性状的相关性并可视化
moduleTraitCor = cor(MEs, datTraits, use = "p");#计算模块与临床数据的相关性 行为样本,列为ME与临床特征的关系
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
sizeGrWindow(15,20)
# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
png("step3-Module-trait-relationships.png",width = 1500,height = 1200,res = 130)
par(mar= c(5,8,2,2));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(50),# WGCNA提醒greenWhiteRed不适合红绿色盲,建议用blueWhiteRed
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
dev.off()
3. 计算各基因表达量与模块 ME 和性状的关系(MM and GS)并可视化
- GS: as(the absolute value of) the correlation between the gene and the trait
- MM: as the correlation of the module eigengene and the gene expression profile. This allows us to quantify the similarity of all genes on the array to every module.
# Define variable hour containing the hour column of datTrait
months = as.data.frame(datTraits$months);
names(months) = "months"
# names (colors) of the modules
modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
# 在列名前上加MM,p.MM
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
geneTraitSignificance = as.data.frame(cor(datExpr, months, use = "p"));#修改临床特征hour
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
# 在列名前上加GS.,p.GS.
names(geneTraitSignificance) = paste("GS.", names(months), sep="");#修改临床特征hour
names(GSPvalue) = paste("p.GS.", names(months), sep="");#修改临床特征hour
# 选择感兴趣的/相关性图中显著的模块
module = "purple"
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7);
png("step3-Module_membership-gene_significance.png",width = 800,height = 600)
par(mfrow = c(1,1));
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for months",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
dev.off()
-
- 显然,GS和MM是高度相关的。
- 说明与某个性状高度显著相关的基因通常也是与该性状相关的模块中最重要的(中心)元素。
4. 计算模块内连接度
# 计算一个邻接矩阵
adjacency = adjacency(datExpr, power = 7)
# 计算模块内连接度
Alldegrees = intramodularConnectivity(adjacency, moduleColors)
# kTotal kWithin kOut kDiff
# ENSMUSG00000035775.2 276.6999 39.72200 236.9779 -197.25588
# ENSMUSG00000040405.9 410.7514 184.60000 226.1514 -41.55145
# ENSMUSG00000026822.10 383.6509 229.88931 153.7616 76.12766
# ENSMUSG00000033860.9 283.5453 159.45166 124.0936 35.35806
# ENSMUSG00000067144.6 361.8859 232.59891 129.2870 103.31195
# ENSMUSG00000047586.3 378.4267 46.90961 331.5171 -284.60746
# 四列信息分别表示总连接度(某基因和所有基因的连接度总和)、模块内连接度、模块外连接度、内外连接度差值
# 取出感兴趣模块的信息
module = "purple"
probes = names(datExpr)
inModule = (moduleColors==module)
modProbes = probes[inModule]
length(modProbes)
KIM_module=Alldegrees[modProbes,]
5. 创建探针信息表
# ID转换
names(datExpr)[1:10]
tail(names(datExpr)[moduleColors=="purple"])
annot = read.csv(file = "anno_probe2sym.csv",row.names = 1);
dim(annot)
names(annot)
probes = names(datExpr)
probes2annot = match(probes, annot$probe)
# The following is the number or probes without annotation:
sum(is.na(probes2annot))
# Should return 0.
# 创建一个数据框,其中包含所有探针的以下信息:
# 探针ID、基因符号、module color, gene significance for weight, and module membership and p-values in all modules.
# 模块将按其与性状的相关性排序
# Create the starting data frame
geneInfo0 = data.frame(probe = probes,#需要自己进行修改
geneSymbol = annot$symbol[probes2annot],
type = annot$type[probes2annot],
moduleColor = moduleColors,
geneTraitSignificance,
GSPvalue)
# Order modules by their significance for ‘hour’
modOrder = order(-abs(cor(MEs,months, use = "p")))
# Add module membership information in the chosen order
for (mod in 1:ncol(geneModuleMembership))
{
oldNames = names(geneInfo0)
geneInfo0 = data.frame(geneInfo0, geneModuleMembership[, modOrder[mod]],
MMPvalue[, modOrder[mod]]);
names(geneInfo0) = c(oldNames, paste("MM.", modNames[modOrder[mod]], sep=""),
paste("p.MM.", modNames[modOrder[mod]], sep=""))
}
# Order the genes in the geneInfo variable first by module color, then by geneTraitSignificance
geneOrder = order(geneInfo0$moduleColor, -abs(geneInfo0$GS.months))
geneInfo = geneInfo0[geneOrder, ]
write.csv(geneInfo, file = "geneInfo.csv")
友情宣传
- 全国巡讲全球听(买一得五),第二期 ,你的生物信息学入门课
- 生信技能树的2019年终总结 ,你的生物信息学成长宝藏
- 2020学习主旋律,B站74小时免费教学视频为你领路