WGCNA分析

参考文章一
参考文章二
参考文章三
我的一个例子
使用DESeq2 normalized的数据比TPM normalized的数据相对好一些
代码:


#trnasfer the data to its folder
rm(list=ls())
FC=0.4
p=0.05
suppressMessages(library(WGCNA))
options(stringsAsFactors = FALSE)
allowWGCNAThreads()
wgcna_dir<-paste0(your_working_dir,"/WGCNA/")
dir.create(wgcna_dir,recursive = T)
#prepare the matrix and traits
your_RNAseq<-read.csv(paste0(wgcna_dir,"your_RNAseq_logFC_",as.character(FC),"_padj_",as.character(p),".csv"),header=T,row.names=1)
your_RNAseq<-your_RNAseq[,3:ncol(your_RNAseq)]
info<-read.csv(paste0(wgcna_dir,"DESeq2_info.csv"),header=T,row.names=1)
traits<-as.data.frame(info[,c("condition")])
row.names(traits)<-as.character(info$collapse)

####Clean the data
mad<- apply(your_RNAseq,1,mad)
dataExprVar <- your_RNAseq[which(mad > 
                                max(quantile(mad, probs=seq(0, 1, 0.25))[2],0.01)),]
dataExpr <- as.data.frame(t(dataExprVar))
dim(dataExpr)
gsg = goodSamplesGenes(dataExpr, verbose = 3)

#delete the bad samples
if (!gsg$allOK){
  # Optionally, print the gene and sample names that were removed:
  if (sum(!gsg$goodGenes)>0) 
    printFlush(paste("Removing genes:", 
                     paste(names(dataExpr)[!gsg$goodGenes], collapse = ",")));
  if (sum(!gsg$goodSamples)>0) 
    printFlush(paste("Removing samples:", 
                     paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ",")));
  # Remove the offending genes and samples from the data:
  dataExpr = dataExpr[gsg$goodSamples, gsg$goodGenes]
}
dim(dataExpr)
#delete the outlier samples
sampleTree = hclust(dist(dataExpr), method = "average")
sampleTree_filename=paste0(wgcna_dir,"/sampleTree.pdf")
pdf(file=sampleTree_filename,width=7.5,height = 5)
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="")
dev.off()
#check this files and in this case deleted NAFL_12
data<-dataExpr[-c(which(rownames(dataExpr)=="NAFL_12")),]
sampleTree_deleted = hclust(dist(data), method = "average")
sampleTree_deleted_filename=paste0(wgcna_dir,"/sampleTree_deleted.pdf")
pdf(file=sampleTree_deleted_filename,width=7.5,height = 5)
plot(sampleTree_deleted, main = "Sample clustering for outliers deleted samples", sub="", xlab="")
dev.off()


####sellect the SoftThreshold

#get the sft plot
powers = c(c(1:10), seq(from = 12, to=30, by=2))
sft = pickSoftThreshold(data, powerVector = powers, verbose = 5)
sft_filename<-paste0(wgcna_dir,"/SoftThreshold.pdf")
pdf(file=sft_filename,width=7,height = 5)
par(mfrow = c(1,2))
cex1 = 0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
abline(h=0.8,col="red")
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()

#if no power is suit for R2 >0.8, then can use below, else can chose the lowest power when reach to the R2>0.8 or 0.9
# nSamples<-nrow(data)
# power = sft$powerEstimate
# type="unsigned"
# if (is.na(power)){
#   power = ifelse(nSamples<20, ifelse(type == "unsigned", 9, 18),
#                  ifelse(nSamples<30, ifelse(type == "unsigned", 8, 16),
#                         ifelse(nSamples<40, ifelse(type == "unsigned", 7, 14),
#                                ifelse(type == "unsigned", 6, 12))       
#                  )
#   )
# }



######build net
type="unsigned"
net = blockwiseModules(
  data,
  power = sft$powerEstimate,#should theck this value and Decide the value(sft$powerEstimate)
  maxBlockSize = 6000,
  TOMType = type, minModuleSize = 30,
  reassignThreshold = 0, mergeCutHeight = 0.25,
  numericLabels = TRUE, pamRespectsDendro = FALSE,
  saveTOMs = TRUE,
  saveTOMFileBase = "DESeq2-TOM",
  verbose = 3,
  corType="pearson"
)
table(net$colors)
#check the modules when chose the power
mergedColors = labels2colors(net$colors)
table(mergedColors)
module_filename<-paste0(wgcna_dir,"/Module.pdf")
pdf(file=module_filename,width=7,height = 5)
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)
dev.off()
#check the module correlation
module_cor_filename<-paste0(wgcna_dir,"/Module_correlation.pdf")
pdf(file=module_cor_filename,width=5,height = 7)
MEs = net$MEs
MEs_col = MEs
colnames(MEs_col) = paste0("ME", labels2colors(
  as.numeric(str_replace_all(colnames(MEs),"ME",""))))
MEs_col = orderMEs(MEs_col)
plotEigengeneNetworks(MEs_col, "Eigengene adjacency heatmap", 
                            marDendro = c(3,3,2,4),
                            marHeatmap = c(3,4,2,2), plotDendrograms = T, 
                            xLabelsAngle = 90)
dev.off()

###TOM_plot
moduleColors <- labels2colors(net$colors)
geneTree = net$dendrograms[[1]]
dissTOM = 1-TOMsimilarityFromExpr(data, power = sft$powerEstimate)
plotTOM = dissTOM^7
diag(plotTOM) = NA
TOM_plot_filename<-paste0(wgcna_dir,"/Module_TOM_plot.pdf")
pdf(file=TOM_plot_filename,width=7,height = 7)
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
dev.off()

###traits and moudule heatmap
group<-as.data.frame(traits[row.names(data),])
row.names(group)<-row.names(data)
design=model.matrix(~0+ as.character(group[,1]))
colnames(design)=levels(as.factor(group[,1]))
nGenes = ncol(data)
nSamples = nrow(data)
moduleColors <- labels2colors(net$colors) ###net ??ǰ??
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(data, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, design , use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)

sizeGrWindow(10,6)
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
                   signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
trait_cor_filename<-paste0(wgcna_dir,"/Module_trait_correlation.pdf")
pdf(file=trait_cor_filename,width=5,height = 6)
par(mar = c(8.5, 8.5, 3, 3))
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels =colnames(design),
               yLabels = names(MEs),
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors =  blueWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.8,
               zlim = c(-1,1),
               main = paste("Module-trait relationships"))
dev.off()

###Significant moudel correlation with trait
##Module correlation with traits
module = as.data.frame(design[,1]);
names(module) = ""
MET = orderMEs(cbind(MEs0, module))
sizeGrWindow(5,5)
trait_cor_filename2<-paste0(wgcna_dir,"/Module_trait_correlation2.pdf")
pdf(file=trait_cor_filename2,width=6,height = 5)
par(cex = 1)
plotEigengeneNetworks(MET, "", marDendro = c(1,5,2,5), marHeatmap = c(4,5,1,2), cex.lab = 1, xLabelsAngle
                      = 90)
dev.off()

#dotplot correlation plpt
modNames = substring(names(MEs0), 3)
geneModuleMembership = as.data.frame(cor(data, MEs0, use = "p"))
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");

geneTraitSignificance = as.data.frame(cor(data, module, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
names(geneTraitSignificance) = paste("GS.", names(module), sep="");
names(GSPvalue) = paste("p.GS.", names(module), sep="");

for (i in moduleColors){
module = i
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7);
par(mfrow = c(1,1));
trait_module_cor_filename1<-paste0(wgcna_dir,"/trait_module_cor_",module,".pdf")
pdf(file=trait_module_cor_filename1,width=6,height = 5)
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneTraitSignificance[moduleGenes, 1]),
                   xlab = paste("Module Membership in", module, "module"),
                   ylab = "Gene significance for ",
                   main = paste("Module membership vs. gene significance\n"),
                   cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
dev.off()
}

for (i in moduleColors){
  module = i
  probes = colnames(data) 
  inModule = (moduleColors==module);
  modProbes = probes[inModule];
  write.csv(modProbes,paste0(wgcna_dir,"/module_geneList_",module,".csv"))
}

你可能感兴趣的:(WGCNA分析)