2019-09-29 来自初学者的困惑

以下是我昨天的运行代码:

#学习WGCNA包
##  首先安装包和下载相应数据
#设置镜像
rm(list = ls())
options()$repos 
options()$BioC_mirror
options(BioC_mirror="https://mirrors.ustc.edu.cn/bioc/")
options("repos" = c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
options()$repos 
options()$BioC_mirror
install.packages(c("matrixStats", "Hmisc", "splines", "foreach", "doParallel", "fastcluster", "dynamicTreeCut", "survival")) 
source("https://bioconductor.org/biocLite.R")
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install(c("WGCNA", "stringr", "reshape2","AnnotationDbi", "impute","GO.db", "preprocessCore"),ask = F,update = F)
BiocManager::install('fastcluster')

##  第一步 加载包读入数据

# Load the WGCNA package
BiocManager::install("WGCNA")
require(WGCNA);
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
#Read in the female liver data set
femData = read.csv("LiverFemale3600.csv");
# Take a quick look at what is in the data set:
dim(femData);
names(femData);

##  第二步 筛选表达矩阵先去掉前8列,转置后变成数据框并加上名字信息

datExpr0 = as.data.frame(t(femData[, -c(1:8)]));
names(datExpr0) = femData$substanceBXH;
rownames(datExpr0) = names(femData)[-c(1:8)];

##  第三步 设置条件过滤基因/筛选基因

gsg = goodSamplesGenes(datExpr0, verbose = 3);
gsg$allOK

##  第四步 根据逻辑值取差异表达基因

if (!gsg$allOK)
{
  # Optionally, print the gene and sample names that were removed:
  if (sum(!gsg$goodGenes)>0) 
    printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));
  if (sum(!gsg$goodSamples)>0) 
    printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
  # Remove the offending genes and samples from the data:
  datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}

##  第五步 继续分析并画出上面筛选数据之间的关联的一个类似进化树的图

sampleTree = hclust(dist(datExpr0), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
sizeGrWindow(12,9)
#pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, 
     cex.axis = 1.5, cex.main = 2)

##  第六步 ‘修剪进化树’将不符合的离群数据剔除

# Plot a line to show the cut
abline(h = 15, col = "red");
# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)
# clust 1 contains the samples we want to keep.
keepSamples = (clust==1)
datExpr = datExpr0[keepSamples, ]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

##  第七步 读入样本信息并做筛选

traitData = read.csv("ClinicalTraits.csv");
dim(traitData)
names(traitData)

# remove columns that hold information we do not need.
allTraits = traitData[, -c(31, 16)];
allTraits = allTraits[, c(2, 11:36) ];
dim(allTraits)
names(allTraits)
#去除"comments", "Note"等无用信息,进一步筛选,去掉了共11列的信息
# Form a data frame analogous to expression data that will hold the clinical traits.

femaleSamples = rownames(datExpr);
traitRows = match(femaleSamples, allTraits$Mice);
datTraits = allTraits[traitRows, -1];
rownames(datTraits) = allTraits[traitRows, 1];

collectGarbage();

##  第八步 根据样本信息筛选后的结果重新走一遍流程

# Re-cluster samples
sampleTree2 = hclust(dist(datExpr), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits, signed = FALSE);
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(datTraits), 
                    main = "Sample dendrogram and trait heatmap")

##  第九步 收尾,保存数据。

save(datExpr, datTraits, file = "FemaleLiver-01-dataInput.RData")

然后输出结果为


image.png

image.png

说明代码是没有问题的。那么我编织Rmarkdown的时候就出现了一系列的问题

processing file: 9-28-学习WGCNA包.Rmd
  |...                                                              |   5%
  ordinary text without R code

  |......                                                           |   9%
label: setup (with options) 
List of 1
 $ include: logi FALSE

  |.........                                                        |  14%
  ordinary text without R code

  |............                                                     |  18%
label: unnamed-chunk-1
  |...............                                                  |  23%
  ordinary text without R code

  |..................                                               |  27%
label: unnamed-chunk-2
Loading required package: WGCNA
Loading required package: dynamicTreeCut
Failed with error:  'package 'fastcluster' required by 'WGCNA' could not be found'
此外: Warning message:
package 'WGCNA' was built under R version 3.5.3 
  |.....................                                            |  32%
  ordinary text without R code

  |........................                                         |  36%
label: unnamed-chunk-3
Loading required package: WGCNA
Failed with error:  'package 'fastcluster' required by 'WGCNA' could not be found'
此外: Warning message:
package 'WGCNA' was built under R version 3.5.3 
  |...........................                                      |  41%
  ordinary text without R code

  |..............................                                   |  45%
label: unnamed-chunk-4

Loading required package: WGCNA
Failed with error:  'package 'fastcluster' required by 'WGCNA' could not be found'
此外: Warning message:
package 'WGCNA' was built under R version 3.5.3 
Quitting from lines 63-66 (9-28-学习WGCNA包.Rmd) 
Error in goodSamplesGenes(datExpr0, verbose = 3) : 
  没有"goodSamplesGenes"这个函数
Calls:  ... handle -> withCallingHandlers -> withVisible -> eval -> eval
停止执行

我回头检查了一下我确实已经安装了这个包而且也安装了他的依赖包:

> BiocManager::install('fastcluster')
Bioconductor version 3.8 (BiocManager 1.30.4), R 3.5.2 (2018-12-20)
Installing package(s) 'fastcluster'
Warning: package ‘fastcluster’ is in use and will not be installed
Update old packages: 'BiocInstaller', 'boot', 'class', 'cluster', 'codetools', 'curl',
  'digest', 'ellipsis', 'foreign', 'MASS', 'Matrix', 'mgcv', 'nlme', 'pkgconfig',
  'rpart'

然后下面附上我的Rmarkdown代码,我不知道具体是哪里出了问题,有点懵逼。

---
title: "9-28"
author: "chelsea"
date: "2019年9月28日"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = F)
knitr::opts_chunk$set(message  = F) 

学习WGCNA包

首先安装包和下载相应数据

# #设置镜像
# rm(list = ls())
# options()$repos 
# options()$BioC_mirror
# options(BioC_mirror="https://mirrors.ustc.edu.cn/bioc/")
# options("repos" = c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
# options()$repos 
# options()$BioC_mirror
# 
# install.packages(c("matrixStats", "Hmisc", "splines", "foreach", "doParallel", "fastcluster", "dynamicTreeCut", "survival")) 
# source("https://bioconductor.org/biocLite.R")
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install(c("WGCNA", "stringr", "reshape2","AnnotationDbi", "impute","GO.db", "preprocessCore"),ask = F,update = F)

第一步 加载包读入数据

# Load the WGCNA package
require(WGCNA);
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
#Read in the female liver data set
femData = read.csv("LiverFemale3600.csv");
# Take a quick look at what is in the data set:
dim(femData);
names(femData);

第二步 筛选表达矩阵先去掉前8列,转置后变成数据框并加上名字信息

require(WGCNA);
datExpr0 = as.data.frame(t(femData[, -c(1:8)]));
names(datExpr0) = femData$substanceBXH;
rownames(datExpr0) = names(femData)[-c(1:8)];

第三步 设置条件过滤基因/筛选基因

require(WGCNA)
gsg = goodSamplesGenes(datExpr0, verbose = 3);
gsg$allOK

此处用到WGCNA包中的goodSamplesGenes()函数,help后得到Description信息(解释信息来自谷歌翻译:检查数据以查找缺失的条目,权重低于阈值的条目以及零方差基因,并返回通过了最大缺失值或低权重值标准的样本和基因列表。 如有必要,将迭代过滤。)

第四步 根据逻辑值取差异表达基因


if (!gsg$allOK)
{
  # Optionally, print the gene and sample names that were removed:
  if (sum(!gsg$goodGenes)>0) 
    printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));
  if (sum(!gsg$goodSamples)>0) 
    printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
  # Remove the offending genes and samples from the data:
  datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}

第五步 继续分析并画出上面筛选数据之间的关联的一个类似进化树的图

sampleTree = hclust(dist(datExpr0), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
sizeGrWindow(12,9)
#pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, 
     cex.axis = 1.5, cex.main = 2)

stats包的2个函数hclust()(对一组差异表达进行层次聚类的分析分析方法。);dist()函数(计算并返回距离矩阵,该距离矩阵是通过使用指定的距离度量来计算数据矩阵的行之间的距离而得出的。)
WGCNA包的函数sizeGrWindow()(如果图形设备窗口已经打开,则将其关闭并以指定的尺寸(以英寸为单位)重新打开; 否则,将打开一个新窗口。)

第六步 ‘修剪进化树’将不符合的离群数据剔除

# Plot a line to show the cut
abline(h = 15, col = "red");
# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)
# clust 1 contains the samples we want to keep.
keepSamples = (clust==1)
datExpr = datExpr0[keepSamples, ]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

graphics包的abline()函数(此功能在当前曲线上添加一条或多条直线。)
WGCNA包的cutreeStatic()函数(使用恒定高度的树形图以分层树状图进行模块检测。 仅保留大小至少为minSize的分支。)

第七步 读入样本信息并做筛选

traitData = read.csv("ClinicalTraits.csv");
dim(traitData)
names(traitData)

# remove columns that hold information we do not need.
allTraits = traitData[, -c(31, 16)];
allTraits = allTraits[, c(2, 11:36) ];
dim(allTraits)
names(allTraits)
#去除"comments", "Note"等无用信息,进一步筛选,去掉了共11列的信息
# Form a data frame analogous to expression data that will hold the clinical traits.

femaleSamples = rownames(datExpr);
traitRows = match(femaleSamples, allTraits$Mice);
datTraits = allTraits[traitRows, -1];
rownames(datTraits) = allTraits[traitRows, 1];

collectGarbage();

WGCNA包的函数collectGarbage()(执行垃圾回收,直到可用内存指示器没有变化。)此处不是太理解,目的是什么?

第八步 根据样本信息筛选后的结果重新走一遍流程

# Re-cluster samples
sampleTree2 = hclust(dist(datExpr), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits, signed = FALSE);
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(datTraits), 
                    main = "Sample dendrogram and trait heatmap")

WGCNA包的函数numbers2colors()(该函数为给定的数字输入创建颜色并展示。),函数plotDendroAndColors()(此功能在下面的树状图中绘制了层次聚类树状图和对象的颜色注释。)。

第九步 收尾,保存数据。

save(datExpr, datTraits, file = "FemaleLiver-01-dataInput.RData")

你可能感兴趣的:(2019-09-29 来自初学者的困惑)