LandSCENT信息熵评估分化能力


library(devtools)
# install.packages("devtools")
devtools::install_github("ChenWeiyan/LandSCENT")
BiocManager::install("GEOquery")
BiocManager::install("Biobase")

BiocManager::install(c("AnnotationDbi","org.Hs.eg.db"))

#2用户定义的功能基因网络
library(LandSCENT)
data(net13Jun12.m)

#3单细胞RNA序列数据
setwd("/opt/data1/long_test/LandSCENT/mouse/")
library(LandSCENT)
require(GEOquery)
require(Biobase)
GSE75748 <- getGEOSuppFiles("GSE75748")
gunzip(rownames(GSE75748)[3])
rawdata.m <- as.matrix(read.csv("GSE75748/GSE75748_sc_cell_type_ec.csv", row.names = 1))
data(phenoscChu.v)

#3.1质量控制

require(scater)
example.sce <- SingleCellExperiment(assay = list(counts = rawExample.m))
### Detect mitochondrial gene and spike-in RNA

is.mito <- grepl("^MT", rownames(example.sce))
is.spike <- grepl("^ERCC", rownames(example.sce))

counts(example.sce) <- as(counts(example.sce), "dgCMatrix")
example.sce <- calculateQCMetrics(example.sce, feature_controls=list(Spike=is.spike, Mt=is.mito))

### Cell Filtering wih isOutlier function

libsize.drop <- isOutlier(example.sce$total_counts, nmads=5, type="lower", log=TRUE);
mito.drop <- isOutlier(example.sce$pct_counts_Mt, nmads=5, type="higher");
spike.drop <- isOutlier(example.sce$pct_counts_Spike, nmads=5, type="higher");

filter_example.sce <- example.sce[, !(libsize.drop | mito.drop | spike.drop)]
phenoExample.v <- phenoExample.v[!(libsize.drop | mito.drop | spike.drop)]
data.frame(ByLibSize=sum(libsize.drop), ByMito=sum(mito.drop), 
           BySpike=sum(spike.drop), Remaining=ncol(filter_example.sce))
##   ByLibSize ByMito BySpike Remaining
## 1         0      1       3        96
example.sce <- filter_example.sce

#3.2归一化
sizeFactors(example.sce) <- librarySizeFactors(example.sce)
example.sce <- normalize(example.sce, log_exprs_offset = 1.1)
example.m <- as.matrix(assay(example.sce, i = "logcounts"))
min(example.m)
## [1] 0.1375035


#3.3检查基因标识符

require(AnnotationDbi)
require(org.Hs.eg.db)
anno.v <- mapIds(org.Hs.eg.db, keys = rownames(example.m), keytype = "SYMBOL", 
                 column = "ENTREZID", multiVals = "first")
unique_anno.v <- unique(anno.v)
example_New.m <- matrix(0, nrow = length(unique_anno.v), ncol = dim(example.m)[2])
for (i in seq_len(length(unique_anno.v))) {
  tmp <- example.m[which(anno.v == unique_anno.v[i]) ,]
  if (!is.null(dim(tmp))) {
    tmp <- colSums(tmp) / dim(tmp)[1]
  }
  example_New.m[i ,] <- example_New.m[i ,] + tmp
}
rownames(example_New.m) <- unique_anno.v
colnames(example_New.m) <- colnames(example.m)
example_New.m <- example_New.m[-which(rownames(example_New.m) %in% NA) ,]
Example.m <- example_New.m


#4如何使用LandSCENT包装
#4.1分化能力估计
Integration.l <- DoIntegPPI(exp.m = Example.m, ppiA.m = net13Jun12.m)
str(Integration.l)
 

你可能感兴趣的:(LandSCENT信息熵评估分化能力)