本文介绍了CIBERSORT两种使用方法,大家可以自行选择,方法二简单些,方法一原始些
本文顺便倡议大家使用Rproject来管理代码,感谢生信技能树jimmy老师让我知道了这么方便的玩意,再也不用拼命setwd()和getwd()了,不想看这部分可以直接下滑。
CIBERSORTx是原版网站,建议大家去学习,并且学习他们发的经典文章
鸣谢:生信技能树jimmy老师和 Biomamba 生信基地 BIOMAMBA老师
# install packages 这三个安装不成功的话,就安后面的bseqsc包也行
install.packages('e1071')
install.pacakges('parallel')
install.packages('preprocessCore')
library(e1071)
library(preprocessCore)
library(parallel)
install.packages('devtools')
library(devtools)
devtools::install_github('shenorrlab/bseqsc')
library(bseqsc)#这个包携带大量CIBERSORT的依赖,前三个安装不好可以安装他
此法使用Cibersort工具需要三个文件:
1、sourcecibersort.R
2、LM22.txt
3、genes_exp.txt
直接把下列代码新建一个script,然后保存,保存名字为sourcecibersort.R
#' CIBERSORT R script v1.03 (last updated 07-10-2015)
#' Note: Signature matrix construction is not currently available; use java version for full functionality.
#' Author: Aaron M. Newman, Stanford University ([email protected])
#' Requirements:
#' R v3.0 or later. (dependencies below might not work properly with earlier versions)
#' install.packages('e1071')
#' install.pacakges('parallel')
#' install.packages('preprocessCore')
#' if preprocessCore is not available in the repositories you have selected, run the following:
#' source("http://bioconductor.org/biocLite.R")
#' biocLite("preprocessCore")
#' Windows users using the R GUI may need to Run as Administrator to install or update packages.
#' This script uses 3 parallel processes. Since Windows does not support forking, this script will run
#' single-threaded in Windows.
#'
#' Usage:
#' Navigate to directory containing R script
#'
#' In R:
#' source('CIBERSORT.R')
#' results <- CIBERSORT('sig_matrix_file.txt','mixture_file.txt', perm, QN)
#'
#' Options:
#' i) perm = No. permutations; set to >=100 to calculate p-values (default = 0)
#' ii) QN = Quantile normalization of input mixture (default = TRUE)
#'
#' Input: signature matrix and mixture file, formatted as specified at http://cibersort.stanford.edu/tutorial.php
#' Output: matrix object containing all results and tabular data written to disk 'CIBERSORT-Results.txt'
#' License: http://cibersort.stanford.edu/CIBERSORT_License.txt
#' Core algorithm
#' @param X cell-specific gene expression
#' @param y mixed expression per sample
#' @export
CoreAlg <- function(X, y){
#try different values of nu
svn_itor <- 3
res <- function(i){
if(i==1){nus <- 0.25}
if(i==2){nus <- 0.5}
if(i==3){nus <- 0.75}
model<-e1071::svm(X,y,type="nu-regression",kernel="linear",nu=nus,scale=F)
model
}
if(Sys.info()['sysname'] == 'Windows') out <- parallel::mclapply(1:svn_itor, res, mc.cores=1) else
out <- parallel::mclapply(1:svn_itor, res, mc.cores=svn_itor)
nusvm <- rep(0,svn_itor)
corrv <- rep(0,svn_itor)
#do cibersort
t <- 1
while(t <= svn_itor) {
weights = t(out[[t]]$coefs) %*% out[[t]]$SV
weights[which(weights<0)]<-0
w<-weights/sum(weights)
u <- sweep(X,MARGIN=2,w,'*')
k <- apply(u, 1, sum)
nusvm[t] <- sqrt((mean((k - y)^2)))
corrv[t] <- cor(k, y)
t <- t + 1
}
#pick best model
rmses <- nusvm
mn <- which.min(rmses)
model <- out[[mn]]
#get and normalize coefficients
q <- t(model$coefs) %*% model$SV
q[which(q<0)]<-0
w <- (q/sum(q))
mix_rmse <- rmses[mn]
mix_r <- corrv[mn]
newList <- list("w" = w, "mix_rmse" = mix_rmse, "mix_r" = mix_r)
}
#' do permutations
#' @param perm Number of permutations
#' @param X cell-specific gene expression
#' @param y mixed expression per sample
#' @export
doPerm <- function(perm, X, Y){
itor <- 1
Ylist <- as.list(data.matrix(Y))
dist <- matrix()
while(itor <= perm){
#print(itor)
#random mixture
yr <- as.numeric(Ylist[sample(length(Ylist),dim(X)[1])])
#standardize mixture
yr <- (yr - mean(yr)) / sd(yr)
#run CIBERSORT core algorithm
result <- CoreAlg(X, yr)
mix_r <- result$mix_r
#store correlation
if(itor == 1) {dist <- mix_r}
else {dist <- rbind(dist, mix_r)}
itor <- itor + 1
}
newList <- list("dist" = dist)
}
#' Main functions
#' @param sig_matrix file path to gene expression from isolated cells
#' @param mixture_file heterogenous mixed expression
#' @param perm Number of permutations
#' @param QN Perform quantile normalization or not (TRUE/FALSE)
#' @export
CIBERSORT <- function(sig_matrix, mixture_file, perm=0, QN=TRUE){
#read in data
X <- read.table(sig_matrix,header=T,sep="\t",row.names=1,check.names=F)
Y <- read.table(mixture_file, header=T, sep="\t", row.names=1,check.names=F)
X <- data.matrix(X)
Y <- data.matrix(Y)
#order
X <- X[order(rownames(X)),]
Y <- Y[order(rownames(Y)),]
P <- perm #number of permutations
#anti-log if max < 50 in mixture file
if(max(Y) < 50) {Y <- 2^Y}
#quantile normalization of mixture file
if(QN == TRUE){
tmpc <- colnames(Y)
tmpr <- rownames(Y)
Y <- preprocessCore::normalize.quantiles(Y)
colnames(Y) <- tmpc
rownames(Y) <- tmpr
}
#intersect genes
Xgns <- row.names(X)
Ygns <- row.names(Y)
YintX <- Ygns %in% Xgns
Y <- Y[YintX,]
XintY <- Xgns %in% row.names(Y)
X <- X[XintY,]
#standardize sig matrix
X <- (X - mean(X)) / sd(as.vector(X))
#empirical null distribution of correlation coefficients
if(P > 0) {nulldist <- sort(doPerm(P, X, Y)$dist)}
#print(nulldist)
header <- c('Mixture',colnames(X),"P-value","Correlation","RMSE")
#print(header)
output <- matrix()
itor <- 1
mixtures <- dim(Y)[2]
pval <- 9999
#iterate through mixtures
while(itor <= mixtures){
y <- Y[,itor]
#standardize mixture
y <- (y - mean(y)) / sd(y)
#run SVR core algorithm
result <- CoreAlg(X, y)
#get results
w <- result$w
mix_r <- result$mix_r
mix_rmse <- result$mix_rmse
#calculate p-value
if(P > 0) {pval <- 1 - (which.min(abs(nulldist - mix_r)) / length(nulldist))}
#print output
out <- c(colnames(Y)[itor],w,pval,mix_r,mix_rmse)
if(itor == 1) {output <- out}
else {output <- rbind(output, out)}
itor <- itor + 1
}
#save results
write.table(rbind(header,output), file="CIBERSORT-Results.txt", sep="\t", row.names=F, col.names=F, quote=F)
#return matrix object containing all results
obj <- rbind(header,output)
obj <- obj[,-1]
obj <- obj[-1,]
obj <- matrix(as.numeric(unlist(obj)),nrow=nrow(obj))
rownames(obj) <- colnames(Y)
colnames(obj) <- c(colnames(X),"P-value","Correlation","RMSE")
obj
}
source("sourcecibersort.R") #启动这个函数,必须在哦那个一个文件夹内才可哟
results <- CIBERSORT(sig_matrix ="LM22.txt", mixture_file ="genes_exp.txt", perm = 1000, QN = T)
# perm置换次数=1000,QN分位数归一化=TRUE
# 文件名可以自定义
# 得到的结果可以用来绘制热图等等
# install packages 这三个安装不成功的话,就安后面的bseqsc包也行
install.packages('e1071')
install.pacakges('parallel')
install.packages('preprocessCore')
library(e1071)
library(preprocessCore)
library(parallel)
install.packages('devtools')
library(devtools)
devtools::install_github('shenorrlab/bseqsc')
library(bseqsc)#这个包携带大量CIBERSORT的依赖,前三个安装不好可以安装他
################安装CIBERSORT包##########################################################
if(!require(CIBERSORT))devtools::install_github("Moonerss/CIBERSORT")
library(CIBERSORT)
# 包全部安装完成
# 画热图的包
install.packages("pheatmap")
install.packages("ComplexHeatmap")
library(ggplot2)
library(pheatmap)
library(ComplexHeatmap)
安装好以后就可以使用cibersort函数了
# 同时准备好LM22的TXT文件,注意自己以后的文件要和这个TXT的格式一样
# 加载CIBERSORT包成功后,系统内部会自带data(LM22)
data(LM22)
data(mixed_expr)#TCGA的演示数据,正式情况下就用自己的数据
# 正式开始探索
# 看5*5的数据
LM22[1:5,1:5]
mixed_expr[1:5,1:5]
# 分别定义signature矩阵LM22和我的数据(演示)矩阵mixed_expr
results <- cibersort(sig_matrix = LM22, mixture_file = mixed_expr)
# 理解一下results的结果
# 你可以理解为返回了一个列名为细胞类型、行名为样本名的细胞浸润程度(占比)的矩阵
# 此外result中还会多出三列:
# P-value: 用来展示去卷积的结果在所有细胞类群中是否具有差异
# Correlation:参考矩阵与输入矩阵的特征基因相关性
# RMSE: Root mean squared error,参考矩阵与输入矩阵的特征基因标准差
# heatmap
# 按行(样本内部)标准化可以看出在各类样本内部,M2浸润程度(占比)最高
rowscale <- results[,1:ncol(LM22)]#只是相当于备份了一下results
rowscale <- rowscale[,apply(rowscale, 2, function(x){sum(x)>0})]#删除全是0的列
pheatmap(rowscale,
scale = 'row',#按行标准化,不标准化就会按绝对值显示,很诡异
cluster_col=T,#是否对列聚类,不聚类,坐标轴就按照原来的顺序显示
cluster_row=F,#是否对行聚类
angle_col = "315")#调整X轴坐标的倾斜角度
# 各类样本之间也具有自己占比高的特异性免疫细胞
columnscale <- results[,1:ncol(LM22)]
columnscale <- columnscale[,apply(columnscale, 2, function(x){sum(x)>0})]#删除全是0的列
pheatmap(columnscale,
scale = 'column',
cluster_col=F,
cluster_row=T,
angle_col = "315")
# 堆积比例图
my36colors <-c('#E5D2DD', '#53A85F', '#F1BB72', '#F3B1A0', '#D6E7A3', '#57C3F3', '#476D87','#E95C59', '#E59CC4', '#AB3282', '#23452F', '#BD956A', '#8C549C', '#585658','#9FA3A8', '#E0D4CA', '#5F3D69', '#C5DEBA', '#58A4C3', '#E4C755', '#F7F398','#AA9A59', '#E63863', '#E39A35', '#C1E6F3', '#6778AE', '#91D0BE', '#B53E2B', '#712820', '#DCC1DD', '#CCE0F5', '#CCC9E6', '#625D9E', '#68A180', '#3A6963','#968175'
)
cellnum <- results[,1:ncol(LM22)]
cell.prop<- apply(cellnum, 1, function(x){x/sum(x)})
data4plot <- data.frame()
for (i in 1:ncol(cell.prop)) {
data4plot <- rbind(
data4plot,
cbind(cell.prop[,i],rownames(cell.prop),
rep(colnames(cell.prop)[i],nrow(cell.prop)
)
)
)
}
colnames(data4plot)<-c('proportion','celltype','sample')
data4plot$proportion <- as.numeric(data4plot$proportion)
ggplot(data4plot,aes(sample,proportion,fill=celltype))+
geom_bar(stat="identity",position="fill")+
scale_fill_manual(values=my36colors)+#自定义fill的颜色
ggtitle("cell portation")+
theme_bw()+
theme(axis.ticks.length=unit(0.5,'cm'),axis.title.x=element_text(size=1))+
theme(axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5))+#把x坐标轴横过来
guides(fill=guide_legend(title=NULL))
LM22:
#########链接:https://pan.baidu.com/s/1eQSEekekozS5osgydwzk1w
#@####提取码:fk88
LM22read <- read.csv("LM22.csv",header = T)
gene <- LM22read[,1]
rownames(LM22read) <- gene
LM22read <- LM22read[,-1]
data(LM22)
all(LM22==LM22read)#可以看到TURE,说明两个文件完全一样了;LM22是上文提到的安装CIBERSORT包之后自带的data
鸣谢:生信技能树jimmy老师和 Biomamba 生信基地 BIOMAMBA老师
有疑问可以邮件联系我,会尽力帮忙:[email protected]