CIBERSORT可以从bulk 基因表达数据估计混合细胞群中成员细胞类型的丰度。
CIBERSORT.R 代码
# CIBERSORT R script v1.04 (last updated 10-24-2016)
# Note: Signature matrix construction is not currently available; use java version for full functionality.
# Author: Aaron M. Newman, Stanford University ([email protected])
# Requirements:
# R v3.0 or later. (dependencies below might not work properly with earlier versions)
# install.packages('e1071')
# install.pacakges('parallel')
# install.packages('preprocessCore')
# if preprocessCore is not available in the repositories you have selected, run the following:
# source("http://bioconductor.org/biocLite.R")
# biocLite("preprocessCore")
# Windows users using the R GUI may need to Run as Administrator to install or update packages.
# This script uses 3 parallel processes. Since Windows does not support forking, this script will run
# single-threaded in Windows.
#
# Usage:
# Navigate to directory containing R script
#
# In R:
# source('CIBERSORT.R')
# results <- CIBERSORT('sig_matrix_file.txt','mixture_file.txt', perm, QN, absolute, abs_method)
#
# Options:
# i) perm = No. permutations; set to >=100 to calculate p-values (default = 0)
# ii) QN = Quantile normalization of input mixture (default = TRUE)
# iii) absolute = Run CIBERSORT in absolute mode (default = FALSE)
# - note that cell subsets will be scaled by their absolute levels and will not be
# represented as fractions (to derive the default output, normalize absolute
# levels such that they sum to 1 for each mixture sample)
# - the sum of all cell subsets in each mixture sample will be added to the ouput
# ('Absolute score'). If LM22 is used, this score will capture total immune content.
# iv) abs_method = if absolute is set to TRUE, choose method: 'no.sumto1' or 'sig.score'
# - sig.score = for each mixture sample, define S as the median expression
# level of all genes in the signature matrix divided by the median expression
# level of all genes in the mixture. Multiple cell subset fractions by S.
# - no.sumto1 = remove sum to 1 constraint
#
# Input: signature matrix and mixture file, formatted as specified at http://cibersort.stanford.edu/tutorial.php
# Output: matrix object containing all results and tabular data written to disk 'CIBERSORT-Results.txt'
# License: http://cibersort.stanford.edu/CIBERSORT_License.txt
#Core algorithm
CoreAlg <- function(X, y, absolute, abs_method){
#try different values of nu
svn_itor <- 3
res <- function(i){
if(i==1){nus <- 0.25}
if(i==2){nus <- 0.5}
if(i==3){nus <- 0.75}
model<-svm(X,y,type="nu-regression",kernel="linear",nu=nus,scale=F)
model
}
if(Sys.info()['sysname'] == 'Windows') out <- mclapply(1:svn_itor, res, mc.cores=1) else
out <- mclapply(1:svn_itor, res, mc.cores=svn_itor)
nusvm <- rep(0,svn_itor)
corrv <- rep(0,svn_itor)
#do cibersort
t <- 1
while(t <= svn_itor) {
weights = t(out[[t]]$coefs) %*% out[[t]]$SV
weights[which(weights<0)]<-0
w<-weights/sum(weights)
u <- sweep(X,MARGIN=2,w,'*')
k <- apply(u, 1, sum)
nusvm[t] <- sqrt((mean((k - y)^2)))
corrv[t] <- cor(k, y)
t <- t + 1
}
#pick best model
rmses <- nusvm
mn <- which.min(rmses)
model <- out[[mn]]
#get and normalize coefficients
q <- t(model$coefs) %*% model$SV
q[which(q<0)]<-0
if(!absolute || abs_method == 'sig.score') w <- (q/sum(q)) #relative space (returns fractions)
if(absolute && abs_method == 'no.sumto1') w <- q #absolute space (returns scores)
mix_rmse <- rmses[mn]
mix_r <- corrv[mn]
newList <- list("w" = w, "mix_rmse" = mix_rmse, "mix_r" = mix_r)
}
#do permutations
doPerm <- function(perm, X, Y, absolute, abs_method){
itor <- 1
Ylist <- as.list(data.matrix(Y))
dist <- matrix()
while(itor <= perm){
#print(itor)
#random mixture
yr <- as.numeric(Ylist[sample(length(Ylist),dim(X)[1])])
#standardize mixture
yr <- (yr - mean(yr)) / sd(yr)
#run CIBERSORT core algorithm
result <- CoreAlg(X, yr, absolute, abs_method)
mix_r <- result$mix_r
#store correlation
if(itor == 1) {dist <- mix_r}
else {dist <- rbind(dist, mix_r)}
itor <- itor + 1
}
newList <- list("dist" = dist)
}
#main function
CIBERSORT <- function(sig_matrix, mixture_file, perm=0, QN=TRUE, absolute=FALSE, abs_method='sig.score'){
#dependencies
require(e1071)
require(parallel)
require(preprocessCore)
if(absolute && abs_method != 'no.sumto1' && abs_method != 'sig.score') stop("abs_method must be set to either 'sig.score' or 'no.sumto1'")
#read in data
X <- read.table(sig_matrix,header=T,sep="\t",row.names=1,check.names=F)
Y <- read.table(mixture_file, header=T, sep="\t",check.names=F)
#to prevent crashing on duplicated gene symbols, add unique numbers to identical names
dups <- dim(Y)[1] - length(unique(Y[,1]))
if(dups > 0) {
warning(paste(dups," duplicated gene symbol(s) found in mixture file!",sep=""))
rownames(Y) <- make.names(Y[,1], unique=TRUE)
}else {rownames(Y) <- Y[,1]}
Y <- Y[,-1]
X <- data.matrix(X)
Y <- data.matrix(Y)
#order
X <- X[order(rownames(X)),]
Y <- Y[order(rownames(Y)),]
P <- perm #number of permutations
#anti-log if max < 50 in mixture file
if(max(Y) < 50) {Y <- 2^Y}
#quantile normalization of mixture file
if(QN == TRUE){
tmpc <- colnames(Y)
tmpr <- rownames(Y)
Y <- normalize.quantiles(Y)
colnames(Y) <- tmpc
rownames(Y) <- tmpr
}
#store original mixtures
Yorig <- Y
Ymedian <- max(median(Yorig),1)
#intersect genes
Xgns <- row.names(X)
Ygns <- row.names(Y)
YintX <- Ygns %in% Xgns
Y <- Y[YintX,]
XintY <- Xgns %in% row.names(Y)
X <- X[XintY,]
#standardize sig matrix
X <- (X - mean(X)) / sd(as.vector(X))
#empirical null distribution of correlation coefficients
if(P > 0) {nulldist <- sort(doPerm(P, X, Y, absolute, abs_method)$dist)}
header <- c('Mixture',colnames(X),"P-value","Correlation","RMSE")
if(absolute) header <- c(header, paste('Absolute score (',abs_method,')',sep=""))
output <- matrix()
itor <- 1
mixtures <- dim(Y)[2]
pval <- 9999
#iterate through mixtures
while(itor <= mixtures){
y <- Y[,itor]
#standardize mixture
y <- (y - mean(y)) / sd(y)
#run SVR core algorithm
result <- CoreAlg(X, y, absolute, abs_method)
#get results
w <- result$w
mix_r <- result$mix_r
mix_rmse <- result$mix_rmse
if(absolute && abs_method == 'sig.score') {
w <- w * median(Y[,itor]) / Ymedian
}
#calculate p-value
if(P > 0) {pval <- 1 - (which.min(abs(nulldist - mix_r)) / length(nulldist))}
#print output
out <- c(colnames(Y)[itor],w,pval,mix_r,mix_rmse)
if(absolute) out <- c(out, sum(w))
if(itor == 1) {output <- out}
else {output <- rbind(output, out)}
itor <- itor + 1
}
#save results
write.table(rbind(header,output), file="CIBERSORT-Results.txt", sep="\t", row.names=F, col.names=F, quote=F)
#return matrix object containing all results
obj <- rbind(header,output)
obj <- obj[,-1]
obj <- obj[-1,]
obj <- matrix(as.numeric(unlist(obj)),nrow=nrow(obj))
rownames(obj) <- colnames(Y)
if(!absolute){colnames(obj) <- c(colnames(X),"P-value","Correlation","RMSE")}
else{colnames(obj) <- c(colnames(X),"P-value","Correlation","RMSE",paste('Absolute score (',abs_method,')',sep=""))}
obj
}
setwd("/test")
source("CIBERSORT.R") # 从文件读取R代码, 在工作目录下
如果已经log2转化了,CIBERSORT自带anti-log2可以转换回来。
表达矩阵一般要是
exp_df <- read.csv("ExampleMixtures-GEPs.csv",header=T,check.names=F)
head(exp_df)
# 整理一下行名,列名
rownames(exp_df)=exp_df[,1]
exp_df=exp_df[,2:ncol(exp_df)]
### 转化为表达矩阵
data <- as.matrix(exp_df)
#class(data)
#typeof(data)
# Transform count data to log2-counts per million (logCPM),
# estimate the mean-variance relationship and use this to compute appropriate observation-level weights.
# The data are then ready for linear modelling.
library(limma)
v <-voom(data, plot = F, save.plot = F)
out=v$E # "matrix" "array"
head(out)
class(out)
typeof(out)
write.table(out,file="query_expr_ready.txt",sep="\t")
#把准备输入CIBERSORT的数据保存一下
out=rbind(ID=colnames(out),out)
write.table(out,file="query_expr_ready.txt",sep="\t",quote=F,col.names=F)
# ## LM22.csv文件转为LM22.txt输入文件
# LM22 <- read.csv("LM22.csv")
# head(LM22[,1:3])
# rownames(LM22) <- LM22$GeneSymbol
# LM22 <- LM22[,-1]
# write.table(LM22,"LM22.txt",sep="\t")
results=CIBERSORT("LM22.txt", "query_expr_ready.txt", perm=100, QN=TRUE)
# 保存结果文件
write.csv(results,"CIBERSORT_Output.csv")
#批量调包
# pkgs <- c("matrixStats", "pheatmap", "RColorBrewer",
# "tidyverse", "cowplot","ggpubr","bslib","ggthemes")
# lapply(pkgs, library, character.only = T)
# Read in results
cibersort_raw <- read.csv("CIBERSORT_Output.csv",row.names = 1,header = T)
library(dplyr)
library(tidyr)
library(tibble)
library(ggplot2)
library(ggpubr)
# head(cibersort_raw)
# dim(cibersort_raw)
# colnames(cibersort_raw)
Composition_df <- cibersort_raw %>%
rownames_to_column("sample") %>%
pivot_longer(cols = 2:23, # 22种细胞类型
names_to = "CellType",
values_to = "Composition")
sub_comp <- Composition_df[,c(5,1,6)] #sample,P.value,Correlation,RMSE,CellType,Composition
## 免疫细胞在所有样本中的boxplot箱图
ggboxplot(
sub_comp,
x = "CellType",
y = "Composition",
color = "black",
fill = "CellType",
xlab = "",
ylab = "Cell composition",
main = "TME Cell composition") +
theme(axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 1
))
## 每个样本中免疫细胞的组成比例图
ggbarplot(
sub_comp,
x = "sample",
y = "Composition",
size = 1,
fill = "CellType",
color = "CellType") +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
vjust = 1,
size = 10
),
legend.position = "bottom"
)
参考:
CIBERSORTx数据上传格式_weixin_43364556的博客-CSDN博客_cibersortx怎么用
CIBERSORT 学习笔记_菠萝西斯的博客-CSDN博客_cibersort算法
https://github.com/zomithex/CIBERSORT