library(Seurat)
library(SeuratData)
library(hexSticker)
p <- ggplot(DimPlot(pbmc3k.final,label = T)$data,aes(UMAP_1 , UMAP_2,fill=ident)) +
geom_point(shape=21,colour="black",stroke=0.25,alpha=0.8) +
DimPlot(pbmc3k.final,label = T)$theme + NoLegend()+ theme_transparent()
sticker(p, package="PseudoCell", p_color = "#FFFFFF",
url='Seurat Weekly NO.5',
u_size = 5,spotlight = T,
u_color = "#FFFFFF",
p_size=20, s_x=1, s_y=.75,
s_width=1.3, s_height=1,
h_color = "#1881C2",
h_fill = "pink",
filename="PseudoCell.png")
在文章单细胞转录组中的pseudocell又是什么中,我们介绍了pseudocell的概念,并且在文章最后贴上了sc-MCA的计算代码。作为一个Seurat的深度用户,我们不禁要想:能不能把这段代码写成可以接受Seurat对象的函数呢?而且要保留Seurat的一般风格。下面,就让我们试试吧。
首先要获得不同assay,即对哪个assay来计算?获得assay之后,assay的哪个slot?已经确定对哪种分群来计算。确定参数后,我们来写代码:
library(purrr)
GatherData <- function(object, ...) {
UseMethod("GatherData")
}
GatherData.Seurat <- function(object,
assay,
slot_use,
...) {
assay <- assay %||% "RNA"
slot_use <- slot_use %||% "data"
obj_data <- GetAssayData(
object = object,
assay = assay,
slot = slot_use
) %>%
as.matrix()
return(obj_data)
}
这段代码来获取assay和slot的表达矩阵,返回一个Seurat的对象。
GatherData.Seurat <- function(object,
assay,
slot_use,
...) {
assay <- assay %||% "RNA"
slot_use <- slot_use %||% "data"
obj_data <- GetAssayData(
object = object,
assay = assay,
slot = slot_use
) %>%
as.matrix()
return(obj_data)
}
PseudoCell <- function(object,
assay_use = NULL,
slot_use = NULL,
cluster_use =NULL,
pseudocell.size =NULL){
message("tips:
Cluster_use : one col in metadata
pseudocell.size : how many cell will be pseudo")
Inter<- GatherData(object = object,
assay = assay_use,
slot_use = slot_use)
Inter[Inter<0]=0
idd<[email protected]
Inter.id<-cbind(rownames(idd),as.vector(idd[,cluster_use]))
rownames(Inter.id)<-rownames(idd)
colnames(Inter.id)<-c("CellID","Celltype")
Inter.id<-as.data.frame(Inter.id)
Inter1<-Inter[,Inter.id$CellID]
Inter<-as.matrix(Inter1)
pseudocell.size = pseudocell.size ## 10 test
new_ids_list = list()
Inter.id$Celltype <- as.factor(Inter.id$Celltype)
for (i in 1:length(levels(Inter.id$Celltype))) {
cluster_id = levels(Inter.id$Celltype)[i]
cluster_cells <- rownames(Inter.id[Inter.id$Celltype == cluster_id,])
cluster_size <- length(cluster_cells)
pseudo_ids <- floor(seq_along(cluster_cells)/pseudocell.size)
pseudo_ids <- paste0(cluster_id, "_Cell", pseudo_ids)
names(pseudo_ids) <- sample(cluster_cells)
new_ids_list[[i]] <- pseudo_ids
}
new_ids <- unlist(new_ids_list)
new_ids <- as.data.frame(new_ids)
new_ids_length <- table(new_ids)
new_colnames <- rownames(new_ids) ###add
all.data<-Inter[,as.character(new_colnames)] ###add
all.data <- t(all.data)###add
new.data<-aggregate(list(all.data[,1:length(all.data[1,])]),
list(name=new_ids[,1]),FUN=mean)
rownames(new.data)<-new.data$name
new.data<-new.data[,-1]
new_ids_length<-as.matrix(new_ids_length)##
short<-which(new_ids_length< pseudocell.size -1 )##
new_good_ids<-as.matrix(new_ids_length[-short,])##
result<-t(new.data)[,rownames(new_good_ids)]
rownames(result)<-rownames(Inter)
newobject <- CreateSeuratObject(result)
newobject@misc$idtrans <- new_ids
newobject@commands$PseudoCell <- LogSeuratCommand(newobject, return.command = TRUE)
return(newobject)
}
pseudocell.size 的意思是几个单细胞变成一个pseudocell,注意不要大于最小细胞群的细胞数。
这个函数完成计算,并用LogSeuratCommand
函数来计算参数保存在Seurat的对象中。下面让我们来试试吧
library(Seurat)
head([email protected])
orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups RNA_snn_res.1
ATGCCAGAACGACT SeuratProject 70 47 0 A g2 0
CATGGCCTGTGCAT SeuratProject 85 52 0 A g1 0
GAACCTGATGAACC SeuratProject 87 50 1 B g2 0
TGACTGGATTCTCA SeuratProject 127 56 0 A g2 0
AGTCAGACTGCACA SeuratProject 173 53 0 A g2 0
TCTGATACACGTGT SeuratProject 70 48 0 A g1 0
对于既想保留样本信息又想保留细胞类型信息的需求:
[email protected]$samcell <- paste0([email protected]$groups,'_',[email protected]$RNA_snn_res.1)
mypbmc@commands$PseudoCell
Command: PseudoCell(pbmc_small, "RNA", "data", "samcell", 10)
Time: 2020-12-23 15:56:53
assay_use : RNA
slot_use : data
cluster_use : samcell
pseudocell.size : 10
mypbmc <- PseudoCell(pbmc_small, "RNA","data","samcell",10)
tips:
Cluster_use : one col in metadata
pseudocell.size : how many cell will be pseudo
查看我们运行的结果返回一个新的Seurat对象。
mypbmc
An object of class Seurat
230 features across 7 samples within 1 assay
Active assay: RNA (230 features, 0 variable features)
head([email protected])
orig.ident nCount_RNA nFeature_RNA
g1_0_Cell0 g1 215.8404 156
g1_0_Cell1 g1 238.3377 144
g1_1_Cell0 g1 318.5675 164
g1_2_Cell0 g1 265.6965 186
g2_0_Cell0 g2 231.1276 166
g2_1_Cell0 g2 289.4952 152
新旧barcode的对应关系在mypbmc@misc$idtrans对象中。
head(mypbmc@misc$idtrans)
new_ids
GGCATATGCTTATC g1_0_Cell0
GCGCATCTTGCTCC g1_0_Cell0
CATGGCCTGTGCAT g1_0_Cell0
AATGTTGACAGTCA g1_0_Cell0
TTACGTACGTTCAG g1_0_Cell0
AGTCTTACTTCGGA g1_0_Cell0
注意一下 pseudocell的 命名规则: 0_Cell0。_
之前是细胞群,Cell
之后是该群的第几个pseudocell(从零开始编号)。当然,你可以根据自己的心绪,自行命名。
这样,我们就为Seurat写了一个函数啦。以后相对自己的scrna数据做什么操作,直接以函数的形式嫁接到Seurat里就可以啦。
Seurat只是一个工具吗?不,它已经变成我们的一部分了。