关于infercnv
的输入文件整理和运行参数参考
01.infercnv-区分肿瘤样本中的正常上皮细胞-infercnv运行
参考文献Therapy-Induced Evolution of Human Lung Cancer Revealed by Single-Cell RNA Sequencing
肿瘤样本中正常ep细胞的推断思路
- 采用
cutree
对infercnv
聚类树进行截断,保证插入的正常上皮细胞聚类到一个或几个大类中,把这一个或几个大类定义为肿瘤细胞
rm(list=ls())
options(stringsAsFactors = F)
library(phylogram)
library(gridExtra)
library(grid)
require(dendextend)
require(ggthemes)
library(tidyverse)
library(Seurat)
library(infercnv)
library(miscTools)
# 导入infercnv聚类树
infercnv.dend <- read.dendrogram(file = "result/ep_infercnv_output/infercnv.observations_dendrogram.txt")
# Cut tree
####设定K值使插入的正常细胞包含在一个聚类中
#可以采用以高度截断或聚类数量截断
# infercnv.labels <- cutree(infercnv.dend, h = 8,
# order_clusters_as_data = F)
infercnv.labels <- cutree(infercnv.dend,k = 34, order_clusters_as_data = FALSE)
table(infercnv.labels)
# Color labels
cb_palette <- c("#ed1299", "#09f9f5", "#246b93", "#cc8e12", "#d561dd", "#c93f00", "#ddd53e",
"#4aef7b", "#e86502", "#9ed84e", "#39ba30", "#6ad157", "#8249aa", "#99db27", "#e07233", "#ff523f",
"#ce2523", "#f7aa5d", "#cebb10", "#03827f", "#931635", "#373bbf", "#a1ce4c", "#ef3bb6", "#d66551",
"#1a918f", "#ff66fc", "#2927c4", "#7149af" ,"#57e559" ,"#8e3af4" ,"#f9a270" ,"#22547f", "#db5e92",
"#edd05e", "#6f25e8", "#0dbc21", "#280f7a", "#6373ed", "#5b910f" ,"#7b34c1" ,"#0cf29a" ,"#d80fc1",
"#dd27ce", "#07a301", "#167275", "#391c82", "#2baeb5","#925bea", "#63ff4f") #由于本研究聚类数保留较多,为后续画图定义了较大的颜色队列
the_bars <- as.data.frame(cb_palette[1:50][infercnv.labels])
colnames(the_bars) <- "inferCNV_tree"
the_bars$inferCNV_tree <- as.character(the_bars$inferCNV_tree)
##保存树状图
pdf("result/07.infercnv_cuttree.pdf",height = 10,width = 20)
infercnv.dend %>% set("labels",rep("", nobs(infercnv.dend)) ) %>% plot(main="inferCNV dendrogram") %>%
colored_bars(colors = as.data.frame(the_bars), dend = infercnv.dend, sort_by_labels_order = FALSE, add = T, y_scale=10, y_shift = 0)
dev.off()
infercnv.labels=as.data.frame(infercnv.labels)
groupFiles='data/ep-infercnv.groupFiles.txt'
meta=read.table(groupFiles,sep = '\t')
infercnv.labels$V1=rownames(infercnv.labels)
meta=merge(meta,infercnv.labels,by='V1')
table(meta[,2:3]) #查看每个聚类下各组细胞数量
###########计算cnv分数,用于其他分析可忽略
if( ! file.exists( "result/07.cnv_scores.csv")){
tmp=read.table("result/ep_infercnv_output/infercnv.references.txt", header=T)
down=mean(rowMeans(tmp)) - 2 * mean( apply(tmp, 1, sd))
up=mean(rowMeans(tmp)) + 2 * mean( apply(tmp, 1, sd))
oneCopy=up-down
oneCopy
a1= down- 2*oneCopy
a2= down- 1*oneCopy
down;up
a3= up + 1*oneCopy
a4= up + 2*oneCopy
cnv_table <- read.table("result/ep_infercnv_output/infercnv.observations.txt", header=T)
# Score cells based on their CNV scores
# Replicate the table
cnv_score_table <- as.matrix(cnv_table)
cnv_score_mat <- as.matrix(cnv_table)
# Scoring
cnv_score_table[cnv_score_mat > 0 & cnv_score_mat < a2] <- "A" #complete loss. 2pts
cnv_score_table[cnv_score_mat >= a2 & cnv_score_mat < down] <- "B" #loss of one copy. 1pts
cnv_score_table[cnv_score_mat >= down & cnv_score_mat < up ] <- "C" #Neutral. 0pts
cnv_score_table[cnv_score_mat >= up & cnv_score_mat <= a3] <- "D" #addition of one copy. 1pts
cnv_score_table[cnv_score_mat > a3 & cnv_score_mat <= a4 ] <- "E" #addition of two copies. 2pts
cnv_score_table[cnv_score_mat > a4] <- "F" #addition of more than two copies. 2pts
# Check
table(cnv_score_table[,1])
# Replace with score
cnv_score_table_pts <- cnv_table
rm(cnv_score_mat)
#
cnv_score_table_pts[cnv_score_table == "A"] <- 2
cnv_score_table_pts[cnv_score_table == "B"] <- 1
cnv_score_table_pts[cnv_score_table == "C"] <- 0
cnv_score_table_pts[cnv_score_table == "D"] <- 1
cnv_score_table_pts[cnv_score_table == "E"] <- 2
cnv_score_table_pts[cnv_score_table == "F"] <- 2
# Scores are stored in “cnv_score_table_pts”. Use colSums to add up scores for each cell and store as vector
cell_scores_CNV <- as.data.frame(colSums(cnv_score_table_pts))
colnames(cell_scores_CNV) <- "cnv_score"
head(cell_scores_CNV)
write.csv(x = cell_scores_CNV, file = "result/07.cnv_scores.csv")
}
write.table(meta,"result/07.4.infercnv_meta.txt",sep = "\t",row.names = F,col.names = T,quote = F)
head(meta)
table(meta[,2:3])
#############根据树状图聚类注释正常和肿瘤细胞
rm(list = ls())
meta2=read.table("result/07.4.infercnv_meta.txt",sep = "\t",header = T,quote = "",check.names = F)
meta2$infertype[meta2$infercnv.labels%in%c(13,33)]="normal_ep" #根据结果选取鉴定的正常ep细胞聚类编号进行注释
meta2$infertype[!meta2$infercnv.labels%in%c(13,33)]="tumor_ep"
结果可视化
结果整合至seurat对象并提取肿瘤ep细胞
#整合进seurat对象
load(file = 'data/4.ep-noann.Rdata')
# [email protected]
# phe$celltype=Idents(sce)
# head(rownames(phe))
metatype=meta2[,c(1,4)] #提取细胞名和所在聚类数
colnames(metatype)[1]="cell"
####整合tumor-ep代码
[email protected]$infer_type =metatype[match(rownames([email protected]),metatype$cell),"infertype"] #根据细胞名称匹配聚类数整合至seurat对象
[email protected]###再次确认整合结果
####取肿瘤ep细胞子集
sce=subset(sce,infer_type %in% c('tumor_ep' ))
sce
sce$orig.ident=as.factor(as.character(sce$orig.ident)) ###去掉空子集代码
[email protected]$orig.ident #再次确认
###保存提取的肿瘤ep细胞seurat对象
save(sce,file = "data/6.tumor_ep_no_cluster_noann.Rdata")
参考:
https://blog.csdn.net/qq_38774801/article/details/115435091
https://cloud.tencent.com/developer/inventory/7049/article/1737138
问题交流:
Email:[email protected]