02.infercnv-区分肿瘤样本中的正常上皮细胞-infercnv结果处理

关于infercnv的输入文件整理和运行参数参考
01.infercnv-区分肿瘤样本中的正常上皮细胞-infercnv运行


参考文献Therapy-Induced Evolution of Human Lung Cancer Revealed by Single-Cell RNA Sequencing

Therapy-Induced Evolution of Human Lung Cancer Revealed by Single-Cell RNA Sequencing

肿瘤样本中正常ep细胞的推断思路

  • 采用cutreeinfercnv聚类树进行截断,保证插入的正常上皮细胞聚类到一个或几个大类中,把这一个或几个大类定义为肿瘤细胞
rm(list=ls())
options(stringsAsFactors = F)
library(phylogram)
library(gridExtra)
library(grid)
require(dendextend)
require(ggthemes)
library(tidyverse)
library(Seurat)
library(infercnv)
library(miscTools)
#  导入infercnv聚类树
infercnv.dend <- read.dendrogram(file = "result/ep_infercnv_output/infercnv.observations_dendrogram.txt")
# Cut tree 
####设定K值使插入的正常细胞包含在一个聚类中
#可以采用以高度截断或聚类数量截断
# infercnv.labels <- cutree(infercnv.dend, h = 8, 
#                           order_clusters_as_data = F)
infercnv.labels <- cutree(infercnv.dend,k = 34, order_clusters_as_data = FALSE)
table(infercnv.labels)
# Color labels
cb_palette <- c("#ed1299", "#09f9f5", "#246b93", "#cc8e12", "#d561dd", "#c93f00", "#ddd53e",
                "#4aef7b", "#e86502", "#9ed84e", "#39ba30", "#6ad157", "#8249aa", "#99db27", "#e07233", "#ff523f",
                "#ce2523", "#f7aa5d", "#cebb10", "#03827f", "#931635", "#373bbf", "#a1ce4c", "#ef3bb6", "#d66551",
                "#1a918f", "#ff66fc", "#2927c4", "#7149af" ,"#57e559" ,"#8e3af4" ,"#f9a270" ,"#22547f", "#db5e92",
                "#edd05e", "#6f25e8", "#0dbc21", "#280f7a", "#6373ed", "#5b910f" ,"#7b34c1" ,"#0cf29a" ,"#d80fc1",
                "#dd27ce", "#07a301", "#167275", "#391c82", "#2baeb5","#925bea", "#63ff4f")  #由于本研究聚类数保留较多,为后续画图定义了较大的颜色队列
the_bars <- as.data.frame(cb_palette[1:50][infercnv.labels])
colnames(the_bars) <- "inferCNV_tree"
the_bars$inferCNV_tree <- as.character(the_bars$inferCNV_tree) 
##保存树状图
pdf("result/07.infercnv_cuttree.pdf",height = 10,width = 20)
infercnv.dend %>% set("labels",rep("", nobs(infercnv.dend)) )  %>% plot(main="inferCNV dendrogram") %>%
  colored_bars(colors = as.data.frame(the_bars), dend = infercnv.dend, sort_by_labels_order = FALSE, add = T, y_scale=10, y_shift = 0)
dev.off()

infercnv.labels=as.data.frame(infercnv.labels)
groupFiles='data/ep-infercnv.groupFiles.txt'   
meta=read.table(groupFiles,sep = '\t')
infercnv.labels$V1=rownames(infercnv.labels)
meta=merge(meta,infercnv.labels,by='V1')
table(meta[,2:3])  #查看每个聚类下各组细胞数量
###########计算cnv分数,用于其他分析可忽略
if( ! file.exists(  "result/07.cnv_scores.csv")){
  tmp=read.table("result/ep_infercnv_output/infercnv.references.txt", header=T)
  down=mean(rowMeans(tmp)) - 2 * mean( apply(tmp, 1, sd))
  up=mean(rowMeans(tmp)) + 2 * mean( apply(tmp, 1, sd))
  oneCopy=up-down
  oneCopy
  a1= down- 2*oneCopy
  a2= down- 1*oneCopy
  down;up
  a3= up +  1*oneCopy
  a4= up + 2*oneCopy 
  cnv_table <- read.table("result/ep_infercnv_output/infercnv.observations.txt", header=T)
  # Score cells based on their CNV scores 
  # Replicate the table 
  cnv_score_table <- as.matrix(cnv_table)
  cnv_score_mat <- as.matrix(cnv_table)
  # Scoring
  cnv_score_table[cnv_score_mat > 0 & cnv_score_mat < a2] <- "A" #complete loss. 2pts
  cnv_score_table[cnv_score_mat >= a2 & cnv_score_mat < down] <- "B" #loss of one copy. 1pts
  cnv_score_table[cnv_score_mat >= down & cnv_score_mat <  up ] <- "C" #Neutral. 0pts
  cnv_score_table[cnv_score_mat >= up  & cnv_score_mat <= a3] <- "D" #addition of one copy. 1pts
  cnv_score_table[cnv_score_mat > a3  & cnv_score_mat <= a4 ] <- "E" #addition of two copies. 2pts
  cnv_score_table[cnv_score_mat > a4] <- "F" #addition of more than two copies. 2pts
  # Check
  table(cnv_score_table[,1])
  # Replace with score 
  cnv_score_table_pts <- cnv_table
  rm(cnv_score_mat)
  # 
  cnv_score_table_pts[cnv_score_table == "A"] <- 2
  cnv_score_table_pts[cnv_score_table == "B"] <- 1
  cnv_score_table_pts[cnv_score_table == "C"] <- 0
  cnv_score_table_pts[cnv_score_table == "D"] <- 1
  cnv_score_table_pts[cnv_score_table == "E"] <- 2
  cnv_score_table_pts[cnv_score_table == "F"] <- 2
  # Scores are stored in “cnv_score_table_pts”. Use colSums to add up scores for each cell and store as vector 
  cell_scores_CNV <- as.data.frame(colSums(cnv_score_table_pts))
  colnames(cell_scores_CNV) <- "cnv_score"
  head(cell_scores_CNV)
  write.csv(x = cell_scores_CNV, file = "result/07.cnv_scores.csv")
} 
write.table(meta,"result/07.4.infercnv_meta.txt",sep = "\t",row.names = F,col.names = T,quote = F)
head(meta) 
table(meta[,2:3]) 
#############根据树状图聚类注释正常和肿瘤细胞
rm(list = ls())

meta2=read.table("result/07.4.infercnv_meta.txt",sep = "\t",header = T,quote = "",check.names = F)
meta2$infertype[meta2$infercnv.labels%in%c(13,33)]="normal_ep"  #根据结果选取鉴定的正常ep细胞聚类编号进行注释
meta2$infertype[!meta2$infercnv.labels%in%c(13,33)]="tumor_ep"

结果可视化

结果整合至seurat对象并提取肿瘤ep细胞

#整合进seurat对象
load(file = 'data/4.ep-noann.Rdata') 
# [email protected]
# phe$celltype=Idents(sce)
# head(rownames(phe))
metatype=meta2[,c(1,4)] #提取细胞名和所在聚类数
colnames(metatype)[1]="cell"
####整合tumor-ep代码
[email protected]$infer_type =metatype[match(rownames([email protected]),metatype$cell),"infertype"] #根据细胞名称匹配聚类数整合至seurat对象

[email protected]###再次确认整合结果
####取肿瘤ep细胞子集
sce=subset(sce,infer_type %in% c('tumor_ep' )) 
sce
sce$orig.ident=as.factor(as.character(sce$orig.ident))  ###去掉空子集代码
[email protected]$orig.ident #再次确认
###保存提取的肿瘤ep细胞seurat对象
save(sce,file = "data/6.tumor_ep_no_cluster_noann.Rdata")

参考:
https://blog.csdn.net/qq_38774801/article/details/115435091
https://cloud.tencent.com/developer/inventory/7049/article/1737138
问题交流:
Email:[email protected]

你可能感兴趣的:(02.infercnv-区分肿瘤样本中的正常上皮细胞-infercnv结果处理)