.libPaths(c("/home/data/t040413/R/yll/usr/local/lib/R/site-library", "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2", "/usr/local/lib/R/library"))
library(Seurat)
#BiocManager::install("monocle",force = TRUE)
#library(monocle)
#devtools::load_all("/home/data/t040413/R/yll/usr/local/lib/R/site-library/monocle/")
library(monocle)
getwd()
dir.create("/home/data/t040413/ipf/diseased_lung_covid20_Reclustered_RNA_monocle/")
setwd("/home/data/t040413/ipf/diseased_lung_covid20_Reclustered_RNA_monocle/")
load("/home/data/t040413/ipf/diseased_lung_covid20/subsetdata_fibroblast.rds")
#dir.create("./fibroblast_monocle")
3setwd("./fibroblast_monocle")
#library(dplyr)
DimPlot(object = subset_data,label = T)
[email protected] %>%head()
table(subset_data$cell.type)
table(Idents(subset_data))
#subset_data=RenameIdents(subset_data,"HAS1 High Fibroblasts"="PLIN2+ Fibroblasts")
subset_data=subset_data[,-(which(Idents(subset_data)=="Smooth Muscle Cells"))]
subset_data$celltype=Idents(subset_data)
table(Idents(subset_data))
subset_data$cell.type=subset_data$celltype
table(subset_data$stim,Idents(subset_data))
table(subset_data$cell.type)
getwd()
table(subset_data$stim)
table(subset_data$Diagnosis)
table(subset_data$Status)
dim(subset_data)
DimPlot(subset_data)
head([email protected])
DimPlot(subset_data,group.by ='SCT_snn_res.0.7')
DefaultAssay(subset_data)
dim(subset_data)
subset_data=FindNeighbors(subset_data,assay = "SCT") %>%FindClusters(resolution = 0.2)
DimPlot(subset_data,label = T)
library(tibble)
markers_for_eachcluster=FindAllMarkers(subset_data,only.pos = T)
head(markers_for_eachcluster)
#markers_for_eachcluster =markers_for_eachcluster %>% rownames_to_column(var="gene")
openxlsx::write.xlsx(markers_for_eachcluster,
file = "markers_for_eachcluster.xlsx")
DimPlot(subset_data,label = T)
DotPlot(subset_data,features = c("HAS1",'PLIN2'))
###手动选择细胞 去掉靠近vsm的细胞群体
plot <- DimPlot(subset_data, reduction = "umap")
select.cells <- CellSelector(plot = plot)
head(select.cells)
subset_data=subset(subset_data,cells=select.cells)
DimPlot(subset_data,label = T)
subset_data=FindNeighbors(subset_data,assay = "SCT") %>%FindClusters(resolution = 0.21)
DimPlot(subset_data,label = T)
subset_data=RenameIdents(subset_data,"0"="HAS1 Fibroblast",
'1'="Myofib1",'2'="Myofib2",
"3"="Fibroblast",
"4"="Mesothelial")
DimPlot(subset_data,label = T)
###注意使用RNA 还是SCT
DefaultAssay(subset_data)
DefaultAssay(subset_data)="RNA"
table(duplicated(rownames(subset_data)))
table(duplicated(colnames(subset_data)))
new.metadata <- merge([email protected],
data.frame(Idents(subset_data)),
by = "row.names",sort = FALSE)
head(new.metadata)
rownames(new.metadata)<-new.metadata[,1]
head([email protected])
identical(rownames(new.metadata),rownames([email protected]))
[email protected]<-new.metadata
expression_matrix <- as(as.matrix(subset_data@assays$RNA@counts), 'sparseMatrix')
head(expression_matrix)
identical(colnames(expression_matrix),rownames(new.metadata))
cell_metadata <- new('AnnotatedDataFrame',[email protected])
head([email protected])
head(cell_metadata)
gene_annotation <- new('AnnotatedDataFrame',data=data.frame(gene_short_name = row.names(subset_data),
row.names = row.names(subset_data)))
head(gene_annotation)
fData(gene_annotation)
phenoData(gene_annotation)
featureData(gene_annotation)
monocle_cds <- monocle::newCellDataSet(expression_matrix,
phenoData = cell_metadata,
featureData = gene_annotation,
lowerDetectionLimit = 0.5,
expressionFamily = negbinomial.size())
###################################################################################
##归一化######
cds <- monocle_cds
cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds) ## Removing 110 outliers #下面的cell.type 为subset_Data 的meta信息
library("BiocGenerics")#并行计算
diff_test_res <- differentialGeneTest(cds,fullModelFormulaStr = "~ cell.type")
### inference the pseudotrajectory########################################################
# step1: select genes for orderding setOrderingFilter() #
ordering_genes <- row.names (subset(diff_test_res, qval < 0.01))
length(ordering_genes)# 6354
cds <- setOrderingFilter(cds, ordering_genes)
# step2: dimension reduction=> reduceDimension() DDRTree #
cds <- reduceDimension(cds, max_components = 2,method = 'DDRTree')
#package.version(pkg = "monocle")
# step3: ordering the cells=> orderCells()
#getwd()
#source("./order_cells.R")
#unloadNamespace('monocle')
#devtools::load_all("../monocle_2.26.0 (1).tar/monocle_2.26.0 (1)/monocle/")
devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")
cds <- orderCells(cds)
pdf("1.pseudutime.cell.type.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "cell.type")
dev.off()
pdf("1.pseudutime.stim.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "stim")
dev.off()
pdf("1.pseudutime.State.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "State")
dev.off()
###### split ########
pdf("2.split.pseudutime.Seurat.cell.type.pdf")
plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~cell.type)
dev.off()
pdf("2.split.pseudutime.stim.pdf")
plot_cell_trajectory(cds, color_by = "stim") + facet_wrap(~stim)
dev.off()
pdf("4.split.pseudutime.Seurat.State.pdf")
plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~State)
dev.off()
pdf("3.split.pseudutime.Seurat.cell.type_State.pdf")
plot_cell_trajectory(cds, color_by = 'State') + facet_wrap(~cell.type)
dev.off()
table(pData(cds)$State,pData(cds)$cell.type)
openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$cell.type), "State_cellType_summary.xlsx", colnames=T, rownames=T)
table(pData(cds)$State,pData(cds)$stim)
openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$stim), "State_Stim_summary.xlsx", colnames=T, rownames=T)
##we set the state 2 as root ########state 2 with most cells in Endothelial cells
#这里设置谁为root??
DimPlot(subset_data,label = T)
table(Idents(subset_data))
DefaultAssay(subset_data)
DefaultAssay(subset_data)<-"SCT"
DefaultAssay(subset_data)<-"RNA"
DimPlot(subset_data,label = T)
dev.off()
table(subset_data$cell.type)
getwd()
#ds <- orderCells(cds,root_state=1)
pdf("4.pseudutime.Pseudotime.pdf")
plot_cell_trajectory(cds, color_by = "Pseudotime")
dev.off()
getwd()
save(cds,file="cds_fibroblast_using_RNA_slot.rds")
load("/home/data/t040413/ipf/diseased_lung_covid20_Reclustered_RNA_monocle/-1/cds_fibroblast_using_RNA_slot.rds")
getwd()
##############33
#################################3
#Once we have a trajectory, we can use differentialGeneTest() to find genes
#that have an expression pattern that varies according to pseudotime.
diff_test_res <- differentialGeneTest(HSMM[marker_genes,],
fullModelFormulaStr = "~sm.ns(Pseudotime)",
cores = 4)
sig_gene_names <- row.names(subset(diff_test_res, qval < 0.01))
plot_pseudotime_heatmap(cds[sig_gene_names,],
num_clusters = 5,
cores = 4,
show_rownames = T)
library(ggplot2)
ggsave('heatmap-.png',width = 20,height = 30)
plot_pseudotime_heatmap(cds[c('CX3CR1',"SPP1"),],
num_clusters = 5,
# cores = 4,
show_rownames = T)
############333333333------------------------------
load("/home/data/t040413/ipf/diseased_lung_covid20_Reclustered_RNA_monocle/-1/cds_fibroblast_using_RNA_slot.rds")
plot_genes_branched_heatmap(mycds_sub_beam, branch_point = 1, num_clusters = 3, show_rownames = T)
plot_genes_branched_pseudotime(cds["GPX3",],color_by = "State")
pData(cds)$GPX3=log2(exprs(cds)["GPX3",]+1)
plot_cell_trajectory(cds,color_by = "GPX3")+scale_color_steps2()
plot_cell_trajectory(cds,color_by = "GPX3")+scale_color_gsea()
#install.packages("ggsci")
library(ggsci)
pData(cds)$FN1=log2(exprs(cds)["FN1",]+1)
plot_cell_trajectory(cds,color_by = "FN1")+scale_color_gsea()
plot_genes_violin(cds["GPX3",],grouping = "State",color_by = "State")
plot_genes_in_pseudotime(cds["GPX3",],color_by = "State")
################################################################################
#https://davetang.org/muse/2017/10/01/getting-started-monocle/
#
my_pseudotime_de %>% arrange(qval) %>% head()
status family pval qval id gene_short_name num_cells_expressed use_for_ordering
1 OK negbinomial.size 0 0 ENSG00000143546 S100A8 2034 TRUE
2 OK negbinomial.size 0 0 ENSG00000163220 S100A9 2049 TRUE
3 OK negbinomial.size 0 0 ENSG00000163221 S100A12 1740 TRUE
4 OK negbinomial.size 0 0 ENSG00000203747 FCGR3A 358 TRUE
5 OK negbinomial.size 0 0 ENSG00000231389 HLA-DPA1 1671 TRUE
6 OK negbinomial.size 0 0 ENSG00000257764 RP11-1143G9.4 1896 TRUE
# save the top 6 genes
my_pseudotime_de %>% arrange(qval) %>% head() %>% select(id) -> my_pseudotime_gene
my_pseudotime_gene <- my_pseudotime_gene$id
plot_genes_in_pseudotime(my_cds_subset[my_pseudotime_gene,])