monocle2 jimmy服务器

.libPaths(c("/home/data/t040413/R/yll/usr/local/lib/R/site-library",  "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2", "/usr/local/lib/R/library"))
library(Seurat)

#BiocManager::install("monocle",force = TRUE)
#library(monocle)
#devtools::load_all("/home/data/t040413/R/yll/usr/local/lib/R/site-library/monocle/")

library(monocle)

getwd()
dir.create("/home/data/t040413/ipf/diseased_lung_covid20_Reclustered_RNA_monocle/")
setwd("/home/data/t040413/ipf/diseased_lung_covid20_Reclustered_RNA_monocle/")

load("/home/data/t040413/ipf/diseased_lung_covid20/subsetdata_fibroblast.rds")
#dir.create("./fibroblast_monocle")
3setwd("./fibroblast_monocle")
#library(dplyr)
DimPlot(object = subset_data,label = T)

[email protected] %>%head()
table(subset_data$cell.type)
table(Idents(subset_data))

#subset_data=RenameIdents(subset_data,"HAS1 High Fibroblasts"="PLIN2+ Fibroblasts")
subset_data=subset_data[,-(which(Idents(subset_data)=="Smooth Muscle Cells"))]

subset_data$celltype=Idents(subset_data)
table(Idents(subset_data))
subset_data$cell.type=subset_data$celltype

table(subset_data$stim,Idents(subset_data))
table(subset_data$cell.type)
getwd()
table(subset_data$stim)
table(subset_data$Diagnosis)
table(subset_data$Status)

dim(subset_data)

DimPlot(subset_data)

head([email protected])
DimPlot(subset_data,group.by ='SCT_snn_res.0.7')
DefaultAssay(subset_data)
dim(subset_data)
subset_data=FindNeighbors(subset_data,assay = "SCT") %>%FindClusters(resolution = 0.2)
DimPlot(subset_data,label = T)
library(tibble)
markers_for_eachcluster=FindAllMarkers(subset_data,only.pos = T)
head(markers_for_eachcluster)
#markers_for_eachcluster =markers_for_eachcluster %>% rownames_to_column(var="gene")
openxlsx::write.xlsx(markers_for_eachcluster,
    file = "markers_for_eachcluster.xlsx")


DimPlot(subset_data,label = T)
DotPlot(subset_data,features = c("HAS1",'PLIN2'))
###手动选择细胞 去掉靠近vsm的细胞群体
plot <- DimPlot(subset_data, reduction = "umap") 
select.cells <- CellSelector(plot = plot) 
head(select.cells)
subset_data=subset(subset_data,cells=select.cells)
DimPlot(subset_data,label = T)
subset_data=FindNeighbors(subset_data,assay = "SCT") %>%FindClusters(resolution = 0.21)
DimPlot(subset_data,label = T)

subset_data=RenameIdents(subset_data,"0"="HAS1 Fibroblast",
             '1'="Myofib1",'2'="Myofib2",
             "3"="Fibroblast",
             "4"="Mesothelial")

DimPlot(subset_data,label = T)



###注意使用RNA 还是SCT

DefaultAssay(subset_data)
DefaultAssay(subset_data)="RNA"
table(duplicated(rownames(subset_data)))
table(duplicated(colnames(subset_data)))


new.metadata <- merge([email protected],
                      data.frame(Idents(subset_data)),
                      by = "row.names",sort = FALSE)
head(new.metadata)
rownames(new.metadata)<-new.metadata[,1]
head([email protected])
identical(rownames(new.metadata),rownames([email protected]))

[email protected]<-new.metadata
expression_matrix <- as(as.matrix(subset_data@assays$RNA@counts), 'sparseMatrix')
head(expression_matrix)
identical(colnames(expression_matrix),rownames(new.metadata))


cell_metadata <- new('AnnotatedDataFrame',[email protected])
head([email protected])
head(cell_metadata)

gene_annotation <- new('AnnotatedDataFrame',data=data.frame(gene_short_name = row.names(subset_data),
                                                            row.names = row.names(subset_data)))
head(gene_annotation)
fData(gene_annotation)
phenoData(gene_annotation)
featureData(gene_annotation)


monocle_cds <- monocle::newCellDataSet(expression_matrix,
                                       phenoData = cell_metadata,
                                       featureData = gene_annotation,
                                       lowerDetectionLimit = 0.5,
                                       expressionFamily = negbinomial.size())

###################################################################################

##归一化######
cds <- monocle_cds
cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds)  ## Removing 110 outliers  #下面的cell.type 为subset_Data 的meta信息
library("BiocGenerics")#并行计算
diff_test_res <- differentialGeneTest(cds,fullModelFormulaStr = "~ cell.type")

### inference the pseudotrajectory########################################################
# step1: select genes for orderding setOrderingFilter() #
ordering_genes <- row.names (subset(diff_test_res, qval < 0.01))
length(ordering_genes)# 6354
cds <- setOrderingFilter(cds, ordering_genes)  
# step2: dimension reduction=> reduceDimension()  DDRTree #
cds <- reduceDimension(cds, max_components = 2,method = 'DDRTree')

#package.version(pkg = "monocle")
# step3: ordering the cells=> orderCells()
#getwd()
#source("./order_cells.R")
#unloadNamespace('monocle')
#devtools::load_all("../monocle_2.26.0 (1).tar/monocle_2.26.0 (1)/monocle/")
devtools::load_all("/home/data/t040413/ipf/diseased_lung_covid20/monocle/")


cds <- orderCells(cds)

pdf("1.pseudutime.cell.type.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "cell.type")  
dev.off()

pdf("1.pseudutime.stim.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "stim")  
dev.off()

pdf("1.pseudutime.State.pre.order.pdf")
plot_cell_trajectory(cds, color_by = "State")  
dev.off()
###### split ########
pdf("2.split.pseudutime.Seurat.cell.type.pdf")
plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~cell.type)
dev.off()

pdf("2.split.pseudutime.stim.pdf")
plot_cell_trajectory(cds, color_by = "stim") + facet_wrap(~stim)
dev.off()


pdf("4.split.pseudutime.Seurat.State.pdf")
plot_cell_trajectory(cds, color_by = 'cell.type') + facet_wrap(~State)
dev.off()


pdf("3.split.pseudutime.Seurat.cell.type_State.pdf")
plot_cell_trajectory(cds, color_by = 'State') + facet_wrap(~cell.type)
dev.off()

table(pData(cds)$State,pData(cds)$cell.type)
openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$cell.type), "State_cellType_summary.xlsx", colnames=T, rownames=T)

table(pData(cds)$State,pData(cds)$stim)
openxlsx::write.xlsx(table(pData(cds)$State,pData(cds)$stim), "State_Stim_summary.xlsx", colnames=T, rownames=T)
##we set the state 2 as root ########state 2 with most cells in Endothelial cells
#这里设置谁为root??
DimPlot(subset_data,label = T)
table(Idents(subset_data))
DefaultAssay(subset_data)
DefaultAssay(subset_data)<-"SCT"
DefaultAssay(subset_data)<-"RNA"
DimPlot(subset_data,label = T)
dev.off()

table(subset_data$cell.type)
getwd()

#ds <- orderCells(cds,root_state=1)

pdf("4.pseudutime.Pseudotime.pdf")
plot_cell_trajectory(cds, color_by = "Pseudotime")  
dev.off()
getwd()
save(cds,file="cds_fibroblast_using_RNA_slot.rds")

load("/home/data/t040413/ipf/diseased_lung_covid20_Reclustered_RNA_monocle/-1/cds_fibroblast_using_RNA_slot.rds")
getwd()
##############33
#################################3
#Once we have a trajectory, we can use differentialGeneTest() to find genes 
#that have an expression pattern that varies according to pseudotime.
diff_test_res <- differentialGeneTest(HSMM[marker_genes,],
                                      fullModelFormulaStr = "~sm.ns(Pseudotime)",
                                      cores = 4)
sig_gene_names <- row.names(subset(diff_test_res, qval < 0.01))

plot_pseudotime_heatmap(cds[sig_gene_names,],
                        num_clusters = 5,
                        cores = 4,
                        show_rownames = T)
library(ggplot2)
ggsave('heatmap-.png',width = 20,height = 30)


plot_pseudotime_heatmap(cds[c('CX3CR1',"SPP1"),],
                        num_clusters = 5,
                        #  cores = 4,
                        show_rownames = T)



############333333333------------------------------
load("/home/data/t040413/ipf/diseased_lung_covid20_Reclustered_RNA_monocle/-1/cds_fibroblast_using_RNA_slot.rds")



plot_genes_branched_heatmap(mycds_sub_beam,  branch_point = 1, num_clusters = 3, show_rownames = T)

plot_genes_branched_pseudotime(cds["GPX3",],color_by = "State")

pData(cds)$GPX3=log2(exprs(cds)["GPX3",]+1)
plot_cell_trajectory(cds,color_by = "GPX3")+scale_color_steps2()
plot_cell_trajectory(cds,color_by = "GPX3")+scale_color_gsea()

#install.packages("ggsci")
library(ggsci)
pData(cds)$FN1=log2(exprs(cds)["FN1",]+1)
plot_cell_trajectory(cds,color_by = "FN1")+scale_color_gsea()


plot_genes_violin(cds["GPX3",],grouping = "State",color_by = "State")
plot_genes_in_pseudotime(cds["GPX3",],color_by = "State")




################################################################################
#https://davetang.org/muse/2017/10/01/getting-started-monocle/
#
my_pseudotime_de %>% arrange(qval) %>% head()
status           family pval qval              id gene_short_name num_cells_expressed use_for_ordering
1     OK negbinomial.size    0    0 ENSG00000143546          S100A8                2034             TRUE
2     OK negbinomial.size    0    0 ENSG00000163220          S100A9                2049             TRUE
3     OK negbinomial.size    0    0 ENSG00000163221         S100A12                1740             TRUE
4     OK negbinomial.size    0    0 ENSG00000203747          FCGR3A                 358             TRUE
5     OK negbinomial.size    0    0 ENSG00000231389        HLA-DPA1                1671             TRUE
6     OK negbinomial.size    0    0 ENSG00000257764   RP11-1143G9.4                1896             TRUE

# save the top 6 genes
my_pseudotime_de %>% arrange(qval) %>% head() %>% select(id) -> my_pseudotime_gene
my_pseudotime_gene <- my_pseudotime_gene$id

plot_genes_in_pseudotime(my_cds_subset[my_pseudotime_gene,])

你可能感兴趣的:(服务器,人工智能,算法,r)