https://github.com/JonathanShor/DoubletDetection
https://github.com/EDePasquale/DoubletDecon/blob/master/seurat-3.0.R
https://www.cell.com/cell-reports/fulltext/S2211-1247(19)31286-0
https://www.biorxiv.org/content/10.1101/2020.04.23.058156v1.full
if(!require(devtools)){
install.packages("devtools") # If not already installed
}
devtools::install_github('EDePasquale/DoubletDecon')
library(plyr)
library(dplyr)
library(Matrix)
library(ggplot2)
library(cowplot)
library(Seurat)
library(harmony)
library(DoubletDecon)
library(clusterProfiler)
sce.10x <- Read10X(data.dir = 'D:/HSW/HD/scRNA-seq/')
testdata_1 <- CreateSeuratObject(counts = sce.10x,
project = "testdata_1",
min.cells=3, min.features=500)
testdata_1
testdata_1[["percent.mt"]] <- PercentageFeatureSet(testdata_1, pattern = "^MT-")
testdata_1[["percent.HB"]]<-PercentageFeatureSet(testdata_1,features="HBB")
VlnPlot(testdata_1, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.HB"), ncol = 2)
plot1 <- FeatureScatter(testdata_1, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(testdata_1, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
testdata_1 <- subset(testdata_1,
subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 10 & percent.HB < 7)
dim(testdata_1)
testdata_1
testdata_1 <- NormalizeData(testdata_1)
testdata_1 <- FindVariableFeatures(testdata_1, selection.method = "vst", nfeatures = 3000, verbose = FALSE)
testdata_1 <- ScaleData(testdata_1, verbose = FALSE)
testdata_1<- RunPCA(testdata_1, features = VariableFeatures(object =testdata_1))
testdata_1 <- JackStraw(testdata_1 , num.replicate = 100,dims = 50)
testdata_1 <- ScoreJackStraw(testdata_1, dims = 1:20)
JackStrawPlot(testdata_1, dims =1:20)
ElbowPlot(testdata_1,ndims=50)
testdata_1<- FindNeighbors(testdata_1, dims = 1:10)
testdata_1<- FindClusters(testdata_1, resolution = 0.6)
testdata_1<- RunTSNE(testdata_1, dims = 1:10)
testdata_1<- RunUMAP(testdata_1, dims = 1:10)
testdata_1.markers <- FindAllMarkers(testdata_1, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
testdata_1.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
top50 <- testdata_1.markers %>% group_by(cluster) %>% top_n(n = 50, wt = avg_logFC)
write.table(top50,file="Top50Genes.txt",sep="\t",col.names= NA)
write.table(x = Idents(object = testdata_1),"Cluster.txt",sep="\t",col.names= NA)
data<- testdata_1@assays$RNA@data
write.table(data,file="counts.txt",sep="\t",col.names= NA)
location="D:/HSW/HD/scRNA-seq/"
expressionFile=paste0(location, "counts.txt")
genesFile=paste0(location, "Top50Genes.txt")
clustersFile=paste0(location, "Cluster.txt")
newFiles=Seurat_Pre_Process(expressionFile, genesFile, clustersFile)
filename="test_example"
results=Main_Doublet_Decon(rawDataFile=newFiles$newExpressionFile,
groupsFile=newFiles$newGroupsFile,
filename=filename,
location=location,
fullDataFile=NULL,
removeCC=FALSE,
species="hsa",
rhop=1.1,
write=TRUE,
PMF=TRUE,
useFull=FALSE,
heatmap=FALSE,
centroids=TRUE,
num_doubs=100,
only50=FALSE,
min_uniq=4,
nCores=-1)
LIST<-row.names(results$Final_nondoublets_groups)
head(LIST)
LIST=gsub('[.]','-',LIST)
testdata_1_RemoveDoublet<-subset(x = testdata_1, cells=LIST)
sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19041)
Matrix products: default
locale:
[1] LC_COLLATE=Chinese (Simplified)_China.936 LC_CTYPE=Chinese (Simplified)_China.936
[3] LC_MONETARY=Chinese (Simplified)_China.936 LC_NUMERIC=C
[5] LC_TIME=Chinese (Simplified)_China.936
attached base packages:
[1] stats4 parallel grid stats graphics grDevices utils datasets methods
[10] base
other attached packages:
[1] Matrix_1.2-18 shiny_1.5.0 stringr_1.4.0 doParallel_1.0.15
[5] iterators_1.0.12 foreach_1.5.0 R.utils_2.10.1 R.oo_1.24.0
[9] R.methodsS3_1.8.1 tidyr_1.1.1 mygene_1.24.0 GenomicFeatures_1.40.1
[13] AnnotationDbi_1.50.3 GenomicRanges_1.40.0 GenomeInfoDb_1.24.2 IRanges_2.22.2
[17] S4Vectors_0.26.1 MCL_1.0 plyr_1.8.6 gplots_3.0.4
[21] DeconRNASeq_1.30.0 pcaMethods_1.80.0 Biobase_2.48.0 BiocGenerics_0.34.0
[25] limSolve_1.5.6 dplyr_1.0.1 clusterProfiler_3.16.1 ROCR_1.0-11
[29] KernSmooth_2.23-17 fields_11.5 spam_2.5-1 dotCall64_1.0-0
[33] DoubletFinder_2.0.3 harmony_1.0 Rcpp_1.0.5 Seurat_3.2.0
[37] sctransform_0.2.1 cowplot_1.0.0 ggplot2_3.3.2 DoubletDecon_1.1.6
loaded via a namespace (and not attached):
[1] rappdirs_0.3.1 rtracklayer_1.48.0 knitr_1.29
[4] bit64_4.0.5 DelayedArray_0.14.1 irlba_2.3.3
[7] data.table_1.13.0 rpart_4.1-15 RCurl_1.98-1.2
[10] generics_0.0.2 callr_3.4.3 usethis_1.6.1
[13] RSQLite_2.2.0 RANN_2.6.1 europepmc_0.4
[16] future_1.18.0 chron_2.3-56 bit_4.0.4
[19] enrichplot_1.8.1 spatstat.data_1.4-3 xml2_1.3.2
[22] httpuv_1.5.4 SummarizedExperiment_1.18.2 assertthat_0.2.1
[25] viridis_0.5.1 xfun_0.16 hms_0.5.3
[28] promises_1.1.1 fansi_0.4.1 progress_1.2.2
[31] caTools_1.18.0 dbplyr_1.4.4 igraph_1.2.5
[34] DBI_1.1.0 htmlwidgets_1.5.1 purrr_0.3.4
[37] ellipsis_0.3.1 RSpectra_0.16-0 backports_1.1.8
[40] biomaRt_2.44.1 deldir_0.1-28 vctrs_0.3.2
[43] remotes_2.2.0 abind_1.4-5 withr_2.2.0
[46] ggforce_0.3.2 triebeard_0.3.0 checkmate_2.0.0
[49] GenomicAlignments_1.24.0 prettyunits_1.1.1 goftest_1.2-2
[52] cluster_2.1.0 DOSE_3.14.0 ape_5.4-1
[55] lazyeval_0.2.2 crayon_1.3.4 pkgconfig_2.0.3
[58] labeling_0.3 tweenr_1.0.1 nlme_3.1-148
[61] pkgload_1.1.0 nnet_7.3-14 devtools_2.3.1
[64] rlang_0.4.7 globals_0.12.5 lifecycle_0.2.0
[67] miniUI_0.1.1.1 downloader_0.4 BiocFileCache_1.12.1
[70] rsvd_1.0.3 rprojroot_1.3-2 polyclip_1.10-0
[73] matrixStats_0.56.0 lmtest_0.9-37 urltools_1.7.3
[76] zoo_1.8-8 base64enc_0.1-3 ggridges_0.5.2
[79] processx_3.4.3 png_0.1-7 viridisLite_0.3.0
[82] bitops_1.0-6 Biostrings_2.56.0 blob_1.2.1
[85] qvalue_2.20.0 jpeg_0.1-8.1 gridGraphics_0.5-0
[88] scales_1.1.1 lpSolve_5.6.15 memoise_1.1.0
[91] magrittr_1.5 ica_1.0-2 gdata_2.18.0
[94] zlibbioc_1.34.0 compiler_4.0.2 scatterpie_0.1.5
[97] RColorBrewer_1.1-2 fitdistrplus_1.1-1 Rsamtools_2.4.0
[100] cli_2.0.2 XVector_0.28.0 listenv_0.8.0
[103] patchwork_1.0.1 pbapply_1.4-2 ps_1.3.4
[106] htmlTable_2.1.0 Formula_1.2-3 MASS_7.3-51.6
[109] mgcv_1.8-31 tidyselect_1.1.0 stringi_1.4.6
[112] GOSemSim_2.14.2 askpass_1.1 latticeExtra_0.6-29
[115] ggrepel_0.8.2 fastmatch_1.1-0 tools_4.0.2
[118] future.apply_1.6.0 rstudioapi_0.11 foreign_0.8-80
[121] gridExtra_2.3 farver_2.0.3 Rtsne_0.15
[124] ggraph_2.0.3 digest_0.6.25 rvcheck_0.1.8
[127] BiocManager_1.30.10 proto_1.0.0 quadprog_1.5-8
[130] later_1.1.0.1 RcppAnnoy_0.0.16 httr_1.4.2
[133] colorspace_1.4-1 XML_3.99-0.5 fs_1.5.0
[136] tensor_1.5 reticulate_1.16 splines_4.0.2
[139] uwot_0.1.8 expm_0.999-5 spatstat.utils_1.17-0
[142] graphlayouts_0.7.0 ggplotify_0.0.5 plotly_4.9.2.1
[145] sessioninfo_1.1.1 xtable_1.8-4 jsonlite_1.7.0
[148] spatstat_1.64-1 tidygraph_1.2.0 testthat_2.3.2
[151] R6_2.4.1 Hmisc_4.4-1 gsubfn_0.7
[154] pillar_1.4.6 htmltools_0.5.0 mime_0.9
[157] glue_1.4.1 fastmap_1.0.1 BiocParallel_1.22.0
[160] codetools_0.2-16 maps_3.3.0 fgsea_1.14.0
[163] pkgbuild_1.1.0 utf8_1.1.4 lattice_0.20-41
[166] tibble_3.0.3 sqldf_0.4-11 curl_4.3
[169] leiden_0.3.3 gtools_3.8.2 GO.db_3.11.4
[172] openssl_1.4.2 survival_3.2-3 limma_3.44.3
[175] desc_1.2.0 munsell_0.5.0 DO.db_2.9
[178] GenomeInfoDbData_1.2.3 reshape2_1.4.4 gtable_0.3.0
何世伟
复旦大学医学博士生
厦门大学公共卫生硕士
研究方向:儿科学、生物信息学、表观遗传流行病学、循证医学
联系方式:[email protected]