4000cell
47 min
> rm(list=ls())
> options(stringsAsFactors = F)
> library(Seurat)
> library(ggplot2)
> library(infercnv)
> expFile='expFile.txt'
> groupFiles='groupFiles.txt'
> geneFile='geneFile.txt'
> infercnv_obj = CreateInfercnvObject(raw_counts_matrix=expFile,
+ annotations_file=groupFiles,
+ delim="\t",
+ gene_order_file= geneFile,
+ ref_group_names=NULL) ## 这个取决于自己的分组信息里面的
INFO [2021-03-11 20:31:29] Parsing matrix: expFile.txt
INFO [2021-03-11 20:31:57] Parsing gene order file: geneFile.txt
INFO [2021-03-11 20:31:57] Parsing cell annotations file: groupFiles.txt
INFO [2021-03-11 20:31:57] ::order_reduce:Start.
INFO [2021-03-11 20:31:57] .order_reduce(): expr and order match.
INFO [2021-03-11 20:31:57] ::process_data:order_reduce:Reduction from positional data, new dimensions (r,c) = 17015,4422 Total=55430001 Min=0 Max=3041.
INFO [2021-03-11 20:31:57] num genes removed taking into account provided gene ordering list: 540 = 3.17367029091978% removed.
INFO [2021-03-11 20:31:57] -filtering out cells < 100 or > Inf, removing 0 % of cells
INFO [2021-03-11 20:31:59] validating infercnv_obj
>
> ## 文献的代码:#14:58开始
> start_time <- Sys.time()
> infercnv_obj2 = infercnv::run(infercnv_obj,
+ cutoff=0.1, # cutoff=1 works well for Smart-seq2, and cutoff=0.1 works well for 10x Genomics
+ out_dir='plot_out2/' ,
+ cluster_by_groups=T, # cluster
+ hclust_method="ward.D2",
+ plot_steps=F,
+ denoise=T,
+ HMM=T)
INFO [2021-03-11 20:31:59] ::process_data:Start
INFO [2021-03-11 20:31:59] Creating output path plot_out2/
INFO [2021-03-11 20:31:59] Checking for saved results.
INFO [2021-03-11 20:31:59]
STEP 1: incoming data
INFO [2021-03-11 20:32:14]
STEP 02: Removing lowly expressed genes
INFO [2021-03-11 20:32:14] ::above_min_mean_expr_cutoff:Start
INFO [2021-03-11 20:32:14] Removing 10182 genes from matrix as below mean expr threshold: 0.1
INFO [2021-03-11 20:32:14] validating infercnv_obj
INFO [2021-03-11 20:32:14] There are 6293 genes and 4422 cells remaining in the expr matrix.
INFO [2021-03-11 20:32:16] no genes removed due to min cells/gene filter
INFO [2021-03-11 20:32:25]
STEP 03: normalization by sequencing depth
INFO [2021-03-11 20:32:25] normalizing counts matrix by depth
INFO [2021-03-11 20:32:27] Computed total sum normalization factor as median libsize: 10221.500000
INFO [2021-03-11 20:32:27] Adding h-spike
INFO [2021-03-11 20:32:27] -no normals defined, using all observation cells as proxy
INFO [2021-03-11 20:32:27] -hspike modeling of normalsToUse
INFO [2021-03-11 20:33:43] validating infercnv_obj
INFO [2021-03-11 20:33:43] normalizing counts matrix by depth
INFO [2021-03-11 20:33:43] Using specified normalization factor: 10221.500000
INFO [2021-03-11 20:33:55]
STEP 04: log transformation of data
INFO [2021-03-11 20:33:55] transforming log2xplus1()
INFO [2021-03-11 20:33:56] -mirroring for hspike
INFO [2021-03-11 20:33:56] transforming log2xplus1()
INFO [2021-03-11 20:34:09]
STEP 08: removing average of reference data (before smoothing)
INFO [2021-03-11 20:34:09] ::subtract_ref_expr_from_obs:Start inv_log=FALSE, use_bounds=TRUE
INFO [2021-03-11 20:34:09] -no reference cells specified... using mean of all cells as proxy
INFO [2021-03-11 20:34:16] -subtracting expr per gene, use_bounds=TRUE
INFO [2021-03-11 20:34:20] -mirroring for hspike
INFO [2021-03-11 20:34:20] ::subtract_ref_expr_from_obs:Start inv_log=FALSE, use_bounds=TRUE
INFO [2021-03-11 20:34:20] subtracting mean(normal) per gene per cell across all data
INFO [2021-03-11 20:34:24] -subtracting expr per gene, use_bounds=TRUE
INFO [2021-03-11 20:34:41]
STEP 09: apply max centered expression threshold: 3
INFO [2021-03-11 20:34:41] ::process_data:setting max centered expr, threshold set to: +/-: 3
INFO [2021-03-11 20:34:41] -mirroring for hspike
INFO [2021-03-11 20:34:41] ::process_data:setting max centered expr, threshold set to: +/-: 3
INFO [2021-03-11 20:34:59]
STEP 10: Smoothing data per cell by chromosome
INFO [2021-03-11 20:34:59] smooth_by_chromosome: chr: chr1
INFO [2021-03-11 20:35:04] smooth_by_chromosome: chr: chr10
INFO [2021-03-11 20:35:09] smooth_by_chromosome: chr: chr11
INFO [2021-03-11 20:35:14] smooth_by_chromosome: chr: chr12
INFO [2021-03-11 20:35:19] smooth_by_chromosome: chr: chr13
INFO [2021-03-11 20:35:23] smooth_by_chromosome: chr: chr14
INFO [2021-03-11 20:35:28] smooth_by_chromosome: chr: chr15
INFO [2021-03-11 20:35:32] smooth_by_chromosome: chr: chr16
INFO [2021-03-11 20:35:37] smooth_by_chromosome: chr: chr17
INFO [2021-03-11 20:35:43] smooth_by_chromosome: chr: chr18
INFO [2021-03-11 20:35:47] smooth_by_chromosome: chr: chr19
INFO [2021-03-11 20:35:51] smooth_by_chromosome: chr: chr2
INFO [2021-03-11 20:35:57] smooth_by_chromosome: chr: chr20
INFO [2021-03-11 20:36:01] smooth_by_chromosome: chr: chr21
INFO [2021-03-11 20:36:03] smooth_by_chromosome: chr: chr22
INFO [2021-03-11 20:36:09] smooth_by_chromosome: chr: chr3
INFO [2021-03-11 20:36:14] smooth_by_chromosome: chr: chr4
INFO [2021-03-11 20:36:19] smooth_by_chromosome: chr: chr5
INFO [2021-03-11 20:36:23] smooth_by_chromosome: chr: chr6
INFO [2021-03-11 20:36:27] smooth_by_chromosome: chr: chr7
INFO [2021-03-11 20:36:32] smooth_by_chromosome: chr: chr8
INFO [2021-03-11 20:36:36] smooth_by_chromosome: chr: chr9
INFO [2021-03-11 20:36:40] -mirroring for hspike
INFO [2021-03-11 20:36:40] smooth_by_chromosome: chr: chrA
INFO [2021-03-11 20:36:40] smooth_by_chromosome: chr: chr_0
INFO [2021-03-11 20:36:41] smooth_by_chromosome: chr: chr_B
INFO [2021-03-11 20:36:41] smooth_by_chromosome: chr: chr_0pt5
INFO [2021-03-11 20:36:41] smooth_by_chromosome: chr: chr_C
INFO [2021-03-11 20:36:41] smooth_by_chromosome: chr: chr_1pt5
INFO [2021-03-11 20:36:42] smooth_by_chromosome: chr: chr_D
INFO [2021-03-11 20:36:42] smooth_by_chromosome: chr: chr_2pt0
INFO [2021-03-11 20:36:42] smooth_by_chromosome: chr: chr_E
INFO [2021-03-11 20:36:42] smooth_by_chromosome: chr: chr_3pt0
INFO [2021-03-11 20:36:42] smooth_by_chromosome: chr: chr_F
INFO [2021-03-11 20:37:01]
STEP 11: re-centering data across chromosome after smoothing
INFO [2021-03-11 20:37:01] ::center_smooth across chromosomes per cell
INFO [2021-03-11 20:37:06] -mirroring for hspike
INFO [2021-03-11 20:37:06] ::center_smooth across chromosomes per cell
INFO [2021-03-11 20:37:26]
STEP 12: removing average of reference data (after smoothing)
INFO [2021-03-11 20:37:26] ::subtract_ref_expr_from_obs:Start inv_log=FALSE, use_bounds=TRUE
INFO [2021-03-11 20:37:26] -no reference cells specified... using mean of all cells as proxy
INFO [2021-03-11 20:37:31] -subtracting expr per gene, use_bounds=TRUE
INFO [2021-03-11 20:37:34] -mirroring for hspike
INFO [2021-03-11 20:37:34] ::subtract_ref_expr_from_obs:Start inv_log=FALSE, use_bounds=TRUE
INFO [2021-03-11 20:37:34] subtracting mean(normal) per gene per cell across all data
INFO [2021-03-11 20:37:36] -subtracting expr per gene, use_bounds=TRUE
INFO [2021-03-11 20:37:55]
STEP 14: invert log2(FC) to FC
INFO [2021-03-11 20:37:55] invert_log2(), computing 2^x
INFO [2021-03-11 20:37:58] -mirroring for hspike
INFO [2021-03-11 20:37:58] invert_log2(), computing 2^x
INFO [2021-03-11 20:38:27]
STEP 15: Clustering samples (not defining tumor subclusters)
INFO [2021-03-11 20:38:27] define_signif_tumor_subclusters(p_val=0.1
INFO [2021-03-11 20:38:27] define_signif_tumor_subclusters(), tumor: epi
INFO [2021-03-11 21:04:25] cut tree into: 1 groups
INFO [2021-03-11 21:04:25] -processing epi,epi_s1
INFO [2021-03-11 21:04:25] -mirroring for hspike
INFO [2021-03-11 21:04:25] define_signif_tumor_subclusters(p_val=0.1
INFO [2021-03-11 21:04:25] define_signif_tumor_subclusters(), tumor: spike_tumor_cell_normalsToUse
INFO [2021-03-11 21:04:26] cut tree into: 1 groups
INFO [2021-03-11 21:04:26] -processing spike_tumor_cell_normalsToUse,spike_tumor_cell_normalsToUse_s1
INFO [2021-03-11 21:04:26] define_signif_tumor_subclusters(), tumor: simnorm_cell_normalsToUse
INFO [2021-03-11 21:04:26] cut tree into: 1 groups
INFO [2021-03-11 21:04:26] -processing simnorm_cell_normalsToUse,simnorm_cell_normalsToUse_s1
INFO [2021-03-11 21:05:13] ::plot_cnv:Start
INFO [2021-03-11 21:05:13] ::plot_cnv:Current data dimensions (r,c)=6293,4422 Total=27865743.6566492 Min=0.600891118387734 Max=1.77104319006399.
INFO [2021-03-11 21:05:14] ::plot_cnv:Depending on the size of the matrix this may take a moment.
INFO [2021-03-11 21:06:38] plot_cnv(): auto thresholding at: (0.860270 , 1.142468)
INFO [2021-03-11 21:06:39] plot_cnv_observation:Start
INFO [2021-03-11 21:06:39] Observation data size: Cells= 4422 Genes= 6293
INFO [2021-03-11 21:06:40] plot_cnv_observation:Writing observation groupings/color.
INFO [2021-03-11 21:06:40] plot_cnv_observation:Done writing observation groupings/color.
INFO [2021-03-11 21:06:40] plot_cnv_observation:Writing observation heatmap thresholds.
INFO [2021-03-11 21:06:40] plot_cnv_observation:Done writing observation heatmap thresholds.
INFO [2021-03-11 21:06:45] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
INFO [2021-03-11 21:06:45] Quantiles of plotted data range: 0.860270334987169,0.967363595089174,0.999064700099595,1.03267553347211,1.14246778067301
INFO [2021-03-11 21:06:50] plot_cnv_observations:Writing observation data to plot_out2//infercnv.preliminary.observations.txt
INFO [2021-03-11 21:08:14]
STEP 17: HMM-based CNV prediction
INFO [2021-03-11 21:08:14] predict_CNV_via_HMM_on_whole_tumor_samples
INFO [2021-03-11 21:08:16] -done predicting CNV based on initial tumor subclusters
INFO [2021-03-11 21:08:24] get_predicted_CNV_regions(subcluster)
INFO [2021-03-11 21:08:24] -processing cell_group_name: epi.epi_s1, size: 4422
INFO [2021-03-11 21:09:02] -writing cell clusters file: plot_out2//17_HMM_predHMMi6.hmm_mode-samples.cell_groupings
INFO [2021-03-11 21:09:02] -writing cnv regions file: plot_out2//17_HMM_predHMMi6.hmm_mode-samples.pred_cnv_regions.dat
INFO [2021-03-11 21:09:02] -writing per-gene cnv report: plot_out2//17_HMM_predHMMi6.hmm_mode-samples.pred_cnv_genes.dat
INFO [2021-03-11 21:09:02] -writing gene ordering info: plot_out2//17_HMM_predHMMi6.hmm_mode-samples.genes_used.dat
INFO [2021-03-11 21:09:03] ::plot_cnv:Start
INFO [2021-03-11 21:09:03] ::plot_cnv:Current data dimensions (r,c)=6293,4422 Total=83482938 Min=3 Max=3.
INFO [2021-03-11 21:09:03] ::plot_cnv:Depending on the size of the matrix this may take a moment.
INFO [2021-03-11 21:09:58] plot_cnv_observation:Start
INFO [2021-03-11 21:09:58] Observation data size: Cells= 4422 Genes= 6293
INFO [2021-03-11 21:09:58] plot_cnv_observation:Writing observation groupings/color.
INFO [2021-03-11 21:09:59] plot_cnv_observation:Done writing observation groupings/color.
INFO [2021-03-11 21:09:59] plot_cnv_observation:Writing observation heatmap thresholds.
INFO [2021-03-11 21:09:59] plot_cnv_observation:Done writing observation heatmap thresholds.
INFO [2021-03-11 21:10:04] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
INFO [2021-03-11 21:10:04] Quantiles of plotted data range: 3,3,3,3,3
INFO [2021-03-11 21:10:10] plot_cnv_observations:Writing observation data to plot_out2//infercnv.17_HMM_predHMMi6.hmm_mode-samples.observations.txt
INFO [2021-03-11 21:11:05]
STEP 19: Converting HMM-based CNV states to repr expr vals
INFO [2021-03-11 21:11:17] ::plot_cnv:Start
INFO [2021-03-11 21:11:17] ::plot_cnv:Current data dimensions (r,c)=6293,4422 Total=27827646 Min=1 Max=1.
INFO [2021-03-11 21:11:17] ::plot_cnv:Depending on the size of the matrix this may take a moment.
INFO [2021-03-11 21:12:11] plot_cnv_observation:Start
INFO [2021-03-11 21:12:11] Observation data size: Cells= 4422 Genes= 6293
INFO [2021-03-11 21:12:12] plot_cnv_observation:Writing observation groupings/color.
INFO [2021-03-11 21:12:12] plot_cnv_observation:Done writing observation groupings/color.
INFO [2021-03-11 21:12:12] plot_cnv_observation:Writing observation heatmap thresholds.
INFO [2021-03-11 21:12:12] plot_cnv_observation:Done writing observation heatmap thresholds.
INFO [2021-03-11 21:12:18] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
INFO [2021-03-11 21:12:18] Quantiles of plotted data range: 1,1,1,1,1
INFO [2021-03-11 21:12:22] plot_cnv_observations:Writing observation data to plot_out2//infercnv.19_HMM_predHMMi6.hmm_mode-samples.Pnorm_0.5.repr_intensities.observations.txt
INFO [2021-03-11 21:13:15]
STEP 21: Denoising
INFO [2021-03-11 21:13:15] ::process_data:Remove noise, noise threshold defined via ref mean sd_amplifier: 1.5
INFO [2021-03-11 21:13:15] -no reference cells specified... using mean and sd of all cells as proxy for denoising
INFO [2021-03-11 21:13:17] :: **** clear_noise_via_ref_quantiles **** : removing noise between bounds: 0.924191563949242 - 1.07854655171093
INFO [2021-03-11 21:13:27] ::plot_cnv:Start
INFO [2021-03-11 21:13:27] ::plot_cnv:Current data dimensions (r,c)=6293,4422 Total=27919855.972017 Min=0.600891118387734 Max=1.77104319006399.
INFO [2021-03-11 21:13:27] ::plot_cnv:Depending on the size of the matrix this may take a moment.
INFO [2021-03-11 21:14:57] plot_cnv(): auto thresholding at: (0.864159 , 1.142468)
INFO [2021-03-11 21:14:58] plot_cnv_observation:Start
INFO [2021-03-11 21:14:58] Observation data size: Cells= 4422 Genes= 6293
INFO [2021-03-11 21:14:58] plot_cnv_observation:Writing observation groupings/color.
INFO [2021-03-11 21:14:58] plot_cnv_observation:Done writing observation groupings/color.
INFO [2021-03-11 21:14:58] plot_cnv_observation:Writing observation heatmap thresholds.
INFO [2021-03-11 21:14:58] plot_cnv_observation:Done writing observation heatmap thresholds.
INFO [2021-03-11 21:15:04] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
INFO [2021-03-11 21:15:04] Quantiles of plotted data range: 0.864159439754611,1.00136905783009,1.00136905783009,1.00136905783009,1.14246778067301
INFO [2021-03-11 21:15:09] plot_cnv_observations:Writing observation data to plot_out2//infercnv.21_denoised.observations.txt
INFO [2021-03-11 21:16:47]
Making the final infercnv heatmap
INFO [2021-03-11 21:16:48] ::plot_cnv:Start
INFO [2021-03-11 21:16:48] ::plot_cnv:Current data dimensions (r,c)=6293,4422 Total=27919855.972017 Min=0.600891118387734 Max=1.77104319006399.
INFO [2021-03-11 21:16:48] ::plot_cnv:Depending on the size of the matrix this may take a moment.
INFO [2021-03-11 21:18:16] plot_cnv(): auto thresholding at: (0.857532 , 1.142468)
INFO [2021-03-11 21:18:17] plot_cnv_observation:Start
INFO [2021-03-11 21:18:17] Observation data size: Cells= 4422 Genes= 6293
INFO [2021-03-11 21:18:18] plot_cnv_observation:Writing observation groupings/color.
INFO [2021-03-11 21:18:18] plot_cnv_observation:Done writing observation groupings/color.
INFO [2021-03-11 21:18:18] plot_cnv_observation:Writing observation heatmap thresholds.
INFO [2021-03-11 21:18:18] plot_cnv_observation:Done writing observation heatmap thresholds.
INFO [2021-03-11 21:18:24] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
INFO [2021-03-11 21:18:24] Quantiles of plotted data range: 0.857532219326993,1.00136905783009,1.00136905783009,1.00136905783009,1.14246778067301
INFO [2021-03-11 21:18:29] plot_cnv_observations:Writing observation data to plot_out2//infercnv.observations.txt
Warning messages:
1: In dir.create(out_dir) : 'plot_out2' already exists
2: In dir.create(out_dir) : 'plot_out2' already exists
3: In dir.create(out_dir) : 'plot_out2' already exists
4: In dir.create(out_dir) : 'plot_out2' already exists
5: In dir.create(out_dir) : 'plot_out2' already exists
end_time <- Sys.time()
end_time
[1] "2021-03-11 21:19:54 CST"
end_time - start_time
Time difference of 47.91619 mins