hello,大家好,今天我们来分享一个很有意思的分析,那就是根据单细胞CNV数据绘制进化树,大家常见的进化树是下面这样的。
关于CNV和进化树的分析,我也分享了很多,列举在这里,供大家参考
10X单细胞(10X空间转录组)数据分析之识别肿瘤细胞的CNV分析原理
10X单细胞(10X空间转录组)肿瘤数据分析之肿瘤进化(inferCNV+UPhyloplot2)
copyKAT推断单细胞转录组肿瘤细胞CNV(自动识别肿瘤normal和tumor)
今天我们来另外一种展示方式。
安装和加载
pip install scatrex
import scatrex
from scatrex import models
import scanpy as sc
import matplotlib.pyplot as plt
import numpy as np
import time
Simulate data
seed = 42
sim_sca = scatrex.SCATrEx(model=models.cna, verbose=True)
observed_tree_args = dict(n_nodes=4, node_weights=np.array([.1, 1, 1, 1]))
observed_tree_params = dict(n_regions=30, min_cn=1, min_nevents=5, max_nevents_frac=.5)
sim_sca.simulate_tree(observed_tree=None, n_extra_per_observed=1, n_genes=800, seed=seed,
observed_tree_params=observed_tree_params, observed_tree_args=observed_tree_args)
sim_sca.observed_tree.create_adata()
theta = 100
sim_sca.ntssb.reset_node_parameters(node_hyperparams=dict(log_lib_size_mean=7.1, log_lib_size_std=.6,
num_global_noise_factors=4,
global_noise_factors_precisions_shape=5,
unobserved_factors_kernel_concentration=1./theta,
frac_dosage=1.,
baseline_shape=.7))
sim_sca.simulate_data(n_cells=1000, copy=False, seed=seed)
sc.pl.heatmap(sim_sca.adata, groupby='node', var_names=sim_sca.adata.var_names, use_raw=True, vmax=50, figsize=(16,8))
sim_sca.observed_tree.plot_heatmap(vmax=4, vmin=0, figsize=(16,4))
Run SCATrEx
theta = 100
args = dict(global_noise_factors_precisions_shape=100, num_global_noise_factors=4,
unobserved_factors_kernel_concentration=1/theta,
unobserved_factors_kernel_rate=1.,
unobserved_factors_root_kernel=.01)
sca = scatrex.SCATrEx(model=models.cna, verbose=True, model_args=args)
sca.model_args = args
sca.add_data(sim_sca.adata.raw.to_adata())
sca.set_observed_tree(sim_sca.observed_tree)
sca.normalize_data()
sca.project_data()
move_weights = {'add':3,
'merge':6,
'prune_reattach':1,
'pivot_reattach':1,
'swap':1,
'add_reattach_pivot':.5,
'subtree_reattach':.5,
'push_subtree':.5,
'extract_pivot':.5,
'perturb_node':.0,
'clean_node':.0,
'subtree_pivot_reattach':.5,
'reset_globals':.0,
'full':.0,
'globals':1}
search_kwargs = {'n_iters': 500, 'n_iters_elbo': 500,
'move_weights': move_weights,
'local': True,
'factor_delay': 0,
'step_size': 0.01,
'posterior_delay': 0,
'mb_size': 200,
'num_samples': 1,
'window': 50,
'max_nodes': 5,
'add_rule_thres': .4,
'joint_init': True,
'anneal': False,
'restart_step': 100}
sca.learn_tree(reset=True, search_kwargs=search_kwargs)
# Check concordance with ground truth assignments
import pandas as pd
print(pd.crosstab(sim_sca.adata.obs['obs_node'], sca.adata.obs['scatrex_obs_node']))
print()
print(pd.crosstab(sim_sca.adata.obs['node'], sca.adata.obs['scatrex_node']))
print()
sca.ntssb.plot_tree(counts=True)
sca.plot_unobserved_parameters(step=5.,estimated=True, title='Estimated unobserved factors',
name='unobserved_factors',figsize=(16,4))
怎么样,感觉还不错吧。
生活很好,有你更好