splatter marker gene random data

https://github.com/theislab/single-cell-tutorial/blob/master/supplementary_scripts/Splatter-marker-genes-random-data.ipynb

import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb

import warnings
from rpy2.rinterface import RRuntimeWarning
from rpy2.robjects import pandas2ri

%load_ext rpy2.ipython
# ignore R warning messages
warnings.filterwarnings("ignore", category=RRuntimeWarning)

# Automatically convert rpy2 outputs to pandas dataframes
pandas2ri.activate()

plt.rcParams['figure.figsize']=(8,8) #rescale figures
sc.settings.verbosity = 3
#sc.set_figure_params(dpi=200, dpi_save=300)
#sc.logging.print_versions()
%%R -o data
#suppressMessages(library(splatter))
suppressMessages(library(splatter))
n_cells <- 1000
n_genes <- 10000



params <- newSplatParams(seed=12345,
                        nGenes = n_genes,
                        batchCells = n_cells,
                        dropout.shape = 0.95,
                        de.prob = 0)


sim <- splatSimulate(params)

data <- counts(sim)
import pandas as pd 
data_df = pd.DataFrame(data)
adata = sc.AnnData(data_df.transpose())
adata.obs_names = adata.obs_names.astype(str)
adata.var_names = adata.var_names.astype(str)
#Basic QC filtering
sc.pp.filter_cells(adata, min_counts=1000)
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=20)

splatter marker gene random data_第1张图片

#Normalize the data
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
sc.pp.log1p(adata)
#Detect highly variable genes
sc.pp.filter_genes_dispersion(adata, flavor='cell_ranger', n_top_genes=4000, log=False, subset=False)
# Pre-process and visualize
sc.pp.pca(adata, svd_solver='arpack', use_highly_variable=True, n_comps=10)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.pl.pca_variance_ratio(adata)

splatter marker gene random data_第2张图片

#Plot visualizations
sc.pl.pca(adata)
sc.pl.umap(adata)

splatter marker gene random data_第3张图片
splatter marker gene random data_第4张图片

#Cluster the data
sc.tl.louvain(adata, resolution=0.5, key_added='louvain')
sc.pl.pca(adata, color='louvain')#, save='_splatter_pca_louvain.pdf')
sc.pl.umap(adata, color='louvain')

splatter marker gene random data_第5张图片splatter marker gene random data_第6张图片

Marker genes

sc.tl.rank_genes_groups(adata, groupby='louvain', use_raw=False, method='t-test', key_added='rank_louvain', n_genes=9000)

sc.pl.rank_genes_groups(adata, key='rank_louvain', groups=['0','1'], fontsize=12)

splatter marker gene random data_第7张图片

P-value analysis

#Extract p-value data
pval_clust0 = [i[0] for i in adata.uns['rank_louvain']['pvals']]
pval_clust1 = [i[1] for i in adata.uns['rank_louvain']['pvals']]
#Plot expected P-value distributions
lab_size = 25
tick_size = 15

p1 = sb.distplot(pval_clust0, kde=False, bins=100)
p1.set_xlabel("P-values",fontsize=lab_size)
p1.set_ylabel("Frequency",fontsize=lab_size)
p1.tick_params(labelsize=tick_size)
f1 = p1.get_figure()
#f1.savefig('figures/hist_pvals_clust0.pdf')
plt.show()

p2 = sb.distplot(pval_clust1, kde=False, bins=100)
p2.set_xlabel("P-values",fontsize=lab_size)
p2.set_ylabel("Frequency",fontsize=lab_size)
p2.tick_params(labelsize=tick_size)
f2 = p2.get_figure()
#f2.savefig('figures/hist_pvals_clust1.pdf')
plt.show()

splatter marker gene random data_第8张图片
splatter marker gene random data_第9张图片

#Number of 'significant' marker gene after multiple testing correcting
signif_thresh = 0.05
n_signif_markers0 = np.sum([i[0] < signif_thresh for i in adata.uns['rank_louvain']['pvals_adj']])
n_signif_markers1 = np.sum([i[1] < signif_thresh for i in adata.uns['rank_louvain']['pvals_adj']])

print(n_signif_markers0)
print(n_signif_markers1)

splatter marker gene random data_第10张图片

你可能感兴趣的:(pytorch)