scanpy画图

###/data/admin/yyp/yypold/scrna/HumanCellAtlasPreviewDatasets
import numpy as np
import pandas as pd
import scanpy as sc

可以直接读取10Xgenomics的.h5格式数据

adata = sc.read_10x_h5("/data/admin/yyp/yypold/scrna/HumanCellAtlasPreviewDatasets/ica_bone_marrow_h5.h5"
, genome=None, gex_only=True)
adata.var_names_make_unique()

sc.pl.highest_expr_genes(adata, n_top=20,save=".pdf") ###----

sc.pp.filter_cells(adata, min_genes=200) # 去除表达基因200以下的细胞
sc.pp.filter_genes(adata, min_cells=3) # 去除在3个细胞以下表达的基因

mito_genes = adata.var_names.str.startswith(‘MT-’)

for each cell compute fraction of counts in mito genes vs. all genes

the .A1 is only necessary as X is sparse (to transform to a dense array after summing)

adata.obs[‘percent_mito’] = np.sum(
adata[:, mito_genes].X, axis=1).A1 / np.sum(adata.X, axis=1).A1

add the total counts per cell as observations-annotation to adata

adata.obs[‘n_counts’] = adata.X.sum(axis=1).A1

sc.pl.violin(adata, [‘n_genes’, ‘n_counts’, ‘percent_mito’],
jitter=0.4, multi_panel=True,save=".pdf") ####-----

sc.pl.scatter(adata, x=‘n_counts’, y=‘percent_mito’,save=".pdf")
sc.pl.scatter(adata, x=‘n_counts’, y=‘n_genes’,save=".pdf")
####-----

adata = adata[adata.obs[‘n_genes’] < 4000, :]
adata = adata[adata.obs[‘percent_mito’] < 0.3, :]

sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
sc.pp.log1p(adata)

adata.raw = adata # 储存标准化后的AnnaData Object

sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)

sc.pl.highly_variable_genes(adata,save=".pdf") ###—

adata = adata[:, adata.var[‘highly_variable’]]

sc.pp.regress_out(adata, [‘n_counts’, ‘percent_mito’])

sc.pp.scale(adata, max_value=10)

sc.tl.pca(adata, svd_solver=‘arpack’) # PCA分析
sc.pl.pca(adata, color=‘CST3’) #绘图-----

sc.pl.pca_variance_ratio(adata, log=True,save=".pdf") ###—

adata.write(“pca_results.h5ad”)

你可能感兴趣的:(python)