Porting of Symphony R package to Python
Project description
Symphonypy
Porting of Symphony R package to Python
Usage
Preprocessing
import scanpy as sc
import symphonypy as sp
n_comps = 20
batch_key = "donor"
n_top_genes = 2000
# preprocess reference, e.g. HVG, normalize, log1p:
sc.pp.highly_variable_genes(
adata_ref,
batch_key=batch_key,
n_top_genes=n_top_genes,
flavor="seurat_v3",
)
sc.pp.normalize_total(adata_ref, target_sum=1e5)
sc.pp.log1p(adata_ref)
adata.raw = adata
# preprocess query in the same way as reference:
sc.pp.normalize_total(adata_query, target_sum=1e5)
sc.pp.log1p(adata_query)
# create reference embedding, e.g. PCA:
sc.pp.scale(adata_ref, zero_center=True, max_value=10)
adata_ref.X[adata_ref.X < -10] = -10 # for R Symphony-like preprocessing
sc.tl.pca(adata_ref, n_comps=n_comps, zero_center=False) # do not center again
Run symphony
harmony_kwargs = {"sigma": 0.1}
lamb = 1
# run Harmonypy or Harmony R,
# if you want to integrate the reference dataset
# (skip this, if there is no necessity
# of batch correction in the reference):
sp.pp.harmony_integrate(
adata_ref,
ref_basis_source="X_pca",
ref_basis_adjusted="X_pca_harmony",
ref_basis_loadings="PCs",
flavor="python", # might be "R" (will run Harmony via rpy2)
key=batch_keys,
**harmony_kwargs,
)
# -> adata_ref.obsm["X_pca_harmony"]
# -> adata_ref.uns["harmony"] (needed for the next step)
# run symphonypy to map query to the reference's embedding:
sp.tl.map_embedding(
adata_query,
adata_ref,
key=batch_key, # could be list of batch keys
lamb=lamb,
use_genes_column="highly_variable",
adjusted_basis_query="X_pca_harmony",
query_basis_ref="X_pca_reference",
)
# -> adata_query.obsm["X_pca_harmony"]
Transfer labels
n_neighbours = 10
labels = ["cell_type", "cell_subtype"] # any columns from adata_ref.obs
transferred_labels = ["predicted_cell_type", "predicted_cell_subtype"]
# transfer labels via scipy kNN
sp.tl.transfer_labels_kNN(
adata_query,
adata_ref,
labels,
# kNN args
n_neighbours,
query_labels=transferred_labels,
ref_basis="X_pca_harmony",
query_basis="X_pca_harmony",
# kNN kwargs
weights="distance",
)
# -> adata_query.obs[:, transferred_labels]
Map UMAP
# map query to the reference's UMAP
# build reference UMAP
sc.pp.neighbors(
adata_ref,
n_pcs=n_comps,
n_neighbors=n_neighbours,
knn=True,
use_rep="X_pca_harmony"
)
sc.tl.umap(adata_ref)
# run ingest (same as sc.tl.ingest, but with setting to zero expressions of var_names missed in query)
sp.tl.ingest(adata_query=adata_query, adata_ref=adata_ref, embedding_method="umap")
# -> adata_query.obsm["X_umap"]
Map tSNE with openTSNE
# map query to the reference's tSNE
tSNE = sp.tl.tsne(adata_ref, use_rep="X_pca_harmony", return_model=True)
sp.tl.tsne(adata_query, use_model=tSNE, use_rep="X_pca_harmony")
# -> adata_query.obsm["X_tsne"]
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
symphonypy-0.1.tar.gz
(23.7 kB
view hashes)
Built Distribution
symphonypy-0.1-py3-none-any.whl
(24.3 kB
view hashes)