Source code for scsilhouette.compute

# src/scsilhouette/compute.py

from pathlib import Path
from typing import List, Optional
import pandas as pd
import anndata as ad
from sklearn.metrics import silhouette_samples
from . import viz


[docs] def run_silhouette( h5ad_path: str, label_keys: List[str], embedding_key: str, output_dir: str, show_obs: bool = False, save_scores: bool = False, save_cluster_summary: bool = False, save_csv: bool = False, save_plots: bool = False, qc_correlations: bool = False, log_pca_dims: bool = False, ) -> None: output_path = Path(output_dir) output_path.mkdir(parents=True, exist_ok=True) adata = ad.read_h5ad(h5ad_path) if show_obs: obs_preview = adata.obs.head(10) obs_preview.to_csv(output_path / "obs_preview.csv") print("[✓] Saved obs preview:", output_path / "obs_preview.csv") if log_pca_dims: dims = adata.obsm[embedding_key].shape[1] dim_path = output_path / "embedding_dims.txt" with open(dim_path, "w") as f: f.write(f"Embedding: {embedding_key}, PCA dimensions: {dims}\n") print("[✓] Saved PCA dimension info:", dim_path) for label_key in label_keys: embedding = adata.obsm[embedding_key] labels = adata.obs[label_key].values silhouette_vals = silhouette_samples(embedding, labels) cell_scores = adata.obs[[label_key]].copy() cell_scores["silhouette_score"] = silhouette_vals if save_scores: score_path = output_path / f"{label_key}_scores.csv" cell_scores.to_csv(score_path, index=False) print(f"[✓] Saved: {score_path.name}") cluster_summary = ( cell_scores.groupby(label_key)["silhouette_score"] .agg(["mean", "median", "count"]) .reset_index() .rename( columns={ "mean": "mean_silhouette_score", "median": "median_silhouette_score", "count": "n_cells", } ) ) if save_cluster_summary: cluster_path = output_path / f"{label_key}_cluster_summary.csv" cluster_summary.to_csv(cluster_path, index=False) print(f"[✓] Saved: {cluster_path.name}") if save_plots: viz.plot_score_distribution(cell_scores, output_path, label_key) viz.plot_cluster_summary(cluster_summary, output_path, label_key) viz.plot_cluster_size_vs_score(cluster_summary, output_path, label_key) if qc_correlations: viz.plot_qc_boxplots(cell_scores, adata.obs, output_path, label_key) if save_csv: cluster_summary.to_csv(output_path / f"{label_key}_summary.csv", index=False) viz.plot_all(cell_scores, cluster_summary, output_path, label_key)