From 1ad2476664a5aac448a24066964499a27a5b5ee3 Mon Sep 17 00:00:00 2001 From: yichunher Date: Sun, 15 Feb 2026 11:52:19 -0500 Subject: [PATCH] Distinguish spot-based vs single-cell platforms for annotation/deconvolution routing Add platform-aware guidance so the agent correctly routes spot-based data (Visium, Slide-seq, ST) to spatial_deconvolution and single-cell resolution data (MERFISH, Xenium, CosMx, SeqFISH) to annotation. --- spatialagent/agent/make_prompt.py | 4 ++++ spatialagent/agent/skills.py | 4 ++-- spatialagent/skill/annotation.md | 12 ++++++++++++ spatialagent/skill/spatial_deconvolution.md | 8 ++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/spatialagent/agent/make_prompt.py b/spatialagent/agent/make_prompt.py index 0aee885..4dc09ec 100644 --- a/spatialagent/agent/make_prompt.py +++ b/spatialagent/agent/make_prompt.py @@ -103,6 +103,10 @@ def SYSTEM_PROMPT(tool_details: str) -> str: - **Python code execution**: Custom analysis, visualization, exploration (stateful - variables persist) - **Bash commands**: File operations, system checks +# Platform-Aware Analysis +- **Spot-based platforms** (Visium, Slide-seq, ST): Each spot contains multiple cells. Use cell type DECONVOLUTION (Cell2location, DestVI, Stereoscope) to estimate proportions — NOT single-label annotation. +- **Single-cell platforms** (MERFISH, Xenium, CosMx, SeqFISH): Each observation is one cell. Use cell type ANNOTATION (Harmony label transfer, clustering + annotation). + # Key Principles 1. **Plan clearly**: Think about what to do before acting diff --git a/spatialagent/agent/skills.py b/spatialagent/agent/skills.py index 5d020e9..6dfe180 100644 --- a/spatialagent/agent/skills.py +++ b/spatialagent/agent/skills.py @@ -114,7 +114,7 @@ def select_skill(self, task_query: str, num_skills: int = 1) -> List[str]: - panel_design: Design gene panels through iterative database queries (CZI CELLxGENE → PanglaoDB → CellMarker2). Use when: "design a gene panel", "find markers for cell types", "build a panel for spatial experiment", marker gene selection for targeted spatial assays. -- annotation: Cell type and tissue niche annotation in spatial transcriptomics data. Use when: "annotate cell types", "identify tissue regions", "label clusters", "what cell types are in this spatial data", cluster annotation, niche identification. +- annotation: Cell type and tissue niche annotation for SINGLE-CELL resolution spatial data (MERFISH, Xenium, CosMx, SeqFISH, FISH-based). Use when: "annotate cell types" on single-cell resolution data, "identify tissue regions", "label clusters". NOT for spot-based data (Visium, Slide-seq, ST) — use spatial_deconvolution instead. - cell_cell_communication: Cell-cell interaction analysis comparing ligand-receptor pairs across conditions. Use when: "compare interactions between conditions", "how do cell communications change", "cross-condition CCC analysis", differential interaction analysis. @@ -136,7 +136,7 @@ def select_skill(self, task_query: str, num_skills: int = 1) -> List[str]: - multimodal_integration: Integrate multiple modalities - RNA+protein (TotalVI/CITE-seq), RNA+ATAC (MultiVI), or any combination (MOFA). Also handles batch correction with BBKNN. Use when: "integrate CITE-seq", "multiome analysis", "combine RNA and protein", "batch correction", "MOFA integration". -- spatial_deconvolution: Estimate cell type proportions in spatial spots using deep learning methods (DestVI, Cell2location, Stereoscope, gimVI). Use when: "deconvolve Visium spots", "cell type proportions", "what fraction of each cell type", "abundance estimation", bulk-to-single-cell deconvolution. +- spatial_deconvolution: Estimate cell type proportions in spatial spots using deep learning (DestVI, Cell2location, Stereoscope, gimVI). Use when: data is spot-based (Visium, Slide-seq, ST), "annotate cell types" on Visium, "deconvolve spots", "cell type proportions", "what fraction of each cell type", "abundance estimation". This is the correct approach for any cell type identification on spot-based platforms. - spatial_domain_detection: Identify spatial domains and tissue niches using SpaGCN (integrates histology) or GraphST (self-supervised). Use when: "find tissue regions", "spatial clustering", "identify niches", "domain detection", "tissue architecture", spatially-aware clustering. diff --git a/spatialagent/skill/annotation.md b/spatialagent/skill/annotation.md index 3395530..f6ff814 100644 --- a/spatialagent/skill/annotation.md +++ b/spatialagent/skill/annotation.md @@ -2,6 +2,18 @@ Annotate cell types and tissue niches in spatial transcriptomics data. +## Platform Applicability + +**This workflow is for single-cell resolution platforms** (MERFISH, Xenium, CosMx, SeqFISH) where each observation corresponds to one cell. + +**NOT for spot-based platforms** (Visium, Slide-seq, ST) where each spot contains multiple cells. For spot-based data, use the `spatial_deconvolution` skill instead, which estimates cell type proportions per spot via deconvolution (DestVI, Cell2location, Stereoscope). + +**How to detect platform type**: +- **Single-cell resolution**: ~100–500 genes per panel, sub-cellular coordinates, technology names include MERFISH, Xenium, CosMx, SeqFISH +- **Spot-based**: ~18,000–33,000 genes (whole transcriptome), ~55µm spot diameter (Visium) or bead-based capture, technology names include Visium, Slide-seq, ST, 10x Spatial Gene Expression + +--- + ## Workflow Overview 1. **Explore dataset structure** (always do this first) diff --git a/spatialagent/skill/spatial_deconvolution.md b/spatialagent/skill/spatial_deconvolution.md index d89341e..2a5358a 100644 --- a/spatialagent/skill/spatial_deconvolution.md +++ b/spatialagent/skill/spatial_deconvolution.md @@ -2,6 +2,14 @@ Estimate cell type compositions in spatial transcriptomics spots using deep learning methods from scvi-tools and cell2location. +## Platform Applicability + +**This is the correct workflow for cell type identification on spot-based platforms** (Visium, Slide-seq, ST). Each spot contains multiple cells, so deconvolution estimates cell type **proportions** rather than assigning a single label. If a user asks to "annotate cell types" on Visium or other spot-based data, this is the appropriate skill. + +For single-cell resolution platforms (MERFISH, Xenium, CosMx, SeqFISH), use the `annotation` skill instead. + +--- + ## Prerequisites **Required**: