From 1ad2476664a5aac448a24066964499a27a5b5ee3 Mon Sep 17 00:00:00 2001
From: yichunher <yichunhe00@gmail.com>
Date: Sun, 15 Feb 2026 11:52:19 -0500
Subject: [PATCH] Distinguish spot-based vs single-cell platforms for
 annotation/deconvolution routing

Add platform-aware guidance so the agent correctly routes spot-based data
(Visium, Slide-seq, ST) to spatial_deconvolution and single-cell resolution
data (MERFISH, Xenium, CosMx, SeqFISH) to annotation.
---
 spatialagent/agent/make_prompt.py           |  4 ++++
 spatialagent/agent/skills.py                |  4 ++--
 spatialagent/skill/annotation.md            | 12 ++++++++++++
 spatialagent/skill/spatial_deconvolution.md |  8 ++++++++
 4 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/spatialagent/agent/make_prompt.py b/spatialagent/agent/make_prompt.py
index 0aee885..4dc09ec 100644
--- a/spatialagent/agent/make_prompt.py
+++ b/spatialagent/agent/make_prompt.py
@@ -103,6 +103,10 @@ def SYSTEM_PROMPT(tool_details: str) -> str:
 - **Python code execution**: Custom analysis, visualization, exploration (stateful - variables persist)
 - **Bash commands**: File operations, system checks
 
+# Platform-Aware Analysis
+- **Spot-based platforms** (Visium, Slide-seq, ST): Each spot contains multiple cells. Use cell type DECONVOLUTION (Cell2location, DestVI, Stereoscope) to estimate proportions — NOT single-label annotation.
+- **Single-cell platforms** (MERFISH, Xenium, CosMx, SeqFISH): Each observation is one cell. Use cell type ANNOTATION (Harmony label transfer, clustering + annotation).
+
 # Key Principles
 
 1. **Plan clearly**: Think about what to do before acting
diff --git a/spatialagent/agent/skills.py b/spatialagent/agent/skills.py
index 5d020e9..6dfe180 100644
--- a/spatialagent/agent/skills.py
+++ b/spatialagent/agent/skills.py
@@ -114,7 +114,7 @@ def select_skill(self, task_query: str, num_skills: int = 1) -> List[str]:
 
 - panel_design: Design gene panels through iterative database queries (CZI CELLxGENE → PanglaoDB → CellMarker2). Use when: "design a gene panel", "find markers for cell types", "build a panel for spatial experiment", marker gene selection for targeted spatial assays.
 
-- annotation: Cell type and tissue niche annotation in spatial transcriptomics data. Use when: "annotate cell types", "identify tissue regions", "label clusters", "what cell types are in this spatial data", cluster annotation, niche identification.
+- annotation: Cell type and tissue niche annotation for SINGLE-CELL resolution spatial data (MERFISH, Xenium, CosMx, SeqFISH, FISH-based). Use when: "annotate cell types" on single-cell resolution data, "identify tissue regions", "label clusters". NOT for spot-based data (Visium, Slide-seq, ST) — use spatial_deconvolution instead.
 
 - cell_cell_communication: Cell-cell interaction analysis comparing ligand-receptor pairs across conditions. Use when: "compare interactions between conditions", "how do cell communications change", "cross-condition CCC analysis", differential interaction analysis.
 
@@ -136,7 +136,7 @@ def select_skill(self, task_query: str, num_skills: int = 1) -> List[str]:
 
 - multimodal_integration: Integrate multiple modalities - RNA+protein (TotalVI/CITE-seq), RNA+ATAC (MultiVI), or any combination (MOFA). Also handles batch correction with BBKNN. Use when: "integrate CITE-seq", "multiome analysis", "combine RNA and protein", "batch correction", "MOFA integration".
 
-- spatial_deconvolution: Estimate cell type proportions in spatial spots using deep learning methods (DestVI, Cell2location, Stereoscope, gimVI). Use when: "deconvolve Visium spots", "cell type proportions", "what fraction of each cell type", "abundance estimation", bulk-to-single-cell deconvolution.
+- spatial_deconvolution: Estimate cell type proportions in spatial spots using deep learning (DestVI, Cell2location, Stereoscope, gimVI). Use when: data is spot-based (Visium, Slide-seq, ST), "annotate cell types" on Visium, "deconvolve spots", "cell type proportions", "what fraction of each cell type", "abundance estimation". This is the correct approach for any cell type identification on spot-based platforms.
 
 - spatial_domain_detection: Identify spatial domains and tissue niches using SpaGCN (integrates histology) or GraphST (self-supervised). Use when: "find tissue regions", "spatial clustering", "identify niches", "domain detection", "tissue architecture", spatially-aware clustering.
 
diff --git a/spatialagent/skill/annotation.md b/spatialagent/skill/annotation.md
index 3395530..f6ff814 100644
--- a/spatialagent/skill/annotation.md
+++ b/spatialagent/skill/annotation.md
@@ -2,6 +2,18 @@
 
 Annotate cell types and tissue niches in spatial transcriptomics data.
 
+## Platform Applicability
+
+**This workflow is for single-cell resolution platforms** (MERFISH, Xenium, CosMx, SeqFISH) where each observation corresponds to one cell.
+
+**NOT for spot-based platforms** (Visium, Slide-seq, ST) where each spot contains multiple cells. For spot-based data, use the `spatial_deconvolution` skill instead, which estimates cell type proportions per spot via deconvolution (DestVI, Cell2location, Stereoscope).
+
+**How to detect platform type**:
+- **Single-cell resolution**: ~100–500 genes per panel, sub-cellular coordinates, technology names include MERFISH, Xenium, CosMx, SeqFISH
+- **Spot-based**: ~18,000–33,000 genes (whole transcriptome), ~55µm spot diameter (Visium) or bead-based capture, technology names include Visium, Slide-seq, ST, 10x Spatial Gene Expression
+
+---
+
 ## Workflow Overview
 
 1. **Explore dataset structure** (always do this first)
diff --git a/spatialagent/skill/spatial_deconvolution.md b/spatialagent/skill/spatial_deconvolution.md
index d89341e..2a5358a 100644
--- a/spatialagent/skill/spatial_deconvolution.md
+++ b/spatialagent/skill/spatial_deconvolution.md
@@ -2,6 +2,14 @@
 
 Estimate cell type compositions in spatial transcriptomics spots using deep learning methods from scvi-tools and cell2location.
 
+## Platform Applicability
+
+**This is the correct workflow for cell type identification on spot-based platforms** (Visium, Slide-seq, ST). Each spot contains multiple cells, so deconvolution estimates cell type **proportions** rather than assigning a single label. If a user asks to "annotate cell types" on Visium or other spot-based data, this is the appropriate skill.
+
+For single-cell resolution platforms (MERFISH, Xenium, CosMx, SeqFISH), use the `annotation` skill instead.
+
+---
+
 ## Prerequisites
 
 **Required**: