From 13144c3a0b58577f43b814c6f96fb5b04b3c9803 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 3 Feb 2026 15:05:42 +0100
Subject: [PATCH 1/2] improve handling of categoricals for feature_key in
 points

---
 src/spatialdata/models/models.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py
index e834ad78..d9fd0a1c 100644
--- a/src/spatialdata/models/models.py
+++ b/src/spatialdata/models/models.py
@@ -819,7 +819,7 @@ def _(
             # TODO: dask does not allow for setting divisions directly anymore. We have to decide on forcing the user.
             if feature_key is not None:
                 feature_categ = dd.from_pandas(
-                    data[feature_key].astype(str).astype("category"),
+                    data[feature_key],
                     sort=sort,
                     **kwargs,
                 )
@@ -827,11 +827,21 @@ def _(
         elif isinstance(data, dd.DataFrame):
             table = data[[coordinates[ax] for ax in axes]]
             table.columns = axes
-            if feature_key is not None:
-                if data[feature_key].dtype.name == "category":
-                    table[feature_key] = data[feature_key]
-                else:
-                    table[feature_key] = data[feature_key].astype(str).astype("category")
+
+        if feature_key is not None:
+            if data[feature_key].dtype.name == "category":
+                table[feature_key] = data[feature_key]
+            else:
+                logger.warning(
+                    f"The `feature_key` column {feature_key} is not categorical, converting it now. "
+                    "Please convert the column to categorical before calling `PointsModel.parse()` to "
+                    "avoid significant performance implications due to the need for dask of computing "
+                    "the categories. If you did not use PointsModel.parse() explicitly in your code ("
+                    "e.g. this message is coming from a reader in `spatialdata_io`, please report "
+                    "this finding."
+                )
+                table[feature_key] = data[feature_key].astype(str).astype("category")
+
         if instance_key is not None:
             table[instance_key] = data[instance_key]
         for c in [X, Y, Z]:

From 9b60a239e2184e59d6f8290f22ee11272f09bcb9 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 3 Feb 2026 16:36:51 +0100
Subject: [PATCH 2/2] pin distributed; improve warning for categorical points

---
 pyproject.toml                   |  1 +
 src/spatialdata/models/models.py | 38 ++++++++++++++++++--------------
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 48c61d29..0e813ead 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,7 @@ dependencies = [
     "click",
     "dask-image",
     "dask>=2025.2.0,<2026.1.2",
+    "distributed<2026.1.2",
     "datashader",
     "fsspec[s3,http]",
     "geopandas>=0.14",
diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py
index d9fd0a1c..6a126b02 100644
--- a/src/spatialdata/models/models.py
+++ b/src/spatialdata/models/models.py
@@ -242,6 +242,8 @@ def parse(
         else:
             # Chunk single scale images
             if chunks is not None:
+                if isinstance(chunks, tuple):
+                    chunks = {dim: chunks[index] for index, dim in enumerate(data.dims)}
                 data = data.chunk(chunks=chunks)
         cls()._check_chunk_size_not_too_large(data)
         # recompute coordinates for (multiscale) spatial image
@@ -832,14 +834,8 @@ def _(
             if data[feature_key].dtype.name == "category":
                 table[feature_key] = data[feature_key]
             else:
-                logger.warning(
-                    f"The `feature_key` column {feature_key} is not categorical, converting it now. "
-                    "Please convert the column to categorical before calling `PointsModel.parse()` to "
-                    "avoid significant performance implications due to the need for dask of computing "
-                    "the categories. If you did not use PointsModel.parse() explicitly in your code ("
-                    "e.g. this message is coming from a reader in `spatialdata_io`, please report "
-                    "this finding."
-                )
+                # this will cause the categories to be unknown and trigger the warning (and performance slowdown) in
+                # _add_metadata_and_validate()
                 table[feature_key] = data[feature_key].astype(str).astype("category")
 
         if instance_key is not None:
@@ -895,15 +891,20 @@ def _add_metadata_and_validate(
             assert instance_key in data.columns
             data.attrs[ATTRS_KEY][cls.INSTANCE_KEY] = instance_key
 
-        for c in data.columns:
-            #  Here we are explicitly importing the categories
-            #  but it is a convenient way to ensure that the categories are known.
-            # It also just changes the state of the series, so it is not a big deal.
-            if isinstance(data[c].dtype, CategoricalDtype) and not data[c].cat.known:
-                try:
-                    data[c] = data[c].cat.set_categories(data[c].compute().cat.categories)
-                except ValueError:
-                    logger.info(f"Column `{c}` contains unknown categories. Consider casting it.")
+        if (
+            feature_key is not None
+            and isinstance(data[feature_key].dtype, CategoricalDtype)
+            and not data[feature_key].cat.known
+        ):
+            logger.warning(
+                f"The `feature_key` column {feature_key} is categorical with unknown categories. "
+                "Please ensure the categories are known before calling `PointsModel.parse()` to "
+                "avoid significant performance implications due to the need for dask to compute "
+                "the categories. If you did not use PointsModel.parse() explicitly in your code ("
+                "e.g. this message is coming from a reader in `spatialdata_io`), please report "
+                "this finding."
+            )
+            data[feature_key] = data[feature_key].cat.set_categories(data[feature_key].compute().cat.categories)
 
         _parse_transformations(data, transformations)
         cls.validate(data)
@@ -1163,6 +1164,9 @@ def parse(
         The parsed data.
         """
         validate_table_attr_keys(adata)
+        # Convert view to actual copy to avoid ImplicitModificationWarning when modifying .uns
+        if adata.is_view:
+            adata = adata.copy()
         # either all live in adata.uns or all be passed in as argument
         n_args = sum([region is not None, region_key is not None, instance_key is not None])
         if n_args == 0: