From d7c1d62a24272fee60bbf0a6431f0535a798ef0e Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 16 Dec 2025 11:20:07 +0100 Subject: [PATCH 1/3] make sure array mask is propagated after applying offset to band --- mapchete_eo/io/assets.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/mapchete_eo/io/assets.py b/mapchete_eo/io/assets.py index 69d7a59..b39e96a 100644 --- a/mapchete_eo/io/assets.py +++ b/mapchete_eo/io/assets.py @@ -32,7 +32,7 @@ class STACRasterBandProperties(BaseModel): - nodata: NodataVal = None + nodata: Optional[NodataVal] = None data_type: Optional[str] = None scale: float = 1.0 offset: float = 0.0 @@ -40,9 +40,9 @@ class STACRasterBandProperties(BaseModel): @staticmethod def from_asset( asset: pystac.Asset, - nodataval: NodataVal = None, + nodataval: Optional[NodataVal] = None, ) -> STACRasterBandProperties: - if asset.extra_fields.get("raster:offset") is not None: + if asset.extra_fields.get("raster:offset", {}): properties = dict( offset=asset.extra_fields.get("raster:offset"), scale=asset.extra_fields.get("raster:scale"), @@ -87,16 +87,22 @@ def asset_to_np_array( ) logger.debug("reading asset %s and indexes %s ...", asset, indexes) - data = read_raster( + array = read_raster( inp=path, indexes=indexes, grid=grid, resampling=resampling.name, dst_nodata=band_properties.nodata, - ).data + ).masked_array() if apply_offset and band_properties.offset: - data_type = band_properties.data_type or data.dtype + logger.debug( + "apply offset %s and scale %s to asset %s", + band_properties.offset, + band_properties.scale, + asset, + ) + data_type = band_properties.data_type or array.dtype # determine value range for the target data_type clip_min, clip_max = dtype_ranges[str(data_type)] @@ -105,9 +111,9 @@ def asset_to_np_array( if clip_min == band_properties.nodata: clip_min += 1 - data[:] = ( + array[~array.mask] = ( ( - ((data * band_properties.scale) + band_properties.offset) + ((array[~array.mask] * band_properties.scale) + band_properties.offset) / band_properties.scale ) .round() @@ -115,8 +121,7 @@ def asset_to_np_array( .astype(data_type, copy=False) .data ) - - return data + return array def get_assets( From 111995dfab6814da8b4cf8ac1fbfa5af9f789f4b Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Tue, 16 Dec 2025 12:55:39 +0100 Subject: [PATCH 2/3] use ReferencedRaster.array --- mapchete_eo/io/assets.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mapchete_eo/io/assets.py b/mapchete_eo/io/assets.py index b39e96a..6f421be 100644 --- a/mapchete_eo/io/assets.py +++ b/mapchete_eo/io/assets.py @@ -93,8 +93,7 @@ def asset_to_np_array( grid=grid, resampling=resampling.name, dst_nodata=band_properties.nodata, - ).masked_array() - + ).array if apply_offset and band_properties.offset: logger.debug( "apply offset %s and scale %s to asset %s", From cfeb625056190048e63dce3215a799fa2839426a Mon Sep 17 00:00:00 2001 From: Scartography Date: Thu, 8 Jan 2026 11:04:37 +0100 Subject: [PATCH 3/3] remove pytest.mark from conftest fixtures depredicated and not doing anything add patch_invalid_assets for stac_search for string assets; patch of pystac FileExtensionHooks --- mapchete_eo/search/config.py | 47 +++++++++++++++++++++++++++++++ mapchete_eo/search/stac_search.py | 20 +++++++------ tests/conftest.py | 7 ----- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/mapchete_eo/search/config.py b/mapchete_eo/search/config.py index 0f3f791..6f4ad0b 100644 --- a/mapchete_eo/search/config.py +++ b/mapchete_eo/search/config.py @@ -1,3 +1,7 @@ +import logging + + +from contextlib import contextmanager from typing import Optional, Dict, Any from mapchete.path import MPath, MPathLike @@ -61,3 +65,46 @@ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: ), ) search_index: Optional[MPathLike] = None + + +@contextmanager +def patch_invalid_assets(): + """ + Context manager/decorator to fix pystac crash on malformed assets (strings instead of dicts). + + """ + try: + from pystac.extensions.file import FileExtensionHooks + except ImportError: # pragma: no cover + yield + return + + logger = logging.getLogger(__name__) + + _original_migrate = FileExtensionHooks.migrate + + def _safe_migrate(self, obj, version, info): + if "assets" in obj and isinstance(obj["assets"], dict): + bad_keys = [] + for key, asset in obj["assets"].items(): + if not isinstance(asset, dict): + logger.debug( + "Removing malformed asset '%s' (type %s) from item %s", + key, + type(asset), + obj.get("id", "unknown"), + ) + bad_keys.append(key) + + for key in bad_keys: + del obj["assets"][key] + + return _original_migrate(self, obj, version, info) + + # Apply patch + FileExtensionHooks.migrate = _safe_migrate + try: + yield + finally: + # Restore original + FileExtensionHooks.migrate = _original_migrate diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index d81ca35..21dbba9 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -15,7 +15,7 @@ from mapchete_eo.product import blacklist_products from mapchete_eo.search.base import CollectionSearcher, StaticCollectionWriterMixin -from mapchete_eo.search.config import StacSearchConfig +from mapchete_eo.search.config import StacSearchConfig, patch_invalid_assets from mapchete_eo.settings import mapchete_eo_settings from mapchete_eo.types import TimeRange @@ -138,14 +138,16 @@ def _search_chunks( query=query, ) - for search in _searches(): - for item in search.items(): - if item.get_self_href() in self.blacklist: # pragma: no cover - logger.debug( - "item %s found in blacklist and skipping", item.get_self_href() - ) - continue - yield item + with patch_invalid_assets(): + for search in _searches(): + for item in search.items(): + if item.get_self_href() in self.blacklist: # pragma: no cover + logger.debug( + "item %s found in blacklist and skipping", + item.get_self_href(), + ) + continue + yield item @cached_property def default_search_params(self): diff --git a/tests/conftest.py b/tests/conftest.py index 3dc82a4..2448d61 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -400,7 +400,6 @@ def s2_l2a_safe_metadata(s2_testdata_dir): ) -@pytest.mark.remote @pytest.fixture(scope="session") def s2_l2a_metadata_remote(): return guess_s2metadata_from_metadata_xml( @@ -408,7 +407,6 @@ def s2_l2a_metadata_remote(): ) -@pytest.mark.remote @pytest.fixture(scope="session") def s2_l2a_roda_metadata_remote(): """Same content as s2_l2a_metadata_remote, but hosted on different server.""" @@ -417,7 +415,6 @@ def s2_l2a_roda_metadata_remote(): ) -@pytest.mark.remote @pytest.fixture(scope="session") def s2_l2a_roda_metadata_jp2_masks_remote(): """From about 2022 on, ahte masks are now encoded as JP2 (rasters), not as GMLs (features).""" @@ -426,7 +423,6 @@ def s2_l2a_roda_metadata_jp2_masks_remote(): ) -@pytest.mark.remote @pytest.fixture() def s2_l2a_earthsearch_xml_remote(): """Metadata used by Earth-Search V1 endpoint""" @@ -435,7 +431,6 @@ def s2_l2a_earthsearch_xml_remote(): ) -@pytest.mark.remote @pytest.fixture() def s2_l2a_earthsearch_xml_remote_broken(): """Metadata used by Earth-Search V1 endpoint""" @@ -444,14 +439,12 @@ def s2_l2a_earthsearch_xml_remote_broken(): ) -@pytest.mark.remote @pytest.fixture(scope="session") def s2_l2a_earthsearch_remote(s2_l2a_earthsearch_remote_item): """Metadata used by Earth-Search V1 endpoint""" return guess_s2metadata_from_item(s2_l2a_earthsearch_remote_item) -@pytest.mark.remote @pytest.fixture(scope="session") def s2_l2a_earthsearch_remote_item(): """Metadata used by Earth-Search V1 endpoint"""