From 430dd8f54c77d12150c75eb9d1bd7bc02425c527 Mon Sep 17 00:00:00 2001 From: Emmanuel Mathot Date: Tue, 3 Feb 2026 20:23:03 +0100 Subject: [PATCH] Enhance DataCube loading with support for additional backend-specific parameters --- docs/conf.py | 4 +- docs/cookbook/sampling.md | 10 ++--- examples/archive/udf/udf_modify_spatial.py | 8 ++-- openeo/processes.py | 51 ++++++++++++++-------- openeo/rest/connection.py | 9 ++++ openeo/rest/datacube.py | 11 +++++ openeo/udf/udf_signatures.py | 2 +- tests/internal/processes/test_generator.py | 16 +++---- tests/rest/datacube/test_datacube.py | 41 +++++++++++++++++ tests/rest/test_connection.py | 35 +++++++++++++++ 10 files changed, 150 insertions(+), 37 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 641972d6a..583afb9ff 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,13 +13,14 @@ # All configuration values have a default; values that are commented out # serve to show the default. +import datetime + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys -import datetime sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, os.path.abspath('../')) @@ -45,6 +46,7 @@ ] import sphinx_autodoc_typehints + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/docs/cookbook/sampling.md b/docs/cookbook/sampling.md index 93b8efc27..9fb790164 100644 --- a/docs/cookbook/sampling.md +++ b/docs/cookbook/sampling.md @@ -6,10 +6,10 @@ but rather want to extract a result at specific locations. Examples include extracting training data for model calibration, or computing the result for areas where validation data is available. -An important constraint is that most implementations assume that sampling is an operation -on relatively small areas, of for instance up to 512x512 pixels (but often much smaller). +An important constraint is that most implementations assume that sampling is an operation +on relatively small areas, of for instance up to 512x512 pixels (but often much smaller). When extracting polygons with larger areas, it is recommended to look into running a separate job per 'sample'. -Some more important performance notices are mentioned later in the chapter, please read them carefully +Some more important performance notices are mentioned later in the chapter, please read them carefully to get best results. Sampling can be done for points or polygons: @@ -23,9 +23,9 @@ public url, and to load it in openEO using {py:meth}`openeo.rest.connection.Conn ## Sampling at point locations -To sample point locations, the `openeo.rest.datacube.DataCube.aggregate_spatial` method can be used. The reducer can be a +To sample point locations, the `openeo.rest.datacube.DataCube.aggregate_spatial` method can be used. The reducer can be a commonly supported reducer like `min`, `max` or `mean` and will receive only one value as input in most cases. Note that -in edge cases, a point can intersect with up to 4 pixels. If this is not desirable, it might be worth trying to align +in edge cases, a point can intersect with up to 4 pixels. If this is not desirable, it might be worth trying to align points with pixel centers, which does require more advanced knowledge of the pixel grid of your data cube. More information on `aggregate_spatial` is available [here](_aggregate-spatial-evi). diff --git a/examples/archive/udf/udf_modify_spatial.py b/examples/archive/udf/udf_modify_spatial.py index db0ccabe7..f0b754cb4 100644 --- a/examples/archive/udf/udf_modify_spatial.py +++ b/examples/archive/udf/udf_modify_spatial.py @@ -1,8 +1,10 @@ +import numpy as np import xarray + +from openeo.metadata import CollectionMetadata from openeo.udf import XarrayDataCube from openeo.udf.debug import inspect -from openeo.metadata import CollectionMetadata -import numpy as np + def apply_metadata(input_metadata:CollectionMetadata, context:dict) -> CollectionMetadata: @@ -41,4 +43,4 @@ def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube: predicted_cube = xarray.DataArray(predicted_array, dims=['bands', 'x', 'y'], coords=dict(x=coord_x, y=coord_y)) - return XarrayDataCube(predicted_cube) \ No newline at end of file + return XarrayDataCube(predicted_cube) diff --git a/openeo/processes.py b/openeo/processes.py index 7226b948b..2fe29750c 100644 --- a/openeo/processes.py +++ b/openeo/processes.py @@ -2879,7 +2879,8 @@ def aggregate_spatial(data, geometries, reducer, target_dimension=UNSET, context aggregated values (i.e. no-data). The spatial dimensions are replaced by a dimension of type 'geometries' and if `target_dimension` is not `null`, a new dimension is added. """ - return _process('aggregate_spatial', + return _process( + "aggregate_spatial", data=data, geometries=geometries, reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), @@ -2915,7 +2916,8 @@ def aggregate_spatial_window(data, reducer, size, boundary=UNSET, align=UNSET, c labels will be set to the coordinate at the center of the window. The other dimension properties (name, type and reference system) remain unchanged. """ - return _process('aggregate_spatial_window', + return _process( + "aggregate_spatial_window", data=data, reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), size=size, @@ -2954,7 +2956,8 @@ def aggregate_temporal(data, intervals, reducer, labels=UNSET, dimension=UNSET, system and resolution) remain unchanged, except for the resolution and dimension labels of the given temporal dimension. """ - return _process('aggregate_temporal', + return _process( + "aggregate_temporal", data=data, intervals=intervals, reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), @@ -3008,7 +3011,8 @@ def aggregate_temporal_period(data, period, reducer, dimension=UNSET, context=UN the source data cube has just one dimension label `2020-01-05`, the process returns a data cube with just a single dimension label (`2020-005`). """ - return _process('aggregate_temporal_period', + return _process( + "aggregate_temporal_period", data=data, period=period, reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), @@ -3149,7 +3153,8 @@ def apply_dimension(data, process, dimension, target_dimension=UNSET, context=UN incrementing integers starting from zero, - the resolution changes, and - the reference system is undefined. """ - return _process('apply_dimension', + return _process( + "apply_dimension", data=data, process=build_child_callback(process, parent_parameters=['data', 'context']), dimension=dimension, @@ -3207,7 +3212,8 @@ def apply_neighborhood(data, process, size, overlap=UNSET, context=UNSET) -> Pro :return: A raster data cube with the newly computed values and the same dimensions. The dimension properties (name, type, labels, reference system and resolution) remain unchanged. """ - return _process('apply_neighborhood', + return _process( + "apply_neighborhood", data=data, process=build_child_callback(process, parent_parameters=['data', 'context']), size=size, @@ -3236,7 +3242,8 @@ def apply_polygon(data, polygons, process, mask_value=UNSET, context=UNSET) -> P :return: A data cube with the newly computed values and the same dimensions. The dimension properties (name, type, labels, reference system and resolution) remain unchanged. """ - return _process('apply_polygon', + return _process( + "apply_polygon", data=data, polygons=polygons, process=build_child_callback(process, parent_parameters=['data', 'context']), @@ -3330,7 +3337,8 @@ def ard_normalized_radar_backscatter(data, elevation_model=UNSET, contributing_a DEM-based local incidence angles in degrees. The data returned is CARD4L compliant with corresponding metadata. """ - return _process('ard_normalized_radar_backscatter', + return _process( + "ard_normalized_radar_backscatter", data=data, elevation_model=elevation_model, contributing_area=contributing_area, @@ -3385,7 +3393,8 @@ def ard_surface_reflectance(data, atmospheric_correction_method, cloud_detection (optional): Contains coefficients used for terrain illumination correction are provided for each pixel. The data returned is CARD4L compliant with corresponding metadata. """ - return _process('ard_surface_reflectance', + return _process( + "ard_surface_reflectance", data=data, atmospheric_correction_method=atmospheric_correction_method, cloud_detection_method=cloud_detection_method, @@ -3425,7 +3434,8 @@ def array_apply(data, process, context=UNSET) -> ProcessBuilder: :return: An array with the newly computed values. The number of elements are the same as for the original array. """ - return _process('array_apply', + return _process( + "array_apply", data=data, process=build_child_callback(process, parent_parameters=['x', 'index', 'label', 'context']), context=context @@ -3515,7 +3525,8 @@ def array_filter(data, condition, context=UNSET) -> ProcessBuilder: :return: An array filtered by the specified condition. The number of elements are less than or equal compared to the original array. """ - return _process('array_filter', + return _process( + "array_filter", data=data, condition=build_child_callback(condition, parent_parameters=['x', 'index', 'label', 'context']), context=context @@ -4068,7 +4079,8 @@ def filter_labels(data, condition, dimension, context=UNSET) -> ProcessBuilder: system and resolution) remain unchanged, except that the given dimension has less (or the same) dimension labels. """ - return _process('filter_labels', + return _process( + "filter_labels", data=data, condition=build_child_callback(condition, parent_parameters=['value', 'context']), dimension=dimension, @@ -4168,7 +4180,8 @@ def fit_curve(data, parameters, function, ignore_nodata=UNSET) -> ProcessBuilder :return: An array with the optimal values for the parameters. """ - return _process('fit_curve', + return _process( + "fit_curve", data=data, parameters=parameters, function=build_child_callback(function, parent_parameters=['x', 'parameters']), @@ -4716,7 +4729,8 @@ def merge_cubes(cube1, cube2, overlap_resolver=UNSET, context=UNSET) -> ProcessB :return: The merged data cube. See the process description for details regarding the dimensions and dimension properties (name, type, labels, reference system and resolution). """ - return _process('merge_cubes', + return _process( + "merge_cubes", cube1=cube1, cube2=cube2, overlap_resolver=(build_child_callback(overlap_resolver, parent_parameters=['x', 'y', 'context']) if overlap_resolver not in [None, UNSET] else overlap_resolver), @@ -4915,7 +4929,8 @@ def predict_curve(parameters, function, dimension, labels=UNSET) -> ProcessBuild :return: A data cube with the predicted values with the provided dimension `dimension` having as many labels as provided through `labels`. """ - return _process('predict_curve', + return _process( + "predict_curve", parameters=parameters, function=build_child_callback(function, parent_parameters=['x', 'parameters']), dimension=dimension, @@ -5019,7 +5034,8 @@ def reduce_dimension(data, reducer, dimension, context=UNSET) -> ProcessBuilder: dimensions decreases by one. The dimension properties (name, type, labels, reference system and resolution) for all other dimensions remain unchanged. """ - return _process('reduce_dimension', + return _process( + "reduce_dimension", data=data, reducer=build_child_callback(reducer, parent_parameters=['data', 'context']), dimension=dimension, @@ -5250,7 +5266,8 @@ def sar_backscatter(data, coefficient=UNSET, elevation_model=UNSET, mask=UNSET, :return: Backscatter values corresponding to the chosen parametrization. The values are given in linear scale. """ - return _process('sar_backscatter', + return _process( + "sar_backscatter", data=data, coefficient=coefficient, elevation_model=elevation_model, diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index d4d4d5995..00132068d 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -1240,6 +1240,7 @@ def load_collection( ] = None, max_cloud_cover: Optional[float] = None, fetch_metadata: bool = True, + **kwargs, ) -> DataCube: """ Load a DataCube by collection id. @@ -1262,6 +1263,9 @@ def load_collection( :param properties: limit data by collection metadata property predicates. See :py:func:`~openeo.rest.graph_building.collection_property` for easy construction of such predicates. :param max_cloud_cover: shortcut to set maximum cloud cover ("eo:cloud_cover" collection property) + :param kwargs: additional backend-specific parameters to pass to ``load_collection``. + These allow leveraging backend-specific features not covered by the standard openEO API, + for example: ``nodata`` (force specific nodata value), ``target_crs`` (output CRS), etc. :return: a datacube containing the requested data .. versionadded:: 0.13.0 @@ -1276,6 +1280,10 @@ def load_collection( .. versionchanged:: 0.37.0 Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file. + + .. versionadded:: 0.48.0 + added ``**kwargs`` for additional backend-specific parameters. + """ return DataCube.load_collection( collection_id=collection_id, @@ -1286,6 +1294,7 @@ def load_collection( properties=properties, max_cloud_cover=max_cloud_cover, fetch_metadata=fetch_metadata, + **kwargs, ) # TODO: remove this #100 #134 0.4.10 diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index a976bd550..1a6dcad80 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -175,6 +175,7 @@ def load_collection( Dict[str, Union[PGNode, typing.Callable]], List[CollectionProperty], CollectionProperty, None ] = None, max_cloud_cover: Optional[float] = None, + **kwargs, ) -> DataCube: """ Create a new Raster Data cube. @@ -199,6 +200,9 @@ def load_collection( :param properties: limit data by metadata property predicates. See :py:func:`~openeo.rest.graph_building.collection_property` for easy construction of such predicates. :param max_cloud_cover: shortcut to set maximum cloud cover ("eo:cloud_cover" collection property) + :param kwargs: additional backend-specific parameters to pass to ``load_collection``. + These allow leveraging backend-specific features not covered by the standard openEO API, + for example: ``nodata`` (force specific nodata value), ``target_crs`` (output CRS), etc. :return: new DataCube containing the collection .. versionchanged:: 0.13.0 @@ -213,6 +217,10 @@ def load_collection( .. versionchanged:: 0.37.0 Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file. + + .. versionadded:: 0.48.0 + added ``**kwargs`` for additional backend-specific parameters. + """ if temporal_extent: temporal_extent = cls._get_temporal_extent(extent=temporal_extent) @@ -255,6 +263,9 @@ def load_collection( if properties is not None: arguments["properties"] = properties + # Add any additional backend-specific parameters + arguments.update(kwargs) + pg = PGNode( process_id='load_collection', arguments=arguments diff --git a/openeo/udf/udf_signatures.py b/openeo/udf/udf_signatures.py index 2ecd49068..dafd4d4f2 100644 --- a/openeo/udf/udf_signatures.py +++ b/openeo/udf/udf_signatures.py @@ -6,8 +6,8 @@ """ # Note: this module was initially developed under the ``openeo-udf`` project (https://github.com/Open-EO/openeo-udf) -from deprecated import deprecated import xarray +from deprecated import deprecated from pandas import Series from openeo.metadata import CollectionMetadata diff --git a/tests/internal/processes/test_generator.py b/tests/internal/processes/test_generator.py index 840c56b2a..06064ce4a 100644 --- a/tests/internal/processes/test_generator.py +++ b/tests/internal/processes/test_generator.py @@ -272,8 +272,7 @@ def test_render_process_graph_callback_wrapping(): renderer = PythonRenderer(optional_default="UNSET") src = renderer.render_process(process, width=80) - assert src == dedent( - '''\ + assert src == dedent('''\ def apply_dimension(data, dimension, process): """ Apply @@ -284,12 +283,11 @@ def apply_dimension(data, dimension, process): :return: Data cube """ - return _process('apply_dimension', + return _process('apply_dimension', data=data, dimension=dimension, process=build_child_callback(process, parent_parameters=['data']) - )''' - ) + )''') def test_render_process_graph_optional_callback(): @@ -321,8 +319,7 @@ def test_render_process_graph_optional_callback(): renderer = PythonRenderer(optional_default="UNSET") src = renderer.render_process(process) - assert src == dedent( - '''\ + assert src == dedent('''\ def apply(data, process=UNSET): """ Apply @@ -332,11 +329,10 @@ def apply(data, process=UNSET): :return: Data cube """ - return _process('apply', + return _process('apply', data=data, process=(build_child_callback(process, parent_parameters=['data']) if process not in [None, UNSET] else process) - )''' - ) + )''') def test_collect_processes_basic(tmp_path, test_data): diff --git a/tests/rest/datacube/test_datacube.py b/tests/rest/datacube/test_datacube.py index 4389d52ce..356acff1c 100644 --- a/tests/rest/datacube/test_datacube.py +++ b/tests/rest/datacube/test_datacube.py @@ -208,6 +208,47 @@ def test_load_collection_spatial_extent_parameter(self, dummy_backend): "temporal_extent": None, } + def test_load_collection_extra_params_connectionless(self): + """Test passing additional backend-specific parameters without connection.""" + cube = DataCube.load_collection( + "T3", + spatial_extent={"west": 1, "east": 2, "north": 3, "south": 4}, + nodata=0, + target_crs=32632, + ) + assert cube.flat_graph() == { + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "id": "T3", + "spatial_extent": {"west": 1, "east": 2, "north": 3, "south": 4}, + "temporal_extent": None, + "nodata": 0, + "target_crs": 32632, + }, + "result": True, + } + } + + def test_load_collection_extra_params_with_connection(self, dummy_backend): + """Test passing additional backend-specific parameters with connection.""" + cube = DataCube.load_collection( + "S2", + connection=dummy_backend.connection, + spatial_extent={"west": 1, "south": 2, "east": 3, "north": 4}, + temporal_extent=["2023-01-01", "2023-06-01"], + nodata=0, + tile_buffer=16, + ) + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": {"west": 1, "south": 2, "east": 3, "north": 4}, + "temporal_extent": ["2023-01-01", "2023-06-01"], + "nodata": 0, + "tile_buffer": 16, + } + def test_load_collection_connectionless_save_result(self): cube = DataCube.load_collection("T3").save_result(format="GTiff") assert cube.flat_graph() == { diff --git a/tests/rest/test_connection.py b/tests/rest/test_connection.py index 6da731f71..b61f29950 100644 --- a/tests/rest/test_connection.py +++ b/tests/rest/test_connection.py @@ -3027,6 +3027,41 @@ def test_load_collection_spatial_extent_vector_cube(self, dummy_backend): }, } + def test_load_collection_extra_params(self, dummy_backend): + """Test passing additional backend-specific parameters via **kwargs.""" + spatial_extent = {"west": 1, "south": 2, "east": 3, "north": 4} + cube = dummy_backend.connection.load_collection( + "S2", + spatial_extent=spatial_extent, + temporal_extent=["2023-01-01", "2023-06-01"], + bands=["B2", "B3"], + nodata=0, + target_crs=32632, + ) + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": {"west": 1, "south": 2, "east": 3, "north": 4}, + "temporal_extent": ["2023-01-01", "2023-06-01"], + "bands": ["B2", "B3"], + "nodata": 0, + "target_crs": 32632, + } + + def test_load_collection_extra_params_only(self, dummy_backend): + """Test passing only backend-specific parameters.""" + cube = dummy_backend.connection.load_collection( + "S2", + tile_buffer=16, + ) + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": None, + "temporal_extent": None, + "tile_buffer": 16, + } + def test_load_result(requests_mock): requests_mock.get(API_URL, json={"api_version": "1.0.0"})