From 8d2e45a52f821c6eb59973d0e63bdbc9c3f2c9ff Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Wed, 30 Apr 2025 15:14:10 +1200 Subject: [PATCH 01/31] rename Squish to Squeeze --- notebooks/tutorial/Accessing_ERA5_Data.ipynb | 4 +- notebooks/tutorial/CNN_model_training.ipynb | 8 ++-- notebooks/tutorial/Data_Pipelines.ipynb | 4 +- notebooks/tutorial/Downloading_ERA5.ipynb | 2 +- .../tutorial/Interfacing_to_Data_at_NCI.ipynb | 2 +- notebooks/tutorial/MultipleSources.ipynb | 2 +- .../tutorial/Working_with_Climate_Data.ipynb | 2 +- .../pipeline/assets/ComplexPipeline.svg | 2 +- .../pipeline/assets/pipeline_example.svg | 2 +- .../docs/documentation/pipeline/available.md | 4 +- .../docs/documentation/pipeline/creating.md | 38 +++++++++++-------- .../pipeline/details/operation.md | 10 ++--- old_docs/docs/documentation/pipeline/index.md | 13 ++++--- packages/pipeline/assets/pipeline_example.svg | 2 +- .../pipeline/operations/dask/__init__.py | 2 +- .../pipeline/operations/dask/reshape.py | 8 ++-- .../pipeline/operations/numpy/__init__.py | 2 +- .../pipeline/operations/numpy/reshape.py | 10 ++--- 18 files changed, 62 insertions(+), 55 deletions(-) diff --git a/notebooks/tutorial/Accessing_ERA5_Data.ipynb b/notebooks/tutorial/Accessing_ERA5_Data.ipynb index 82186fe1..eaca8e4f 100644 --- a/notebooks/tutorial/Accessing_ERA5_Data.ipynb +++ b/notebooks/tutorial/Accessing_ERA5_Data.ipynb @@ -2332,7 +2332,7 @@ ], "metadata": { "kernelspec": { - "display_name": "pet-tutorials", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -2346,7 +2346,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/notebooks/tutorial/CNN_model_training.ipynb b/notebooks/tutorial/CNN_model_training.ipynb index 8beff0e3..16e2b6f8 100644 --- a/notebooks/tutorial/CNN_model_training.ipynb +++ b/notebooks/tutorial/CNN_model_training.ipynb @@ -782,10 +782,10 @@ " * Input data: (-1, 1) tuple is used to select a single timestamp at -1hrs\n", " * Target data: (6, 1) tuple is used to select a single timestamp at +5hrs ahead. \n", "\n", - "### Aditional Steps:\n", + "### Additional Steps:\n", "1. Export to a NumPy array.\n", "2. Rearrange the axes of the NumPy array.\n", - "3. Remove dimensions of size 1 via a \"squish\" (equivalent to a NumPy \"squeeze\") operation." + "3. Remove dimensions of size 1 via a \"squeeze\" operation." ] }, { @@ -1347,7 +1347,7 @@ " ),\n", " pyearthtools.pipeline.operations.xarray.conversion.ToNumpy(),\n", " pyearthtools.pipeline.operations.numpy.reshape.Rearrange(\"c t h w -> t c h w\"),\n", - " pyearthtools.pipeline.operations.numpy.reshape.Squish(axis=0),\n", + " pyearthtools.pipeline.operations.numpy.reshape.Squeeze(axis=0),\n", ")\n", "data_preparation" ] @@ -8350,7 +8350,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/notebooks/tutorial/Data_Pipelines.ipynb b/notebooks/tutorial/Data_Pipelines.ipynb index aa97a35a..599c25f9 100644 --- a/notebooks/tutorial/Data_Pipelines.ipynb +++ b/notebooks/tutorial/Data_Pipelines.ipynb @@ -116,7 +116,7 @@ " # These methods will be explained when we create a pipeline for machine learning. \n", " # pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten('level'),\n", " # pyearthtools.pipeline.operations.xarray.conversion.ToNumpy(),\n", - " # pyearthtools.pipeline.operations.numpy.reshape.Squish(1),\n", + " # pyearthtools.pipeline.operations.numpy.reshape.Squeeze(1),\n", ")\n" ] }, @@ -2081,7 +2081,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/notebooks/tutorial/Downloading_ERA5.ipynb b/notebooks/tutorial/Downloading_ERA5.ipynb index 1438c9e1..5fcedd05 100644 --- a/notebooks/tutorial/Downloading_ERA5.ipynb +++ b/notebooks/tutorial/Downloading_ERA5.ipynb @@ -106,7 +106,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/notebooks/tutorial/Interfacing_to_Data_at_NCI.ipynb b/notebooks/tutorial/Interfacing_to_Data_at_NCI.ipynb index d053b530..35f789fc 100644 --- a/notebooks/tutorial/Interfacing_to_Data_at_NCI.ipynb +++ b/notebooks/tutorial/Interfacing_to_Data_at_NCI.ipynb @@ -1246,7 +1246,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/notebooks/tutorial/MultipleSources.ipynb b/notebooks/tutorial/MultipleSources.ipynb index 4a80497f..5e21085b 100644 --- a/notebooks/tutorial/MultipleSources.ipynb +++ b/notebooks/tutorial/MultipleSources.ipynb @@ -4534,7 +4534,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/notebooks/tutorial/Working_with_Climate_Data.ipynb b/notebooks/tutorial/Working_with_Climate_Data.ipynb index b7ac0275..3df0395e 100644 --- a/notebooks/tutorial/Working_with_Climate_Data.ipynb +++ b/notebooks/tutorial/Working_with_Climate_Data.ipynb @@ -5483,7 +5483,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.9" } }, "nbformat": 4, diff --git a/old_docs/docs/documentation/pipeline/assets/ComplexPipeline.svg b/old_docs/docs/documentation/pipeline/assets/ComplexPipeline.svg index c03b837a..49fac8f1 100644 --- a/old_docs/docs/documentation/pipeline/assets/ComplexPipeline.svg +++ b/old_docs/docs/documentation/pipeline/assets/ComplexPipeline.svg @@ -211,7 +211,7 @@ Squish_2f9471ae-d492-4692-8c44-46bcb75a5526 -reshape.Squish +reshape.Squeeze diff --git a/old_docs/docs/documentation/pipeline/assets/pipeline_example.svg b/old_docs/docs/documentation/pipeline/assets/pipeline_example.svg index c2b4a3b7..8ca41d66 100644 --- a/old_docs/docs/documentation/pipeline/assets/pipeline_example.svg +++ b/old_docs/docs/documentation/pipeline/assets/pipeline_example.svg @@ -91,7 +91,7 @@ Squish_7b413dbf-6cbe-4cf1-a375-17fc36cc9eda -Squish +Squeeze diff --git a/old_docs/docs/documentation/pipeline/available.md b/old_docs/docs/documentation/pipeline/available.md index bbbfc157..01ce30e3 100644 --- a/old_docs/docs/documentation/pipeline/available.md +++ b/old_docs/docs/documentation/pipeline/available.md @@ -47,7 +47,7 @@ Here are the default operations included with `pyearthtools.pipeline`, accessibl | filters | Filter data when iterating | `DropAnyNan`, `DropAllNan`, `DropValue`, `Shape` | | join | Combine tuples of `np.ndarrays` | `Stack`, `VStack`, `HStack`, `Concatenate` | | normalisation | Normalise arrays | `Anomaly`, `Deviation`, `Division`, `Evaluated` | -| reshape | Reshape numpy array | `Rearrange`, `Squish`, `Expand`, `Flatten`, `SwapAxis` | +| reshape | Reshape numpy array | `Rearrange`, `Squeeze`, `Expand`, `Flatten`, `SwapAxis` | | select | Select elements from array | `Select`, `Slice` | | split | Split numpy arrays into tuples | `OnAxis`, `OnSlice`, `VSplit`, `HSplit` | | values | Modify values of arrays | `FillNan`, `MaskValue`, `ForceNormalised` | @@ -67,7 +67,7 @@ Here are the default operations included with `pyearthtools.pipeline`, accessibl | filters | Filter data when iterating | `DropAnyNan`, `DropAllNan`, `DropValue`, `Shape` | | join | Combine tuples of `np.ndarrays` | `Stack`, `VStack`, `HStack`, `Concatenate` | | normalisation | Normalise arrays | `Anomaly`, `Deviation`, `Division`, `Evaluated` | -| reshape | Reshape numpy array | `Rearrange`, `Squish`, `Expand`, `Flatten`, `SwapAxis` | +| reshape | Reshape numpy array | `Rearrange`, `Squeeze`, `Expand`, `Flatten`, `SwapAxis` | | select | Select elements from array | `Select`, `Slice` | | split | Split numpy arrays into tuples | `OnAxis`, `OnSlice`, `VSplit`, `HSplit` | | values | Modify values of arrays | `FillNan`, `MaskValue`, `ForceNormalised` | diff --git a/old_docs/docs/documentation/pipeline/creating.md b/old_docs/docs/documentation/pipeline/creating.md index 55035306..5c56cf51 100644 --- a/old_docs/docs/documentation/pipeline/creating.md +++ b/old_docs/docs/documentation/pipeline/creating.md @@ -48,27 +48,29 @@ pyearthtools.pipeline.Pipeline( ( pyearthtools.data.archive.ERA5(['tcwv', 'skt', 'sp']), pyearthtools.pipeline.operations.Transforms( - apply = pyearthtools.pipeline.operations.transform.AddCoordinates(('latitude', 'longitude'))), + apply=pyearthtools.pipeline.operations.transform.AddCoordinates(('latitude', 'longitude'))), pyearthtools.pipeline.operations.xarray.Sort(('var_latitude', 'var_longitude', 'tcwv', 'skt', 'sp'))), ( - pyearthtools.data.archive.ERA5(['t', 'u', 'v'], level_value = [1,50,150,250,400,600,750,900,1000]), + pyearthtools.data.archive.ERA5(['t', 'u', 'v'], level_value=[1, 50, 150, 250, 400, 600, 750, 900, 1000]), pyearthtools.pipeline.operations.xarray.Sort(('t', 'u', 'v')) ), ( ( - pyearthtools.data.archive.ERA5( - ['mtnlwrf', 'msdwswrf', 'msdwlwrf', 'mtpr', 'mslhf', 'msshf', 'mtnswrf', 'mtdwswrf', 'msnswrf', 'msnlwrf'], - transforms = pyearthtools.data.transforms.derive( - mtupswrf = 'mtnswrf - mtdwswrf', - msupswrf = 'msnswrf - msdwswrf', - msuplwrf = 'msnlwrf - msdwlwrf', - drop = True + pyearthtools.data.archive.ERA5( + ['mtnlwrf', 'msdwswrf', 'msdwlwrf', 'mtpr', 'mslhf', 'msshf', 'mtnswrf', 'mtdwswrf', 'msnswrf', + 'msnlwrf'], + transforms=pyearthtools.data.transforms.derive( + mtupswrf='mtnswrf - mtdwswrf', + msupswrf='msnswrf - msdwswrf', + msuplwrf='msnlwrf - msdwlwrf', + drop=True ) ), - pyearthtools.data.archive.ERA5('!accumulate[period:"6 hours"]:tp>tp_accum') + pyearthtools.data.archive.ERA5('!accumulate[period:"6 hours"]:tp>tp_accum') ), pyearthtools.pipeline.operations.xarray.Merge(), - pyearthtools.pipeline.operations.xarray.Sort(('mslhf', 'msshf', 'msuplwrf', 'msupswrf', 'mtnlwrf', 'mtpr', 'mtupswrf', 'tp_accum')) + pyearthtools.pipeline.operations.xarray.Sort( + ('mslhf', 'msshf', 'msuplwrf', 'msupswrf', 'mtnlwrf', 'mtpr', 'mtupswrf', 'tp_accum')) ), ( pyearthtools.data.archive.ERA5(['mtdwswrf', 'z_surface', 'lsm', 'ci']), @@ -78,15 +80,19 @@ pyearthtools.pipeline.Pipeline( pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten('level', skip_missing=True), pyearthtools.pipeline.operations.xarray.reshape.Dimensions(('time', 'latitude', 'longitude')), pyearthtools.pipeline.operations.Transforms( - apply = pyearthtools.data.transform.coordinates.pad(coordinates = {'latitude': 1, 'longitude': 1}, mode = 'wrap') + pyearthtools.data.transforms.values.fill(coordinates = ['latitude', 'longitude'], direction = 'forward') + pyearthtools.data.transforms.interpolation.like(pipe['2020-01-01T00'], drop_coords = 'time')), + apply=pyearthtools.data.transform.coordinates.pad(coordinates={'latitude': 1, 'longitude': 1}, + mode='wrap') + pyearthtools.data.transforms.values.fill( + coordinates=['latitude', 'longitude'], + direction='forward') + pyearthtools.data.transforms.interpolation.like(pipe['2020-01-01T00'], + drop_coords='time')), pyearthtools.pipeline.operations.xarray.Merge(), - pyearthtools.pipeline.operations.xarray.Sort(order, safe = True), + pyearthtools.pipeline.operations.xarray.Sort(order, safe=True), pyearthtools.pipeline.operations.xarray.conversion.ToDask(), - pyearthtools.pipeline.operations.dask.reshape.Squish(axis=1), - pyearthtools.pipeline.modifications.Cache('temp', pattern_kwargs = dict(extension = 'npy')) - pyearthtools.pipeline.modifications.TemporalRetrieval(((-6, 1), (6,1))), + pyearthtools.pipeline.operations.dask.reshape.Squeeze(axis=1), + pyearthtools.pipeline.modifications.Cache('temp', pattern_kwargs=dict(extension='npy')) +pyearthtools.pipeline.modifications.TemporalRetrieval(((-6, 1), (6, 1))), ) ``` ![Pipeline Graph](./assets/ComplexPipeline.svg) diff --git a/old_docs/docs/documentation/pipeline/details/operation.md b/old_docs/docs/documentation/pipeline/details/operation.md index ea37a116..6139863e 100644 --- a/old_docs/docs/documentation/pipeline/details/operation.md +++ b/old_docs/docs/documentation/pipeline/details/operation.md @@ -60,7 +60,7 @@ inverse = example_operation.T ## Example Implementation -Here is the implementation of `numpy.reshape.Squish`, to flatten a one element axis of an array +Here is the implementation of `numpy.reshape.Squeeze`, to flatten a one element axis of an array ```python from typing import Union, Optional, Any @@ -68,16 +68,16 @@ import numpy as np from pyearthtools.pipeline.operation import Operation -class Squish(Operation): +class Squeeze(Operation): """ - Operation to Squish one Dimensional axis at 'axis' location + Operation to Squeeze one Dimensional axis at 'axis' location """ _override_interface = ["Delayed", "Serial"] # Which parallel interfaces to use in order of priority. - _interface_kwargs = {"Delayed": {"name": "Squish"}} + _interface_kwargs = {"Delayed": {"name": "Squeeze"}} def __init__(self, axis: Union[tuple[int, ...], int]) -> None: - """Squish Dimension of Data + """Squeeze Dimension of Data Args: axis (Union[tuple[int, ...], int]): diff --git a/old_docs/docs/documentation/pipeline/index.md b/old_docs/docs/documentation/pipeline/index.md index eae7f475..e6b9956f 100644 --- a/old_docs/docs/documentation/pipeline/index.md +++ b/old_docs/docs/documentation/pipeline/index.md @@ -15,20 +15,21 @@ Here is an example pipeline, for use with PanguWeather. import pyearthtools.data import pyearthtools.pipeline - data_preperation = pyearthtools.pipeline.Pipeline( ( pyearthtools.data.archive.ERA5(['msl', '10u', '10v', '2t']), - pyearthtools.data.archive.ERA5(['z', 'q', 't', 'u', 'v'], level_value = [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]) + pyearthtools.data.archive.ERA5(['z', 'q', 't', 'u', 'v'], + level_value=[50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]) ), pyearthtools.pipeline.operations.xarray.Merge(), pyearthtools.pipeline.operations.xarray.Sort(['msl', '10u', '10v', '2t', 'z', 'q', 't', 'u', 'v']), pyearthtools.pipeline.operations.Transforms( - apply = pyearthtools.data.transforms.coordinates.standard_longitude(type = '0-360') + pyearthtools.data.transforms.coordinates.ReIndex(level = 'reversed') - ), - pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten(coordinate = 'level'), + apply=pyearthtools.data.transforms.coordinates.standard_longitude( + type='0-360') + pyearthtools.data.transforms.coordinates.ReIndex(level='reversed') + ), + pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten(coordinate='level'), pyearthtools.pipeline.operations.xarray.conversion.ToNumpy(), - pyearthtools.pipeline.operations.numpy.reshape.Squish(axis = 1), + pyearthtools.pipeline.operations.numpy.reshape.Squeeze(axis=1), ) ``` diff --git a/packages/pipeline/assets/pipeline_example.svg b/packages/pipeline/assets/pipeline_example.svg index c2b4a3b7..8ca41d66 100644 --- a/packages/pipeline/assets/pipeline_example.svg +++ b/packages/pipeline/assets/pipeline_example.svg @@ -91,7 +91,7 @@ Squish_7b413dbf-6cbe-4cf1-a375-17fc36cc9eda -Squish +Squeeze diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/dask/__init__.py b/packages/pipeline/src/pyearthtools/pipeline/operations/dask/__init__.py index 7ac0c908..7795eded 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/dask/__init__.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/dask/__init__.py @@ -24,7 +24,7 @@ | filters | Filter data when iterating | `DropAnyNan`, `DropAllNan`, `DropValue`, `Shape` | | join | Combine tuples of `np.ndarrays` | `Stack`, `VStack`, `HStack`, `Concatenate` | | normalisation | Normalise arrays | `Anomaly`, `Deviation`, `Division`, `Evaluated` | -| reshape | Reshape numpy array | `Rearrange`, `Squish`, `Expand`, `Flatten`, `SwapAxis` | +| reshape | Reshape numpy array | `Rearrange`, `Squeeze`, `Expand`, `Flatten`, `SwapAxis` | | select | Select elements from array | `Select`, `Slice` | | split | Split numpy arrays into tuples | `OnAxis`, `OnSlice`, `VSplit`, `HSplit` | | values | Modify values of arrays | `FillNan`, `MaskValue`, `ForceNormalised` | diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/dask/reshape.py b/packages/pipeline/src/pyearthtools/pipeline/operations/dask/reshape.py index 58165f1e..e5f0fd6e 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/dask/reshape.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/dask/reshape.py @@ -110,16 +110,16 @@ def undo_func(self, data: Union[np.ndarray, da.Array]): return self._rearrange(data, pattern) -class Squish(DaskOperation): +class Squeeze(DaskOperation): """ - Operation to Squish one Dimensional axis at 'axis' location + Operation to Squeeze one Dimensional axis at 'axis' location """ _override_interface = ["Serial"] - _numpy_counterpart = "reshape.Squish" + _numpy_counterpart = "reshape.Squeeze" def __init__(self, axis: Union[tuple[int, ...], int]) -> None: - """Squish Dimension of Data + """Squeeze Dimension of Data Args: axis (Union[tuple[int, ...], int]): diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/__init__.py b/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/__init__.py index 0f06e0cb..f240e6a2 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/__init__.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/__init__.py @@ -22,7 +22,7 @@ | filters | Filter data when iterating | `DropAnyNan`, `DropAllNan`, `DropValue`, `Shape` | | join | Combine tuples of `np.ndarrays` | `Stack`, `VStack`, `HStack`, `Concatenate` | | normalisation | Normalise arrays | `Anomaly`, `Deviation`, `Division`, `Evaluated` | -| reshape | Reshape numpy array | `Rearrange`, `Squish`, `Expand`, `Flatten`, `SwapAxis` | +| reshape | Reshape numpy array | `Rearrange`, `Squeeze`, `Expand`, `Flatten`, `SwapAxis` | | select | Select elements from array | `Select`, `Slice` | | split | Split numpy arrays into tuples | `OnAxis`, `OnSlice`, `VSplit`, `HSplit` | | values | Modify values of arrays | `FillNan`, `MaskValue`, `ForceNormalised` | diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/reshape.py b/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/reshape.py index 794b388e..328e5de0 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/reshape.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/reshape.py @@ -94,21 +94,21 @@ def undo_func(self, data: np.ndarray): return self._rearrange(data, pattern) -class Squish(Operation): +class Squeeze(Operation): """ - Operation to Squish one Dimensional axis at 'axis' location + Operation to Squeeze one-Dimensional axes at 'axis' location """ _override_interface = ["Delayed", "Serial"] - _interface_kwargs = {"Delayed": {"name": "Squish"}} + _interface_kwargs = {"Delayed": {"name": "Squeeze"}} def __init__(self, axis: Union[tuple[int, ...], int]) -> None: - """Squish Dimension of Data + """Squeeze Dimension of Data, removing dimensions of length 1. Args: axis (Union[tuple[int, ...], int]): - Axis to squish at + Axis to squeeze at """ super().__init__( split_tuples=True, From 4bcfc6e2019d7d0cae59e493ac7464a8bd17b6e2 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Wed, 23 Jul 2025 13:39:30 +1200 Subject: [PATCH 02/31] begin making tests --- .../pipeline/operations/xarray/reshape.py | 2 +- .../operations/xarray/test_xarray_reshape.py | 92 +++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 packages/pipeline/tests/operations/xarray/test_xarray_reshape.py diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py index 3fc8e572..e9e43404 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py @@ -44,7 +44,7 @@ def __init__( Args: dimensions (Union[str, list[str]]): - Specified order of dimensions to tranpose dataset to + Specified order of dimensions to transpose dataset to append (bool, optional): Append extra dims, if false, prepend dims. Defaults to True. preserve_order (bool, optional): diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py new file mode 100644 index 00000000..4dfbe374 --- /dev/null +++ b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py @@ -0,0 +1,92 @@ +# Copyright Commonwealth of Australia, Bureau of Meteorology 2025. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyearthtools.pipeline.operations.xarray import reshape + +import xarray as xr +import pytest + +SIMPLE_DA1 = xr.DataArray( + [ + [ + [0.9, 0.0, 5], + [0.7, 1.4, 2.8], + [0.4, 0.5, 2.3], + ], + [ + [1.9, 1.0, 1.5], + [1.7, 2.4, 1.1], + [1.4, 1.5, 3.3], + ], + ], + coords=[[10, 20], [0, 1, 2], [5, 6, 7]], + dims=["height", "lat", "lon"], +) +SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) +SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) + +def test_Dimensions(): + d = reshape.Dimensions(["lat", "lon", "height"]) + output = d.apply_func(SIMPLE_DA1) + assert output.dims == ("lat", "lon", "height") + +def test_Dimensions_one_input(): + d = reshape.Dimensions(["lat"]) + output = d.apply_func(SIMPLE_DA1) + assert output.dims[0] == "lat" + +def test_Dimensions_prepend(): + d = reshape.Dimensions(["lat"], append=False) + output = d.apply_func(SIMPLE_DA1) + assert output.dims[-1] == "lat" + +def test_Dimensions_preserve_order(): + d = reshape.Dimensions(["lat"], preserve_order=True) + output = d.apply_func(SIMPLE_DA1) + reversed_output = d.undo_func(output) + assert reversed_output.dims == output.dims + + +def test_CoordinateFlatten(): + cf = reshape.CoordinateFlatten(coordinate=("height", (10, 20)), skip_missing=True) + cf.apply_func(SIMPLE_DA1) + + + + + + # def test_Flatten(): + # f1 = reshape.Flatten(flatten_dims=2) + # random_array = np.random.randn(4, 3, 5) + # output = f1.apply_func(random_array) + # undo_output = f1.undo_func(output) + # assert output.shape == (4, 3 * 5), "Flatten acts on the last few dimensions." + # assert np.all(undo_output == random_array), "Flatten can undo itself." + # + # def test_Flatten_1_dim(): + # f2 = reshape.Flatten(flatten_dims=1) + # random_array = np.random.randn(4, 3, 5) + # output = f2.apply_func(random_array) + # undo_output = f2.undo_func(output) # Check that the undo still works. + # assert np.all(output == random_array), "Flatten 1 dimension does nothing." + # assert np.all(undo_output == random_array), "Undo Flatten 1 dimension." + # + # def test_Flatten_all_dims(): + # f3 = reshape.Flatten() + # random_array3 = np.random.randn(6, 7, 5, 2) + # output = f3.apply_func(random_array3) + # assert output.shape == (6 * 7 * 5 * 2,) + # assert f3.undo_func(output).shape == (6, 7, 5, 2), "Undo Flatten all dimensions." + + From 7b178a1ca97c930d18dd0166ef2c3f25b8924972 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Thu, 24 Jul 2025 14:55:10 +1200 Subject: [PATCH 03/31] bug found in data.transforms.coordinates.Flatten --- .../data/transforms/coordinates.py | 4 +- .../operations/xarray/test_xarray_reshape.py | 46 ++++++------------- 2 files changed, 17 insertions(+), 33 deletions(-) diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index 05b8ff21..0644a702 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -412,13 +412,13 @@ def weak_cast_to_int(value): class Flatten(Transform): - """Flatten a coordinate in a dataset into seperate variables""" + """Flatten a coordinate in a dataset into separate variables""" def __init__( self, coordinate: Hashable | list[Hashable] | tuple[Hashable], *extra_coordinates, skip_missing: bool = False ): """ - Flatten a coordinate in a dataset with each point being made a seperate data var + Flatten a coordinate in a dataset with each point being made a separate data var Args: coordinate (Hashable | list[Hashable] | tuple[Hashable] | None): diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py index 4dfbe374..46143dc8 100644 --- a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py +++ b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py @@ -33,6 +33,16 @@ coords=[[10, 20], [0, 1, 2], [5, 6, 7]], dims=["height", "lat", "lon"], ) + +SIMPLE_DA2 = xr.DataArray( + [ + [9.1, 2.3, 3.2], + [2.2, 1.1, 0.2] + ], + coords=[[1, 2], [3, 4, 5]], + dims=["a", "b"] +) + SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) @@ -57,36 +67,10 @@ def test_Dimensions_preserve_order(): reversed_output = d.undo_func(output) assert reversed_output.dims == output.dims +import pyearthtools.data +def test_dataFlatten(): + pyearthtools.data.transforms.coordinates.Flatten(["a", "b"], skip_missing=True)(SIMPLE_DA2) def test_CoordinateFlatten(): - cf = reshape.CoordinateFlatten(coordinate=("height", (10, 20)), skip_missing=True) - cf.apply_func(SIMPLE_DA1) - - - - - - # def test_Flatten(): - # f1 = reshape.Flatten(flatten_dims=2) - # random_array = np.random.randn(4, 3, 5) - # output = f1.apply_func(random_array) - # undo_output = f1.undo_func(output) - # assert output.shape == (4, 3 * 5), "Flatten acts on the last few dimensions." - # assert np.all(undo_output == random_array), "Flatten can undo itself." - # - # def test_Flatten_1_dim(): - # f2 = reshape.Flatten(flatten_dims=1) - # random_array = np.random.randn(4, 3, 5) - # output = f2.apply_func(random_array) - # undo_output = f2.undo_func(output) # Check that the undo still works. - # assert np.all(output == random_array), "Flatten 1 dimension does nothing." - # assert np.all(undo_output == random_array), "Undo Flatten 1 dimension." - # - # def test_Flatten_all_dims(): - # f3 = reshape.Flatten() - # random_array3 = np.random.randn(6, 7, 5, 2) - # output = f3.apply_func(random_array3) - # assert output.shape == (6 * 7 * 5 * 2,) - # assert f3.undo_func(output).shape == (6, 7, 5, 2), "Undo Flatten all dimensions." - - + cf = reshape.CoordinateFlatten(coordinate="a", skip_missing=True) + cf.apply_func(SIMPLE_DA2) \ No newline at end of file From 028568137cebdfbf38f0ae33e792cf791a14d6a0 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 14:03:33 +1200 Subject: [PATCH 04/31] start testing, improve docstring --- .../data/transforms/coordinates.py | 12 +++- .../tests/data/transforms/test_coordinates.py | 67 +++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 packages/data/tests/data/transforms/test_coordinates.py diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index 05b8ff21..1aa1024b 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -412,13 +412,20 @@ def weak_cast_to_int(value): class Flatten(Transform): - """Flatten a coordinate in a dataset into seperate variables""" + """Operation to flatten a coordinate in a dataset, putting the data at each value of the coordinate into a separate + data variable.""" def __init__( self, coordinate: Hashable | list[Hashable] | tuple[Hashable], *extra_coordinates, skip_missing: bool = False ): """ - Flatten a coordinate in a dataset with each point being made a seperate data var + + Flatten a coordinate in an xarray Dataset, putting the data at each value of the coordinate into a separate + data variable. + + The output data variables will be named "". For example, if the input + Dataset has a variable "t" and it is flattened along the coordinate "pressure_level" which has values + [100, 200, 500], then the output Dataset will have variables called t100, t200 and t500. Args: coordinate (Hashable | list[Hashable] | tuple[Hashable] | None): @@ -431,6 +438,7 @@ def __init__( Raises: ValueError: If invalid number of coordinates found + """ super().__init__() self.record_initialisation() diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py new file mode 100644 index 00000000..7d738057 --- /dev/null +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -0,0 +1,67 @@ +# Copyright Commonwealth of Australia, Bureau of Meteorology 2025. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyearthtools.data.transforms import coordinates +import xarray as xr +import numpy as np + + +# Create a 2D array of integers +data = np.array([[0, 1], [0, 1]], dtype=np.int32) + +# Create a DataArray with named dimensions and coordinates +da = xr.DataArray( + data, + dims=["x", "y"], + coords={"x": [10, 11], "y": [100, 110]}, + name="example_integers" +) + + +larger_data = np.array( + [ + [ + [0, 1, 2, 3], + [0, 1, 2, 3], + [0, 1, 2, 3] + ], + [ + [0, 1, 2, 3], + [1, 16, 17, 18], + [2, 110, 111, 112] + ], + [ + [0, 1, 2, 3], + [1, 6, 7, 8], + [2, 10, 11, 12] + ], + [ + [0, 1, 2, 3], + [1, 16, 17, 18], + [2, 110, 111, 112] + ], + ], + dtype=np.int32) + +da_larger = xr.DataArray( + larger_data, + dims=["height", "lat", "lon"], + coords={"height": [10, 11, 12, 13], "lat": [100, 101, 102], "lon": [21, 22, 23, 24]}, + name="sample_data" +) + + +def test_Flatten(): + f = coordinates.Flatten(["height"]) + f.apply(da_larger) From 8eab03a796da9fb1f2dc0a330420e5d6de447af2 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 14:44:29 +1200 Subject: [PATCH 05/31] Flatten tested and debugged. --- .../data/transforms/coordinates.py | 5 +- .../tests/data/transforms/test_coordinates.py | 60 +++++++------------ 2 files changed, 26 insertions(+), 39 deletions(-) diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index 1aa1024b..c24891cf 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -427,6 +427,9 @@ def __init__( Dataset has a variable "t" and it is flattened along the coordinate "pressure_level" which has values [100, 200, 500], then the output Dataset will have variables called t100, t200 and t500. + If more than one coordinate is flattened, the output data variable names will concatenate the values of each + coordinate. + Args: coordinate (Hashable | list[Hashable] | tuple[Hashable] | None): Coordinates to flatten, either str or list of candidates. @@ -466,7 +469,7 @@ def apply(self, dataset: xr.Dataset) -> xr.Dataset: ) elif len(discovered_coord) > 1: - transforms = TransformCollection(*[flatten(coord) for coord in discovered_coord]) + transforms = TransformCollection(*[Flatten(coord) for coord in discovered_coord]) return transforms(dataset) discovered_coord = str(discovered_coord[0]) diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index 7d738057..2a18a77c 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -16,52 +16,36 @@ import xarray as xr import numpy as np - -# Create a 2D array of integers -data = np.array([[0, 1], [0, 1]], dtype=np.int32) - -# Create a DataArray with named dimensions and coordinates -da = xr.DataArray( - data, - dims=["x", "y"], - coords={"x": [10, 11], "y": [100, 110]}, - name="example_integers" -) - - -larger_data = np.array( +SIMPLE_DA1 = xr.DataArray( [ [ - [0, 1, 2, 3], - [0, 1, 2, 3], - [0, 1, 2, 3] + [0.9, 0.0, 5], + [0.7, 1.4, 2.8], + [0.4, 0.5, 2.3], ], [ - [0, 1, 2, 3], - [1, 16, 17, 18], - [2, 110, 111, 112] - ], - [ - [0, 1, 2, 3], - [1, 6, 7, 8], - [2, 10, 11, 12] - ], - [ - [0, 1, 2, 3], - [1, 16, 17, 18], - [2, 110, 111, 112] + [1.9, 1.0, 1.5], + [1.7, 2.4, 1.1], + [1.4, 1.5, 3.3], ], ], - dtype=np.int32) - -da_larger = xr.DataArray( - larger_data, + coords=[[10, 20], [0, 1, 2], [5, 6, 7]], dims=["height", "lat", "lon"], - coords={"height": [10, 11, 12, 13], "lat": [100, 101, 102], "lon": [21, 22, 23, 24]}, - name="sample_data" ) - +SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) +SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) def test_Flatten(): f = coordinates.Flatten(["height"]) - f.apply(da_larger) + output = f.apply(SIMPLE_DS2) + variables = list(output.keys()) + for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: + assert vbl in variables + +def test_Flatten_2_coords(): + f = coordinates.Flatten(["height", "lon"]) + output = f.apply(SIMPLE_DS1) + variables = list(output.keys()) + for vbl in ['Temperature510', 'Temperature520', 'Temperature610', 'Temperature620', + 'Temperature710', 'Temperature720']: + assert vbl in variables From 52796842b53a0e0846018dbfb1a145fd1add4acb Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 15:15:00 +1200 Subject: [PATCH 06/31] test skip_missing --- .../data/transforms/coordinates.py | 3 +- .../tests/data/transforms/test_coordinates.py | 40 ++++++++++++++++++- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index c24891cf..cd1d4be5 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -436,7 +436,8 @@ def __init__( *extra_coordinates (optional): Arguments form of `coordinate`. skip_missing (bool, optional): - Whether to skip data without the dims. Defaults to False + Whether to skip data that does not have any of the listed coordinates. If True, will return such data + unchanged. Defaults to False. Raises: ValueError: diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index 2a18a77c..dc909106 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -15,6 +15,7 @@ from pyearthtools.data.transforms import coordinates import xarray as xr import numpy as np +import pytest SIMPLE_DA1 = xr.DataArray( [ @@ -32,9 +33,23 @@ coords=[[10, 20], [0, 1, 2], [5, 6, 7]], dims=["height", "lat", "lon"], ) + +SIMPLE_DA2 = xr.DataArray( + [ + [0.9, 0.0, 5], + [0.7, 1.4, 2.8], + [0.4, 0.5, 2.3], + ], + + coords=[[0, 1, 2], [5, 6, 7]], + dims=["lat", "lon"], +) + SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) +COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) + def test_Flatten(): f = coordinates.Flatten(["height"]) output = f.apply(SIMPLE_DS2) @@ -46,6 +61,27 @@ def test_Flatten_2_coords(): f = coordinates.Flatten(["height", "lon"]) output = f.apply(SIMPLE_DS1) variables = list(output.keys()) - for vbl in ['Temperature510', 'Temperature520', 'Temperature610', 'Temperature620', - 'Temperature710', 'Temperature720']: + # Note that it's hard to predict which coordinate will be processed first. + try: + for vbl in ['Temperature510', 'Temperature520', 'Temperature610', 'Temperature620', + 'Temperature710', 'Temperature720']: + assert vbl in variables + except AssertionError: + for vbl in ['Temperature105', 'Temperature205', 'Temperature106', 'Temperature206', + 'Temperature107', 'Temperature207']: + assert vbl in variables + +def test_Flatten_complicated_dataset(): + """Check that Flatten still works when the coordinate being flattened does not exist for all variables.""" + f = coordinates.Flatten(["height"]) + output = f.apply(COMPLICATED_DS1) + variables = list(output.keys()) + for vbl in ["Temperature10", "Temperature20", "MSLP"]: assert vbl in variables + +def test_Flatten_skip_missing(): + f = coordinates.Flatten(["scrupulosity"]) + with pytest.raises(ValueError): + f.apply(SIMPLE_DS1) + f2 = coordinates.Flatten(["height"]) + f2.apply(SIMPLE_DS1) \ No newline at end of file From d3a3541c3da5b31208d44ef91c6c426a533caa6b Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 15:16:07 +1200 Subject: [PATCH 07/31] test skip_missing --- packages/data/tests/data/transforms/test_coordinates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index dc909106..c8cb6219 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -83,5 +83,5 @@ def test_Flatten_skip_missing(): f = coordinates.Flatten(["scrupulosity"]) with pytest.raises(ValueError): f.apply(SIMPLE_DS1) - f2 = coordinates.Flatten(["height"]) + f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) f2.apply(SIMPLE_DS1) \ No newline at end of file From 132470d36224ff49420bef039c51c91e1cda280c Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Tue, 29 Jul 2025 13:56:45 +1200 Subject: [PATCH 08/31] Final tidying before pull request. --- packages/data/tests/data/transforms/test_coordinates.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index c8cb6219..1864a54a 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -84,4 +84,5 @@ def test_Flatten_skip_missing(): with pytest.raises(ValueError): f.apply(SIMPLE_DS1) f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) - f2.apply(SIMPLE_DS1) \ No newline at end of file + output2 = f2.apply(SIMPLE_DS1) + assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." \ No newline at end of file From 8db1c73d0225c3d7ecfb9b43408a8574501d8422 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 11 Aug 2025 12:42:09 +1200 Subject: [PATCH 09/31] copy Flatten functionality to CoordinateFlatten --- .../pipeline/operations/xarray/reshape.py | 80 +++++++++++++++++-- 1 file changed, 73 insertions(+), 7 deletions(-) diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py index e9e43404..c926ebfe 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py @@ -16,8 +16,11 @@ from typing import Hashable, TypeVar, Union import xarray as xr +import numpy as np import pyearthtools.data +from pyearthtools.data.transforms.transform import TransformCollection +from pyearthtools.data.transforms.coordinates import Drop from pyearthtools.pipeline.operation import Operation T = TypeVar("T", xr.Dataset, xr.DataArray) @@ -87,15 +90,34 @@ def undo_func(self, sample: T) -> T: return sample.transpose(*self._incoming_dims, missing_dims="ignore") return sample +def weak_cast_to_int(value): + """ + Basically, turns integer floats to int types, otherwise + does nothing. Used in CoordinateFlatten. + """ + try: + if int(value) == value: + value = int(value) + except Exception: + pass + return value class CoordinateFlatten(Operation): - """Flatten and Expand on a coordinate""" + """Flatten a coordinate in a dataset into separate variables""" _override_interface = "Serial" - def __init__(self, coordinate: Union[Hashable, list[Hashable]], *coords: Hashable, skip_missing: bool = False): + def __init__(self, coordinate: Union[Hashable, list[Hashable]], *extra_coords: Hashable, skip_missing: bool = False): """ - Flatten and expand on coordinate/s + Flatten a coordinate in an xarray Dataset, putting the data at each value of the coordinate into a separate + data variable. + + The output data variables will be named "". For example, if the input + Dataset has a variable "t" and it is flattened along the coordinate "pressure_level" which has values + [100, 200, 500], then the output Dataset will have variables called t100, t200 and t500. + + If more than one coordinate is flattened, the output data variable names will concatenate the values of each + coordinate. Args: coordinate (Union[Hashable,list[Hashable]]): @@ -110,12 +132,56 @@ def __init__(self, coordinate: Union[Hashable, list[Hashable]], *coords: Hashabl ) self.record_initialisation() - coordinate = [coordinate, *coords] if not isinstance(coordinate, (list, tuple)) else [*coordinate, *coords] - self.coords = coordinate + coordinate = [coordinate, *extra_coords] if not isinstance(coordinate, (list, tuple)) else [*coordinate, *extra_coords] + self._coordinate = coordinate self._skip_missing = skip_missing - def apply_func(self, ds): - return pyearthtools.data.transforms.coordinates.Flatten(self.coords, skip_missing=self._skip_missing)(ds) + def apply_func(self, dataset: xr.Dataset) -> xr.Dataset: + discovered_coord = list(set(self._coordinate).intersection(set(dataset.coords))) + + if len(discovered_coord) == 0: + if self._skip_missing: + return dataset + + raise ValueError( + f"{self._coordinate} could not be found in dataset with coordinates {list(dataset.coords)}.\n" + "Set 'skip_missing' to True to skip this." + ) + + elif len(discovered_coord) > 1: + transforms = TransformCollection(*[CoordinateFlatten(coord) for coord in discovered_coord]) + return transforms(dataset) + + discovered_coord = str(discovered_coord[0]) + + coords = dataset.coords + new_ds = xr.Dataset(coords={co: v for co, v in coords.items() if not co == discovered_coord}) + new_ds.attrs.update( + {f"{discovered_coord}-dtype": str(dataset[discovered_coord].encoding.get("dtype", "int32"))} + ) + + for var in dataset: + if discovered_coord not in dataset[var].coords: + new_ds[var] = dataset[var] + continue + + coord_size = dataset[var][discovered_coord].values + coord_size = coord_size if isinstance(coord_size, np.ndarray) else np.array(coord_size) + + if coord_size.size == 1 and False: + coord_val = weak_cast_to_int(dataset[var][discovered_coord].values) + new_ds[f"{var}{coord_val}"] = Drop(discovered_coord, ignore_missing=True)(dataset[var]) + + else: + for coord_val in dataset[discovered_coord]: + coord_val = weak_cast_to_int(coord_val.values.item()) + + selected = dataset[var].sel(**{discovered_coord: coord_val}) # type: ignore + selected = selected.drop_vars(discovered_coord) # type: ignore + selected.attrs.update(**{discovered_coord: coord_val}) + + new_ds[f"{var}{coord_val}"] = selected + return new_ds def undo_func(self, ds): return pyearthtools.data.transforms.coordinates.expand(self.coords)(ds) From 70e746accbfcc0ce2c58e03260168b79b7f582a4 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Tue, 12 Aug 2025 12:32:32 +1200 Subject: [PATCH 10/31] first test --- .../pipeline/tests/operations/xarray/test_xarray_reshape.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py index 46143dc8..75726457 100644 --- a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py +++ b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py @@ -69,8 +69,8 @@ def test_Dimensions_preserve_order(): import pyearthtools.data def test_dataFlatten(): - pyearthtools.data.transforms.coordinates.Flatten(["a", "b"], skip_missing=True)(SIMPLE_DA2) + pyearthtools.data.transforms.coordinates.Flatten(["a", "b"], skip_missing=True)(SIMPLE_DS1) def test_CoordinateFlatten(): cf = reshape.CoordinateFlatten(coordinate="a", skip_missing=True) - cf.apply_func(SIMPLE_DA2) \ No newline at end of file + cf.apply_func(SIMPLE_DS1) \ No newline at end of file From c828cf09b91feb2bf6932c4bf27c4d4d2217b0e9 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Tue, 12 Aug 2025 12:51:57 +1200 Subject: [PATCH 11/31] copy old Flatten test over in preparation for reworking --- .../operations/xarray/test_xarray_reshape.py | 87 ++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py index 75726457..30e4b69a 100644 --- a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py +++ b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py @@ -73,4 +73,89 @@ def test_dataFlatten(): def test_CoordinateFlatten(): cf = reshape.CoordinateFlatten(coordinate="a", skip_missing=True) - cf.apply_func(SIMPLE_DS1) \ No newline at end of file + cf.apply_func(SIMPLE_DS1) + +SIMPLE_DA1 = xr.DataArray( + [ + [ + [0.9, 0.0, 5], + [0.7, 1.4, 2.8], + [0.4, 0.5, 2.3], + ], + [ + [1.9, 1.0, 1.5], + [1.7, 2.4, 1.1], + [1.4, 1.5, 3.3], + ], + ], + coords=[[10, 20], [0, 1, 2], [5, 6, 7]], + dims=["height", "lat", "lon"], +) + +SIMPLE_DA2 = xr.DataArray( + [ + [0.9, 0.0, 5], + [0.7, 1.4, 2.8], + [0.4, 0.5, 2.3], + ], + coords=[[0, 1, 2], [5, 6, 7]], + dims=["lat", "lon"], +) + +SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) +SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) + +COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) + + +def test_Flatten(): + f = coordinates.Flatten(["height"]) + output = f.apply(SIMPLE_DS2) + variables = list(output.keys()) + for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: + assert vbl in variables + + +def test_Flatten_2_coords(): + f = coordinates.Flatten(["height", "lon"]) + output = f.apply(SIMPLE_DS1) + variables = list(output.keys()) + # Note that it's hard to predict which coordinate will be processed first. + try: + for vbl in [ + "Temperature510", + "Temperature520", + "Temperature610", + "Temperature620", + "Temperature710", + "Temperature720", + ]: + assert vbl in variables + except AssertionError: + for vbl in [ + "Temperature105", + "Temperature205", + "Temperature106", + "Temperature206", + "Temperature107", + "Temperature207", + ]: + assert vbl in variables + + +def test_Flatten_complicated_dataset(): + """Check that Flatten still works when the coordinate being flattened does not exist for all variables.""" + f = coordinates.Flatten(["height"]) + output = f.apply(COMPLICATED_DS1) + variables = list(output.keys()) + for vbl in ["Temperature10", "Temperature20", "MSLP"]: + assert vbl in variables + + +def test_Flatten_skip_missing(): + f = coordinates.Flatten(["scrupulosity"]) + with pytest.raises(ValueError): + f.apply(SIMPLE_DS1) + f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) + output2 = f2.apply(SIMPLE_DS1) + assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." \ No newline at end of file From 81dfedc769468a98d236f221c427651524b1d8b7 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Tue, 2 Sep 2025 12:06:18 +1200 Subject: [PATCH 12/31] get ready to update branch --- .../pipeline/operations/xarray/reshape.py | 67 ++++++++++++++++++- .../operations/xarray/test_xarray_reshape.py | 18 ++--- 2 files changed, 74 insertions(+), 11 deletions(-) diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py index c926ebfe..a8eefd31 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py @@ -13,7 +13,7 @@ # limitations under the License. -from typing import Hashable, TypeVar, Union +from typing import Hashable, TypeVar, Union, Optional, Any import xarray as xr import numpy as np @@ -23,6 +23,10 @@ from pyearthtools.data.transforms.coordinates import Drop from pyearthtools.pipeline.operation import Operation +from packages.data.src.pyearthtools.data.transforms.attributes import SetType +from packages.data.src.pyearthtools.data.transforms.coordinates import Select +from packages.utils.src.pyearthtools.utils.decorators import BackwardsCompatibility + T = TypeVar("T", xr.Dataset, xr.DataArray) @@ -184,4 +188,63 @@ def apply_func(self, dataset: xr.Dataset) -> xr.Dataset: return new_ds def undo_func(self, ds): - return pyearthtools.data.transforms.coordinates.expand(self.coords)(ds) + return pyearthtools.pipeline.operations.xarray.reshape.coordinate_expand(self.coords)(ds) + +class CoordinateExpand(Operation): + """Inverse operation to `CoordinateFlatten`""" + + def __init__(self, coordinate: Union[Hashable, list[Hashable], tuple[Hashable]], *extra_coordinates): + """ + Inverse operation to [flatten][pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten] + + Will find flattened variables and regroup them upon the extra coordinate + + Args: + coordinate (Hashable | list[Hashable] | tuple[Hashable]): + Coordinate to unflatten. + *extra_coordinates (optional): + Argument form of `coordinate`. + """ + super().__init__() + self.record_initialisation() + + if not isinstance(coordinate, (list, tuple)): + coordinate = (coordinate,) + + coordinate = (*coordinate, *extra_coordinates) + self._coordinate = coordinate + + # @property + # def _info_(self): + # return dict(coordinate=self._coordinate) + + def apply(self, dataset: xr.Dataset) -> xr.Dataset | xr.DataArray: + dataset = type(dataset)(dataset) + + for coord in self._coordinate: + dtype = dataset.attrs.get(f"{coord}-dtype", "int32") + components = [] + for var in list(dataset.data_vars): + var_data = dataset[var] + if coord in var_data.attrs: + value = var_data.attrs.pop(coord) + var_data = ( + var_data.to_dataset(name=var.replace(str(value), "")) + .assign_coords(**{coord: [value]}) + .set_coords(coord) + ) + components.append(var_data) + + dataset = xr.combine_by_coords(components) # type: ignore + dataset = SetType(**{str(coord): dtype})(dataset) + + ## Add stored encoding if there + if f"{coord}-dtype" in dataset.attrs: + dtype = dataset.attrs.pop(f"{coord}-dtype") + dataset[coord].encoding.update(dtype=dtype) + + return dataset + + +@BackwardsCompatibility(CoordinateExpand) +def coordinate_expand(*args, **kwargs) -> Operation: ... diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py index 30e4b69a..cf5f3a30 100644 --- a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py +++ b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py @@ -108,16 +108,16 @@ def test_CoordinateFlatten(): COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) -def test_Flatten(): - f = coordinates.Flatten(["height"]) +def test_CoordinateFlatten(): + f = reshape.CoordinateFlatten(["height"]) output = f.apply(SIMPLE_DS2) variables = list(output.keys()) for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: assert vbl in variables -def test_Flatten_2_coords(): - f = coordinates.Flatten(["height", "lon"]) +def test_CoordinateFlatten_2_coords(): + f = reshape.CoordinateFlatten(["height", "lon"]) output = f.apply(SIMPLE_DS1) variables = list(output.keys()) # Note that it's hard to predict which coordinate will be processed first. @@ -143,19 +143,19 @@ def test_Flatten_2_coords(): assert vbl in variables -def test_Flatten_complicated_dataset(): +def test_CoordinateFlatten_complicated_dataset(): """Check that Flatten still works when the coordinate being flattened does not exist for all variables.""" - f = coordinates.Flatten(["height"]) + f = reshape.CoordinateFlatten(["height"]) output = f.apply(COMPLICATED_DS1) variables = list(output.keys()) for vbl in ["Temperature10", "Temperature20", "MSLP"]: assert vbl in variables -def test_Flatten_skip_missing(): - f = coordinates.Flatten(["scrupulosity"]) +def test_CoordinateFlatten_skip_missing(): + f = reshape.CoordinateFlatten(["scrupulosity"]) with pytest.raises(ValueError): f.apply(SIMPLE_DS1) - f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) + f2 = reshape.CoordinateFlatten(["scrupulosity"], skip_missing=True) output2 = f2.apply(SIMPLE_DS1) assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." \ No newline at end of file From a133d9e4badc791ed8d8bc291447b9edbe277cfd Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 14:03:33 +1200 Subject: [PATCH 13/31] start testing, improve docstring, rebase with new changes from upstream --- .../data/transforms/coordinates.py | 3 - .../tests/data/transforms/test_coordinates.py | 67 +++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 packages/data/tests/data/transforms/test_coordinates.py diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index 5dda2890..fbed8add 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -426,9 +426,6 @@ def __init__( Dataset has a variable "t" and it is flattened along the coordinate "pressure_level" which has values [100, 200, 500], then the output Dataset will have variables called t100, t200 and t500. - If more than one coordinate is flattened, the output data variable names will concatenate the values of each - coordinate. - Args: coordinate (Hashable | list[Hashable] | tuple[Hashable] | None): Coordinates to flatten, either str or list of candidates. diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py new file mode 100644 index 00000000..7d738057 --- /dev/null +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -0,0 +1,67 @@ +# Copyright Commonwealth of Australia, Bureau of Meteorology 2025. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyearthtools.data.transforms import coordinates +import xarray as xr +import numpy as np + + +# Create a 2D array of integers +data = np.array([[0, 1], [0, 1]], dtype=np.int32) + +# Create a DataArray with named dimensions and coordinates +da = xr.DataArray( + data, + dims=["x", "y"], + coords={"x": [10, 11], "y": [100, 110]}, + name="example_integers" +) + + +larger_data = np.array( + [ + [ + [0, 1, 2, 3], + [0, 1, 2, 3], + [0, 1, 2, 3] + ], + [ + [0, 1, 2, 3], + [1, 16, 17, 18], + [2, 110, 111, 112] + ], + [ + [0, 1, 2, 3], + [1, 6, 7, 8], + [2, 10, 11, 12] + ], + [ + [0, 1, 2, 3], + [1, 16, 17, 18], + [2, 110, 111, 112] + ], + ], + dtype=np.int32) + +da_larger = xr.DataArray( + larger_data, + dims=["height", "lat", "lon"], + coords={"height": [10, 11, 12, 13], "lat": [100, 101, 102], "lon": [21, 22, 23, 24]}, + name="sample_data" +) + + +def test_Flatten(): + f = coordinates.Flatten(["height"]) + f.apply(da_larger) From 5dd13a3d0cf716a13b88c6f0c36677866f4c7b21 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 14:44:29 +1200 Subject: [PATCH 14/31] Flatten tested and debugged. --- .../data/transforms/coordinates.py | 3 + .../tests/data/transforms/test_coordinates.py | 60 +++++++------------ 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index fbed8add..5dda2890 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -426,6 +426,9 @@ def __init__( Dataset has a variable "t" and it is flattened along the coordinate "pressure_level" which has values [100, 200, 500], then the output Dataset will have variables called t100, t200 and t500. + If more than one coordinate is flattened, the output data variable names will concatenate the values of each + coordinate. + Args: coordinate (Hashable | list[Hashable] | tuple[Hashable] | None): Coordinates to flatten, either str or list of candidates. diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index 7d738057..2a18a77c 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -16,52 +16,36 @@ import xarray as xr import numpy as np - -# Create a 2D array of integers -data = np.array([[0, 1], [0, 1]], dtype=np.int32) - -# Create a DataArray with named dimensions and coordinates -da = xr.DataArray( - data, - dims=["x", "y"], - coords={"x": [10, 11], "y": [100, 110]}, - name="example_integers" -) - - -larger_data = np.array( +SIMPLE_DA1 = xr.DataArray( [ [ - [0, 1, 2, 3], - [0, 1, 2, 3], - [0, 1, 2, 3] + [0.9, 0.0, 5], + [0.7, 1.4, 2.8], + [0.4, 0.5, 2.3], ], [ - [0, 1, 2, 3], - [1, 16, 17, 18], - [2, 110, 111, 112] - ], - [ - [0, 1, 2, 3], - [1, 6, 7, 8], - [2, 10, 11, 12] - ], - [ - [0, 1, 2, 3], - [1, 16, 17, 18], - [2, 110, 111, 112] + [1.9, 1.0, 1.5], + [1.7, 2.4, 1.1], + [1.4, 1.5, 3.3], ], ], - dtype=np.int32) - -da_larger = xr.DataArray( - larger_data, + coords=[[10, 20], [0, 1, 2], [5, 6, 7]], dims=["height", "lat", "lon"], - coords={"height": [10, 11, 12, 13], "lat": [100, 101, 102], "lon": [21, 22, 23, 24]}, - name="sample_data" ) - +SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) +SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) def test_Flatten(): f = coordinates.Flatten(["height"]) - f.apply(da_larger) + output = f.apply(SIMPLE_DS2) + variables = list(output.keys()) + for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: + assert vbl in variables + +def test_Flatten_2_coords(): + f = coordinates.Flatten(["height", "lon"]) + output = f.apply(SIMPLE_DS1) + variables = list(output.keys()) + for vbl in ['Temperature510', 'Temperature520', 'Temperature610', 'Temperature620', + 'Temperature710', 'Temperature720']: + assert vbl in variables From 862bec079fa78b36d14837eba4e7d0bc5bbdb0a1 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 15:15:00 +1200 Subject: [PATCH 15/31] test skip_missing --- .../tests/data/transforms/test_coordinates.py | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index 2a18a77c..dc909106 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -15,6 +15,7 @@ from pyearthtools.data.transforms import coordinates import xarray as xr import numpy as np +import pytest SIMPLE_DA1 = xr.DataArray( [ @@ -32,9 +33,23 @@ coords=[[10, 20], [0, 1, 2], [5, 6, 7]], dims=["height", "lat", "lon"], ) + +SIMPLE_DA2 = xr.DataArray( + [ + [0.9, 0.0, 5], + [0.7, 1.4, 2.8], + [0.4, 0.5, 2.3], + ], + + coords=[[0, 1, 2], [5, 6, 7]], + dims=["lat", "lon"], +) + SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) +COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) + def test_Flatten(): f = coordinates.Flatten(["height"]) output = f.apply(SIMPLE_DS2) @@ -46,6 +61,27 @@ def test_Flatten_2_coords(): f = coordinates.Flatten(["height", "lon"]) output = f.apply(SIMPLE_DS1) variables = list(output.keys()) - for vbl in ['Temperature510', 'Temperature520', 'Temperature610', 'Temperature620', - 'Temperature710', 'Temperature720']: + # Note that it's hard to predict which coordinate will be processed first. + try: + for vbl in ['Temperature510', 'Temperature520', 'Temperature610', 'Temperature620', + 'Temperature710', 'Temperature720']: + assert vbl in variables + except AssertionError: + for vbl in ['Temperature105', 'Temperature205', 'Temperature106', 'Temperature206', + 'Temperature107', 'Temperature207']: + assert vbl in variables + +def test_Flatten_complicated_dataset(): + """Check that Flatten still works when the coordinate being flattened does not exist for all variables.""" + f = coordinates.Flatten(["height"]) + output = f.apply(COMPLICATED_DS1) + variables = list(output.keys()) + for vbl in ["Temperature10", "Temperature20", "MSLP"]: assert vbl in variables + +def test_Flatten_skip_missing(): + f = coordinates.Flatten(["scrupulosity"]) + with pytest.raises(ValueError): + f.apply(SIMPLE_DS1) + f2 = coordinates.Flatten(["height"]) + f2.apply(SIMPLE_DS1) \ No newline at end of file From ca0da826865f818573672eb6dfcffc537674e990 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 15:16:07 +1200 Subject: [PATCH 16/31] test skip_missing --- packages/data/tests/data/transforms/test_coordinates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index dc909106..c8cb6219 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -83,5 +83,5 @@ def test_Flatten_skip_missing(): f = coordinates.Flatten(["scrupulosity"]) with pytest.raises(ValueError): f.apply(SIMPLE_DS1) - f2 = coordinates.Flatten(["height"]) + f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) f2.apply(SIMPLE_DS1) \ No newline at end of file From 88553b923c92d75803807a4068a74de8e3d64215 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Tue, 29 Jul 2025 13:56:45 +1200 Subject: [PATCH 17/31] Final tidying before pull request. --- packages/data/tests/data/transforms/test_coordinates.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index c8cb6219..1864a54a 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -84,4 +84,5 @@ def test_Flatten_skip_missing(): with pytest.raises(ValueError): f.apply(SIMPLE_DS1) f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) - f2.apply(SIMPLE_DS1) \ No newline at end of file + output2 = f2.apply(SIMPLE_DS1) + assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." \ No newline at end of file From b4fb81ad4fabd804de3367b372759c5dd1067920 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Thu, 4 Sep 2025 10:20:26 +1200 Subject: [PATCH 18/31] test CoordinateExpand and undo --- .../operations/xarray/test_xarray_reshape.py | 80 +++++++++++++------ 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py index cf5f3a30..b91c623b 100644 --- a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py +++ b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py @@ -116,31 +116,31 @@ def test_CoordinateFlatten(): assert vbl in variables -def test_CoordinateFlatten_2_coords(): - f = reshape.CoordinateFlatten(["height", "lon"]) - output = f.apply(SIMPLE_DS1) - variables = list(output.keys()) - # Note that it's hard to predict which coordinate will be processed first. - try: - for vbl in [ - "Temperature510", - "Temperature520", - "Temperature610", - "Temperature620", - "Temperature710", - "Temperature720", - ]: - assert vbl in variables - except AssertionError: - for vbl in [ - "Temperature105", - "Temperature205", - "Temperature106", - "Temperature206", - "Temperature107", - "Temperature207", - ]: - assert vbl in variables +# def test_CoordinateFlatten_2_coords(): +# f = reshape.CoordinateFlatten(["height", "lon"]) +# output = f.apply(SIMPLE_DS1) +# variables = list(output.keys()) +# # Note that it's hard to predict which coordinate will be processed first. +# try: +# for vbl in [ +# "Temperature510", +# "Temperature520", +# "Temperature610", +# "Temperature620", +# "Temperature710", +# "Temperature720", +# ]: +# assert vbl in variables +# except AssertionError: +# for vbl in [ +# "Temperature105", +# "Temperature205", +# "Temperature106", +# "Temperature206", +# "Temperature107", +# "Temperature207", +# ]: +# assert vbl in variables def test_CoordinateFlatten_complicated_dataset(): @@ -158,4 +158,32 @@ def test_CoordinateFlatten_skip_missing(): f.apply(SIMPLE_DS1) f2 = reshape.CoordinateFlatten(["scrupulosity"], skip_missing=True) output2 = f2.apply(SIMPLE_DS1) - assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." \ No newline at end of file + assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." + +def test_undo_CoordinateFlatten(): + f = reshape.CoordinateFlatten(["height"]) + f_output = f.apply(SIMPLE_DS2) + f_undone = f.undo(f_output) + variables = list(f_undone.keys()) + for vbl in ["Temperature", "Humidity", "WombatsPerKm2"]: + assert vbl in variables + +def test_CoordinateExpand_reverses_CoordinateFlatten(): + f = reshape.CoordinateFlatten(["height"]) + f_output = f.apply(SIMPLE_DS2) + e = reshape.CoordinateExpand(["height"]) + e_output = e.apply(f_output) + variables = list(e_output.keys()) + assert "Temperature" in variables + +def test_undo_CoordinateExpand(): + f = reshape.CoordinateFlatten(["height"]) + f_output = f.apply(SIMPLE_DS2) + e = reshape.CoordinateExpand(["height"]) + e_output = e.apply(f_output) + e_undone = e.undo(e_output) + variables = list(e_undone.keys()) + for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: + assert vbl in variables + + From 4baf5472b0ac3b601f8a45d0da10062bc2dbfd2e Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Thu, 4 Sep 2025 10:25:21 +1200 Subject: [PATCH 19/31] test CoordinateExpand and undo --- .../operations/xarray/test_xarray_reshape.py | 69 +------------------ 1 file changed, 2 insertions(+), 67 deletions(-) diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py index b91c623b..99ee3b30 100644 --- a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py +++ b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py @@ -46,6 +46,8 @@ SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) +COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) + def test_Dimensions(): d = reshape.Dimensions(["lat", "lon", "height"]) output = d.apply_func(SIMPLE_DA1) @@ -67,46 +69,6 @@ def test_Dimensions_preserve_order(): reversed_output = d.undo_func(output) assert reversed_output.dims == output.dims -import pyearthtools.data -def test_dataFlatten(): - pyearthtools.data.transforms.coordinates.Flatten(["a", "b"], skip_missing=True)(SIMPLE_DS1) - -def test_CoordinateFlatten(): - cf = reshape.CoordinateFlatten(coordinate="a", skip_missing=True) - cf.apply_func(SIMPLE_DS1) - -SIMPLE_DA1 = xr.DataArray( - [ - [ - [0.9, 0.0, 5], - [0.7, 1.4, 2.8], - [0.4, 0.5, 2.3], - ], - [ - [1.9, 1.0, 1.5], - [1.7, 2.4, 1.1], - [1.4, 1.5, 3.3], - ], - ], - coords=[[10, 20], [0, 1, 2], [5, 6, 7]], - dims=["height", "lat", "lon"], -) - -SIMPLE_DA2 = xr.DataArray( - [ - [0.9, 0.0, 5], - [0.7, 1.4, 2.8], - [0.4, 0.5, 2.3], - ], - coords=[[0, 1, 2], [5, 6, 7]], - dims=["lat", "lon"], -) - -SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) -SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) - -COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) - def test_CoordinateFlatten(): f = reshape.CoordinateFlatten(["height"]) @@ -116,33 +78,6 @@ def test_CoordinateFlatten(): assert vbl in variables -# def test_CoordinateFlatten_2_coords(): -# f = reshape.CoordinateFlatten(["height", "lon"]) -# output = f.apply(SIMPLE_DS1) -# variables = list(output.keys()) -# # Note that it's hard to predict which coordinate will be processed first. -# try: -# for vbl in [ -# "Temperature510", -# "Temperature520", -# "Temperature610", -# "Temperature620", -# "Temperature710", -# "Temperature720", -# ]: -# assert vbl in variables -# except AssertionError: -# for vbl in [ -# "Temperature105", -# "Temperature205", -# "Temperature106", -# "Temperature206", -# "Temperature107", -# "Temperature207", -# ]: -# assert vbl in variables - - def test_CoordinateFlatten_complicated_dataset(): """Check that Flatten still works when the coordinate being flattened does not exist for all variables.""" f = reshape.CoordinateFlatten(["height"]) From f38731e4f6c111635c9d3e0ac31bbfd336a519d3 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Thu, 4 Sep 2025 10:49:07 +1200 Subject: [PATCH 20/31] remove multiple coordinate option from CoordinateFlatten and CoordinateExpand --- .../pipeline/operations/xarray/reshape.py | 29 ++++++++----------- .../operations/xarray/test_xarray_reshape.py | 2 -- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py index a8eefd31..7160a1c6 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py @@ -111,7 +111,7 @@ class CoordinateFlatten(Operation): _override_interface = "Serial" - def __init__(self, coordinate: Union[Hashable, list[Hashable]], *extra_coords: Hashable, skip_missing: bool = False): + def __init__(self, coordinate: Hashable, skip_missing: bool = False): """ Flatten a coordinate in an xarray Dataset, putting the data at each value of the coordinate into a separate data variable. @@ -120,11 +120,8 @@ def __init__(self, coordinate: Union[Hashable, list[Hashable]], *extra_coords: H Dataset has a variable "t" and it is flattened along the coordinate "pressure_level" which has values [100, 200, 500], then the output Dataset will have variables called t100, t200 and t500. - If more than one coordinate is flattened, the output data variable names will concatenate the values of each - coordinate. - Args: - coordinate (Union[Hashable,list[Hashable]]): + coordinate (Hashable): Coordinate to flatten and expand on. skip_missing (bool, optional): Whether to skip data without the dims. Defaults to False @@ -136,7 +133,6 @@ def __init__(self, coordinate: Union[Hashable, list[Hashable]], *extra_coords: H ) self.record_initialisation() - coordinate = [coordinate, *extra_coords] if not isinstance(coordinate, (list, tuple)) else [*coordinate, *extra_coords] self._coordinate = coordinate self._skip_missing = skip_missing @@ -188,22 +184,23 @@ def apply_func(self, dataset: xr.Dataset) -> xr.Dataset: return new_ds def undo_func(self, ds): - return pyearthtools.pipeline.operations.xarray.reshape.coordinate_expand(self.coords)(ds) + return pyearthtools.pipeline.operations.xarray.reshape.coordinate_expand(self._coordinate)(ds) + +@BackwardsCompatibility(CoordinateFlatten) +def coordinate_flatten(*args, **kwargs) -> Operation: ... class CoordinateExpand(Operation): """Inverse operation to `CoordinateFlatten`""" - def __init__(self, coordinate: Union[Hashable, list[Hashable], tuple[Hashable]], *extra_coordinates): + def __init__(self, coordinate: Hashable): """ Inverse operation to [flatten][pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten] Will find flattened variables and regroup them upon the extra coordinate Args: - coordinate (Hashable | list[Hashable] | tuple[Hashable]): + coordinate (Hashable): Coordinate to unflatten. - *extra_coordinates (optional): - Argument form of `coordinate`. """ super().__init__() self.record_initialisation() @@ -211,14 +208,9 @@ def __init__(self, coordinate: Union[Hashable, list[Hashable], tuple[Hashable]], if not isinstance(coordinate, (list, tuple)): coordinate = (coordinate,) - coordinate = (*coordinate, *extra_coordinates) self._coordinate = coordinate - # @property - # def _info_(self): - # return dict(coordinate=self._coordinate) - - def apply(self, dataset: xr.Dataset) -> xr.Dataset | xr.DataArray: + def apply_func(self, dataset: xr.Dataset) -> xr.Dataset | xr.DataArray: dataset = type(dataset)(dataset) for coord in self._coordinate: @@ -245,6 +237,9 @@ def apply(self, dataset: xr.Dataset) -> xr.Dataset | xr.DataArray: return dataset + def undo_func(self, ds): + return pyearthtools.pipeline.operations.xarray.reshape.coordinate_flatten(self._coordinate)(ds) + @BackwardsCompatibility(CoordinateExpand) def coordinate_expand(*args, **kwargs) -> Operation: ... diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py index 99ee3b30..52515010 100644 --- a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py +++ b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py @@ -120,5 +120,3 @@ def test_undo_CoordinateExpand(): variables = list(e_undone.keys()) for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: assert vbl in variables - - From 3215dd8729e5bcc79b5313cf9100e7a3975b19e3 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Thu, 4 Sep 2025 15:44:46 +1200 Subject: [PATCH 21/31] remove data.coordinates.Flatten, data.coordinates.Expand, data.coordinates.SelectFlatten --- .../data/transforms/coordinates.py | 196 ------------------ .../transform/test_transform_coordinates.py | 35 ---- .../pipeline/operations/xarray/reshape.py | 13 +- .../operations/xarray/test_xarray_reshape.py | 9 + 4 files changed, 16 insertions(+), 237 deletions(-) diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index fa17fa09..713d3fb8 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -396,202 +396,6 @@ def apply(self, dataset: xr.Dataset) -> xr.Dataset: dataset = dataset.drop_vars(i) return dataset - -def weak_cast_to_int(value): - """ - Basically, turns integer floats to int types, otherwise - does nothing. - """ - try: - if int(value) == value: - value = int(value) - except Exception: - pass - return value - - -class Flatten(Transform): - """Flatten a coordinate in a dataset into separate variables""" - - def __init__( - self, coordinate: Hashable | list[Hashable] | tuple[Hashable], *extra_coordinates, skip_missing: bool = False - ): - """ - Flatten a coordinate in an xarray Dataset, putting the data at each value of the coordinate into a separate - data variable. - - The output data variables will be named "". For example, if the input - Dataset has a variable "t" and it is flattened along the coordinate "pressure_level" which has values - [100, 200, 500], then the output Dataset will have variables called t100, t200 and t500. - - If more than one coordinate is flattened, the output data variable names will concatenate the values of each - coordinate. - - Args: - coordinate (Hashable | list[Hashable] | tuple[Hashable] | None): - Coordinates to flatten, either str or list of candidates. - *extra_coordinates (optional): - Arguments form of `coordinate`. - skip_missing (bool, optional): - Whether to skip data that does not have any of the listed coordinates. If True, will return such data - unchanged. Defaults to False. - - Raises: - ValueError: - If invalid number of coordinates found - - """ - super().__init__() - self.record_initialisation() - - coordinate = coordinate if isinstance(coordinate, (list, tuple)) else [coordinate] - coordinate = [*coordinate, *extra_coordinates] - - self._coordinate = coordinate - self._skip_missing = skip_missing - - # @property - # def _info_(self): - # return dict(coordinate=self._coordinate, skip_missing=self._skip_missing) - - def apply(self, dataset: xr.Dataset) -> xr.Dataset: - discovered_coord = list(set(self._coordinate).intersection(set(dataset.coords))) - - if len(discovered_coord) == 0: - if self._skip_missing: - return dataset - - raise ValueError( - f"{self._coordinate} could not be found in dataset with coordinates {list(dataset.coords)}.\n" - "Set 'skip_missing' to True to skip this." - ) - - elif len(discovered_coord) > 1: - transforms = TransformCollection(*[Flatten(coord) for coord in discovered_coord]) - return transforms(dataset) - - discovered_coord = str(discovered_coord[0]) - - coords = dataset.coords - new_ds = xr.Dataset(coords={co: v for co, v in coords.items() if not co == discovered_coord}) - new_ds.attrs.update( - {f"{discovered_coord}-dtype": str(dataset[discovered_coord].encoding.get("dtype", "int32"))} - ) - - for var in dataset: - if discovered_coord not in dataset[var].coords: - new_ds[var] = dataset[var] - continue - - coord_size = dataset[var][discovered_coord].values - coord_size = coord_size if isinstance(coord_size, np.ndarray) else np.array(coord_size) - - if coord_size.size == 1 and False: - coord_val = weak_cast_to_int(dataset[var][discovered_coord].values) - new_ds[f"{var}{coord_val}"] = Drop(discovered_coord, ignore_missing=True)(dataset[var]) - - else: - for coord_val in dataset[discovered_coord]: - coord_val = weak_cast_to_int(coord_val.values.item()) - - selected = dataset[var].sel(**{discovered_coord: coord_val}) # type: ignore - selected = selected.drop_vars(discovered_coord) # type: ignore - selected.attrs.update(**{discovered_coord: coord_val}) - - new_ds[f"{var}{coord_val}"] = selected - return new_ds - - -class Expand(Transform): - """Inverse operation to `Flatten`""" - - def __init__(self, coordinate: Hashable | list[Hashable] | tuple[Hashable], *extra_coordinates): - """ - Inverse operation to [flatten][pyearthtools.data.transforms.coordinate.Flatten] - - Will find flattened variables and regroup them upon the extra coordinate - - Args: - coordinate (Hashable | list[Hashable] | tuple[Hashable]): - Coordinate to unflatten. - *extra_coordinates (optional): - Argument form of `coordinate`. - """ - super().__init__() - self.record_initialisation() - - if not isinstance(coordinate, (list, tuple)): - coordinate = (coordinate,) - - coordinate = (*coordinate, *extra_coordinates) - self._coordinate = coordinate - - # @property - # def _info_(self): - # return dict(coordinate=self._coordinate) - - def apply(self, dataset: xr.Dataset) -> xr.Dataset | xr.DataArray: - dataset = type(dataset)(dataset) - - for coord in self._coordinate: - dtype = dataset.attrs.get(f"{coord}-dtype", "int32") - components = [] - for var in list(dataset.data_vars): - var_data = dataset[var] - if coord in var_data.attrs: - value = var_data.attrs.pop(coord) - var_data = ( - var_data.to_dataset(name=var.replace(str(value), "")) - .assign_coords(**{coord: [value]}) - .set_coords(coord) - ) - components.append(var_data) - - dataset = xr.combine_by_coords(components) # type: ignore - dataset = SetType(**{str(coord): dtype})(dataset) - - ## Add stored encoding if there - if f"{coord}-dtype" in dataset.attrs: - dtype = dataset.attrs.pop(f"{coord}-dtype") - dataset[coord].encoding.update(dtype=dtype) - - return dataset - - -@BackwardsCompatibility(Expand) -def expand(*args, **kwargs) -> Transform: ... - - -def SelectFlatten( - coordinates: dict[str, tuple[Any] | Any] | None = None, - tolerance: float = 0.01, - **extra_coordinates, -) -> TransformCollection: - """ - Select upon coordinates, and flatten said coordinate - - Args: - coordinates (dict[str, tuple[Any] | Any] | None, optional): - Coordinates and values to select. - Must be coordinate in data Defaults to None. - tolerance (float, optional): - tolerance of selection. Defaults to 0.01. - - Returns: - (TransformCollection): - TransformCollection to select and Flatten - """ - - if coordinates is None: - coordinates = {} - coordinates.update(extra_coordinates) - - select_trans = Select(coordinates, ignore_missing=True, tolerance=tolerance) - flatten_trans = Flatten(list(coordinates.keys())) - - return select_trans + flatten_trans - - class Assign(Transform): """Assign coordinates to object""" diff --git a/packages/data/tests/transform/test_transform_coordinates.py b/packages/data/tests/transform/test_transform_coordinates.py index 67d7ff96..c608eb69 100644 --- a/packages/data/tests/transform/test_transform_coordinates.py +++ b/packages/data/tests/transform/test_transform_coordinates.py @@ -80,33 +80,6 @@ def test_Drop(): # TODO: Assert that the dimension has been dropped -def test_Flatten(): - - tf_flatten = coordinates.Flatten("vertical") - _result = tf_flatten.apply(ds_vertical) - - # TODO: Assert the flattened data looks correct - - -def test_Expand(): - - tf_expand = coordinates.Expand("vertical") - _result = tf_expand.apply(ds_vertical) - - # TODO: Assert the expanded dataset has a vertical dimension - - -def test_SelectFlatten(): - - tf_selectflatten = coordinates.SelectFlatten({"longitude": slice(10, 120)}) - - with pytest.raises(NotImplementedError): - _result = tf_selectflatten.apply(ds_vertical) - # TODO fix the code or avoid the issue - - # TODO: Check the values of the resulting dataset - - def test_Assign(): tf_assign = coordinates.Assign({"longitude": list(range(0, 4)), "vertical": list(range(3, 6))}) @@ -126,11 +99,3 @@ def test_Pad(): # TODO: check the values of the result -def test_weak_cast_to_int(): - - wcti = coordinates.weak_cast_to_int - - assert wcti(5.0) == 5 - assert isinstance(wcti(5.0), int) - - assert wcti("hello") == "hello" diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py index 7160a1c6..cbd0ca67 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py @@ -107,7 +107,7 @@ def weak_cast_to_int(value): return value class CoordinateFlatten(Operation): - """Flatten a coordinate in a dataset into separate variables""" + """Flatten a coordinate in a dataset into separate variables.""" _override_interface = "Serial" @@ -124,7 +124,12 @@ def __init__(self, coordinate: Hashable, skip_missing: bool = False): coordinate (Hashable): Coordinate to flatten and expand on. skip_missing (bool, optional): - Whether to skip data without the dims. Defaults to False + Whether to skip data that does not have any of the listed coordinates. If True, will return such data + unchanged. Defaults to False. + + Raises: + ValueError: + If coordinate not found in the dataset and skip_missing==False. """ super().__init__( split_tuples=True, @@ -148,10 +153,6 @@ def apply_func(self, dataset: xr.Dataset) -> xr.Dataset: "Set 'skip_missing' to True to skip this." ) - elif len(discovered_coord) > 1: - transforms = TransformCollection(*[CoordinateFlatten(coord) for coord in discovered_coord]) - return transforms(dataset) - discovered_coord = str(discovered_coord[0]) coords = dataset.coords diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py index 52515010..01242e6c 100644 --- a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py +++ b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py @@ -69,6 +69,15 @@ def test_Dimensions_preserve_order(): reversed_output = d.undo_func(output) assert reversed_output.dims == output.dims +def test_weak_cast_to_int(): + + wcti = reshape.weak_cast_to_int + + assert wcti(5.0) == 5 + assert isinstance(wcti(5.0), int) + + assert wcti("hello") == "hello" + def test_CoordinateFlatten(): f = reshape.CoordinateFlatten(["height"]) From 399fec05036cbd12cdd588979a889f3a0412e0eb Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Fri, 5 Sep 2025 09:44:43 +1200 Subject: [PATCH 22/31] commit before pulling changes --- notebooks/tutorial/CNN-Model-Training.ipynb | 721 +++--------------- notebooks/tutorial/HimawariAllBands.ipynb | 2 +- .../pipeline/operations/xarray/reshape.py | 8 +- 3 files changed, 119 insertions(+), 612 deletions(-) diff --git a/notebooks/tutorial/CNN-Model-Training.ipynb b/notebooks/tutorial/CNN-Model-Training.ipynb index 286173a0..ff7954e2 100644 --- a/notebooks/tutorial/CNN-Model-Training.ipynb +++ b/notebooks/tutorial/CNN-Model-Training.ipynb @@ -32,7 +32,80 @@ "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "A module that was compiled using NumPy 1.x cannot be run in\n", + "NumPy 2.0.1 as it may crash. To support both 1.x and 2.x\n", + "versions of NumPy, modules must be compiled with NumPy 2.0.\n", + "Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n", + "\n", + "If you are a user of the module, the easiest solution will be to\n", + "downgrade to 'numpy<2' or try to upgrade the affected module.\n", + "We expect that some modules will need time to support NumPy 2.\n", + "\n", + "Traceback (most recent call last): File \"\", line 198, in _run_module_as_main\n", + " File \"\", line 88, in _run_code\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel_launcher.py\", line 18, in \n", + " app.launch_new_instance()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/traitlets/config/application.py\", line 1075, in launch_instance\n", + " app.start()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/kernelapp.py\", line 739, in start\n", + " self.io_loop.start()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/tornado/platform/asyncio.py\", line 205, in start\n", + " self.asyncio_loop.run_forever()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/asyncio/base_events.py\", line 645, in run_forever\n", + " self._run_once()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/asyncio/base_events.py\", line 1999, in _run_once\n", + " handle._run()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/asyncio/events.py\", line 88, in _run\n", + " self._context.run(self._callback, *self._args)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 545, in dispatch_queue\n", + " await self.process_one()\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 534, in process_one\n", + " await dispatch(*args)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 437, in dispatch_shell\n", + " await result\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 362, in execute_request\n", + " await super().execute_request(stream, ident, parent)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 778, in execute_request\n", + " reply_content = await reply_content\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 449, in do_execute\n", + " res = shell.run_cell(\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/ipykernel/zmqshell.py\", line 549, in run_cell\n", + " return super().run_cell(*args, **kwargs)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3098, in run_cell\n", + " result = self._run_cell(\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3153, in _run_cell\n", + " result = runner(coro)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/async_helpers.py\", line 128, in _pseudo_sync_runner\n", + " coro.send(None)\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3365, in run_cell_async\n", + " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3610, in run_ast_nodes\n", + " if await self.run_code(code, result, async_=asy):\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3670, in run_code\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + " File \"/var/folders/1s/z56f8rw50755xx8fxp2969r477wmss/T/ipykernel_48096/575015278.py\", line 7, in \n", + " import torch\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/__init__.py\", line 1477, in \n", + " from .functional import * # noqa: F403\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/functional.py\", line 9, in \n", + " import torch.nn.functional as F\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/nn/__init__.py\", line 1, in \n", + " from .modules import * # noqa: F403\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/nn/modules/__init__.py\", line 35, in \n", + " from .transformer import TransformerEncoder, TransformerDecoder, \\\n", + " File \"/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/nn/modules/transformer.py\", line 20, in \n", + " device: torch.device = torch.device(torch._C._get_default_device()), # torch.device('cpu'),\n", + "/Users/masonge/Documents/PET/venv/lib/python3.12/site-packages/torch/nn/modules/transformer.py:20: UserWarning: Failed to initialize NumPy: _ARRAY_API not found (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/utils/tensor_numpy.cpp:84.)\n", + " device: torch.device = torch.device(torch._C._get_default_device()), # torch.device('cpu'),\n" + ] + } + ], "source": [ "import sys\n", "from pathlib import Path\n", @@ -135,615 +208,51 @@ "output_type": "stream", "text": [ "Saving dataset, it will take at most 3.83 gigabytes of storage space.\n", - "macadamia - 2025-08-25 02:28:15,523 - pyearthtools.data.download.weatherbench - weatherbench - save_local_dataset - L123 - WARNING - Saving dataset, it will take at most 3.83 gigabytes of storage space.\n", - "Saving 2m_temperature variable under cnn_training/download/ee5f0931735d8d146214aa551175dfa34b3e33093aec3b93d20bcc78c56bcd1b/2m_temperature.zarr, it will take at most 766.31 megabytes of storage space.\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d5d6a6b09b424e338a0d46ac97fda57c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Writing: 0%| | 0/937 [00:00 instead.", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mMissingVariableFile\u001b[39m Traceback (most recent call last)", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/PyEarthTools/packages/data/src/pyearthtools/data/download/weatherbench.py:281\u001b[39m, in \u001b[36mWeatherBench2.__init__\u001b[39m\u001b[34m(self, dataset_url, license_url, variables, level, transforms, chunks, download_dir, license_ok, **kwargs)\u001b[39m\n\u001b[32m 280\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m281\u001b[39m ds = \u001b[43mopen_local_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdownload_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvariables\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 282\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m MissingVariableFile:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/PyEarthTools/packages/data/src/pyearthtools/data/download/weatherbench.py:161\u001b[39m, in \u001b[36mopen_local_dataset\u001b[39m\u001b[34m(path, variables, level)\u001b[39m\n\u001b[32m 160\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;129;01mnot\u001b[39;00m fpath.is_dir() \u001b[38;5;28;01mfor\u001b[39;00m fpath \u001b[38;5;129;01min\u001b[39;00m filelist):\n\u001b[32m--> \u001b[39m\u001b[32m161\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m MissingVariableFile(\u001b[33m\"\u001b[39m\u001b[33mMissing .zarr folder for some variables\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 162\u001b[39m logger.debug(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mLoading \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvarname\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m variable from folders \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m[\u001b[38;5;28mstr\u001b[39m(p)\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mp\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39mfilelist]\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[31mMissingVariableFile\u001b[39m: Missing .zarr folder for some variables", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m data_preparation = pyearthtools.pipeline.Pipeline(\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43mpyearthtools\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdownload\u001b[49m\u001b[43m.\u001b[49m\u001b[43mweatherbench\u001b[49m\u001b[43m.\u001b[49m\u001b[43mWB2ERA5\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mvariables\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m2m_temperature\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mu\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mv\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mgeopotential\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mvorticity\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[32;43m850\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mdownload_dir\u001b[49m\u001b[43m=\u001b[49m\u001b[43mworkdir\u001b[49m\u001b[43m \u001b[49m\u001b[43m/\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mdownload\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mlicense_ok\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[32m 8\u001b[39m pyearthtools.pipeline.operations.xarray.Sort(\n\u001b[32m 9\u001b[39m [\u001b[33m\"\u001b[39m\u001b[33m2m_temperature\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mu_component_of_wind\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mv_component_of_wind\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mvorticity\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mgeopotential\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m 10\u001b[39m ),\n\u001b[32m 11\u001b[39m pyearthtools.data.transforms.coordinates.StandardLongitude(\u001b[38;5;28mtype\u001b[39m=\u001b[33m\"\u001b[39m\u001b[33m0-360\u001b[39m\u001b[33m\"\u001b[39m),\n\u001b[32m 12\u001b[39m pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten(\u001b[33m\"\u001b[39m\u001b[33mlevel\u001b[39m\u001b[33m\"\u001b[39m),\n\u001b[32m 13\u001b[39m \u001b[38;5;66;03m# retrieve previous/next samples, dt = 1H\u001b[39;00m\n\u001b[32m 14\u001b[39m pyearthtools.pipeline.modifications.TemporalRetrieval(\n\u001b[32m 15\u001b[39m concat=\u001b[38;5;28;01mTrue\u001b[39;00m, samples=((\u001b[32m0\u001b[39m, \u001b[32m1\u001b[39m), (\u001b[32m6\u001b[39m, \u001b[32m1\u001b[39m))\n\u001b[32m 16\u001b[39m ),\n\u001b[32m 17\u001b[39m )\n\u001b[32m 18\u001b[39m data_preparation\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/PyEarthTools/packages/data/src/pyearthtools/data/indexes/decorators.py:294\u001b[39m, in \u001b[36mcheck_arguments..internal_function..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 291\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 292\u001b[39m kwargs[k] = spellcheck.check_prompt(kwargs[k], valid_arg, k)\n\u001b[32m--> \u001b[39m\u001b[32m294\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/PyEarthTools/packages/data/src/pyearthtools/data/download/weatherbench.py:369\u001b[39m, in \u001b[36mWB2ERA5.__init__\u001b[39m\u001b[34m(self, resolution, **kwargs)\u001b[39m\n\u001b[32m 367\u001b[39m dataset_url = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mgs://weatherbench2/datasets/era5/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.DATASETS[resolution]\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 368\u001b[39m license_url = \u001b[33m\"\u001b[39m\u001b[33mgs://weatherbench2/datasets/era5/LICENSE\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m--> \u001b[39m\u001b[32m369\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[34;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdataset_url\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlicense_url\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 370\u001b[39m \u001b[38;5;28mself\u001b[39m.resolution = resolution\n\u001b[32m 371\u001b[39m \u001b[38;5;28mself\u001b[39m.record_initialisation()\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/PyEarthTools/packages/utils/src/pyearthtools/utils/decorators.py:88\u001b[39m, in \u001b[36malias_arguments..internal_function..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 84\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 85\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mTwo keyword arguments for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mk\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[33m have been passed due to alias resolving. Cannot parse.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33mRecieved: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkwargs\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 86\u001b[39m )\n\u001b[32m 87\u001b[39m new_kwargs[k] = v\n\u001b[32m---> \u001b[39m\u001b[32m88\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mnew_kwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/PyEarthTools/packages/data/src/pyearthtools/data/modifications/decorator.py:310\u001b[39m, in \u001b[36mvariable_modifications..internal..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 308\u001b[39m modifications, stripped_variables = _get_modifications(variables)\n\u001b[32m 309\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(modifications.keys()) == \u001b[32m0\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m310\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 312\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m remove_variables: \u001b[38;5;66;03m# Remove variables being modified from base index\u001b[39;00m\n\u001b[32m 313\u001b[39m stripped_variables = \u001b[38;5;28mlist\u001b[39m(\u001b[38;5;28mset\u001b[39m(stripped_variables).difference(\u001b[38;5;28mlist\u001b[39m(modifications.keys())))\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/PyEarthTools/packages/data/src/pyearthtools/data/download/weatherbench.py:284\u001b[39m, in \u001b[36mWeatherBench2.__init__\u001b[39m\u001b[34m(self, dataset_url, license_url, variables, level, transforms, chunks, download_dir, license_ok, **kwargs)\u001b[39m\n\u001b[32m 282\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m MissingVariableFile:\n\u001b[32m 283\u001b[39m ds_remote = open_online_dataset()\n\u001b[32m--> \u001b[39m\u001b[32m284\u001b[39m \u001b[43msave_local_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdownload_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mds_remote\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 285\u001b[39m (download_path / \u001b[33m\"\u001b[39m\u001b[33mdataset_url\u001b[39m\u001b[33m\"\u001b[39m).write_text(dataset_url)\n\u001b[32m 286\u001b[39m ds = open_local_dataset(download_path, variables, level)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/PyEarthTools/packages/data/src/pyearthtools/data/download/weatherbench.py:139\u001b[39m, in \u001b[36msave_local_dataset\u001b[39m\u001b[34m(path, dset)\u001b[39m\n\u001b[32m 137\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mlevel\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m dset_var.dims:\n\u001b[32m 138\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m level \u001b[38;5;129;01min\u001b[39;00m dset_var.level.values:\n\u001b[32m--> \u001b[39m\u001b[32m139\u001b[39m \u001b[43m_save_variable\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdset_var\u001b[49m\u001b[43m.\u001b[49m\u001b[43msel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m=\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 140\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 141\u001b[39m _save_variable(dset_var, path)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/PyEarthTools/packages/data/src/pyearthtools/data/download/weatherbench.py:112\u001b[39m, in \u001b[36m_save_variable\u001b[39m\u001b[34m(darr, path)\u001b[39m\n\u001b[32m 110\u001b[39m disable_bar = logger.getEffectiveLevel() > logging.INFO\n\u001b[32m 111\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m TqdmCallback(desc=\u001b[33m\"\u001b[39m\u001b[33mWriting\u001b[39m\u001b[33m\"\u001b[39m, disable=disable_bar):\n\u001b[32m--> \u001b[39m\u001b[32m112\u001b[39m \u001b[43mdarr\u001b[49m\u001b[43m.\u001b[49m\u001b[43mto_zarr\u001b[49m\u001b[43m(\u001b[49m\u001b[43mzarrpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mzarr_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 114\u001b[39m logger.info(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mSaving \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvarname\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m finished.\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/xarray/core/dataarray.py:4453\u001b[39m, in \u001b[36mDataArray.to_zarr\u001b[39m\u001b[34m(self, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, align_chunks, storage_options, zarr_version, zarr_format, write_empty_chunks, chunkmanager_store_kwargs)\u001b[39m\n\u001b[32m 4449\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 4450\u001b[39m \u001b[38;5;66;03m# No problems with the name - so we're fine!\u001b[39;00m\n\u001b[32m 4451\u001b[39m dataset = \u001b[38;5;28mself\u001b[39m.to_dataset()\n\u001b[32m-> \u001b[39m\u001b[32m4453\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mto_zarr\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore[call-overload,misc]\u001b[39;49;00m\n\u001b[32m 4454\u001b[39m \u001b[43m \u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4455\u001b[39m \u001b[43m \u001b[49m\u001b[43mstore\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4456\u001b[39m \u001b[43m \u001b[49m\u001b[43mchunk_store\u001b[49m\u001b[43m=\u001b[49m\u001b[43mchunk_store\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4457\u001b[39m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4458\u001b[39m \u001b[43m \u001b[49m\u001b[43msynchronizer\u001b[49m\u001b[43m=\u001b[49m\u001b[43msynchronizer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4459\u001b[39m \u001b[43m \u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4460\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4461\u001b[39m \u001b[43m \u001b[49m\u001b[43mcompute\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcompute\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4462\u001b[39m \u001b[43m \u001b[49m\u001b[43mconsolidated\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconsolidated\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4463\u001b[39m \u001b[43m \u001b[49m\u001b[43mappend_dim\u001b[49m\u001b[43m=\u001b[49m\u001b[43mappend_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4464\u001b[39m \u001b[43m \u001b[49m\u001b[43mregion\u001b[49m\u001b[43m=\u001b[49m\u001b[43mregion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4465\u001b[39m \u001b[43m \u001b[49m\u001b[43msafe_chunks\u001b[49m\u001b[43m=\u001b[49m\u001b[43msafe_chunks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4466\u001b[39m \u001b[43m \u001b[49m\u001b[43malign_chunks\u001b[49m\u001b[43m=\u001b[49m\u001b[43malign_chunks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4467\u001b[39m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4468\u001b[39m \u001b[43m \u001b[49m\u001b[43mzarr_version\u001b[49m\u001b[43m=\u001b[49m\u001b[43mzarr_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4469\u001b[39m \u001b[43m \u001b[49m\u001b[43mzarr_format\u001b[49m\u001b[43m=\u001b[49m\u001b[43mzarr_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4470\u001b[39m \u001b[43m \u001b[49m\u001b[43mwrite_empty_chunks\u001b[49m\u001b[43m=\u001b[49m\u001b[43mwrite_empty_chunks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4471\u001b[39m \u001b[43m \u001b[49m\u001b[43mchunkmanager_store_kwargs\u001b[49m\u001b[43m=\u001b[49m\u001b[43mchunkmanager_store_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4472\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/xarray/backends/api.py:2245\u001b[39m, in \u001b[36mto_zarr\u001b[39m\u001b[34m(dataset, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, align_chunks, storage_options, zarr_version, zarr_format, write_empty_chunks, chunkmanager_store_kwargs)\u001b[39m\n\u001b[32m 2243\u001b[39m writer = ArrayWriter()\n\u001b[32m 2244\u001b[39m \u001b[38;5;66;03m# TODO: figure out how to properly handle unlimited_dims\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m2245\u001b[39m \u001b[43mdump_to_store\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mzstore\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwriter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2246\u001b[39m writes = writer.sync(\n\u001b[32m 2247\u001b[39m compute=compute, chunkmanager_store_kwargs=chunkmanager_store_kwargs\n\u001b[32m 2248\u001b[39m )\n\u001b[32m 2250\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m compute:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/xarray/backends/api.py:1975\u001b[39m, in \u001b[36mdump_to_store\u001b[39m\u001b[34m(dataset, store, writer, encoder, encoding, unlimited_dims)\u001b[39m\n\u001b[32m 1972\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m encoder:\n\u001b[32m 1973\u001b[39m variables, attrs = encoder(variables, attrs)\n\u001b[32m-> \u001b[39m\u001b[32m1975\u001b[39m \u001b[43mstore\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstore\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariables\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck_encoding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwriter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munlimited_dims\u001b[49m\u001b[43m=\u001b[49m\u001b[43munlimited_dims\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/xarray/backends/zarr.py:990\u001b[39m, in \u001b[36mZarrStore.store\u001b[39m\u001b[34m(self, variables, attributes, check_encoding_set, writer, unlimited_dims)\u001b[39m\n\u001b[32m 987\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 988\u001b[39m variables_to_set = variables_encoded\n\u001b[32m--> \u001b[39m\u001b[32m990\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mset_variables\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 991\u001b[39m \u001b[43m \u001b[49m\u001b[43mvariables_to_set\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck_encoding_set\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwriter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munlimited_dims\u001b[49m\u001b[43m=\u001b[49m\u001b[43munlimited_dims\u001b[49m\n\u001b[32m 992\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 993\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._consolidate_on_close:\n\u001b[32m 994\u001b[39m kwargs = {}\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/xarray/backends/zarr.py:1208\u001b[39m, in \u001b[36mZarrStore.set_variables\u001b[39m\u001b[34m(self, variables, check_encoding_set, writer, unlimited_dims)\u001b[39m\n\u001b[32m 1204\u001b[39m encoded_attrs[DIMENSION_KEY] = dims\n\u001b[32m 1206\u001b[39m encoding[\u001b[33m\"\u001b[39m\u001b[33moverwrite\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28mself\u001b[39m._mode == \u001b[33m\"\u001b[39m\u001b[33mw\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m-> \u001b[39m\u001b[32m1208\u001b[39m zarr_array = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_create_new_array\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1209\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1210\u001b[39m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1211\u001b[39m \u001b[43m \u001b[49m\u001b[43mshape\u001b[49m\u001b[43m=\u001b[49m\u001b[43mshape\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1212\u001b[39m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1213\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1214\u001b[39m \u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[43m=\u001b[49m\u001b[43mencoded_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1215\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1217\u001b[39m writer.add(v.data, zarr_array, region)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/xarray/backends/zarr.py:1070\u001b[39m, in \u001b[36mZarrStore._create_new_array\u001b[39m\u001b[34m(self, name, shape, dtype, fill_value, encoding, attrs)\u001b[39m\n\u001b[32m 1067\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m c \u001b[38;5;129;01min\u001b[39;00m encoding:\n\u001b[32m 1068\u001b[39m encoding[\u001b[33m\"\u001b[39m\u001b[33mconfig\u001b[39m\u001b[33m\"\u001b[39m][c] = encoding.pop(c)\n\u001b[32m-> \u001b[39m\u001b[32m1070\u001b[39m zarr_array = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mzarr_group\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1071\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1072\u001b[39m \u001b[43m \u001b[49m\u001b[43mshape\u001b[49m\u001b[43m=\u001b[49m\u001b[43mshape\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1073\u001b[39m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1074\u001b[39m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1075\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1076\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1077\u001b[39m zarr_array = _put_attrs(zarr_array, attrs)\n\u001b[32m 1078\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m zarr_array\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/core/group.py:2354\u001b[39m, in \u001b[36mGroup.create\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 2352\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mcreate\u001b[39m(\u001b[38;5;28mself\u001b[39m, *args: Any, **kwargs: Any) -> Array:\n\u001b[32m 2353\u001b[39m \u001b[38;5;66;03m# Backwards compatibility for 2.x\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m2354\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mcreate_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/_compat.py:43\u001b[39m, in \u001b[36m_deprecate_positional_args.._inner_deprecate_positional_args..inner_f\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 41\u001b[39m extra_args = \u001b[38;5;28mlen\u001b[39m(args) - \u001b[38;5;28mlen\u001b[39m(all_args)\n\u001b[32m 42\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m extra_args <= \u001b[32m0\u001b[39m:\n\u001b[32m---> \u001b[39m\u001b[32m43\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 45\u001b[39m \u001b[38;5;66;03m# extra_args > 0\u001b[39;00m\n\u001b[32m 46\u001b[39m args_msg = [\n\u001b[32m 47\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00marg\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 48\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, arg \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(kwonly_args[:extra_args], args[-extra_args:], strict=\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[32m 49\u001b[39m ]\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/core/group.py:2473\u001b[39m, in \u001b[36mGroup.create_array\u001b[39m\u001b[34m(self, name, shape, dtype, chunks, shards, filters, compressors, compressor, serializer, fill_value, order, attributes, chunk_key_encoding, dimension_names, storage_options, overwrite, config)\u001b[39m\n\u001b[32m 2378\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Create an array within this group.\u001b[39;00m\n\u001b[32m 2379\u001b[39m \n\u001b[32m 2380\u001b[39m \u001b[33;03mThis method lightly wraps :func:`zarr.core.array.create_array`.\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 2467\u001b[39m \u001b[33;03mAsyncArray\u001b[39;00m\n\u001b[32m 2468\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 2469\u001b[39m compressors = _parse_deprecated_compressor(\n\u001b[32m 2470\u001b[39m compressor, compressors, zarr_format=\u001b[38;5;28mself\u001b[39m.metadata.zarr_format\n\u001b[32m 2471\u001b[39m )\n\u001b[32m 2472\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m Array(\n\u001b[32m-> \u001b[39m\u001b[32m2473\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_sync\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2474\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_async_group\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcreate_array\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2475\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2476\u001b[39m \u001b[43m \u001b[49m\u001b[43mshape\u001b[49m\u001b[43m=\u001b[49m\u001b[43mshape\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2477\u001b[39m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2478\u001b[39m \u001b[43m \u001b[49m\u001b[43mchunks\u001b[49m\u001b[43m=\u001b[49m\u001b[43mchunks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2479\u001b[39m \u001b[43m \u001b[49m\u001b[43mshards\u001b[49m\u001b[43m=\u001b[49m\u001b[43mshards\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2480\u001b[39m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2481\u001b[39m \u001b[43m \u001b[49m\u001b[43mattributes\u001b[49m\u001b[43m=\u001b[49m\u001b[43mattributes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2482\u001b[39m \u001b[43m \u001b[49m\u001b[43mchunk_key_encoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mchunk_key_encoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2483\u001b[39m \u001b[43m \u001b[49m\u001b[43mcompressors\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcompressors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2484\u001b[39m \u001b[43m \u001b[49m\u001b[43mserializer\u001b[49m\u001b[43m=\u001b[49m\u001b[43mserializer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2485\u001b[39m \u001b[43m \u001b[49m\u001b[43mdimension_names\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdimension_names\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2486\u001b[39m \u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m=\u001b[49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2487\u001b[39m \u001b[43m \u001b[49m\u001b[43mfilters\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfilters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2488\u001b[39m \u001b[43m \u001b[49m\u001b[43moverwrite\u001b[49m\u001b[43m=\u001b[49m\u001b[43moverwrite\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2489\u001b[39m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2490\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2491\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2492\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2493\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/core/sync.py:208\u001b[39m, in \u001b[36mSyncMixin._sync\u001b[39m\u001b[34m(self, coroutine)\u001b[39m\n\u001b[32m 205\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_sync\u001b[39m(\u001b[38;5;28mself\u001b[39m, coroutine: Coroutine[Any, Any, T]) -> T:\n\u001b[32m 206\u001b[39m \u001b[38;5;66;03m# TODO: refactor this to to take *args and **kwargs and pass those to the method\u001b[39;00m\n\u001b[32m 207\u001b[39m \u001b[38;5;66;03m# this should allow us to better type the sync wrapper\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m208\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msync\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 209\u001b[39m \u001b[43m \u001b[49m\u001b[43mcoroutine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 210\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43masync.timeout\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 211\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/core/sync.py:163\u001b[39m, in \u001b[36msync\u001b[39m\u001b[34m(coro, loop, timeout)\u001b[39m\n\u001b[32m 160\u001b[39m return_result = \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28miter\u001b[39m(finished)).result()\n\u001b[32m 162\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(return_result, \u001b[38;5;167;01mBaseException\u001b[39;00m):\n\u001b[32m--> \u001b[39m\u001b[32m163\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m return_result\n\u001b[32m 164\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 165\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m return_result\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/core/sync.py:119\u001b[39m, in \u001b[36m_runner\u001b[39m\u001b[34m(coro)\u001b[39m\n\u001b[32m 114\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 115\u001b[39m \u001b[33;03mAwait a coroutine and return the result of running it. If awaiting the coroutine raises an\u001b[39;00m\n\u001b[32m 116\u001b[39m \u001b[33;03mexception, the exception will be returned.\u001b[39;00m\n\u001b[32m 117\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 118\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m119\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m coro\n\u001b[32m 120\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[32m 121\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m ex\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/core/group.py:1102\u001b[39m, in \u001b[36mAsyncGroup.create_array\u001b[39m\u001b[34m(self, name, shape, dtype, chunks, shards, filters, compressors, compressor, serializer, fill_value, order, attributes, chunk_key_encoding, dimension_names, storage_options, overwrite, config)\u001b[39m\n\u001b[32m 1007\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Create an array within this group.\u001b[39;00m\n\u001b[32m 1008\u001b[39m \n\u001b[32m 1009\u001b[39m \u001b[33;03mThis method lightly wraps :func:`zarr.core.array.create_array`.\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 1097\u001b[39m \n\u001b[32m 1098\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m 1099\u001b[39m compressors = _parse_deprecated_compressor(\n\u001b[32m 1100\u001b[39m compressor, compressors, zarr_format=\u001b[38;5;28mself\u001b[39m.metadata.zarr_format\n\u001b[32m 1101\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1102\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m create_array(\n\u001b[32m 1103\u001b[39m store=\u001b[38;5;28mself\u001b[39m.store_path,\n\u001b[32m 1104\u001b[39m name=name,\n\u001b[32m 1105\u001b[39m shape=shape,\n\u001b[32m 1106\u001b[39m dtype=dtype,\n\u001b[32m 1107\u001b[39m chunks=chunks,\n\u001b[32m 1108\u001b[39m shards=shards,\n\u001b[32m 1109\u001b[39m filters=filters,\n\u001b[32m 1110\u001b[39m compressors=compressors,\n\u001b[32m 1111\u001b[39m serializer=serializer,\n\u001b[32m 1112\u001b[39m fill_value=fill_value,\n\u001b[32m 1113\u001b[39m order=order,\n\u001b[32m 1114\u001b[39m zarr_format=\u001b[38;5;28mself\u001b[39m.metadata.zarr_format,\n\u001b[32m 1115\u001b[39m attributes=attributes,\n\u001b[32m 1116\u001b[39m chunk_key_encoding=chunk_key_encoding,\n\u001b[32m 1117\u001b[39m dimension_names=dimension_names,\n\u001b[32m 1118\u001b[39m storage_options=storage_options,\n\u001b[32m 1119\u001b[39m overwrite=overwrite,\n\u001b[32m 1120\u001b[39m config=config,\n\u001b[32m 1121\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/core/array.py:4438\u001b[39m, in \u001b[36mcreate_array\u001b[39m\u001b[34m(store, name, shape, dtype, data, chunks, shards, filters, compressors, serializer, fill_value, order, zarr_format, attributes, chunk_key_encoding, dimension_names, storage_options, overwrite, config, write_data)\u001b[39m\n\u001b[32m 4433\u001b[39m mode: Literal[\u001b[33m\"\u001b[39m\u001b[33ma\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[33m\"\u001b[39m\u001b[33ma\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 4435\u001b[39m store_path = \u001b[38;5;28;01mawait\u001b[39;00m make_store_path(\n\u001b[32m 4436\u001b[39m store, path=name, mode=mode, storage_options=storage_options\n\u001b[32m 4437\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m4438\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m init_array(\n\u001b[32m 4439\u001b[39m store_path=store_path,\n\u001b[32m 4440\u001b[39m shape=shape_parsed,\n\u001b[32m 4441\u001b[39m dtype=dtype_parsed,\n\u001b[32m 4442\u001b[39m chunks=chunks,\n\u001b[32m 4443\u001b[39m shards=shards,\n\u001b[32m 4444\u001b[39m filters=filters,\n\u001b[32m 4445\u001b[39m compressors=compressors,\n\u001b[32m 4446\u001b[39m serializer=serializer,\n\u001b[32m 4447\u001b[39m fill_value=fill_value,\n\u001b[32m 4448\u001b[39m order=order,\n\u001b[32m 4449\u001b[39m zarr_format=zarr_format,\n\u001b[32m 4450\u001b[39m attributes=attributes,\n\u001b[32m 4451\u001b[39m chunk_key_encoding=chunk_key_encoding,\n\u001b[32m 4452\u001b[39m dimension_names=dimension_names,\n\u001b[32m 4453\u001b[39m overwrite=overwrite,\n\u001b[32m 4454\u001b[39m config=config,\n\u001b[32m 4455\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/core/array.py:4229\u001b[39m, in \u001b[36minit_array\u001b[39m\u001b[34m(store_path, shape, dtype, chunks, shards, filters, compressors, serializer, fill_value, order, zarr_format, attributes, chunk_key_encoding, dimension_names, overwrite, config)\u001b[39m\n\u001b[32m 4217\u001b[39m meta = AsyncArray._create_metadata_v2(\n\u001b[32m 4218\u001b[39m shape=shape_parsed,\n\u001b[32m 4219\u001b[39m dtype=dtype_parsed,\n\u001b[32m (...)\u001b[39m\u001b[32m 4226\u001b[39m attributes=attributes,\n\u001b[32m 4227\u001b[39m )\n\u001b[32m 4228\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m4229\u001b[39m array_array, array_bytes, bytes_bytes = \u001b[43m_parse_chunk_encoding_v3\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 4230\u001b[39m \u001b[43m \u001b[49m\u001b[43mcompressors\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcompressors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4231\u001b[39m \u001b[43m \u001b[49m\u001b[43mfilters\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfilters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4232\u001b[39m \u001b[43m \u001b[49m\u001b[43mserializer\u001b[49m\u001b[43m=\u001b[49m\u001b[43mserializer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4233\u001b[39m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdtype_parsed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4234\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4235\u001b[39m sub_codecs = cast(\u001b[38;5;28mtuple\u001b[39m[Codec, ...], (*array_array, array_bytes, *bytes_bytes))\n\u001b[32m 4236\u001b[39m codecs_out: \u001b[38;5;28mtuple\u001b[39m[Codec, ...]\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/core/array.py:4698\u001b[39m, in \u001b[36m_parse_chunk_encoding_v3\u001b[39m\u001b[34m(compressors, filters, serializer, dtype)\u001b[39m\n\u001b[32m 4695\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 4696\u001b[39m maybe_bytes_bytes = cast(Iterable[Codec | \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, JSON]], compressors)\n\u001b[32m-> \u001b[39m\u001b[32m4698\u001b[39m out_bytes_bytes = \u001b[38;5;28;43mtuple\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m_parse_bytes_bytes_codec\u001b[49m\u001b[43m(\u001b[49m\u001b[43mc\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mc\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mmaybe_bytes_bytes\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4700\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m out_array_array, out_array_bytes, out_bytes_bytes\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/core/array.py:4698\u001b[39m, in \u001b[36m\u001b[39m\u001b[34m(.0)\u001b[39m\n\u001b[32m 4695\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 4696\u001b[39m maybe_bytes_bytes = cast(Iterable[Codec | \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, JSON]], compressors)\n\u001b[32m-> \u001b[39m\u001b[32m4698\u001b[39m out_bytes_bytes = \u001b[38;5;28mtuple\u001b[39m(\u001b[43m_parse_bytes_bytes_codec\u001b[49m\u001b[43m(\u001b[49m\u001b[43mc\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m c \u001b[38;5;129;01min\u001b[39;00m maybe_bytes_bytes)\n\u001b[32m 4700\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m out_array_array, out_array_bytes, out_bytes_bytes\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/Documents/PET/venv/lib/python3.12/site-packages/zarr/registry.py:184\u001b[39m, in \u001b[36m_parse_bytes_bytes_codec\u001b[39m\u001b[34m(data)\u001b[39m\n\u001b[32m 182\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 183\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data, BytesBytesCodec):\n\u001b[32m--> \u001b[39m\u001b[32m184\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mExpected a BytesBytesCodec. Got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(data)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m instead.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 185\u001b[39m result = data\n\u001b[32m 186\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", + "\u001b[31mTypeError\u001b[39m: Expected a BytesBytesCodec. Got instead." ] - }, - { - "data": { - "text/html": [ - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Pipeline\n",
-       "\tDescription                    `pyearthtools.pipeline` Data Pipeline\n",
-       "\n",
-       "\n",
-       "\tInitialisation                 \n",
-       "\t\t exceptions_to_ignore           None\n",
-       "\t\t iterator                       None\n",
-       "\t\t sampler                        None\n",
-       "\tSteps                          \n",
-       "\t\t weatherbench.WB2ERA5           {'WB2ERA5': {'download_dir': "PosixPath('cnn_training/download')", 'level': '[850]', 'license_ok': 'True', 'resolution': "'64x32'", 'variables': "['2m_temperature', 'u', 'v', 'geopotential', 'vorticity']"}}\n",
-       "\t\t sort.Sort                      {'Sort': {'order': "['2m_temperature', 'u_component_of_wind', 'v_component_of_wind', 'vorticity', 'geopotential']", 'strict': 'False'}}\n",
-       "\t\t coordinates.StandardLongitude  {'StandardLongitude': {'longitude_name': "'longitude'", 'type': "'0-360'"}}\n",
-       "\t\t reshape.CoordinateFlatten      {'CoordinateFlatten': {'__args': '()', 'coordinate': "'level'", 'skip_missing': 'False'}}\n",
-       "\t\t idx_modification.TemporalRetrieval {'TemporalRetrieval': {'concat': 'True', 'delta_unit': 'None', 'merge_function': 'None', 'merge_kwargs': 'None', 'samples': '((0, 1), (6, 1))'}}
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "

Graph

" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "WB2ERA5_44a88406-2743-4175-b4e3-6ae1042970e6\n", - "\n", - "weatherbench.WB2ERA5\n", - "\n", - "\n", - "\n", - "Sort_5b42b1b9-246a-4bb8-930b-4c48125c8d18\n", - "\n", - "sort.Sort\n", - "\n", - "\n", - "\n", - "WB2ERA5_44a88406-2743-4175-b4e3-6ae1042970e6->Sort_5b42b1b9-246a-4bb8-930b-4c48125c8d18\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "StandardLongitude_da295270-bbfc-4229-86fb-c7a77a58bb7b\n", - "\n", - "coordinates.StandardLongitude\n", - "\n", - "\n", - "\n", - "Sort_5b42b1b9-246a-4bb8-930b-4c48125c8d18->StandardLongitude_da295270-bbfc-4229-86fb-c7a77a58bb7b\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "CoordinateFlatten_f6bacd21-fccf-4326-91b3-90b9bce7590e\n", - "\n", - "reshape.CoordinateFlatten\n", - "\n", - "\n", - "\n", - "StandardLongitude_da295270-bbfc-4229-86fb-c7a77a58bb7b->CoordinateFlatten_f6bacd21-fccf-4326-91b3-90b9bce7590e\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "TemporalRetrieval_8e1e8286-9fca-485f-94d5-60ae49d9659d\n", - "\n", - "idx_modification.TemporalRetrieval\n", - "\n", - "\n", - "\n", - "CoordinateFlatten_f6bacd21-fccf-4326-91b3-90b9bce7590e->TemporalRetrieval_8e1e8286-9fca-485f-94d5-60ae49d9659d\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ @@ -6094,7 +5603,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.12.9" }, "nbsphinx": { "orphan": true diff --git a/notebooks/tutorial/HimawariAllBands.ipynb b/notebooks/tutorial/HimawariAllBands.ipynb index 8d0961cc..c4c3c64b 100644 --- a/notebooks/tutorial/HimawariAllBands.ipynb +++ b/notebooks/tutorial/HimawariAllBands.ipynb @@ -916,7 +916,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.9" }, "nbsphinx": { "orphan": true diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py index cbd0ca67..c3935c62 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py @@ -13,19 +13,17 @@ # limitations under the License. -from typing import Hashable, TypeVar, Union, Optional, Any +from typing import Hashable, TypeVar, Union import xarray as xr import numpy as np import pyearthtools.data -from pyearthtools.data.transforms.transform import TransformCollection from pyearthtools.data.transforms.coordinates import Drop from pyearthtools.pipeline.operation import Operation -from packages.data.src.pyearthtools.data.transforms.attributes import SetType -from packages.data.src.pyearthtools.data.transforms.coordinates import Select -from packages.utils.src.pyearthtools.utils.decorators import BackwardsCompatibility +from pyearthtools.data.transforms.attributes import SetType +from pyearthtools.utils.decorators import BackwardsCompatibility T = TypeVar("T", xr.Dataset, xr.DataArray) From d1b1c1edb381acd9ce0d1938a3102b51729bfe78 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 14:03:33 +1200 Subject: [PATCH 23/31] start testing, improve docstring, rebase with new changes from upstream --- .../data/transforms/coordinates.py | 3 - .../tests/data/transforms/test_coordinates.py | 67 +++++++++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 packages/data/tests/data/transforms/test_coordinates.py diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index 5dda2890..fbed8add 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -426,9 +426,6 @@ def __init__( Dataset has a variable "t" and it is flattened along the coordinate "pressure_level" which has values [100, 200, 500], then the output Dataset will have variables called t100, t200 and t500. - If more than one coordinate is flattened, the output data variable names will concatenate the values of each - coordinate. - Args: coordinate (Hashable | list[Hashable] | tuple[Hashable] | None): Coordinates to flatten, either str or list of candidates. diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py new file mode 100644 index 00000000..7d738057 --- /dev/null +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -0,0 +1,67 @@ +# Copyright Commonwealth of Australia, Bureau of Meteorology 2025. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyearthtools.data.transforms import coordinates +import xarray as xr +import numpy as np + + +# Create a 2D array of integers +data = np.array([[0, 1], [0, 1]], dtype=np.int32) + +# Create a DataArray with named dimensions and coordinates +da = xr.DataArray( + data, + dims=["x", "y"], + coords={"x": [10, 11], "y": [100, 110]}, + name="example_integers" +) + + +larger_data = np.array( + [ + [ + [0, 1, 2, 3], + [0, 1, 2, 3], + [0, 1, 2, 3] + ], + [ + [0, 1, 2, 3], + [1, 16, 17, 18], + [2, 110, 111, 112] + ], + [ + [0, 1, 2, 3], + [1, 6, 7, 8], + [2, 10, 11, 12] + ], + [ + [0, 1, 2, 3], + [1, 16, 17, 18], + [2, 110, 111, 112] + ], + ], + dtype=np.int32) + +da_larger = xr.DataArray( + larger_data, + dims=["height", "lat", "lon"], + coords={"height": [10, 11, 12, 13], "lat": [100, 101, 102], "lon": [21, 22, 23, 24]}, + name="sample_data" +) + + +def test_Flatten(): + f = coordinates.Flatten(["height"]) + f.apply(da_larger) From fbb60b80fd3b3e8066903ef808c038b102468804 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 14:44:29 +1200 Subject: [PATCH 24/31] Flatten tested and debugged. --- .../data/transforms/coordinates.py | 3 + .../tests/data/transforms/test_coordinates.py | 60 +++++++------------ 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index fbed8add..5dda2890 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -426,6 +426,9 @@ def __init__( Dataset has a variable "t" and it is flattened along the coordinate "pressure_level" which has values [100, 200, 500], then the output Dataset will have variables called t100, t200 and t500. + If more than one coordinate is flattened, the output data variable names will concatenate the values of each + coordinate. + Args: coordinate (Hashable | list[Hashable] | tuple[Hashable] | None): Coordinates to flatten, either str or list of candidates. diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index 7d738057..2a18a77c 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -16,52 +16,36 @@ import xarray as xr import numpy as np - -# Create a 2D array of integers -data = np.array([[0, 1], [0, 1]], dtype=np.int32) - -# Create a DataArray with named dimensions and coordinates -da = xr.DataArray( - data, - dims=["x", "y"], - coords={"x": [10, 11], "y": [100, 110]}, - name="example_integers" -) - - -larger_data = np.array( +SIMPLE_DA1 = xr.DataArray( [ [ - [0, 1, 2, 3], - [0, 1, 2, 3], - [0, 1, 2, 3] + [0.9, 0.0, 5], + [0.7, 1.4, 2.8], + [0.4, 0.5, 2.3], ], [ - [0, 1, 2, 3], - [1, 16, 17, 18], - [2, 110, 111, 112] - ], - [ - [0, 1, 2, 3], - [1, 6, 7, 8], - [2, 10, 11, 12] - ], - [ - [0, 1, 2, 3], - [1, 16, 17, 18], - [2, 110, 111, 112] + [1.9, 1.0, 1.5], + [1.7, 2.4, 1.1], + [1.4, 1.5, 3.3], ], ], - dtype=np.int32) - -da_larger = xr.DataArray( - larger_data, + coords=[[10, 20], [0, 1, 2], [5, 6, 7]], dims=["height", "lat", "lon"], - coords={"height": [10, 11, 12, 13], "lat": [100, 101, 102], "lon": [21, 22, 23, 24]}, - name="sample_data" ) - +SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) +SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) def test_Flatten(): f = coordinates.Flatten(["height"]) - f.apply(da_larger) + output = f.apply(SIMPLE_DS2) + variables = list(output.keys()) + for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: + assert vbl in variables + +def test_Flatten_2_coords(): + f = coordinates.Flatten(["height", "lon"]) + output = f.apply(SIMPLE_DS1) + variables = list(output.keys()) + for vbl in ['Temperature510', 'Temperature520', 'Temperature610', 'Temperature620', + 'Temperature710', 'Temperature720']: + assert vbl in variables From 35d3d078e80a48a9c6660ff167322ab45e7aba34 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 15:15:00 +1200 Subject: [PATCH 25/31] test skip_missing --- .../tests/data/transforms/test_coordinates.py | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index 2a18a77c..dc909106 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -15,6 +15,7 @@ from pyearthtools.data.transforms import coordinates import xarray as xr import numpy as np +import pytest SIMPLE_DA1 = xr.DataArray( [ @@ -32,9 +33,23 @@ coords=[[10, 20], [0, 1, 2], [5, 6, 7]], dims=["height", "lat", "lon"], ) + +SIMPLE_DA2 = xr.DataArray( + [ + [0.9, 0.0, 5], + [0.7, 1.4, 2.8], + [0.4, 0.5, 2.3], + ], + + coords=[[0, 1, 2], [5, 6, 7]], + dims=["lat", "lon"], +) + SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) +COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) + def test_Flatten(): f = coordinates.Flatten(["height"]) output = f.apply(SIMPLE_DS2) @@ -46,6 +61,27 @@ def test_Flatten_2_coords(): f = coordinates.Flatten(["height", "lon"]) output = f.apply(SIMPLE_DS1) variables = list(output.keys()) - for vbl in ['Temperature510', 'Temperature520', 'Temperature610', 'Temperature620', - 'Temperature710', 'Temperature720']: + # Note that it's hard to predict which coordinate will be processed first. + try: + for vbl in ['Temperature510', 'Temperature520', 'Temperature610', 'Temperature620', + 'Temperature710', 'Temperature720']: + assert vbl in variables + except AssertionError: + for vbl in ['Temperature105', 'Temperature205', 'Temperature106', 'Temperature206', + 'Temperature107', 'Temperature207']: + assert vbl in variables + +def test_Flatten_complicated_dataset(): + """Check that Flatten still works when the coordinate being flattened does not exist for all variables.""" + f = coordinates.Flatten(["height"]) + output = f.apply(COMPLICATED_DS1) + variables = list(output.keys()) + for vbl in ["Temperature10", "Temperature20", "MSLP"]: assert vbl in variables + +def test_Flatten_skip_missing(): + f = coordinates.Flatten(["scrupulosity"]) + with pytest.raises(ValueError): + f.apply(SIMPLE_DS1) + f2 = coordinates.Flatten(["height"]) + f2.apply(SIMPLE_DS1) \ No newline at end of file From d6fe731db5db259ac7814e00293b4a8bd8c3ebe4 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Mon, 28 Jul 2025 15:16:07 +1200 Subject: [PATCH 26/31] test skip_missing --- packages/data/tests/data/transforms/test_coordinates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index dc909106..c8cb6219 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -83,5 +83,5 @@ def test_Flatten_skip_missing(): f = coordinates.Flatten(["scrupulosity"]) with pytest.raises(ValueError): f.apply(SIMPLE_DS1) - f2 = coordinates.Flatten(["height"]) + f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) f2.apply(SIMPLE_DS1) \ No newline at end of file From 4826a49665a05a2826d12a87dde723631f83b894 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Tue, 29 Jul 2025 13:56:45 +1200 Subject: [PATCH 27/31] Final tidying before pull request. --- packages/data/tests/data/transforms/test_coordinates.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index c8cb6219..1864a54a 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -84,4 +84,5 @@ def test_Flatten_skip_missing(): with pytest.raises(ValueError): f.apply(SIMPLE_DS1) f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) - f2.apply(SIMPLE_DS1) \ No newline at end of file + output2 = f2.apply(SIMPLE_DS1) + assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." \ No newline at end of file From ad3ce75204607e9bfa1334166808ed4bf4cf98a3 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Wed, 1 Oct 2025 13:04:45 +1300 Subject: [PATCH 28/31] Update documentation --- docs/api/data/data_index.md | 3 - docs/api/pipeline/pipeline_index.md | 303 ++++++++++++++-------------- 2 files changed, 152 insertions(+), 154 deletions(-) diff --git a/docs/api/data/data_index.md b/docs/api/data/data_index.md index bcf5a846..ec51498c 100644 --- a/docs/api/data/data_index.md +++ b/docs/api/data/data_index.md @@ -141,9 +141,6 @@ The rest of this page contains reference information for the components of the D | | | - [coordinates.StandardCoordinateNames](data_api.md#pyearthtools.data.transforms.coordinates.StandardCoordinateNames) | | | | - [coordinates.Select](data_api.md#pyearthtools.data.transforms.coordinates.Select) | | | | - [coordinates.Drop](data_api.md#pyearthtools.data.transforms.coordinates.Drop) | -| | | - [coordinates.Flatten](data_api.md#pyearthtools.data.transforms.coordinates.Flatten) | -| | | - [coordinates.Expand](data_api.md#pyearthtools.data.transforms.coordinates.Expand) | -| | | - [coordinates.SelectFlatten](data_api.md#pyearthtools.data.transforms.coordinates.SelectFlatten) | | | | - [coordinates.Assign](data_api.md#pyearthtools.data.transforms.coordinates.Assign) | | | | - [coordinates.Pad](data_api.md#pyearthtools.data.transforms.coordinates.Pad) | | | | - [default.get_default_transforms](data_api.md#pyearthtools.data.transforms.default.get_default_transforms) | diff --git a/docs/api/pipeline/pipeline_index.md b/docs/api/pipeline/pipeline_index.md index 21cb5511..5dc215d0 100644 --- a/docs/api/pipeline/pipeline_index.md +++ b/docs/api/pipeline/pipeline_index.md @@ -4,154 +4,155 @@ This is the Pipeline package which forms a part of the [PyEarthTools package](ht The rest of this page contains reference information for the components of the Pipeline package. The Pipeline API docs can be viewed at [Pipeline API Docs](pipeline_api.md). -| Module | Purpose | API Docs | -|----------------------|--------------------------------------|----------------| -| `pipeline` | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.Sampler) | -| | | - [Pipeline](pipeline_api.md#pyearthtools.pipeline.Pipeline) | -| | | - [Operation](pipeline_api.md#pyearthtools.pipeline.Operation) | -| | | - [PipelineException](pipeline_api.md#pyearthtools.pipeline.PipelineException) | -| | | - [PipelineFilterException](pipeline_api.md#pyearthtools.pipeline.PipelineFilterException) | -| | | - [PipelineRuntimeError](pipeline_api.md#pyearthtools.pipeline.PipelineRuntimeError) | -| | | - [PipelineTypeError](pipeline_api.md#pyearthtools.pipeline.PipelineTypeError) | -| `pipeline.branching` | | - [PipelineBranchPoint](pipeline_api.md#pyearthtools.pipeline.branching.PipelineBranchPoint) | -| | | - [Unifier](pipeline_api.md#pyearthtools.pipeline.branching.Unifier) | -| | | - [Joiner](pipeline_api.md#pyearthtools.pipeline.branching.Joiner) | -| | | - [Spliter](pipeline_api.md#pyearthtools.pipeline.branching.Spliter) | -| `pipeline.filters` | | - [Filter](pipeline_api.md#pyearthtools.pipeline.filters.Filter) | -| | | - [FilterCheck](pipeline_api.md#pyearthtools.pipeline.filters.FilterCheck) | -| | | - [FilterWarningContext](pipeline_api.md#pyearthtools.pipeline.filters.FilterWarningContext) | -| | | - [TypeFilter](pipeline_api.md#pyearthtools.pipeline.filters.TypeFilter) | -| `pipeline.iterators` | | - [Iterator](pipeline_api.md#pyearthtools.pipeline.iterators.Iterator) | -| | | - [Range](pipeline_api.md#pyearthtools.pipeline.iterators.Range) | -| | | - [Predefined](pipeline_api.md#pyearthtools.pipeline.iterators.Predefined) | -| | | - [File](pipeline_api.md#pyearthtools.pipeline.iterators.File) | -| | | - [DateRange](pipeline_api.md#pyearthtools.pipeline.iterators.DateRange) | -| | | - [DateRangeLimit](pipeline_api.md#pyearthtools.pipeline.iterators.DateRangeLimit) | -| | | - [Randomise](pipeline_api.md#pyearthtools.pipeline.iterators.Randomise) | -| | | - [SuperIterator](pipeline_api.md#pyearthtools.pipeline.iterators.SuperIterator) | -| | | - [IterateResults](pipeline_api.md#pyearthtools.pipeline.iterators.IterateResults) | -| `pipeline.modifications` | | - [Cache](pipeline_api.md#pyearthtools.pipeline.modifications.Cache) | -| | | - [StaticCache](pipeline_api.md#pyearthtools.pipeline.modifications.StaticCache) | -| | | - [MemCache](pipeline_api.md#pyearthtools.pipeline.modifications.MemCache) | -| | | - [IdxModifier](pipeline_api.md#pyearthtools.pipeline.modifications.IdxModifier) | -| | | - [IdxOverride](pipeline_api.md#pyearthtools.pipeline.modifications.IdxOverride) | -| | | - [TimeIdxModifier](pipeline_api.md#pyearthtools.pipeline.modifications.TimeIdxModifier) | -| | | - [SequenceRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.SequenceRetrieval) | -| | | - [TemporalRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.TemporalRetrieval) | -| | | - [idx_modification](pipeline_api.md#pyearthtools.pipeline.modifications.idx_modification) | -| `pipeline.operations` | | - [Transforms](pipeline_api.md#pyearthtools.pipeline.operations.Transforms) | -| `pipeline.operations.xarray` | | - [Compute](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Compute) | -| | | - [Merge](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Merge) | -| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Concatenate) | -| | | - [Sort](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Sort) | -| | | - [Chunk](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Chunk) | -| | | - [RecodeCalendar](pipeline_api.md#pyearthtools.pipeline.operations.xarray.RecodeCalendar) | -| | | - [AlignDates](pipeline_api.md#pyearthtools.pipeline.operations.xarray.AlignDates) | -| `pipeline.operations.xarray.conversion` | | - [ToNumpy](pipeline_api.md#pyearthtools.pipeline.operations.xarray.conversion.ToNumpy) | -| | | - [ToDask](pipeline_api.md#pyearthtools.pipeline.operations.xarray.conversion.ToDask) | -| `pipeline.operations.xarray.filters` | | - [XarrayFilter](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.XarrayFilter) | -| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropAnyNan) | -| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropAllNan) | -| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropValue) | -| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.Shape) | -| `pipeline.operations.xarray.reshape` | | - [Dimensions](pipeline_api.md#pyearthtools.pipeline.operations.xarray.reshape.Dimensions) | -| | | - [CoordinateFlatten](pipeline_api.md#pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten) | -| `pipeline.operations.xarray.select` | | - [SelectDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.SelectDataset) | -| | | - [DropDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.DropDataset) | -| | | - [SliceDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.SliceDataset) | -| `pipeline.operations.xarray.split` | | - [OnVariables](pipeline_api.md#pyearthtools.pipeline.operations.xarray.split.OnVariables) | -| | | - [OnCoordinate](pipeline_api.md#pyearthtools.pipeline.operations.xarray.split.OnCoordinate) | -| `pipeline.operations.xarray.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.FillNan) | -| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.MaskValue) | -| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.ForceNormalised) | -| | | - [Derive](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.Derive) | -| `pipeline.operations.xarray.metadata` | | - [Rename](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Rename) | -| | | - [Encoding](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Encoding) | -| | | - [MaintainEncoding](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.MaintainEncoding) | -| | | - [Attributes](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Attributes) | -| | | - [MaintainAttributes](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.MaintainAttributes) | -| `pipeline.operations.xarray.normalisation` | | - [xarrayNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.xarrayNormalisation) | -| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Anomaly) | -| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Deviation) | -| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Division) | -| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Evaluated) | -| `pipeline.operations.xarray.remapping` | | - [HEALPix](pipeline_api.md#pyearthtools.pipeline.operations.xarray.remapping.HEALPix) | -| `pipeline.samplers` | | - [EmptyObject](pipeline_api.md#pyearthtools.pipeline.samplers.EmptyObject) | -| | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.samplers.Sampler) | -| | | - [Default](pipeline_api.md#pyearthtools.pipeline.samplers.Default) | -| | | - [SuperSampler](pipeline_api.md#pyearthtools.pipeline.samplers.SuperSampler) | -| | | - [Random](pipeline_api.md#pyearthtools.pipeline.samplers.Random) | -| | | - [DropOut](pipeline_api.md#pyearthtools.pipeline.samplers.DropOut) | -| | | - [RandomDropOut](pipeline_api.md#pyearthtools.pipeline.samplers.RandomDropOut) | -| `pipeline.operations.dask` | | - [Stack](pipeline_api.md#pyearthtools.pipeline.operations.dask.Stack) | -| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.dask.Concatenate) | -| | | - [VStack](pipeline_api.md#pyearthtools.pipeline.operations.dask.VStack) | -| | | - [HStack](pipeline_api.md#pyearthtools.pipeline.operations.dask.HStack) | -| | | - [Compute](pipeline_api.md#pyearthtools.pipeline.operations.dask.Compute) | -| `pipeline.operations.dask.augment` | | - [Rotate](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Rotate) | -| | | - [Flip](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Flip) | -| | | - [Transform](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Transform) | -| `pipeline.operations.dask.filters` | | - [daskFilter](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.daskFilter) | -| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropAnyNan) | -| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropAllNan) | -| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropValue) | -| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.Shape) | -| `pipeline.operations.dask.reshape` | | - [Rearrange](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Rearrange) | -| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Squeeze) | -| | | - [Flattener](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Flattener) | -| | | - [Flatten](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Flatten) | -| | | - [SwapAxis](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.SwapAxis) | -| `pipeline.operations.dask.select` | | - [Select](pipeline_api.md#pyearthtools.pipeline.operations.dask.select.Select) | -| | | - [Slice](pipeline_api.md#pyearthtools.pipeline.operations.dask.select.Slice) | -| `pipeline.operations.dask.split` | | - [OnAxis](pipeline_api.md#pyearthtools.pipeline.operations.dask.split.OnAxis) | -| | | - [OnSlice](pipeline_api.md#pyearthtools.pipeline.operations.dask.split.OnSlice) | -| `pipeline.operations.dask.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.FillNan) | -| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.MaskValue) | -| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.ForceNormalised) | -| `pipeline.operations.dask.normalisation` | | - [daskNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.daskNormalisation) | -| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Anomaly) | -| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Deviation) | -| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Division) | -| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Evaluated) | -| `pipeline.operations.dask.conversion` | | - [ToXarray](pipeline_api.md#pyearthtools.pipeline.operations.dask.conversion.ToXarray) | -| | | - [ToNumpy](pipeline_api.md#pyearthtools.pipeline.operations.dask.conversion.ToXarray) | -| `pipeline.operations.numpy` | | - [Stack](pipeline_api.md#pyearthtools.pipeline.operations.numpy.Stack) | -| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.numpy.Concatenate) | -| `pipeline.operations.numpy.augment` | | - [Rotate](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Rotate) | -| | | - [Flip](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Flip) | -| | | - [Transform](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Transform) | -| `pipeline.operations.numpy.filters` | | - [NumpyFilter](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.NumpyFilter) | -| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropAnyNan) | -| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropAllNan) | -| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropValue) | -| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.Shape) | -| `pipeline.operations.numpy.reshape` | | - [Rearrange](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Rearrange) | -| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Squeeze) | -| | | - [Expand](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Expand) | -| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Squeeze) | -| | | - [Flattener](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Flattener) | -| | | - [Flatten](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Flatten) | -| | | - [SwapAxis](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.SwapAxis) | -| `pipeline.operations.numpy.select` | | - [Select](pipeline_api.md#pyearthtools.pipeline.operations.numpy.select.Select) | -| | | - [Slice](pipeline_api.md#pyearthtools.pipeline.operations.numpy.select.Slice) | -| `pipeline.operations.numpy.split` | | - [OnAxis](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.OnAxis) | -| | | - [OnSlice](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.OnSlice) | -| | | - [VSplit](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.VSplit) | -| | | - [HSplit](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.HSplit) | -| `pipeline.operations.numpy.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.FillNan) | -| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.MaskValue) | -| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.ForceNormalised) | -| `pipeline.operations.numpy.normalisation` | | - [numpyNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.numpyNormalisation) | -| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Anomaly) | -| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Deviation) | -| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Division) | -| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Evaluated) | -| `pipeline.operations.transform` | | - [TimeOfYear](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.TimeOfYear) | -| | | - [AddCoordinates](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.AddCoordinates) | -| `pipeline.samplers` | | - [EmptyObject](pipeline_api.md#pyearthtools.pipeline.samplers.EmptyObject) | -| `pipeline.samplers` | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.samplers.Sampler) | -| `pipeline.samplers` | | - [Default](pipeline_api.md#pyearthtools.pipeline.samplers.Default) | -| `pipeline.samplers` | | - [SuperSampler](pipeline_api.md#pyearthtools.pipeline.samplers.SuperSampler) | -| `pipeline.samplers` | | - [Random](pipeline_api.md#pyearthtools.pipeline.samplers.Random) | -| `pipeline.samplers` | | - [DropOut](pipeline_api.md#pyearthtools.pipeline.samplers.DropOut) | -| `pipeline.samplers` | | - [RandomDropOut](pipeline_api.md#pyearthtools.pipeline.samplers.RandomDropOut) | +| Module | Purpose | API Docs | +|----------------------|--------------------------------------|--------------------------------------------------------------------------------------------------------------------| +| `pipeline` | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.Sampler) | +| | | - [Pipeline](pipeline_api.md#pyearthtools.pipeline.Pipeline) | +| | | - [Operation](pipeline_api.md#pyearthtools.pipeline.Operation) | +| | | - [PipelineException](pipeline_api.md#pyearthtools.pipeline.PipelineException) | +| | | - [PipelineFilterException](pipeline_api.md#pyearthtools.pipeline.PipelineFilterException) | +| | | - [PipelineRuntimeError](pipeline_api.md#pyearthtools.pipeline.PipelineRuntimeError) | +| | | - [PipelineTypeError](pipeline_api.md#pyearthtools.pipeline.PipelineTypeError) | +| `pipeline.branching` | | - [PipelineBranchPoint](pipeline_api.md#pyearthtools.pipeline.branching.PipelineBranchPoint) | +| | | - [Unifier](pipeline_api.md#pyearthtools.pipeline.branching.Unifier) | +| | | - [Joiner](pipeline_api.md#pyearthtools.pipeline.branching.Joiner) | +| | | - [Spliter](pipeline_api.md#pyearthtools.pipeline.branching.Spliter) | +| `pipeline.filters` | | - [Filter](pipeline_api.md#pyearthtools.pipeline.filters.Filter) | +| | | - [FilterCheck](pipeline_api.md#pyearthtools.pipeline.filters.FilterCheck) | +| | | - [FilterWarningContext](pipeline_api.md#pyearthtools.pipeline.filters.FilterWarningContext) | +| | | - [TypeFilter](pipeline_api.md#pyearthtools.pipeline.filters.TypeFilter) | +| `pipeline.iterators` | | - [Iterator](pipeline_api.md#pyearthtools.pipeline.iterators.Iterator) | +| | | - [Range](pipeline_api.md#pyearthtools.pipeline.iterators.Range) | +| | | - [Predefined](pipeline_api.md#pyearthtools.pipeline.iterators.Predefined) | +| | | - [File](pipeline_api.md#pyearthtools.pipeline.iterators.File) | +| | | - [DateRange](pipeline_api.md#pyearthtools.pipeline.iterators.DateRange) | +| | | - [DateRangeLimit](pipeline_api.md#pyearthtools.pipeline.iterators.DateRangeLimit) | +| | | - [Randomise](pipeline_api.md#pyearthtools.pipeline.iterators.Randomise) | +| | | - [SuperIterator](pipeline_api.md#pyearthtools.pipeline.iterators.SuperIterator) | +| | | - [IterateResults](pipeline_api.md#pyearthtools.pipeline.iterators.IterateResults) | +| `pipeline.modifications` | | - [Cache](pipeline_api.md#pyearthtools.pipeline.modifications.Cache) | +| | | - [StaticCache](pipeline_api.md#pyearthtools.pipeline.modifications.StaticCache) | +| | | - [MemCache](pipeline_api.md#pyearthtools.pipeline.modifications.MemCache) | +| | | - [IdxModifier](pipeline_api.md#pyearthtools.pipeline.modifications.IdxModifier) | +| | | - [IdxOverride](pipeline_api.md#pyearthtools.pipeline.modifications.IdxOverride) | +| | | - [TimeIdxModifier](pipeline_api.md#pyearthtools.pipeline.modifications.TimeIdxModifier) | +| | | - [SequenceRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.SequenceRetrieval) | +| | | - [TemporalRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.TemporalRetrieval) | +| | | - [idx_modification](pipeline_api.md#pyearthtools.pipeline.modifications.idx_modification) | +| `pipeline.operations` | | - [Transforms](pipeline_api.md#pyearthtools.pipeline.operations.Transforms) | +| `pipeline.operations.xarray` | | - [Compute](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Compute) | +| | | - [Merge](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Merge) | +| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Concatenate) | +| | | - [Sort](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Sort) | +| | | - [Chunk](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Chunk) | +| | | - [RecodeCalendar](pipeline_api.md#pyearthtools.pipeline.operations.xarray.RecodeCalendar) | +| | | - [AlignDates](pipeline_api.md#pyearthtools.pipeline.operations.xarray.AlignDates) | +| `pipeline.operations.xarray.conversion` | | - [ToNumpy](pipeline_api.md#pyearthtools.pipeline.operations.xarray.conversion.ToNumpy) | +| | | - [ToDask](pipeline_api.md#pyearthtools.pipeline.operations.xarray.conversion.ToDask) | +| `pipeline.operations.xarray.filters` | | - [XarrayFilter](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.XarrayFilter) | +| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropAnyNan) | +| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropAllNan) | +| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.DropValue) | +| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.xarray.filters.Shape) | +| `pipeline.operations.xarray.reshape` | | - [Dimensions](pipeline_api.md#pyearthtools.pipeline.operations.xarray.reshape.Dimensions) | +| | | - [CoordinateFlatten](pipeline_api.md#pyearthtools.pipeline.operations.xarray.reshape.CoordinateFlatten) | +| | | - [CoordinateExpand](pipeline_api.md#pyearthtools.pipeline.operations.xarray.reshape.CoordinateExpand) | +| `pipeline.operations.xarray.select` | | - [SelectDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.SelectDataset) | +| | | - [DropDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.DropDataset) | +| | | - [SliceDataset](pipeline_api.md#pyearthtools.pipeline.operations.xarray.select.SliceDataset) | +| `pipeline.operations.xarray.split` | | - [OnVariables](pipeline_api.md#pyearthtools.pipeline.operations.xarray.split.OnVariables) | +| | | - [OnCoordinate](pipeline_api.md#pyearthtools.pipeline.operations.xarray.split.OnCoordinate) | +| `pipeline.operations.xarray.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.FillNan) | +| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.MaskValue) | +| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.ForceNormalised) | +| | | - [Derive](pipeline_api.md#pyearthtools.pipeline.operations.xarray.values.Derive) | +| `pipeline.operations.xarray.metadata` | | - [Rename](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Rename) | +| | | - [Encoding](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Encoding) | +| | | - [MaintainEncoding](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.MaintainEncoding) | +| | | - [Attributes](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.Attributes) | +| | | - [MaintainAttributes](pipeline_api.md#pyearthtools.pipeline.operations.xarray.metadata.MaintainAttributes) | +| `pipeline.operations.xarray.normalisation` | | - [xarrayNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.xarrayNormalisation) | +| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Anomaly) | +| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Deviation) | +| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Division) | +| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.xarray.normalisation.Evaluated) | +| `pipeline.operations.xarray.remapping` | | - [HEALPix](pipeline_api.md#pyearthtools.pipeline.operations.xarray.remapping.HEALPix) | +| `pipeline.samplers` | | - [EmptyObject](pipeline_api.md#pyearthtools.pipeline.samplers.EmptyObject) | +| | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.samplers.Sampler) | +| | | - [Default](pipeline_api.md#pyearthtools.pipeline.samplers.Default) | +| | | - [SuperSampler](pipeline_api.md#pyearthtools.pipeline.samplers.SuperSampler) | +| | | - [Random](pipeline_api.md#pyearthtools.pipeline.samplers.Random) | +| | | - [DropOut](pipeline_api.md#pyearthtools.pipeline.samplers.DropOut) | +| | | - [RandomDropOut](pipeline_api.md#pyearthtools.pipeline.samplers.RandomDropOut) | +| `pipeline.operations.dask` | | - [Stack](pipeline_api.md#pyearthtools.pipeline.operations.dask.Stack) | +| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.dask.Concatenate) | +| | | - [VStack](pipeline_api.md#pyearthtools.pipeline.operations.dask.VStack) | +| | | - [HStack](pipeline_api.md#pyearthtools.pipeline.operations.dask.HStack) | +| | | - [Compute](pipeline_api.md#pyearthtools.pipeline.operations.dask.Compute) | +| `pipeline.operations.dask.augment` | | - [Rotate](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Rotate) | +| | | - [Flip](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Flip) | +| | | - [Transform](pipeline_api.md#pyearthtools.pipeline.operations.dask.augment.Transform) | +| `pipeline.operations.dask.filters` | | - [daskFilter](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.daskFilter) | +| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropAnyNan) | +| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropAllNan) | +| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.DropValue) | +| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.dask.filters.Shape) | +| `pipeline.operations.dask.reshape` | | - [Rearrange](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Rearrange) | +| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Squeeze) | +| | | - [Flattener](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Flattener) | +| | | - [Flatten](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.Flatten) | +| | | - [SwapAxis](pipeline_api.md#pyearthtools.pipeline.operations.dask.reshape.SwapAxis) | +| `pipeline.operations.dask.select` | | - [Select](pipeline_api.md#pyearthtools.pipeline.operations.dask.select.Select) | +| | | - [Slice](pipeline_api.md#pyearthtools.pipeline.operations.dask.select.Slice) | +| `pipeline.operations.dask.split` | | - [OnAxis](pipeline_api.md#pyearthtools.pipeline.operations.dask.split.OnAxis) | +| | | - [OnSlice](pipeline_api.md#pyearthtools.pipeline.operations.dask.split.OnSlice) | +| `pipeline.operations.dask.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.FillNan) | +| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.MaskValue) | +| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.dask.values.ForceNormalised) | +| `pipeline.operations.dask.normalisation` | | - [daskNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.daskNormalisation) | +| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Anomaly) | +| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Deviation) | +| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Division) | +| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.dask.normalisation.Evaluated) | +| `pipeline.operations.dask.conversion` | | - [ToXarray](pipeline_api.md#pyearthtools.pipeline.operations.dask.conversion.ToXarray) | +| | | - [ToNumpy](pipeline_api.md#pyearthtools.pipeline.operations.dask.conversion.ToXarray) | +| `pipeline.operations.numpy` | | - [Stack](pipeline_api.md#pyearthtools.pipeline.operations.numpy.Stack) | +| | | - [Concatenate](pipeline_api.md#pyearthtools.pipeline.operations.numpy.Concatenate) | +| `pipeline.operations.numpy.augment` | | - [Rotate](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Rotate) | +| | | - [Flip](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Flip) | +| | | - [Transform](pipeline_api.md#pyearthtools.pipeline.operations.numpy.augment.Transform) | +| `pipeline.operations.numpy.filters` | | - [NumpyFilter](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.NumpyFilter) | +| | | - [DropAnyNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropAnyNan) | +| | | - [DropAllNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropAllNan) | +| | | - [DropValue](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.DropValue) | +| | | - [Shape](pipeline_api.md#pyearthtools.pipeline.operations.numpy.filters.Shape) | +| `pipeline.operations.numpy.reshape` | | - [Rearrange](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Rearrange) | +| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Squeeze) | +| | | - [Expand](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Expand) | +| | | - [Squeeze](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Squeeze) | +| | | - [Flattener](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Flattener) | +| | | - [Flatten](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.Flatten) | +| | | - [SwapAxis](pipeline_api.md#pyearthtools.pipeline.operations.numpy.reshape.SwapAxis) | +| `pipeline.operations.numpy.select` | | - [Select](pipeline_api.md#pyearthtools.pipeline.operations.numpy.select.Select) | +| | | - [Slice](pipeline_api.md#pyearthtools.pipeline.operations.numpy.select.Slice) | +| `pipeline.operations.numpy.split` | | - [OnAxis](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.OnAxis) | +| | | - [OnSlice](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.OnSlice) | +| | | - [VSplit](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.VSplit) | +| | | - [HSplit](pipeline_api.md#pyearthtools.pipeline.operations.numpy.split.HSplit) | +| `pipeline.operations.numpy.values` | | - [FillNan](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.FillNan) | +| | | - [MaskValue](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.MaskValue) | +| | | - [ForceNormalised](pipeline_api.md#pyearthtools.pipeline.operations.numpy.values.ForceNormalised) | +| `pipeline.operations.numpy.normalisation` | | - [numpyNormalisation](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.numpyNormalisation) | +| | | - [Anomaly](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Anomaly) | +| | | - [Deviation](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Deviation) | +| | | - [Division](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Division) | +| | | - [Evaluated](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.Evaluated) | +| `pipeline.operations.transform` | | - [TimeOfYear](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.TimeOfYear) | +| | | - [AddCoordinates](pipeline_api.md#pyearthtools.pipeline.operations.numpy.normalisation.AddCoordinates) | +| `pipeline.samplers` | | - [EmptyObject](pipeline_api.md#pyearthtools.pipeline.samplers.EmptyObject) | +| `pipeline.samplers` | | - [Sampler](pipeline_api.md#pyearthtools.pipeline.samplers.Sampler) | +| `pipeline.samplers` | | - [Default](pipeline_api.md#pyearthtools.pipeline.samplers.Default) | +| `pipeline.samplers` | | - [SuperSampler](pipeline_api.md#pyearthtools.pipeline.samplers.SuperSampler) | +| `pipeline.samplers` | | - [Random](pipeline_api.md#pyearthtools.pipeline.samplers.Random) | +| `pipeline.samplers` | | - [DropOut](pipeline_api.md#pyearthtools.pipeline.samplers.DropOut) | +| `pipeline.samplers` | | - [RandomDropOut](pipeline_api.md#pyearthtools.pipeline.samplers.RandomDropOut) | From e82e05c47f13fc3dafb89464d9951dcf29e9154a Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Thu, 16 Oct 2025 14:47:41 +1100 Subject: [PATCH 29/31] Ran code reformatter --- .../data/transforms/coordinates.py | 1 + .../tests/data/transforms/test_coordinates.py | 27 ++++++++++++++----- .../transform/test_transform_coordinates.py | 2 -- .../pipeline/operations/xarray/reshape.py | 5 +++- .../operations/xarray/test_xarray_reshape.py | 17 ++++++------ 5 files changed, 35 insertions(+), 17 deletions(-) diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index 2cdb70c7..087f1e05 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -395,6 +395,7 @@ def apply(self, dataset: xr.Dataset) -> xr.Dataset: dataset = dataset.drop_vars(i) return dataset + class Assign(Transform): """Assign coordinates to object""" diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index 1864a54a..d1c5060b 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -40,7 +40,6 @@ [0.7, 1.4, 2.8], [0.4, 0.5, 2.3], ], - coords=[[0, 1, 2], [5, 6, 7]], dims=["lat", "lon"], ) @@ -50,6 +49,7 @@ COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) + def test_Flatten(): f = coordinates.Flatten(["height"]) output = f.apply(SIMPLE_DS2) @@ -57,20 +57,34 @@ def test_Flatten(): for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: assert vbl in variables + def test_Flatten_2_coords(): f = coordinates.Flatten(["height", "lon"]) output = f.apply(SIMPLE_DS1) variables = list(output.keys()) # Note that it's hard to predict which coordinate will be processed first. try: - for vbl in ['Temperature510', 'Temperature520', 'Temperature610', 'Temperature620', - 'Temperature710', 'Temperature720']: + for vbl in [ + "Temperature510", + "Temperature520", + "Temperature610", + "Temperature620", + "Temperature710", + "Temperature720", + ]: assert vbl in variables except AssertionError: - for vbl in ['Temperature105', 'Temperature205', 'Temperature106', 'Temperature206', - 'Temperature107', 'Temperature207']: + for vbl in [ + "Temperature105", + "Temperature205", + "Temperature106", + "Temperature206", + "Temperature107", + "Temperature207", + ]: assert vbl in variables + def test_Flatten_complicated_dataset(): """Check that Flatten still works when the coordinate being flattened does not exist for all variables.""" f = coordinates.Flatten(["height"]) @@ -79,10 +93,11 @@ def test_Flatten_complicated_dataset(): for vbl in ["Temperature10", "Temperature20", "MSLP"]: assert vbl in variables + def test_Flatten_skip_missing(): f = coordinates.Flatten(["scrupulosity"]) with pytest.raises(ValueError): f.apply(SIMPLE_DS1) f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) output2 = f2.apply(SIMPLE_DS1) - assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." \ No newline at end of file + assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." diff --git a/packages/data/tests/transform/test_transform_coordinates.py b/packages/data/tests/transform/test_transform_coordinates.py index d5ba3a76..56a3cfbd 100644 --- a/packages/data/tests/transform/test_transform_coordinates.py +++ b/packages/data/tests/transform/test_transform_coordinates.py @@ -107,5 +107,3 @@ def test_Assign(ds_vertical): # # with pytest.raises(AssertionError): # assert not np.isnan(_result.sel(longitude=-1, vertical=1).temperature.values) - - diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py index bf4dc1ce..bcab81a8 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/reshape.py @@ -92,6 +92,7 @@ def undo_func(self, sample: T) -> T: return sample.transpose(*self._incoming_dims, missing_dims="ignore") return sample + def weak_cast_to_int(value): """ Basically, turns integer floats to int types, otherwise @@ -104,6 +105,7 @@ def weak_cast_to_int(value): pass return value + class CoordinateFlatten(Operation): """Flatten a coordinate in a dataset into separate variables.""" @@ -185,9 +187,11 @@ def apply_func(self, dataset: xr.Dataset) -> xr.Dataset: def undo_func(self, ds): return pyearthtools.pipeline.operations.xarray.reshape.coordinate_expand(self._coordinate)(ds) + @BackwardsCompatibility(CoordinateFlatten) def coordinate_flatten(*args, **kwargs) -> Operation: ... + class CoordinateExpand(Operation): """Inverse operation to `CoordinateFlatten`""" @@ -242,4 +246,3 @@ def undo_func(self, ds): @BackwardsCompatibility(CoordinateExpand) def coordinate_expand(*args, **kwargs) -> Operation: ... - diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py index 01242e6c..48dd8d57 100644 --- a/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py +++ b/packages/pipeline/tests/operations/xarray/test_xarray_reshape.py @@ -34,41 +34,39 @@ dims=["height", "lat", "lon"], ) -SIMPLE_DA2 = xr.DataArray( - [ - [9.1, 2.3, 3.2], - [2.2, 1.1, 0.2] - ], - coords=[[1, 2], [3, 4, 5]], - dims=["a", "b"] -) +SIMPLE_DA2 = xr.DataArray([[9.1, 2.3, 3.2], [2.2, 1.1, 0.2]], coords=[[1, 2], [3, 4, 5]], dims=["a", "b"]) SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) + def test_Dimensions(): d = reshape.Dimensions(["lat", "lon", "height"]) output = d.apply_func(SIMPLE_DA1) assert output.dims == ("lat", "lon", "height") + def test_Dimensions_one_input(): d = reshape.Dimensions(["lat"]) output = d.apply_func(SIMPLE_DA1) assert output.dims[0] == "lat" + def test_Dimensions_prepend(): d = reshape.Dimensions(["lat"], append=False) output = d.apply_func(SIMPLE_DA1) assert output.dims[-1] == "lat" + def test_Dimensions_preserve_order(): d = reshape.Dimensions(["lat"], preserve_order=True) output = d.apply_func(SIMPLE_DA1) reversed_output = d.undo_func(output) assert reversed_output.dims == output.dims + def test_weak_cast_to_int(): wcti = reshape.weak_cast_to_int @@ -104,6 +102,7 @@ def test_CoordinateFlatten_skip_missing(): output2 = f2.apply(SIMPLE_DS1) assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." + def test_undo_CoordinateFlatten(): f = reshape.CoordinateFlatten(["height"]) f_output = f.apply(SIMPLE_DS2) @@ -112,6 +111,7 @@ def test_undo_CoordinateFlatten(): for vbl in ["Temperature", "Humidity", "WombatsPerKm2"]: assert vbl in variables + def test_CoordinateExpand_reverses_CoordinateFlatten(): f = reshape.CoordinateFlatten(["height"]) f_output = f.apply(SIMPLE_DS2) @@ -120,6 +120,7 @@ def test_CoordinateExpand_reverses_CoordinateFlatten(): variables = list(e_output.keys()) assert "Temperature" in variables + def test_undo_CoordinateExpand(): f = reshape.CoordinateFlatten(["height"]) f_output = f.apply(SIMPLE_DS2) From 8f842755f927103b8ee94760603d65736d3f0e45 Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Thu, 16 Oct 2025 14:51:42 +1100 Subject: [PATCH 30/31] Remove unused imports reported by ruff check --- packages/data/src/pyearthtools/data/transforms/coordinates.py | 3 +-- packages/data/tests/data/transforms/test_coordinates.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/data/src/pyearthtools/data/transforms/coordinates.py b/packages/data/src/pyearthtools/data/transforms/coordinates.py index 087f1e05..5e4fdf3d 100644 --- a/packages/data/src/pyearthtools/data/transforms/coordinates.py +++ b/packages/data/src/pyearthtools/data/transforms/coordinates.py @@ -24,8 +24,7 @@ import pandas as pd -from pyearthtools.data.transforms.transform import Transform, TransformCollection -from pyearthtools.data.transforms.attributes import SetType +from pyearthtools.data.transforms.transform import Transform from pyearthtools.data.warnings import pyearthtoolsDataWarning from pyearthtools.data.exceptions import DataNotFoundError diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py index d1c5060b..ee01f7ff 100644 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ b/packages/data/tests/data/transforms/test_coordinates.py @@ -14,7 +14,6 @@ from pyearthtools.data.transforms import coordinates import xarray as xr -import numpy as np import pytest SIMPLE_DA1 = xr.DataArray( From 559be85bf41b0dd25a701126383a259e87e1e707 Mon Sep 17 00:00:00 2001 From: Gemma Mason Date: Fri, 17 Oct 2025 14:20:44 +1300 Subject: [PATCH 31/31] resolve conflicts when pulling --- .../tests/data/transforms/test_coordinates.py | 102 ------------------ .../data/transforms/test_data_coordinates.py | 102 ------------------ 2 files changed, 204 deletions(-) delete mode 100644 packages/data/tests/data/transforms/test_coordinates.py delete mode 100644 packages/data/tests/data/transforms/test_data_coordinates.py diff --git a/packages/data/tests/data/transforms/test_coordinates.py b/packages/data/tests/data/transforms/test_coordinates.py deleted file mode 100644 index ee01f7ff..00000000 --- a/packages/data/tests/data/transforms/test_coordinates.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright Commonwealth of Australia, Bureau of Meteorology 2025. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from pyearthtools.data.transforms import coordinates -import xarray as xr -import pytest - -SIMPLE_DA1 = xr.DataArray( - [ - [ - [0.9, 0.0, 5], - [0.7, 1.4, 2.8], - [0.4, 0.5, 2.3], - ], - [ - [1.9, 1.0, 1.5], - [1.7, 2.4, 1.1], - [1.4, 1.5, 3.3], - ], - ], - coords=[[10, 20], [0, 1, 2], [5, 6, 7]], - dims=["height", "lat", "lon"], -) - -SIMPLE_DA2 = xr.DataArray( - [ - [0.9, 0.0, 5], - [0.7, 1.4, 2.8], - [0.4, 0.5, 2.3], - ], - coords=[[0, 1, 2], [5, 6, 7]], - dims=["lat", "lon"], -) - -SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) -SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) - -COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) - - -def test_Flatten(): - f = coordinates.Flatten(["height"]) - output = f.apply(SIMPLE_DS2) - variables = list(output.keys()) - for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: - assert vbl in variables - - -def test_Flatten_2_coords(): - f = coordinates.Flatten(["height", "lon"]) - output = f.apply(SIMPLE_DS1) - variables = list(output.keys()) - # Note that it's hard to predict which coordinate will be processed first. - try: - for vbl in [ - "Temperature510", - "Temperature520", - "Temperature610", - "Temperature620", - "Temperature710", - "Temperature720", - ]: - assert vbl in variables - except AssertionError: - for vbl in [ - "Temperature105", - "Temperature205", - "Temperature106", - "Temperature206", - "Temperature107", - "Temperature207", - ]: - assert vbl in variables - - -def test_Flatten_complicated_dataset(): - """Check that Flatten still works when the coordinate being flattened does not exist for all variables.""" - f = coordinates.Flatten(["height"]) - output = f.apply(COMPLICATED_DS1) - variables = list(output.keys()) - for vbl in ["Temperature10", "Temperature20", "MSLP"]: - assert vbl in variables - - -def test_Flatten_skip_missing(): - f = coordinates.Flatten(["scrupulosity"]) - with pytest.raises(ValueError): - f.apply(SIMPLE_DS1) - f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) - output2 = f2.apply(SIMPLE_DS1) - assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged." diff --git a/packages/data/tests/data/transforms/test_data_coordinates.py b/packages/data/tests/data/transforms/test_data_coordinates.py deleted file mode 100644 index ee01f7ff..00000000 --- a/packages/data/tests/data/transforms/test_data_coordinates.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright Commonwealth of Australia, Bureau of Meteorology 2025. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from pyearthtools.data.transforms import coordinates -import xarray as xr -import pytest - -SIMPLE_DA1 = xr.DataArray( - [ - [ - [0.9, 0.0, 5], - [0.7, 1.4, 2.8], - [0.4, 0.5, 2.3], - ], - [ - [1.9, 1.0, 1.5], - [1.7, 2.4, 1.1], - [1.4, 1.5, 3.3], - ], - ], - coords=[[10, 20], [0, 1, 2], [5, 6, 7]], - dims=["height", "lat", "lon"], -) - -SIMPLE_DA2 = xr.DataArray( - [ - [0.9, 0.0, 5], - [0.7, 1.4, 2.8], - [0.4, 0.5, 2.3], - ], - coords=[[0, 1, 2], [5, 6, 7]], - dims=["lat", "lon"], -) - -SIMPLE_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1}) -SIMPLE_DS2 = xr.Dataset({"Humidity": SIMPLE_DA1, "Temperature": SIMPLE_DA1, "WombatsPerKm2": SIMPLE_DA1}) - -COMPLICATED_DS1 = xr.Dataset({"Temperature": SIMPLE_DA1, "MSLP": SIMPLE_DA2}) - - -def test_Flatten(): - f = coordinates.Flatten(["height"]) - output = f.apply(SIMPLE_DS2) - variables = list(output.keys()) - for vbl in ["Temperature10", "Temperature20", "Humidity10", "Humidity20", "WombatsPerKm210", "WombatsPerKm220"]: - assert vbl in variables - - -def test_Flatten_2_coords(): - f = coordinates.Flatten(["height", "lon"]) - output = f.apply(SIMPLE_DS1) - variables = list(output.keys()) - # Note that it's hard to predict which coordinate will be processed first. - try: - for vbl in [ - "Temperature510", - "Temperature520", - "Temperature610", - "Temperature620", - "Temperature710", - "Temperature720", - ]: - assert vbl in variables - except AssertionError: - for vbl in [ - "Temperature105", - "Temperature205", - "Temperature106", - "Temperature206", - "Temperature107", - "Temperature207", - ]: - assert vbl in variables - - -def test_Flatten_complicated_dataset(): - """Check that Flatten still works when the coordinate being flattened does not exist for all variables.""" - f = coordinates.Flatten(["height"]) - output = f.apply(COMPLICATED_DS1) - variables = list(output.keys()) - for vbl in ["Temperature10", "Temperature20", "MSLP"]: - assert vbl in variables - - -def test_Flatten_skip_missing(): - f = coordinates.Flatten(["scrupulosity"]) - with pytest.raises(ValueError): - f.apply(SIMPLE_DS1) - f2 = coordinates.Flatten(["scrupulosity"], skip_missing=True) - output2 = f2.apply(SIMPLE_DS1) - assert output2 == SIMPLE_DS1, "When skip_missing=True, Datasets without the given coordinate pass unchanged."