From 42997cea947efe67dc2ceaafa5e43348db327d13 Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Wed, 29 Oct 2025 21:59:30 +1100 Subject: [PATCH 1/7] Update zenodo.json to reflect contribution rates Update documentation to make it clearer that evaluation is 'coming soon' rather than being done already --- .zenodo.json | 172 ++++++++++++++++++++++++------------------------ README.md | 37 +++++++---- docs/index.md | 35 ++++++---- docs/newuser.md | 4 +- 4 files changed, 134 insertions(+), 114 deletions(-) diff --git a/.zenodo.json b/.zenodo.json index a46f295e..0c95b258 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,89 +1,87 @@ { - "creators": [ - { - "orcid": "https://orcid.org/0009-0009-3207-4876", - "affiliation": "Work undertaken while at the Bureau of Meteorology, Australia", - "name": "Cook, Harrison" - }, - { - "orcid": "https://orcid.org/0009-0008-2024-1967", - "affiliation": "Bureau of Meteorology, Australia", - "name": "Leeuwenburg, Tennessee" - }, - { - "orcid": "https://orcid.org/0000-0003-2553-0721", - "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand", - "name": "Rio, Maxime" - }, - { - "orcid": "https://orcid.org/0000-0002-3148-4496", - "affiliation": "Met Office, United Kingdom", - "name": "Miller, Joel" - }, - { - "orcid": "https://orcid.org/0000-0002-6569-1178", - "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand", - "name": "Mason, Gemma" - }, - { - "orcid": "https://orcid.org/0009-0002-7406-7438", - "affiliation": "Bureau of Meteorology, Australia", - "name": "Ramanathan, Nikeeth" - }, - { - "orcid": "https://orcid.org/0009-0001-6250-3987", - "affiliation": "Met Office, United Kingdom", - "name": "Pill, John" - }, - { - "orcid": "https://orcid.org/0000-0003-3430-2456", - "affiliation": "Met Office, United Kingdom", - "name": "Haddad, Stephen" - }, - { - "orcid": "https://orcid.org/0000-0002-1975-0042", - "affiliation": "Bureau of Meteorology, Australia", - "name": "de Burgh-Day, Catherine" - }, - { - "orcid": "https://orcid.org/0009-0000-5048-4240", - "affiliation": "Met Office, United Kingdom", - "name": "Sullivan, Ben" - }, - { - "orcid": "https://orcid.org/0000-0001-6825-3854", - "affiliation": "University of New South Wales Sydney, Australia", - "name": "Hobeichi, Sanaa" - - }, - { - "orcid": "https://orcid.org/0000-0002-6799-9109", - "affiliation": "Bureau of Meteorology, Australia", - "name": "Holmes, Ryan" - }, - { - "orcid": "https://orcid.org/0009-0009-8268-8358", - "affiliation": "Independent researcher", - "name": "Potokina, Margarita" - }, - { - "orcid": "https://orcid.org/0009-0001-4486-0120", - "affiliation": "Independent researcher, Vietnam", - "name": "Bogacheva, Jenya" - }, - { - "orcid": "https://orcid.org/0009-0004-9553-8387", - "affiliation": "Independent researcher, Australia", - "name": "James, Matthew" - }, - { - "orcid": "https://orcid.org/0000-0002-5407-4297", - "affiliation": "Bureau of Meteorology, Australia", - "name": "Stassen, Christian" - } - ], - "license": "Apache-2.0", - "title": "PyEarthTools: Machine learning for Earth system science", - "keywords": ["modelling", "geoscience", "earth system science"] - + "creators": [ + { + "orcid": "https://orcid.org/0009-0008-2024-1967", + "affiliation": "Bureau of Meteorology, Australia", + "name": "Leeuwenburg, Tennessee" + }, + { + "orcid": "https://orcid.org/0009-0009-3207-4876", + "affiliation": "Work undertaken while at the Bureau of Meteorology, Australia", + "name": "Cook, Harrison" + }, + { + "orcid": "https://orcid.org/0000-0003-2553-0721", + "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand", + "name": "Rio, Maxime" + }, + { + "orcid": "https://orcid.org/0000-0001-6825-3854", + "affiliation": "University of New South Wales Sydney, Australia", + "name": "Hobeichi, Sanaa" + }, + { + "orcid": "https://orcid.org/0000-0002-3148-4496", + "affiliation": "Met Office, United Kingdom", + "name": "Miller, Joel" + }, + { + "orcid": "https://orcid.org/0000-0002-6569-1178", + "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand", + "name": "Mason, Gemma" + }, + { + "orcid": "https://orcid.org/0009-0002-7406-7438", + "affiliation": "Bureau of Meteorology, Australia", + "name": "Ramanathan, Nikeeth" + }, + { + "orcid": "https://orcid.org/0009-0001-6250-3987", + "affiliation": "Met Office, United Kingdom", + "name": "Pill, John" + }, + { + "orcid": "https://orcid.org/0000-0003-3430-2456", + "affiliation": "Met Office, United Kingdom", + "name": "Haddad, Stephen" + }, + { + "orcid": "https://orcid.org/0000-0002-5407-4297", + "affiliation": "Bureau of Meteorology, Australia", + "name": "Stassen, Christian" + }, + { + "orcid": "https://orcid.org/0000-0002-1975-0042", + "affiliation": "Bureau of Meteorology, Australia", + "name": "de Burgh-Day, Catherine" + }, + { + "orcid": "https://orcid.org/0000-0002-6799-9109", + "affiliation": "Bureau of Meteorology, Australia", + "name": "Holmes, Ryan" + }, + { + "orcid": "https://orcid.org/0009-0009-8268-8358", + "affiliation": "Independent researcher", + "name": "Potokina, Margarita" + }, + { + "orcid": "https://orcid.org/0009-0001-4486-0120", + "affiliation": "Independent researcher, Vietnam", + "name": "Bogacheva, Jenya" + }, + { + "orcid": "https://orcid.org/0009-0004-9553-8387", + "affiliation": "Independent researcher, Australia", + "name": "James, Matthew" + }, + { + "orcid": "https://orcid.org/0009-0000-5048-4240", + "affiliation": "Met Office, United Kingdom", + "name": "Sullivan, Ben" + } + ], + "license": "Apache-2.0", + "title": "PyEarthTools: Machine learning for Earth system science", + "keywords": ["modelling", "geoscience", "earth system science"] } diff --git a/README.md b/README.md index 6f69ed9e..475d6fb5 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,12 @@ |![](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_demo_FourCastNeXt_Inference_9_1.png)
A weather prediction from a model trained with PyEarthTools.|![](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_tutorial_Working_with_Climate_Data_14_2.svg)
A data processing flow composed for working with climate data.| |:-:|:-:| -Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools) -Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io) -Tutorial Gallery: [available here](https://pyearthtools.readthedocs.io/en/latest/notebooks/Gallery.html) -New Users Guide: [available here](https://pyearthtools.readthedocs.io/en/latest/newuser.html) +Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools) +Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io) +Tutorial Gallery: [available here](https://pyearthtools.readthedocs.io/en/latest/notebooks/Gallery.html) +New Users Guide: [available here](https://pyearthtools.readthedocs.io/en/latest/newuser.html) + +**If you use `PyEarthTools` for your work or a publication, [please city our work](https://pyearthtools.readthedocs.io/en/latest/#acknowleging-or-citing-pyearthtools).** ## Installation @@ -53,7 +55,7 @@ PyEarthTools is a Python framework containing modules for: - defining machine learning (ML) models; - training ML models and managing experiments; - performing inference with ML models; - - and evaluating ML models. + - and evaluating ML models (coming soon). ## Overview of the Packages within PyEarthTools @@ -74,11 +76,11 @@ PyEarthTools comprises multiple sub-packages which can be used individually or t If you use PyEarthTools for your work, we would appreciate you citing our software as below: -Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., & de Burgh-Day, C. (2025). PyEarthTools: Machine learning for Earth system science (0.1.1). Zenodo. https://doi.org/10.5281/zenodo.15760769 +Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., de Burgh-Day, C., Sullivan, B., Hobeichi, S., Holmes, R., Potokina, M., Bogacheva, J., James, M., & Stassen, C. (2025). PyEarthTools: Machine learning for Earth system science (0.4.0). Zenodo. https://doi.org/10.5281/zenodo.17429589 BibTeX: ``` -@software{cook_2025_15760769, +@software{cook_2025_17429589, author = {Cook, Harrison and Leeuwenburg, Tennessee and Rio, Maxime and @@ -87,13 +89,22 @@ BibTeX: Ramanathan, Nikeeth and Pill, John and Haddad, Stephen and - de Burgh-Day, Catherine}, - title = {{PyEarthTools: Machine learning for Earth system science}}, - month = jun, + de Burgh-Day, Catherine and + Sullivan, Ben and + Hobeichi, Sanaa and + Holmes, Ryan and + Potokina, Margarita and + Bogacheva, Jenya and + James, Matthew and + Stassen, Christian}, + title = {PyEarthTools: Machine learning for Earth system + science + }, + month = oct, year = 2025, publisher = {Zenodo}, - version = {0.1.1}, - doi = {10.5281/zenodo.15760769}, - url = {https://doi.org/10.5281/zenodo.15760769} + version = {0.4.0}, + doi = {10.5281/zenodo.17429589}, + url = {https://doi.org/10.5281/zenodo.17429589}, } ``` diff --git a/docs/index.md b/docs/index.md index 41dd16bf..10dd9ff7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -22,9 +22,11 @@
A data processing flow composed for working with climate data.
-Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools) -Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io) -Tutorial Gallery: [available here](./notebooks/Gallery) +Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools) +Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io) +Tutorial Gallery: [available here](./notebooks/Gallery) + +**If you use `PyEarthTools` for your work or a publication, [please city our work](https://pyearthtools.readthedocs.io/en/latest/#acknowleging-or-citing-pyearthtools).** ## Installation @@ -71,7 +73,7 @@ PyEarthTools is a Python framework containing modules for: - defining machine learning (ML) models; - training ML models and managing experiments; - performing inference with ML models; - - and evaluating ML models. + - and evaluating ML models (coming soon). ## Overview of the Packages within PyEarthTools @@ -95,11 +97,11 @@ If you use PyEarthTools for your work, we would appreciate you citing our softwa :::::{tab-set} ::::{tab-item} APA -Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., & de Burgh-Day, C. (2025). PyEarthTools: Machine learning for Earth system science (0.1.1). Zenodo. https://doi.org/10.5281/zenodo.15760769 +Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., de Burgh-Day, C., Sullivan, B., Hobeichi, S., Holmes, R., Potokina, M., Bogacheva, J., James, M., & Stassen, C. (2025). PyEarthTools: Machine learning for Earth system science (0.4.0). Zenodo. https://doi.org/10.5281/zenodo.17429589 :::: ::::{tab-item} BibTeX ``` -@software{cook_2025_15760769, +@software{cook_2025_17429589, author = {Cook, Harrison and Leeuwenburg, Tennessee and Rio, Maxime and @@ -108,14 +110,23 @@ Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, Ramanathan, Nikeeth and Pill, John and Haddad, Stephen and - de Burgh-Day, Catherine}, - title = {{PyEarthTools: Machine learning for Earth system science}}, - month = jun, + de Burgh-Day, Catherine and + Sullivan, Ben and + Hobeichi, Sanaa and + Holmes, Ryan and + Potokina, Margarita and + Bogacheva, Jenya and + James, Matthew and + Stassen, Christian}, + title = {PyEarthTools: Machine learning for Earth system + science + }, + month = oct, year = 2025, publisher = {Zenodo}, - version = {0.1.1}, - doi = {10.5281/zenodo.15760769}, - url = {https://doi.org/10.5281/zenodo.15760769} + version = {0.4.0}, + doi = {10.5281/zenodo.17429589}, + url = {https://doi.org/10.5281/zenodo.17429589}, } ``` :::: diff --git a/docs/newuser.md b/docs/newuser.md index 4cb2b681..ceb17550 100644 --- a/docs/newuser.md +++ b/docs/newuser.md @@ -1,6 +1,6 @@ # New Users Guide -Welcome new user! This document will continue to be updated based on user feedback. This table quickly explains how to get things done in `PyEarthTools` +Welcome new user! This document will continue to be updated based on user feedback. This table quickly explains how to get things done in `PyEarthTools` | Step | Without PyEarthTools | With PyEarthTools | |----------------|-----------------------------|----------------------| | Obtaining and loading data | Manual download or open from disk + data cleaning | Use or adapt in-built fetchers and openers (no cleaning) | @@ -8,7 +8,7 @@ Welcome new user! This document will continue to be updated based on user feedba | Present your data to PyTorch or another framework as a Python iterator | Custom code to iterate over data | Pipelines are iterators | | Define a machine learning model | Clone someone's repo or define your own | Use a 'bundled model' or write your own | | Denormalise model outputs | Manual code | Pipelines are reversible | -| Model evaluation | Use a separate framework | Use pre-defined evaluation to generate standard scorecards | +| Model evaluation | Use a separate framework | (coming soon) Use pre-defined evaluation to generate standard scorecards | This approach also allows your research to be more targetted into varying only the part of the end-to-end process which you are investigating. By starting with a baseline implementation to provide a strong basis for comparison and modifying only the relevant step, you can undertake a more controlled investigative process, confidently generating results from experimentation along the way. From 9c9892fa4e154fa903467db58367b41a972f7db8 Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Wed, 13 Aug 2025 17:46:56 +1000 Subject: [PATCH 2/7] Design outline for the temporal window pipeline step --- notebooks/TemporalWindowDemo.ipynb | 1356 ++++++++++++++++++++++++++++ 1 file changed, 1356 insertions(+) create mode 100644 notebooks/TemporalWindowDemo.ipynb diff --git a/notebooks/TemporalWindowDemo.ipynb b/notebooks/TemporalWindowDemo.ipynb new file mode 100644 index 00000000..4d7075b7 --- /dev/null +++ b/notebooks/TemporalWindowDemo.ipynb @@ -0,0 +1,1356 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "074883b8-caf1-43eb-a2d5-fd3ca05f3b8e", + "metadata": {}, + "outputs": [], + "source": [ + "import functools\n", + "import xarray as xr\n", + "import pyearthtools.pipeline\n", + "class TemporalWindow(pyearthtools.pipeline.controller.PipelineIndex):\n", + " '''\n", + " The purpose of this class is to provide the ability to perform\n", + " sequence-to-sequence modelling from an data accessor or pipeline\n", + " that was designed to produce single time steps (i.e. single samples).\n", + "\n", + " The temporal window allows the specification of the 'back window'\n", + " and the 'forward window', and will produce a binary branch.\n", + "\n", + " For example, if the time steps are hourly, and the base pipeline\n", + " can produce hours 1, 2, 3 ... 10; then this Temporal Window can\n", + " be used to produce sequence pairs like:\n", + " [1,2,3], [4], \n", + " [2,3,4], [5],\n", + " ...\n", + " [7,8,9], [10]\n", + "\n", + " or like:\n", + " [1,2], [3,4,5],\n", + " [2,3], [4,5,6],\n", + " ...\n", + " [6,7], [8,9,10]\n", + "\n", + " This provides a simpler interface than the TemporalRetrieval which\n", + " is a more general alternative.\n", + "\n", + " The window offsets are calculated not using positional indexing, but \n", + " using calculated date-times based on the reference time and the specified\n", + " timedelta to calculate each required index exactly. The handling of missing\n", + " data is left to the underlying pipeline response to the retrieval of the\n", + " calculated datetime.\n", + "\n", + " The resultant sequences may be left unmerged (i.e. a list of retrieved\n", + " results for each timetime) or merged (e.g. into an xarray along the time\n", + " dimension). The default behaviour is to merge along the time dimension.\n", + "\n", + " A custom merge method may be specified.\n", + " '''\n", + "\n", + " def __init__(self,\n", + " *,\n", + " a_indexes,\n", + " b_indexes,\n", + " timedelta,\n", + " merge_method=None\n", + " ):\n", + " '''\n", + " Args:\n", + " a_indexes: Multiplied by the timedelta then applied to the reference date\n", + " b_indexes: Multiplied by the timedelta then applied to the reference date\n", + " timedelta: Typically the time step of the underlying data\n", + " merge_method: How to merge samples into a combined object\n", + " '''\n", + " self.a_indexes = a_indexes\n", + " self.b_indexes = b_indexes\n", + " self.timedelta = timedelta\n", + " self.merge_method = merge_method\n", + "\n", + " def __getitem__(self, date_of_interest):\n", + "\n", + " date_of_interest = pyearthtools.data.time.Petdt(date_of_interest)\n", + "\n", + " head_i = [i * self.timedelta for i in self.a_indexes]\n", + " tail_i = [i * self.timedelta for i in self.b_indexes]\n", + "\n", + " head = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in head_i]\n", + " tail = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in tail_i]\n", + "\n", + " if self.merge_method:\n", + " head = self.merge_method(head)\n", + " tail = self.merge_method(tail)\n", + "\n", + " return head, tail\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "eea30b9f-58c9-4c20-968f-0f0635f07a43", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Most users should change this to the current directory.\n", + "os.environ['PETPROJECT'] = os.path.expanduser(\"~\") + '/dev/proj/petcache'\n", + "workdir = os.environ['PETPROJECT']\n", + "# print(workdir)\n", + "\n", + "import pathlib\n", + "import xarray as xr\n", + "from pathlib import Path\n", + "import time\n", + "\n", + "import pyearthtools.data.archive\n", + "import pyearthtools.tutorial\n", + "import pyearthtools.pipeline\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4eff742e-2737-42ac-acad-695cf21166cf", + "metadata": {}, + "outputs": [], + "source": [ + "file_location = workdir + '/mini.nc'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ec9aca47-1321-4e74-88e2-44415440726a", + "metadata": {}, + "outputs": [], + "source": [ + "accessor = pyearthtools.tutorial.ERA5DataClass.ERA5LowResDemoIndex([\n", + " '10m_u_component_of_wind', \n", + " '10m_v_component_of_wind', \n", + " 'mean_sea_level_pressure',\n", + " '2m_temperature' \n", + "],\n", + "filename_override=file_location)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9af60306-0f99-44eb-8912-e320cf11a394", + "metadata": {}, + "outputs": [], + "source": [ + "sample = accessor['2010-01-01T00']" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6697bc3a-82e5-438f-83b9-3f8101509d38", + "metadata": {}, + "outputs": [], + "source": [ + "timedelta = pyearthtools.data.time.TimeDelta((1, \"day\"))\n", + "\n", + "merge_method = functools.partial(xr.concat, dim='time')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "4043efc2-1ebe-4ff4-9ff5-cc0c2a664f29", + "metadata": {}, + "outputs": [], + "source": [ + "data_pipeline = pyearthtools.pipeline.Pipeline(\n", + " accessor,\n", + " pyearthtools.data.transforms.coordinates.StandardLongitude(type=\"-180-180\"), \n", + " # pyearthtools.pipeline.modifications.TemporalRetrieval(\n", + " # concat=True, samples=((0, 1), (6, 1, 6)) # Input = 1 sample from time T=0 hours. Output = T+6,+12,+18,+24\n", + " # ), \n", + " TemporalWindow(a_indexes=[-3,-2,-1], b_indexes=[0], timedelta=timedelta, merge_method=merge_method),\n", + " sampler=pyearthtools.pipeline.samplers.Default(),\n", + " iterator=pyearthtools.pipeline.iterators.DateRange(1980, 2016, interval='6 hours')\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "45d00667-d6b4-4290-8f4c-36f1650cde42", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/munin/dev/proj/PyEarthTools/packages/data/src/pyearthtools/data/indexes/_indexes.py:809: IndexWarning: Data requested at a higher resolution than available. minute > hour\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "doi = '20100101T0000'\n", + "sample = data_pipeline[doi]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "da5c1493-2f1b-432a-80d0-13bfe1f5a794", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(sample)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "32c25070-535b-4a61-91ed-6c9f475d37cf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 99kB\n",
+       "Dimensions:                  (time: 3, longitude: 64, latitude: 32)\n",
+       "Coordinates:\n",
+       "  * latitude                 (latitude) float64 256B -87.19 -81.56 ... 87.19\n",
+       "  * time                     (time) datetime64[ns] 24B 2009-12-29 ... 2009-12-31\n",
+       "  * longitude                (longitude) float64 512B -180.0 -174.4 ... 174.4\n",
+       "Data variables:\n",
+       "    10m_u_component_of_wind  (time, longitude, latitude) float32 25kB 0.01296...\n",
+       "    10m_v_component_of_wind  (time, longitude, latitude) float32 25kB -0.1656...\n",
+       "    2m_temperature           (time, longitude, latitude) float32 25kB 251.8 ....\n",
+       "    mean_sea_level_pressure  (time, longitude, latitude) float32 25kB 9.964e+...
" + ], + "text/plain": [ + " Size: 99kB\n", + "Dimensions: (time: 3, longitude: 64, latitude: 32)\n", + "Coordinates:\n", + " * latitude (latitude) float64 256B -87.19 -81.56 ... 87.19\n", + " * time (time) datetime64[ns] 24B 2009-12-29 ... 2009-12-31\n", + " * longitude (longitude) float64 512B -180.0 -174.4 ... 174.4\n", + "Data variables:\n", + " 10m_u_component_of_wind (time, longitude, latitude) float32 25kB 0.01296...\n", + " 10m_v_component_of_wind (time, longitude, latitude) float32 25kB -0.1656...\n", + " 2m_temperature (time, longitude, latitude) float32 25kB 251.8 ....\n", + " mean_sea_level_pressure (time, longitude, latitude) float32 25kB 9.964e+..." + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b17f6eb9-19d7-4413-a63f-2b1f60351584", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 34kB\n",
+       "Dimensions:                  (time: 1, longitude: 64, latitude: 32)\n",
+       "Coordinates:\n",
+       "  * latitude                 (latitude) float64 256B -87.19 -81.56 ... 87.19\n",
+       "  * time                     (time) datetime64[ns] 8B 2010-01-01\n",
+       "  * longitude                (longitude) float64 512B -180.0 -174.4 ... 174.4\n",
+       "Data variables:\n",
+       "    10m_u_component_of_wind  (time, longitude, latitude) float32 8kB 0.9633 ....\n",
+       "    10m_v_component_of_wind  (time, longitude, latitude) float32 8kB 2.515 .....\n",
+       "    2m_temperature           (time, longitude, latitude) float32 8kB 249.6 .....\n",
+       "    mean_sea_level_pressure  (time, longitude, latitude) float32 8kB 1.007e+0...
" + ], + "text/plain": [ + " Size: 34kB\n", + "Dimensions: (time: 1, longitude: 64, latitude: 32)\n", + "Coordinates:\n", + " * latitude (latitude) float64 256B -87.19 -81.56 ... 87.19\n", + " * time (time) datetime64[ns] 8B 2010-01-01\n", + " * longitude (longitude) float64 512B -180.0 -174.4 ... 174.4\n", + "Data variables:\n", + " 10m_u_component_of_wind (time, longitude, latitude) float32 8kB 0.9633 ....\n", + " 10m_v_component_of_wind (time, longitude, latitude) float32 8kB 2.515 .....\n", + " 2m_temperature (time, longitude, latitude) float32 8kB 249.6 .....\n", + " mean_sea_level_pressure (time, longitude, latitude) float32 8kB 1.007e+0..." + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50e45ec6-c10a-4a55-a471-bf0f7f3aacbf", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From da37311a9e0fe8f7e40a92f276522b518cd79658 Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Wed, 13 Aug 2025 21:22:57 +1000 Subject: [PATCH 3/7] Ran black. Ruff and black had a minor disagreement, this should clear CI, I will circle back to our ruff/black config later. --- .../data/src/pyearthtools/data/indexes/utilities/fileload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py b/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py index eee5cf2f..0e9180a1 100644 --- a/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py +++ b/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py @@ -215,7 +215,7 @@ def open_files( if not isinstance(files, (tuple, list, dict)): raise TypeError( - f"Could not load files from input: {files!r}.\n" "Supported types are (str, Path, tuple, list, dict)" + f"Could not load files from input: {files!r}.\nSupported types are (str, Path, tuple, list, dict)" ) files_to_load = [value for _, value in files.items()] if isinstance(files, dict) else list(files) From e2a076b227db883fbe2eab812a79391d3cb945c1 Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Thu, 30 Oct 2025 09:35:58 +1100 Subject: [PATCH 4/7] Add the TemporalWindow class to the available index modifiers --- .../modifications/idx_modification.py | 85 ++++++++++++++++++- 1 file changed, 83 insertions(+), 2 deletions(-) diff --git a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py index 8773191f..99c2c33b 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py +++ b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py @@ -215,7 +215,6 @@ def _get_tuple(self, idx, mod: tuple[Any, ...], layer: int) -> Union[tuple[Any], return samples def __getitem__(self, idx: Any): - if not isinstance(self._modification, tuple): return self.parent_pipeline()[idx + self._modification] @@ -452,7 +451,12 @@ def __init__( delta_unit: e.g. "month" or "hour" concat: whether to contact or merge """ - super().__init__(samples, merge_function=merge_function, concat=concat, merge_kwargs=merge_kwargs) + super().__init__( + samples, + merge_function=merge_function, + concat=concat, + merge_kwargs=merge_kwargs, + ) def map_to_tuple(mod): if isinstance(mod, tuple): @@ -469,3 +473,80 @@ def __getitem__(self, idx: Any): idx = pyearthtools.data.Petdt(idx) return super().__getitem__(idx) + + +class TemporalWindow(pyearthtools.pipeline.controller.PipelineIndex): + """ + The purpose of this class is to provide the ability to perform + sequence-to-sequence modelling from a data accessor or pipeline + that was designed to produce single time steps (i.e. single samples). + + The temporal window allows the specification of the 'back window' + and the 'forward window', and will produce a binary branch. + + For example, if the time steps are hourly, and the base pipeline + can produce hours 1, 2, 3 ... 10; then this Temporal Window can + be used to produce sequence pairs like: + [1,2,3], [4], + [2,3,4], [5], + ... + [7,8,9], [10] + + or like: + [1,2], [3,4,5], + [2,3], [4,5,6], + ... + [6,7], [8,9,10] + + This provides a simpler interface than the TemporalRetrieval which + is a more general alternative. + + The window offsets are calculated not using positional indexing, but + using calculated date-times based on the reference time and the specified + timedelta to calculate each required index exactly. The handling of missing + data is left to the underlying pipeline response to the retrieval of the + calculated datetime. + + The resultant sequences may be left unmerged (i.e. a list of retrieved + results for each timetime) or merged (e.g. into an xarray along the time + dimension). The default behaviour is to merge along the time dimension. + + A custom merge method may be specified. + """ + + def __init__(self, *, prior_indexes, posterior_indexes, timedelta, merge_method=None): + """ + Args: + prior_indexes: Multiplied by the timedelta then applied to the reference date + posterior_indexes: Multiplied by the timedelta then applied to the reference date + timedelta: Typically the time step of the underlying data + merge_method: How to merge samples into a combined object + + + Examples: + + >>> TemporalWindow(prior_indexes=[-3,-2,-1], posterior_indexes=[0], timedelta=timedelta, merge_method=merge_method) + + (assuming xarray data) will result in a tuple of two datasets, the first with a time coordinate dimension of 3 time steps and the + second with a time coordinate dimension of 1 time step. + + """ + self.prior_indexes = prior_indexes + self.posterior_indexes = posterior_indexes + self.timedelta = timedelta + self.merge_method = merge_method + + def __getitem__(self, date_of_interest): + date_of_interest = pyearthtools.data.time.Petdt(date_of_interest) + + prior_i = [i * self.timedelta for i in self.prior_indexes] + posterior_i = [i * self.timedelta for i in self.posterior_indexes] + + prior = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in prior_i] + posterior = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in posterior_i] + + if self.merge_method: + prior = self.merge_method(prior) + posterior = self.merge_method(posterior) + + return prior, posterior From aee378d8d3ffd4927265adfbb8cdb224724196af Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Thu, 30 Oct 2025 09:51:44 +1100 Subject: [PATCH 5/7] Add temporal window to docs --- docs/api/pipeline/pipeline_api.md | 2 + docs/api/pipeline/pipeline_index.md | 1 + .../pipeline/modifications/__init__.py | 1 + .../modifications/idx_modification.py | 47 ++++++++++++++----- 4 files changed, 39 insertions(+), 12 deletions(-) diff --git a/docs/api/pipeline/pipeline_api.md b/docs/api/pipeline/pipeline_api.md index f1ef7d34..4e2f19f7 100644 --- a/docs/api/pipeline/pipeline_api.md +++ b/docs/api/pipeline/pipeline_api.md @@ -91,6 +91,8 @@ :members: .. autoclass:: pyearthtools.pipeline.modifications.TemporalRetrieval :members: +.. autoclass:: pyearthtools.pipeline.modifications.TemporalWindow + :members: .. autoclass:: pyearthtools.pipeline.modifications.idx_modification :members: ``` diff --git a/docs/api/pipeline/pipeline_index.md b/docs/api/pipeline/pipeline_index.md index f053df43..479ac10c 100644 --- a/docs/api/pipeline/pipeline_index.md +++ b/docs/api/pipeline/pipeline_index.md @@ -39,6 +39,7 @@ The rest of this page contains reference information for the components of the P | | | - [TimeIdxModifier](pipeline_api.md#pyearthtools.pipeline.modifications.TimeIdxModifier) | | | | - [SequenceRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.SequenceRetrieval) | | | | - [TemporalRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.TemporalRetrieval) | +| | | - [TemporalWindow](pipeline_api.md#pyearthtools.pipeline.modifications.TemporalWindow) | | | | - [idx_modification](pipeline_api.md#pyearthtools.pipeline.modifications.idx_modification) | | `pipeline.operations` | | - [Transforms](pipeline_api.md#pyearthtools.pipeline.operations.Transforms) | | `pipeline.operations.xarray` | | - [Compute](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Compute) | diff --git a/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py b/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py index 22ef8896..896672eb 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py +++ b/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py @@ -34,5 +34,6 @@ "TimeIdxModifier", "SequenceRetrieval", "TemporalRetrieval", + "TemporalWindow", "idx_modification", ] diff --git a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py index 99c2c33b..0cd657ad 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py +++ b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py @@ -122,9 +122,13 @@ def __init__( self._modification = modification if concat: - MERGE_FUNCTIONS[np.ndarray] = lambda x, **k: np.concatenate(x, **k) if len(x) > 1 else x[0] + MERGE_FUNCTIONS[np.ndarray] = ( + lambda x, **k: np.concatenate(x, **k) if len(x) > 1 else x[0] + ) if DASK_IMPORTED: - MERGE_FUNCTIONS[da.Array] = lambda x, **k: da.concatenate(x, **k) if len(x) > 1 else x[0] + MERGE_FUNCTIONS[da.Array] = ( + lambda x, **k: da.concatenate(x, **k) if len(x) > 1 else x[0] + ) merge = int(merge) if isinstance(merge, bool) else int(merge) @@ -188,7 +192,9 @@ def trim(s): result = merge_function(sample, **self._merge_kwargs) return result - def _get_tuple(self, idx, mod: tuple[Any, ...], layer: int) -> Union[tuple[Any], Any]: + def _get_tuple( + self, idx, mod: tuple[Any, ...], layer: int + ) -> Union[tuple[Any], Any]: """ Collect all elements from tuple of modification @@ -199,7 +205,9 @@ def _get_tuple(self, idx, mod: tuple[Any, ...], layer: int) -> Union[tuple[Any], samples = [] for m in mod: if isinstance(m, tuple): - samples.append(self.parallel_interface.submit(self._get_tuple, idx, m, layer + 1)) + samples.append( + self.parallel_interface.submit(self._get_tuple, idx, m, layer + 1) + ) else: samples.append(self.parallel_interface.submit(super_get, idx + m)) @@ -249,7 +257,9 @@ def __init__( """ def can_map(mod): - return isinstance(mod, tuple) # and len(mod) > 0 a#nd isinstance(mod[0], tuple) + return isinstance( + mod, tuple + ) # and len(mod) > 0 a#nd isinstance(mod[0], tuple) def map_to_time(mod): """Map elements to `TimeDelta`""" @@ -277,7 +287,9 @@ class SequenceSpecification: def convert(self, pos): pos += self.offset total = self.num_samples * self.interval - return tuple(range(pos, pos + total, self.interval)), pos + total - (1 * self.interval) + return tuple(range(pos, pos + total, self.interval)), pos + total - ( + 1 * self.interval + ) class SequenceRetrieval(IdxModifier): @@ -369,7 +381,9 @@ def __init__( ) self.record_initialisation() - def _convert(self, samples: tuple[SequenceSpecification, ...]) -> tuple[tuple[int, ...], ...]: + def _convert( + self, samples: tuple[SequenceSpecification, ...] + ) -> tuple[tuple[int, ...], ...]: """ Convert `samples` from `_parse_samples` ready for `IdxModifier`. """ @@ -410,7 +424,9 @@ def parse_int(specification): return parse_int(samples) elif isinstance(samples, Iterable): - if len(samples) in [2, 3] and all(map(lambda x: not isinstance(x, Iterable), samples)): + if len(samples) in [2, 3] and all( + map(lambda x: not isinstance(x, Iterable), samples) + ): return (SequenceSpecification(*samples),) # type: ignore specs = [] @@ -475,7 +491,7 @@ def __getitem__(self, idx: Any): return super().__getitem__(idx) -class TemporalWindow(pyearthtools.pipeline.controller.PipelineIndex): +class TemporalWindow(PipelineIndex): """ The purpose of this class is to provide the ability to perform sequence-to-sequence modelling from a data accessor or pipeline @@ -514,7 +530,9 @@ class TemporalWindow(pyearthtools.pipeline.controller.PipelineIndex): A custom merge method may be specified. """ - def __init__(self, *, prior_indexes, posterior_indexes, timedelta, merge_method=None): + def __init__( + self, *, prior_indexes, posterior_indexes, timedelta, merge_method=None + ): """ Args: prior_indexes: Multiplied by the timedelta then applied to the reference date @@ -542,8 +560,13 @@ def __getitem__(self, date_of_interest): prior_i = [i * self.timedelta for i in self.prior_indexes] posterior_i = [i * self.timedelta for i in self.posterior_indexes] - prior = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in prior_i] - posterior = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in posterior_i] + prior = [ + self.parent_pipeline()[str(date_of_interest + delta)] for delta in prior_i + ] + posterior = [ + self.parent_pipeline()[str(date_of_interest + delta)] + for delta in posterior_i + ] if self.merge_method: prior = self.merge_method(prior) From 8ed4b7e1af50d63eea4864cf589f45f901448afc Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Thu, 30 Oct 2025 10:29:04 +1100 Subject: [PATCH 6/7] Remove doc entries that were causing errors (classes have moved) Fix up docs for TemporalWindow --- docs/api/data/data_api.md | 5 ----- docs/api/data/data_index.md | 1 - .../pipeline/modifications/__init__.py | 9 ++++----- .../pipeline/modifications/idx_modification.py | 14 +++++++------- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/docs/api/data/data_api.md b/docs/api/data/data_api.md index a87400bf..051a6603 100644 --- a/docs/api/data/data_api.md +++ b/docs/api/data/data_api.md @@ -329,7 +329,6 @@ :members: .. autofunction:: pyearthtools.data.transforms.coordinates.get_longitude -.. autofunction:: pyearthtools.data.transforms.coordinates.weak_cast_to_int .. autoclass:: pyearthtools.data.transforms.coordinates.StandardLongitude :members: @@ -341,12 +340,8 @@ :members: .. autoclass:: pyearthtools.data.transforms.coordinates.Drop :members: -.. autoclass:: pyearthtools.data.transforms.coordinates.Flatten - :members: .. autoclass:: pyearthtools.data.transforms.coordinates.Expand :members: -.. autoclass:: pyearthtools.data.transforms.coordinates.SelectFlatten - :members: .. autoclass:: pyearthtools.data.transforms.coordinates.Assign :members: .. autoclass:: pyearthtools.data.transforms.coordinates.Pad diff --git a/docs/api/data/data_index.md b/docs/api/data/data_index.md index ec51498c..c0bf9bae 100644 --- a/docs/api/data/data_index.md +++ b/docs/api/data/data_index.md @@ -135,7 +135,6 @@ The rest of this page contains reference information for the components of the D | | | - [attributes.SetType](data_api.md#pyearthtools.data.transforms.attributes.SetType) | | | | - [attributes.Rename](data_api.md#pyearthtools.data.transforms.attributes.Rename) | | | | - [coordinates.get_longitude](data_api.md#pyearthtools.data.transforms.coordinates.get_longitude) | -| | | - [coordinates.weak_cast_to_int](data_api.md#pyearthtools.data.transforms.coordinates.weak_cast_to_int) | | | | - [coordinates.StandardLongitude](data_api.md#pyearthtools.data.transforms.coordinates.StandardLongitude) | | | | - [coordinates.ReIndex](data_api.md#pyearthtools.data.transforms.coordinates.ReIndex) | | | | - [coordinates.StandardCoordinateNames](data_api.md#pyearthtools.data.transforms.coordinates.StandardCoordinateNames) | diff --git a/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py b/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py index 896672eb..d033a651 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py +++ b/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py @@ -13,17 +13,16 @@ # limitations under the License. +from pyearthtools.pipeline.modifications import idx_modification +from pyearthtools.pipeline.modifications.cache import Cache, MemCache, StaticCache from pyearthtools.pipeline.modifications.idx_modification import ( IdxModifier, IdxOverride, - TimeIdxModifier, SequenceRetrieval, TemporalRetrieval, + TemporalWindow, + TimeIdxModifier, ) -from pyearthtools.pipeline.modifications.cache import Cache, StaticCache, MemCache - - -from pyearthtools.pipeline.modifications import idx_modification __all__ = [ "Cache", diff --git a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py index 0cd657ad..b6e00532 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py +++ b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py @@ -17,15 +17,13 @@ from __future__ import annotations +import warnings from dataclasses import dataclass from typing import Any, Callable, Iterable, Optional, Type, Union -import warnings -import xarray as xr import numpy as np - import pyearthtools.data - +import xarray as xr from pyearthtools.pipeline.controller import PipelineIndex from pyearthtools.pipeline.parallel import ParallelEnabledMixin @@ -33,8 +31,8 @@ DASK_IMPORTED = True try: - from dask.delayed import Delayed, delayed import dask.array as da + from dask.delayed import Delayed, delayed except (ImportError, ModuleNotFoundError) as _: DASK_IMPORTED = False @@ -502,13 +500,15 @@ class TemporalWindow(PipelineIndex): For example, if the time steps are hourly, and the base pipeline can produce hours 1, 2, 3 ... 10; then this Temporal Window can - be used to produce sequence pairs like: + be used to produce sequence pairs like:: + [1,2,3], [4], [2,3,4], [5], ... [7,8,9], [10] - or like: + or like:: + [1,2], [3,4,5], [2,3], [4,5,6], ... From e2fc35d6be631171aa375a17b4fb5935ddb39834 Mon Sep 17 00:00:00 2001 From: Tennessee Leeuwenburg Date: Thu, 30 Oct 2025 10:30:01 +1100 Subject: [PATCH 7/7] Code reformat --- .../modifications/idx_modification.py | 45 +++++-------------- 1 file changed, 11 insertions(+), 34 deletions(-) diff --git a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py index b6e00532..d35608f8 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py +++ b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py @@ -120,13 +120,9 @@ def __init__( self._modification = modification if concat: - MERGE_FUNCTIONS[np.ndarray] = ( - lambda x, **k: np.concatenate(x, **k) if len(x) > 1 else x[0] - ) + MERGE_FUNCTIONS[np.ndarray] = lambda x, **k: np.concatenate(x, **k) if len(x) > 1 else x[0] if DASK_IMPORTED: - MERGE_FUNCTIONS[da.Array] = ( - lambda x, **k: da.concatenate(x, **k) if len(x) > 1 else x[0] - ) + MERGE_FUNCTIONS[da.Array] = lambda x, **k: da.concatenate(x, **k) if len(x) > 1 else x[0] merge = int(merge) if isinstance(merge, bool) else int(merge) @@ -190,9 +186,7 @@ def trim(s): result = merge_function(sample, **self._merge_kwargs) return result - def _get_tuple( - self, idx, mod: tuple[Any, ...], layer: int - ) -> Union[tuple[Any], Any]: + def _get_tuple(self, idx, mod: tuple[Any, ...], layer: int) -> Union[tuple[Any], Any]: """ Collect all elements from tuple of modification @@ -203,9 +197,7 @@ def _get_tuple( samples = [] for m in mod: if isinstance(m, tuple): - samples.append( - self.parallel_interface.submit(self._get_tuple, idx, m, layer + 1) - ) + samples.append(self.parallel_interface.submit(self._get_tuple, idx, m, layer + 1)) else: samples.append(self.parallel_interface.submit(super_get, idx + m)) @@ -255,9 +247,7 @@ def __init__( """ def can_map(mod): - return isinstance( - mod, tuple - ) # and len(mod) > 0 a#nd isinstance(mod[0], tuple) + return isinstance(mod, tuple) # and len(mod) > 0 a#nd isinstance(mod[0], tuple) def map_to_time(mod): """Map elements to `TimeDelta`""" @@ -285,9 +275,7 @@ class SequenceSpecification: def convert(self, pos): pos += self.offset total = self.num_samples * self.interval - return tuple(range(pos, pos + total, self.interval)), pos + total - ( - 1 * self.interval - ) + return tuple(range(pos, pos + total, self.interval)), pos + total - (1 * self.interval) class SequenceRetrieval(IdxModifier): @@ -379,9 +367,7 @@ def __init__( ) self.record_initialisation() - def _convert( - self, samples: tuple[SequenceSpecification, ...] - ) -> tuple[tuple[int, ...], ...]: + def _convert(self, samples: tuple[SequenceSpecification, ...]) -> tuple[tuple[int, ...], ...]: """ Convert `samples` from `_parse_samples` ready for `IdxModifier`. """ @@ -422,9 +408,7 @@ def parse_int(specification): return parse_int(samples) elif isinstance(samples, Iterable): - if len(samples) in [2, 3] and all( - map(lambda x: not isinstance(x, Iterable), samples) - ): + if len(samples) in [2, 3] and all(map(lambda x: not isinstance(x, Iterable), samples)): return (SequenceSpecification(*samples),) # type: ignore specs = [] @@ -530,9 +514,7 @@ class TemporalWindow(PipelineIndex): A custom merge method may be specified. """ - def __init__( - self, *, prior_indexes, posterior_indexes, timedelta, merge_method=None - ): + def __init__(self, *, prior_indexes, posterior_indexes, timedelta, merge_method=None): """ Args: prior_indexes: Multiplied by the timedelta then applied to the reference date @@ -560,13 +542,8 @@ def __getitem__(self, date_of_interest): prior_i = [i * self.timedelta for i in self.prior_indexes] posterior_i = [i * self.timedelta for i in self.posterior_indexes] - prior = [ - self.parent_pipeline()[str(date_of_interest + delta)] for delta in prior_i - ] - posterior = [ - self.parent_pipeline()[str(date_of_interest + delta)] - for delta in posterior_i - ] + prior = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in prior_i] + posterior = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in posterior_i] if self.merge_method: prior = self.merge_method(prior)