diff --git a/.zenodo.json b/.zenodo.json index a46f295e..0c95b258 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,89 +1,87 @@ { - "creators": [ - { - "orcid": "https://orcid.org/0009-0009-3207-4876", - "affiliation": "Work undertaken while at the Bureau of Meteorology, Australia", - "name": "Cook, Harrison" - }, - { - "orcid": "https://orcid.org/0009-0008-2024-1967", - "affiliation": "Bureau of Meteorology, Australia", - "name": "Leeuwenburg, Tennessee" - }, - { - "orcid": "https://orcid.org/0000-0003-2553-0721", - "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand", - "name": "Rio, Maxime" - }, - { - "orcid": "https://orcid.org/0000-0002-3148-4496", - "affiliation": "Met Office, United Kingdom", - "name": "Miller, Joel" - }, - { - "orcid": "https://orcid.org/0000-0002-6569-1178", - "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand", - "name": "Mason, Gemma" - }, - { - "orcid": "https://orcid.org/0009-0002-7406-7438", - "affiliation": "Bureau of Meteorology, Australia", - "name": "Ramanathan, Nikeeth" - }, - { - "orcid": "https://orcid.org/0009-0001-6250-3987", - "affiliation": "Met Office, United Kingdom", - "name": "Pill, John" - }, - { - "orcid": "https://orcid.org/0000-0003-3430-2456", - "affiliation": "Met Office, United Kingdom", - "name": "Haddad, Stephen" - }, - { - "orcid": "https://orcid.org/0000-0002-1975-0042", - "affiliation": "Bureau of Meteorology, Australia", - "name": "de Burgh-Day, Catherine" - }, - { - "orcid": "https://orcid.org/0009-0000-5048-4240", - "affiliation": "Met Office, United Kingdom", - "name": "Sullivan, Ben" - }, - { - "orcid": "https://orcid.org/0000-0001-6825-3854", - "affiliation": "University of New South Wales Sydney, Australia", - "name": "Hobeichi, Sanaa" - - }, - { - "orcid": "https://orcid.org/0000-0002-6799-9109", - "affiliation": "Bureau of Meteorology, Australia", - "name": "Holmes, Ryan" - }, - { - "orcid": "https://orcid.org/0009-0009-8268-8358", - "affiliation": "Independent researcher", - "name": "Potokina, Margarita" - }, - { - "orcid": "https://orcid.org/0009-0001-4486-0120", - "affiliation": "Independent researcher, Vietnam", - "name": "Bogacheva, Jenya" - }, - { - "orcid": "https://orcid.org/0009-0004-9553-8387", - "affiliation": "Independent researcher, Australia", - "name": "James, Matthew" - }, - { - "orcid": "https://orcid.org/0000-0002-5407-4297", - "affiliation": "Bureau of Meteorology, Australia", - "name": "Stassen, Christian" - } - ], - "license": "Apache-2.0", - "title": "PyEarthTools: Machine learning for Earth system science", - "keywords": ["modelling", "geoscience", "earth system science"] - + "creators": [ + { + "orcid": "https://orcid.org/0009-0008-2024-1967", + "affiliation": "Bureau of Meteorology, Australia", + "name": "Leeuwenburg, Tennessee" + }, + { + "orcid": "https://orcid.org/0009-0009-3207-4876", + "affiliation": "Work undertaken while at the Bureau of Meteorology, Australia", + "name": "Cook, Harrison" + }, + { + "orcid": "https://orcid.org/0000-0003-2553-0721", + "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand", + "name": "Rio, Maxime" + }, + { + "orcid": "https://orcid.org/0000-0001-6825-3854", + "affiliation": "University of New South Wales Sydney, Australia", + "name": "Hobeichi, Sanaa" + }, + { + "orcid": "https://orcid.org/0000-0002-3148-4496", + "affiliation": "Met Office, United Kingdom", + "name": "Miller, Joel" + }, + { + "orcid": "https://orcid.org/0000-0002-6569-1178", + "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand", + "name": "Mason, Gemma" + }, + { + "orcid": "https://orcid.org/0009-0002-7406-7438", + "affiliation": "Bureau of Meteorology, Australia", + "name": "Ramanathan, Nikeeth" + }, + { + "orcid": "https://orcid.org/0009-0001-6250-3987", + "affiliation": "Met Office, United Kingdom", + "name": "Pill, John" + }, + { + "orcid": "https://orcid.org/0000-0003-3430-2456", + "affiliation": "Met Office, United Kingdom", + "name": "Haddad, Stephen" + }, + { + "orcid": "https://orcid.org/0000-0002-5407-4297", + "affiliation": "Bureau of Meteorology, Australia", + "name": "Stassen, Christian" + }, + { + "orcid": "https://orcid.org/0000-0002-1975-0042", + "affiliation": "Bureau of Meteorology, Australia", + "name": "de Burgh-Day, Catherine" + }, + { + "orcid": "https://orcid.org/0000-0002-6799-9109", + "affiliation": "Bureau of Meteorology, Australia", + "name": "Holmes, Ryan" + }, + { + "orcid": "https://orcid.org/0009-0009-8268-8358", + "affiliation": "Independent researcher", + "name": "Potokina, Margarita" + }, + { + "orcid": "https://orcid.org/0009-0001-4486-0120", + "affiliation": "Independent researcher, Vietnam", + "name": "Bogacheva, Jenya" + }, + { + "orcid": "https://orcid.org/0009-0004-9553-8387", + "affiliation": "Independent researcher, Australia", + "name": "James, Matthew" + }, + { + "orcid": "https://orcid.org/0009-0000-5048-4240", + "affiliation": "Met Office, United Kingdom", + "name": "Sullivan, Ben" + } + ], + "license": "Apache-2.0", + "title": "PyEarthTools: Machine learning for Earth system science", + "keywords": ["modelling", "geoscience", "earth system science"] } diff --git a/README.md b/README.md index 6f69ed9e..475d6fb5 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,12 @@ |![](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_demo_FourCastNeXt_Inference_9_1.png)
A weather prediction from a model trained with PyEarthTools.|![](https://pyearthtools.readthedocs.io/en/latest/_images/notebooks_tutorial_Working_with_Climate_Data_14_2.svg)
A data processing flow composed for working with climate data.| |:-:|:-:| -Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools) -Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io) -Tutorial Gallery: [available here](https://pyearthtools.readthedocs.io/en/latest/notebooks/Gallery.html) -New Users Guide: [available here](https://pyearthtools.readthedocs.io/en/latest/newuser.html) +Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools) +Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io) +Tutorial Gallery: [available here](https://pyearthtools.readthedocs.io/en/latest/notebooks/Gallery.html) +New Users Guide: [available here](https://pyearthtools.readthedocs.io/en/latest/newuser.html) + +**If you use `PyEarthTools` for your work or a publication, [please city our work](https://pyearthtools.readthedocs.io/en/latest/#acknowleging-or-citing-pyearthtools).** ## Installation @@ -53,7 +55,7 @@ PyEarthTools is a Python framework containing modules for: - defining machine learning (ML) models; - training ML models and managing experiments; - performing inference with ML models; - - and evaluating ML models. + - and evaluating ML models (coming soon). ## Overview of the Packages within PyEarthTools @@ -74,11 +76,11 @@ PyEarthTools comprises multiple sub-packages which can be used individually or t If you use PyEarthTools for your work, we would appreciate you citing our software as below: -Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., & de Burgh-Day, C. (2025). PyEarthTools: Machine learning for Earth system science (0.1.1). Zenodo. https://doi.org/10.5281/zenodo.15760769 +Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., de Burgh-Day, C., Sullivan, B., Hobeichi, S., Holmes, R., Potokina, M., Bogacheva, J., James, M., & Stassen, C. (2025). PyEarthTools: Machine learning for Earth system science (0.4.0). Zenodo. https://doi.org/10.5281/zenodo.17429589 BibTeX: ``` -@software{cook_2025_15760769, +@software{cook_2025_17429589, author = {Cook, Harrison and Leeuwenburg, Tennessee and Rio, Maxime and @@ -87,13 +89,22 @@ BibTeX: Ramanathan, Nikeeth and Pill, John and Haddad, Stephen and - de Burgh-Day, Catherine}, - title = {{PyEarthTools: Machine learning for Earth system science}}, - month = jun, + de Burgh-Day, Catherine and + Sullivan, Ben and + Hobeichi, Sanaa and + Holmes, Ryan and + Potokina, Margarita and + Bogacheva, Jenya and + James, Matthew and + Stassen, Christian}, + title = {PyEarthTools: Machine learning for Earth system + science + }, + month = oct, year = 2025, publisher = {Zenodo}, - version = {0.1.1}, - doi = {10.5281/zenodo.15760769}, - url = {https://doi.org/10.5281/zenodo.15760769} + version = {0.4.0}, + doi = {10.5281/zenodo.17429589}, + url = {https://doi.org/10.5281/zenodo.17429589}, } ``` diff --git a/docs/api/data/data_api.md b/docs/api/data/data_api.md index a87400bf..051a6603 100644 --- a/docs/api/data/data_api.md +++ b/docs/api/data/data_api.md @@ -329,7 +329,6 @@ :members: .. autofunction:: pyearthtools.data.transforms.coordinates.get_longitude -.. autofunction:: pyearthtools.data.transforms.coordinates.weak_cast_to_int .. autoclass:: pyearthtools.data.transforms.coordinates.StandardLongitude :members: @@ -341,12 +340,8 @@ :members: .. autoclass:: pyearthtools.data.transforms.coordinates.Drop :members: -.. autoclass:: pyearthtools.data.transforms.coordinates.Flatten - :members: .. autoclass:: pyearthtools.data.transforms.coordinates.Expand :members: -.. autoclass:: pyearthtools.data.transforms.coordinates.SelectFlatten - :members: .. autoclass:: pyearthtools.data.transforms.coordinates.Assign :members: .. autoclass:: pyearthtools.data.transforms.coordinates.Pad diff --git a/docs/api/data/data_index.md b/docs/api/data/data_index.md index ec51498c..c0bf9bae 100644 --- a/docs/api/data/data_index.md +++ b/docs/api/data/data_index.md @@ -135,7 +135,6 @@ The rest of this page contains reference information for the components of the D | | | - [attributes.SetType](data_api.md#pyearthtools.data.transforms.attributes.SetType) | | | | - [attributes.Rename](data_api.md#pyearthtools.data.transforms.attributes.Rename) | | | | - [coordinates.get_longitude](data_api.md#pyearthtools.data.transforms.coordinates.get_longitude) | -| | | - [coordinates.weak_cast_to_int](data_api.md#pyearthtools.data.transforms.coordinates.weak_cast_to_int) | | | | - [coordinates.StandardLongitude](data_api.md#pyearthtools.data.transforms.coordinates.StandardLongitude) | | | | - [coordinates.ReIndex](data_api.md#pyearthtools.data.transforms.coordinates.ReIndex) | | | | - [coordinates.StandardCoordinateNames](data_api.md#pyearthtools.data.transforms.coordinates.StandardCoordinateNames) | diff --git a/docs/api/pipeline/pipeline_api.md b/docs/api/pipeline/pipeline_api.md index f1ef7d34..4e2f19f7 100644 --- a/docs/api/pipeline/pipeline_api.md +++ b/docs/api/pipeline/pipeline_api.md @@ -91,6 +91,8 @@ :members: .. autoclass:: pyearthtools.pipeline.modifications.TemporalRetrieval :members: +.. autoclass:: pyearthtools.pipeline.modifications.TemporalWindow + :members: .. autoclass:: pyearthtools.pipeline.modifications.idx_modification :members: ``` diff --git a/docs/api/pipeline/pipeline_index.md b/docs/api/pipeline/pipeline_index.md index f053df43..479ac10c 100644 --- a/docs/api/pipeline/pipeline_index.md +++ b/docs/api/pipeline/pipeline_index.md @@ -39,6 +39,7 @@ The rest of this page contains reference information for the components of the P | | | - [TimeIdxModifier](pipeline_api.md#pyearthtools.pipeline.modifications.TimeIdxModifier) | | | | - [SequenceRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.SequenceRetrieval) | | | | - [TemporalRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.TemporalRetrieval) | +| | | - [TemporalWindow](pipeline_api.md#pyearthtools.pipeline.modifications.TemporalWindow) | | | | - [idx_modification](pipeline_api.md#pyearthtools.pipeline.modifications.idx_modification) | | `pipeline.operations` | | - [Transforms](pipeline_api.md#pyearthtools.pipeline.operations.Transforms) | | `pipeline.operations.xarray` | | - [Compute](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Compute) | diff --git a/docs/index.md b/docs/index.md index 41dd16bf..10dd9ff7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -22,9 +22,11 @@
A data processing flow composed for working with climate data.
-Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools) -Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io) -Tutorial Gallery: [available here](./notebooks/Gallery) +Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools) +Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io) +Tutorial Gallery: [available here](./notebooks/Gallery) + +**If you use `PyEarthTools` for your work or a publication, [please city our work](https://pyearthtools.readthedocs.io/en/latest/#acknowleging-or-citing-pyearthtools).** ## Installation @@ -71,7 +73,7 @@ PyEarthTools is a Python framework containing modules for: - defining machine learning (ML) models; - training ML models and managing experiments; - performing inference with ML models; - - and evaluating ML models. + - and evaluating ML models (coming soon). ## Overview of the Packages within PyEarthTools @@ -95,11 +97,11 @@ If you use PyEarthTools for your work, we would appreciate you citing our softwa :::::{tab-set} ::::{tab-item} APA -Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., & de Burgh-Day, C. (2025). PyEarthTools: Machine learning for Earth system science (0.1.1). Zenodo. https://doi.org/10.5281/zenodo.15760769 +Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., de Burgh-Day, C., Sullivan, B., Hobeichi, S., Holmes, R., Potokina, M., Bogacheva, J., James, M., & Stassen, C. (2025). PyEarthTools: Machine learning for Earth system science (0.4.0). Zenodo. https://doi.org/10.5281/zenodo.17429589 :::: ::::{tab-item} BibTeX ``` -@software{cook_2025_15760769, +@software{cook_2025_17429589, author = {Cook, Harrison and Leeuwenburg, Tennessee and Rio, Maxime and @@ -108,14 +110,23 @@ Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, Ramanathan, Nikeeth and Pill, John and Haddad, Stephen and - de Burgh-Day, Catherine}, - title = {{PyEarthTools: Machine learning for Earth system science}}, - month = jun, + de Burgh-Day, Catherine and + Sullivan, Ben and + Hobeichi, Sanaa and + Holmes, Ryan and + Potokina, Margarita and + Bogacheva, Jenya and + James, Matthew and + Stassen, Christian}, + title = {PyEarthTools: Machine learning for Earth system + science + }, + month = oct, year = 2025, publisher = {Zenodo}, - version = {0.1.1}, - doi = {10.5281/zenodo.15760769}, - url = {https://doi.org/10.5281/zenodo.15760769} + version = {0.4.0}, + doi = {10.5281/zenodo.17429589}, + url = {https://doi.org/10.5281/zenodo.17429589}, } ``` :::: diff --git a/docs/newuser.md b/docs/newuser.md index 4cb2b681..ceb17550 100644 --- a/docs/newuser.md +++ b/docs/newuser.md @@ -1,6 +1,6 @@ # New Users Guide -Welcome new user! This document will continue to be updated based on user feedback. This table quickly explains how to get things done in `PyEarthTools` +Welcome new user! This document will continue to be updated based on user feedback. This table quickly explains how to get things done in `PyEarthTools` | Step | Without PyEarthTools | With PyEarthTools | |----------------|-----------------------------|----------------------| | Obtaining and loading data | Manual download or open from disk + data cleaning | Use or adapt in-built fetchers and openers (no cleaning) | @@ -8,7 +8,7 @@ Welcome new user! This document will continue to be updated based on user feedba | Present your data to PyTorch or another framework as a Python iterator | Custom code to iterate over data | Pipelines are iterators | | Define a machine learning model | Clone someone's repo or define your own | Use a 'bundled model' or write your own | | Denormalise model outputs | Manual code | Pipelines are reversible | -| Model evaluation | Use a separate framework | Use pre-defined evaluation to generate standard scorecards | +| Model evaluation | Use a separate framework | (coming soon) Use pre-defined evaluation to generate standard scorecards | This approach also allows your research to be more targetted into varying only the part of the end-to-end process which you are investigating. By starting with a baseline implementation to provide a strong basis for comparison and modifying only the relevant step, you can undertake a more controlled investigative process, confidently generating results from experimentation along the way. diff --git a/notebooks/TemporalWindowDemo.ipynb b/notebooks/TemporalWindowDemo.ipynb new file mode 100644 index 00000000..4d7075b7 --- /dev/null +++ b/notebooks/TemporalWindowDemo.ipynb @@ -0,0 +1,1356 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "074883b8-caf1-43eb-a2d5-fd3ca05f3b8e", + "metadata": {}, + "outputs": [], + "source": [ + "import functools\n", + "import xarray as xr\n", + "import pyearthtools.pipeline\n", + "class TemporalWindow(pyearthtools.pipeline.controller.PipelineIndex):\n", + " '''\n", + " The purpose of this class is to provide the ability to perform\n", + " sequence-to-sequence modelling from an data accessor or pipeline\n", + " that was designed to produce single time steps (i.e. single samples).\n", + "\n", + " The temporal window allows the specification of the 'back window'\n", + " and the 'forward window', and will produce a binary branch.\n", + "\n", + " For example, if the time steps are hourly, and the base pipeline\n", + " can produce hours 1, 2, 3 ... 10; then this Temporal Window can\n", + " be used to produce sequence pairs like:\n", + " [1,2,3], [4], \n", + " [2,3,4], [5],\n", + " ...\n", + " [7,8,9], [10]\n", + "\n", + " or like:\n", + " [1,2], [3,4,5],\n", + " [2,3], [4,5,6],\n", + " ...\n", + " [6,7], [8,9,10]\n", + "\n", + " This provides a simpler interface than the TemporalRetrieval which\n", + " is a more general alternative.\n", + "\n", + " The window offsets are calculated not using positional indexing, but \n", + " using calculated date-times based on the reference time and the specified\n", + " timedelta to calculate each required index exactly. The handling of missing\n", + " data is left to the underlying pipeline response to the retrieval of the\n", + " calculated datetime.\n", + "\n", + " The resultant sequences may be left unmerged (i.e. a list of retrieved\n", + " results for each timetime) or merged (e.g. into an xarray along the time\n", + " dimension). The default behaviour is to merge along the time dimension.\n", + "\n", + " A custom merge method may be specified.\n", + " '''\n", + "\n", + " def __init__(self,\n", + " *,\n", + " a_indexes,\n", + " b_indexes,\n", + " timedelta,\n", + " merge_method=None\n", + " ):\n", + " '''\n", + " Args:\n", + " a_indexes: Multiplied by the timedelta then applied to the reference date\n", + " b_indexes: Multiplied by the timedelta then applied to the reference date\n", + " timedelta: Typically the time step of the underlying data\n", + " merge_method: How to merge samples into a combined object\n", + " '''\n", + " self.a_indexes = a_indexes\n", + " self.b_indexes = b_indexes\n", + " self.timedelta = timedelta\n", + " self.merge_method = merge_method\n", + "\n", + " def __getitem__(self, date_of_interest):\n", + "\n", + " date_of_interest = pyearthtools.data.time.Petdt(date_of_interest)\n", + "\n", + " head_i = [i * self.timedelta for i in self.a_indexes]\n", + " tail_i = [i * self.timedelta for i in self.b_indexes]\n", + "\n", + " head = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in head_i]\n", + " tail = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in tail_i]\n", + "\n", + " if self.merge_method:\n", + " head = self.merge_method(head)\n", + " tail = self.merge_method(tail)\n", + "\n", + " return head, tail\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "eea30b9f-58c9-4c20-968f-0f0635f07a43", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Most users should change this to the current directory.\n", + "os.environ['PETPROJECT'] = os.path.expanduser(\"~\") + '/dev/proj/petcache'\n", + "workdir = os.environ['PETPROJECT']\n", + "# print(workdir)\n", + "\n", + "import pathlib\n", + "import xarray as xr\n", + "from pathlib import Path\n", + "import time\n", + "\n", + "import pyearthtools.data.archive\n", + "import pyearthtools.tutorial\n", + "import pyearthtools.pipeline\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4eff742e-2737-42ac-acad-695cf21166cf", + "metadata": {}, + "outputs": [], + "source": [ + "file_location = workdir + '/mini.nc'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ec9aca47-1321-4e74-88e2-44415440726a", + "metadata": {}, + "outputs": [], + "source": [ + "accessor = pyearthtools.tutorial.ERA5DataClass.ERA5LowResDemoIndex([\n", + " '10m_u_component_of_wind', \n", + " '10m_v_component_of_wind', \n", + " 'mean_sea_level_pressure',\n", + " '2m_temperature' \n", + "],\n", + "filename_override=file_location)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9af60306-0f99-44eb-8912-e320cf11a394", + "metadata": {}, + "outputs": [], + "source": [ + "sample = accessor['2010-01-01T00']" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6697bc3a-82e5-438f-83b9-3f8101509d38", + "metadata": {}, + "outputs": [], + "source": [ + "timedelta = pyearthtools.data.time.TimeDelta((1, \"day\"))\n", + "\n", + "merge_method = functools.partial(xr.concat, dim='time')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "4043efc2-1ebe-4ff4-9ff5-cc0c2a664f29", + "metadata": {}, + "outputs": [], + "source": [ + "data_pipeline = pyearthtools.pipeline.Pipeline(\n", + " accessor,\n", + " pyearthtools.data.transforms.coordinates.StandardLongitude(type=\"-180-180\"), \n", + " # pyearthtools.pipeline.modifications.TemporalRetrieval(\n", + " # concat=True, samples=((0, 1), (6, 1, 6)) # Input = 1 sample from time T=0 hours. Output = T+6,+12,+18,+24\n", + " # ), \n", + " TemporalWindow(a_indexes=[-3,-2,-1], b_indexes=[0], timedelta=timedelta, merge_method=merge_method),\n", + " sampler=pyearthtools.pipeline.samplers.Default(),\n", + " iterator=pyearthtools.pipeline.iterators.DateRange(1980, 2016, interval='6 hours')\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "45d00667-d6b4-4290-8f4c-36f1650cde42", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/munin/dev/proj/PyEarthTools/packages/data/src/pyearthtools/data/indexes/_indexes.py:809: IndexWarning: Data requested at a higher resolution than available. minute > hour\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "doi = '20100101T0000'\n", + "sample = data_pipeline[doi]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "da5c1493-2f1b-432a-80d0-13bfe1f5a794", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(sample)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "32c25070-535b-4a61-91ed-6c9f475d37cf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 99kB\n",
+       "Dimensions:                  (time: 3, longitude: 64, latitude: 32)\n",
+       "Coordinates:\n",
+       "  * latitude                 (latitude) float64 256B -87.19 -81.56 ... 87.19\n",
+       "  * time                     (time) datetime64[ns] 24B 2009-12-29 ... 2009-12-31\n",
+       "  * longitude                (longitude) float64 512B -180.0 -174.4 ... 174.4\n",
+       "Data variables:\n",
+       "    10m_u_component_of_wind  (time, longitude, latitude) float32 25kB 0.01296...\n",
+       "    10m_v_component_of_wind  (time, longitude, latitude) float32 25kB -0.1656...\n",
+       "    2m_temperature           (time, longitude, latitude) float32 25kB 251.8 ....\n",
+       "    mean_sea_level_pressure  (time, longitude, latitude) float32 25kB 9.964e+...
" + ], + "text/plain": [ + " Size: 99kB\n", + "Dimensions: (time: 3, longitude: 64, latitude: 32)\n", + "Coordinates:\n", + " * latitude (latitude) float64 256B -87.19 -81.56 ... 87.19\n", + " * time (time) datetime64[ns] 24B 2009-12-29 ... 2009-12-31\n", + " * longitude (longitude) float64 512B -180.0 -174.4 ... 174.4\n", + "Data variables:\n", + " 10m_u_component_of_wind (time, longitude, latitude) float32 25kB 0.01296...\n", + " 10m_v_component_of_wind (time, longitude, latitude) float32 25kB -0.1656...\n", + " 2m_temperature (time, longitude, latitude) float32 25kB 251.8 ....\n", + " mean_sea_level_pressure (time, longitude, latitude) float32 25kB 9.964e+..." + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b17f6eb9-19d7-4413-a63f-2b1f60351584", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 34kB\n",
+       "Dimensions:                  (time: 1, longitude: 64, latitude: 32)\n",
+       "Coordinates:\n",
+       "  * latitude                 (latitude) float64 256B -87.19 -81.56 ... 87.19\n",
+       "  * time                     (time) datetime64[ns] 8B 2010-01-01\n",
+       "  * longitude                (longitude) float64 512B -180.0 -174.4 ... 174.4\n",
+       "Data variables:\n",
+       "    10m_u_component_of_wind  (time, longitude, latitude) float32 8kB 0.9633 ....\n",
+       "    10m_v_component_of_wind  (time, longitude, latitude) float32 8kB 2.515 .....\n",
+       "    2m_temperature           (time, longitude, latitude) float32 8kB 249.6 .....\n",
+       "    mean_sea_level_pressure  (time, longitude, latitude) float32 8kB 1.007e+0...
" + ], + "text/plain": [ + " Size: 34kB\n", + "Dimensions: (time: 1, longitude: 64, latitude: 32)\n", + "Coordinates:\n", + " * latitude (latitude) float64 256B -87.19 -81.56 ... 87.19\n", + " * time (time) datetime64[ns] 8B 2010-01-01\n", + " * longitude (longitude) float64 512B -180.0 -174.4 ... 174.4\n", + "Data variables:\n", + " 10m_u_component_of_wind (time, longitude, latitude) float32 8kB 0.9633 ....\n", + " 10m_v_component_of_wind (time, longitude, latitude) float32 8kB 2.515 .....\n", + " 2m_temperature (time, longitude, latitude) float32 8kB 249.6 .....\n", + " mean_sea_level_pressure (time, longitude, latitude) float32 8kB 1.007e+0..." + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50e45ec6-c10a-4a55-a471-bf0f7f3aacbf", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py b/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py index eee5cf2f..0e9180a1 100644 --- a/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py +++ b/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py @@ -215,7 +215,7 @@ def open_files( if not isinstance(files, (tuple, list, dict)): raise TypeError( - f"Could not load files from input: {files!r}.\n" "Supported types are (str, Path, tuple, list, dict)" + f"Could not load files from input: {files!r}.\nSupported types are (str, Path, tuple, list, dict)" ) files_to_load = [value for _, value in files.items()] if isinstance(files, dict) else list(files) diff --git a/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py b/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py index 22ef8896..d033a651 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py +++ b/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py @@ -13,17 +13,16 @@ # limitations under the License. +from pyearthtools.pipeline.modifications import idx_modification +from pyearthtools.pipeline.modifications.cache import Cache, MemCache, StaticCache from pyearthtools.pipeline.modifications.idx_modification import ( IdxModifier, IdxOverride, - TimeIdxModifier, SequenceRetrieval, TemporalRetrieval, + TemporalWindow, + TimeIdxModifier, ) -from pyearthtools.pipeline.modifications.cache import Cache, StaticCache, MemCache - - -from pyearthtools.pipeline.modifications import idx_modification __all__ = [ "Cache", @@ -34,5 +33,6 @@ "TimeIdxModifier", "SequenceRetrieval", "TemporalRetrieval", + "TemporalWindow", "idx_modification", ] diff --git a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py index 8773191f..d35608f8 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py +++ b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py @@ -17,15 +17,13 @@ from __future__ import annotations +import warnings from dataclasses import dataclass from typing import Any, Callable, Iterable, Optional, Type, Union -import warnings -import xarray as xr import numpy as np - import pyearthtools.data - +import xarray as xr from pyearthtools.pipeline.controller import PipelineIndex from pyearthtools.pipeline.parallel import ParallelEnabledMixin @@ -33,8 +31,8 @@ DASK_IMPORTED = True try: - from dask.delayed import Delayed, delayed import dask.array as da + from dask.delayed import Delayed, delayed except (ImportError, ModuleNotFoundError) as _: DASK_IMPORTED = False @@ -215,7 +213,6 @@ def _get_tuple(self, idx, mod: tuple[Any, ...], layer: int) -> Union[tuple[Any], return samples def __getitem__(self, idx: Any): - if not isinstance(self._modification, tuple): return self.parent_pipeline()[idx + self._modification] @@ -452,7 +449,12 @@ def __init__( delta_unit: e.g. "month" or "hour" concat: whether to contact or merge """ - super().__init__(samples, merge_function=merge_function, concat=concat, merge_kwargs=merge_kwargs) + super().__init__( + samples, + merge_function=merge_function, + concat=concat, + merge_kwargs=merge_kwargs, + ) def map_to_tuple(mod): if isinstance(mod, tuple): @@ -469,3 +471,82 @@ def __getitem__(self, idx: Any): idx = pyearthtools.data.Petdt(idx) return super().__getitem__(idx) + + +class TemporalWindow(PipelineIndex): + """ + The purpose of this class is to provide the ability to perform + sequence-to-sequence modelling from a data accessor or pipeline + that was designed to produce single time steps (i.e. single samples). + + The temporal window allows the specification of the 'back window' + and the 'forward window', and will produce a binary branch. + + For example, if the time steps are hourly, and the base pipeline + can produce hours 1, 2, 3 ... 10; then this Temporal Window can + be used to produce sequence pairs like:: + + [1,2,3], [4], + [2,3,4], [5], + ... + [7,8,9], [10] + + or like:: + + [1,2], [3,4,5], + [2,3], [4,5,6], + ... + [6,7], [8,9,10] + + This provides a simpler interface than the TemporalRetrieval which + is a more general alternative. + + The window offsets are calculated not using positional indexing, but + using calculated date-times based on the reference time and the specified + timedelta to calculate each required index exactly. The handling of missing + data is left to the underlying pipeline response to the retrieval of the + calculated datetime. + + The resultant sequences may be left unmerged (i.e. a list of retrieved + results for each timetime) or merged (e.g. into an xarray along the time + dimension). The default behaviour is to merge along the time dimension. + + A custom merge method may be specified. + """ + + def __init__(self, *, prior_indexes, posterior_indexes, timedelta, merge_method=None): + """ + Args: + prior_indexes: Multiplied by the timedelta then applied to the reference date + posterior_indexes: Multiplied by the timedelta then applied to the reference date + timedelta: Typically the time step of the underlying data + merge_method: How to merge samples into a combined object + + + Examples: + + >>> TemporalWindow(prior_indexes=[-3,-2,-1], posterior_indexes=[0], timedelta=timedelta, merge_method=merge_method) + + (assuming xarray data) will result in a tuple of two datasets, the first with a time coordinate dimension of 3 time steps and the + second with a time coordinate dimension of 1 time step. + + """ + self.prior_indexes = prior_indexes + self.posterior_indexes = posterior_indexes + self.timedelta = timedelta + self.merge_method = merge_method + + def __getitem__(self, date_of_interest): + date_of_interest = pyearthtools.data.time.Petdt(date_of_interest) + + prior_i = [i * self.timedelta for i in self.prior_indexes] + posterior_i = [i * self.timedelta for i in self.posterior_indexes] + + prior = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in prior_i] + posterior = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in posterior_i] + + if self.merge_method: + prior = self.merge_method(prior) + posterior = self.merge_method(posterior) + + return prior, posterior