diff --git a/.zenodo.json b/.zenodo.json
index a46f295e..0c95b258 100644
--- a/.zenodo.json
+++ b/.zenodo.json
@@ -1,89 +1,87 @@
{
- "creators": [
- {
- "orcid": "https://orcid.org/0009-0009-3207-4876",
- "affiliation": "Work undertaken while at the Bureau of Meteorology, Australia",
- "name": "Cook, Harrison"
- },
- {
- "orcid": "https://orcid.org/0009-0008-2024-1967",
- "affiliation": "Bureau of Meteorology, Australia",
- "name": "Leeuwenburg, Tennessee"
- },
- {
- "orcid": "https://orcid.org/0000-0003-2553-0721",
- "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand",
- "name": "Rio, Maxime"
- },
- {
- "orcid": "https://orcid.org/0000-0002-3148-4496",
- "affiliation": "Met Office, United Kingdom",
- "name": "Miller, Joel"
- },
- {
- "orcid": "https://orcid.org/0000-0002-6569-1178",
- "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand",
- "name": "Mason, Gemma"
- },
- {
- "orcid": "https://orcid.org/0009-0002-7406-7438",
- "affiliation": "Bureau of Meteorology, Australia",
- "name": "Ramanathan, Nikeeth"
- },
- {
- "orcid": "https://orcid.org/0009-0001-6250-3987",
- "affiliation": "Met Office, United Kingdom",
- "name": "Pill, John"
- },
- {
- "orcid": "https://orcid.org/0000-0003-3430-2456",
- "affiliation": "Met Office, United Kingdom",
- "name": "Haddad, Stephen"
- },
- {
- "orcid": "https://orcid.org/0000-0002-1975-0042",
- "affiliation": "Bureau of Meteorology, Australia",
- "name": "de Burgh-Day, Catherine"
- },
- {
- "orcid": "https://orcid.org/0009-0000-5048-4240",
- "affiliation": "Met Office, United Kingdom",
- "name": "Sullivan, Ben"
- },
- {
- "orcid": "https://orcid.org/0000-0001-6825-3854",
- "affiliation": "University of New South Wales Sydney, Australia",
- "name": "Hobeichi, Sanaa"
-
- },
- {
- "orcid": "https://orcid.org/0000-0002-6799-9109",
- "affiliation": "Bureau of Meteorology, Australia",
- "name": "Holmes, Ryan"
- },
- {
- "orcid": "https://orcid.org/0009-0009-8268-8358",
- "affiliation": "Independent researcher",
- "name": "Potokina, Margarita"
- },
- {
- "orcid": "https://orcid.org/0009-0001-4486-0120",
- "affiliation": "Independent researcher, Vietnam",
- "name": "Bogacheva, Jenya"
- },
- {
- "orcid": "https://orcid.org/0009-0004-9553-8387",
- "affiliation": "Independent researcher, Australia",
- "name": "James, Matthew"
- },
- {
- "orcid": "https://orcid.org/0000-0002-5407-4297",
- "affiliation": "Bureau of Meteorology, Australia",
- "name": "Stassen, Christian"
- }
- ],
- "license": "Apache-2.0",
- "title": "PyEarthTools: Machine learning for Earth system science",
- "keywords": ["modelling", "geoscience", "earth system science"]
-
+ "creators": [
+ {
+ "orcid": "https://orcid.org/0009-0008-2024-1967",
+ "affiliation": "Bureau of Meteorology, Australia",
+ "name": "Leeuwenburg, Tennessee"
+ },
+ {
+ "orcid": "https://orcid.org/0009-0009-3207-4876",
+ "affiliation": "Work undertaken while at the Bureau of Meteorology, Australia",
+ "name": "Cook, Harrison"
+ },
+ {
+ "orcid": "https://orcid.org/0000-0003-2553-0721",
+ "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand",
+ "name": "Rio, Maxime"
+ },
+ {
+ "orcid": "https://orcid.org/0000-0001-6825-3854",
+ "affiliation": "University of New South Wales Sydney, Australia",
+ "name": "Hobeichi, Sanaa"
+ },
+ {
+ "orcid": "https://orcid.org/0000-0002-3148-4496",
+ "affiliation": "Met Office, United Kingdom",
+ "name": "Miller, Joel"
+ },
+ {
+ "orcid": "https://orcid.org/0000-0002-6569-1178",
+ "affiliation": "NIWA - The National Institute of Water and Atmospheric Research, New Zealand",
+ "name": "Mason, Gemma"
+ },
+ {
+ "orcid": "https://orcid.org/0009-0002-7406-7438",
+ "affiliation": "Bureau of Meteorology, Australia",
+ "name": "Ramanathan, Nikeeth"
+ },
+ {
+ "orcid": "https://orcid.org/0009-0001-6250-3987",
+ "affiliation": "Met Office, United Kingdom",
+ "name": "Pill, John"
+ },
+ {
+ "orcid": "https://orcid.org/0000-0003-3430-2456",
+ "affiliation": "Met Office, United Kingdom",
+ "name": "Haddad, Stephen"
+ },
+ {
+ "orcid": "https://orcid.org/0000-0002-5407-4297",
+ "affiliation": "Bureau of Meteorology, Australia",
+ "name": "Stassen, Christian"
+ },
+ {
+ "orcid": "https://orcid.org/0000-0002-1975-0042",
+ "affiliation": "Bureau of Meteorology, Australia",
+ "name": "de Burgh-Day, Catherine"
+ },
+ {
+ "orcid": "https://orcid.org/0000-0002-6799-9109",
+ "affiliation": "Bureau of Meteorology, Australia",
+ "name": "Holmes, Ryan"
+ },
+ {
+ "orcid": "https://orcid.org/0009-0009-8268-8358",
+ "affiliation": "Independent researcher",
+ "name": "Potokina, Margarita"
+ },
+ {
+ "orcid": "https://orcid.org/0009-0001-4486-0120",
+ "affiliation": "Independent researcher, Vietnam",
+ "name": "Bogacheva, Jenya"
+ },
+ {
+ "orcid": "https://orcid.org/0009-0004-9553-8387",
+ "affiliation": "Independent researcher, Australia",
+ "name": "James, Matthew"
+ },
+ {
+ "orcid": "https://orcid.org/0009-0000-5048-4240",
+ "affiliation": "Met Office, United Kingdom",
+ "name": "Sullivan, Ben"
+ }
+ ],
+ "license": "Apache-2.0",
+ "title": "PyEarthTools: Machine learning for Earth system science",
+ "keywords": ["modelling", "geoscience", "earth system science"]
}
diff --git a/README.md b/README.md
index 6f69ed9e..475d6fb5 100644
--- a/README.md
+++ b/README.md
@@ -10,10 +10,12 @@
| A weather prediction from a model trained with PyEarthTools.| A data processing flow composed for working with climate data.|
|:-:|:-:|
-Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools)
-Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io)
-Tutorial Gallery: [available here](https://pyearthtools.readthedocs.io/en/latest/notebooks/Gallery.html)
-New Users Guide: [available here](https://pyearthtools.readthedocs.io/en/latest/newuser.html)
+Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools)
+Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io)
+Tutorial Gallery: [available here](https://pyearthtools.readthedocs.io/en/latest/notebooks/Gallery.html)
+New Users Guide: [available here](https://pyearthtools.readthedocs.io/en/latest/newuser.html)
+
+**If you use `PyEarthTools` for your work or a publication, [please city our work](https://pyearthtools.readthedocs.io/en/latest/#acknowleging-or-citing-pyearthtools).**
## Installation
@@ -53,7 +55,7 @@ PyEarthTools is a Python framework containing modules for:
- defining machine learning (ML) models;
- training ML models and managing experiments;
- performing inference with ML models;
- - and evaluating ML models.
+ - and evaluating ML models (coming soon).
## Overview of the Packages within PyEarthTools
@@ -74,11 +76,11 @@ PyEarthTools comprises multiple sub-packages which can be used individually or t
If you use PyEarthTools for your work, we would appreciate you citing our software as below:
-Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., & de Burgh-Day, C. (2025). PyEarthTools: Machine learning for Earth system science (0.1.1). Zenodo. https://doi.org/10.5281/zenodo.15760769
+Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., de Burgh-Day, C., Sullivan, B., Hobeichi, S., Holmes, R., Potokina, M., Bogacheva, J., James, M., & Stassen, C. (2025). PyEarthTools: Machine learning for Earth system science (0.4.0). Zenodo. https://doi.org/10.5281/zenodo.17429589
BibTeX:
```
-@software{cook_2025_15760769,
+@software{cook_2025_17429589,
author = {Cook, Harrison and
Leeuwenburg, Tennessee and
Rio, Maxime and
@@ -87,13 +89,22 @@ BibTeX:
Ramanathan, Nikeeth and
Pill, John and
Haddad, Stephen and
- de Burgh-Day, Catherine},
- title = {{PyEarthTools: Machine learning for Earth system science}},
- month = jun,
+ de Burgh-Day, Catherine and
+ Sullivan, Ben and
+ Hobeichi, Sanaa and
+ Holmes, Ryan and
+ Potokina, Margarita and
+ Bogacheva, Jenya and
+ James, Matthew and
+ Stassen, Christian},
+ title = {PyEarthTools: Machine learning for Earth system
+ science
+ },
+ month = oct,
year = 2025,
publisher = {Zenodo},
- version = {0.1.1},
- doi = {10.5281/zenodo.15760769},
- url = {https://doi.org/10.5281/zenodo.15760769}
+ version = {0.4.0},
+ doi = {10.5281/zenodo.17429589},
+ url = {https://doi.org/10.5281/zenodo.17429589},
}
```
diff --git a/docs/api/data/data_api.md b/docs/api/data/data_api.md
index a87400bf..051a6603 100644
--- a/docs/api/data/data_api.md
+++ b/docs/api/data/data_api.md
@@ -329,7 +329,6 @@
:members:
.. autofunction:: pyearthtools.data.transforms.coordinates.get_longitude
-.. autofunction:: pyearthtools.data.transforms.coordinates.weak_cast_to_int
.. autoclass:: pyearthtools.data.transforms.coordinates.StandardLongitude
:members:
@@ -341,12 +340,8 @@
:members:
.. autoclass:: pyearthtools.data.transforms.coordinates.Drop
:members:
-.. autoclass:: pyearthtools.data.transforms.coordinates.Flatten
- :members:
.. autoclass:: pyearthtools.data.transforms.coordinates.Expand
:members:
-.. autoclass:: pyearthtools.data.transforms.coordinates.SelectFlatten
- :members:
.. autoclass:: pyearthtools.data.transforms.coordinates.Assign
:members:
.. autoclass:: pyearthtools.data.transforms.coordinates.Pad
diff --git a/docs/api/data/data_index.md b/docs/api/data/data_index.md
index ec51498c..c0bf9bae 100644
--- a/docs/api/data/data_index.md
+++ b/docs/api/data/data_index.md
@@ -135,7 +135,6 @@ The rest of this page contains reference information for the components of the D
| | | - [attributes.SetType](data_api.md#pyearthtools.data.transforms.attributes.SetType) |
| | | - [attributes.Rename](data_api.md#pyearthtools.data.transforms.attributes.Rename) |
| | | - [coordinates.get_longitude](data_api.md#pyearthtools.data.transforms.coordinates.get_longitude) |
-| | | - [coordinates.weak_cast_to_int](data_api.md#pyearthtools.data.transforms.coordinates.weak_cast_to_int) |
| | | - [coordinates.StandardLongitude](data_api.md#pyearthtools.data.transforms.coordinates.StandardLongitude) |
| | | - [coordinates.ReIndex](data_api.md#pyearthtools.data.transforms.coordinates.ReIndex) |
| | | - [coordinates.StandardCoordinateNames](data_api.md#pyearthtools.data.transforms.coordinates.StandardCoordinateNames) |
diff --git a/docs/api/pipeline/pipeline_api.md b/docs/api/pipeline/pipeline_api.md
index f1ef7d34..4e2f19f7 100644
--- a/docs/api/pipeline/pipeline_api.md
+++ b/docs/api/pipeline/pipeline_api.md
@@ -91,6 +91,8 @@
:members:
.. autoclass:: pyearthtools.pipeline.modifications.TemporalRetrieval
:members:
+.. autoclass:: pyearthtools.pipeline.modifications.TemporalWindow
+ :members:
.. autoclass:: pyearthtools.pipeline.modifications.idx_modification
:members:
```
diff --git a/docs/api/pipeline/pipeline_index.md b/docs/api/pipeline/pipeline_index.md
index f053df43..479ac10c 100644
--- a/docs/api/pipeline/pipeline_index.md
+++ b/docs/api/pipeline/pipeline_index.md
@@ -39,6 +39,7 @@ The rest of this page contains reference information for the components of the P
| | | - [TimeIdxModifier](pipeline_api.md#pyearthtools.pipeline.modifications.TimeIdxModifier) |
| | | - [SequenceRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.SequenceRetrieval) |
| | | - [TemporalRetrieval](pipeline_api.md#pyearthtools.pipeline.modifications.TemporalRetrieval) |
+| | | - [TemporalWindow](pipeline_api.md#pyearthtools.pipeline.modifications.TemporalWindow) |
| | | - [idx_modification](pipeline_api.md#pyearthtools.pipeline.modifications.idx_modification) |
| `pipeline.operations` | | - [Transforms](pipeline_api.md#pyearthtools.pipeline.operations.Transforms) |
| `pipeline.operations.xarray` | | - [Compute](pipeline_api.md#pyearthtools.pipeline.operations.xarray.Compute) |
diff --git a/docs/index.md b/docs/index.md
index 41dd16bf..10dd9ff7 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -22,9 +22,11 @@
A data processing flow composed for working with climate data.
-Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools)
-Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io)
-Tutorial Gallery: [available here](./notebooks/Gallery)
+Source Code: [github.com/ACCESS-Community-Hub/PyEarthTools](https://github.com/ACCESS-Community-Hub/PyEarthTools)
+Documentation: [pyearthtools.readthedocs.io](https://pyearthtools.readthedocs.io)
+Tutorial Gallery: [available here](./notebooks/Gallery)
+
+**If you use `PyEarthTools` for your work or a publication, [please city our work](https://pyearthtools.readthedocs.io/en/latest/#acknowleging-or-citing-pyearthtools).**
## Installation
@@ -71,7 +73,7 @@ PyEarthTools is a Python framework containing modules for:
- defining machine learning (ML) models;
- training ML models and managing experiments;
- performing inference with ML models;
- - and evaluating ML models.
+ - and evaluating ML models (coming soon).
## Overview of the Packages within PyEarthTools
@@ -95,11 +97,11 @@ If you use PyEarthTools for your work, we would appreciate you citing our softwa
:::::{tab-set}
::::{tab-item} APA
-Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., & de Burgh-Day, C. (2025). PyEarthTools: Machine learning for Earth system science (0.1.1). Zenodo. https://doi.org/10.5281/zenodo.15760769
+Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill, J., Haddad, S., de Burgh-Day, C., Sullivan, B., Hobeichi, S., Holmes, R., Potokina, M., Bogacheva, J., James, M., & Stassen, C. (2025). PyEarthTools: Machine learning for Earth system science (0.4.0). Zenodo. https://doi.org/10.5281/zenodo.17429589
::::
::::{tab-item} BibTeX
```
-@software{cook_2025_15760769,
+@software{cook_2025_17429589,
author = {Cook, Harrison and
Leeuwenburg, Tennessee and
Rio, Maxime and
@@ -108,14 +110,23 @@ Cook, H., Leeuwenburg, T., Rio, M., Miller, J., Mason, G., Ramanathan, N., Pill,
Ramanathan, Nikeeth and
Pill, John and
Haddad, Stephen and
- de Burgh-Day, Catherine},
- title = {{PyEarthTools: Machine learning for Earth system science}},
- month = jun,
+ de Burgh-Day, Catherine and
+ Sullivan, Ben and
+ Hobeichi, Sanaa and
+ Holmes, Ryan and
+ Potokina, Margarita and
+ Bogacheva, Jenya and
+ James, Matthew and
+ Stassen, Christian},
+ title = {PyEarthTools: Machine learning for Earth system
+ science
+ },
+ month = oct,
year = 2025,
publisher = {Zenodo},
- version = {0.1.1},
- doi = {10.5281/zenodo.15760769},
- url = {https://doi.org/10.5281/zenodo.15760769}
+ version = {0.4.0},
+ doi = {10.5281/zenodo.17429589},
+ url = {https://doi.org/10.5281/zenodo.17429589},
}
```
::::
diff --git a/docs/newuser.md b/docs/newuser.md
index 4cb2b681..ceb17550 100644
--- a/docs/newuser.md
+++ b/docs/newuser.md
@@ -1,6 +1,6 @@
# New Users Guide
-Welcome new user! This document will continue to be updated based on user feedback. This table quickly explains how to get things done in `PyEarthTools`
+Welcome new user! This document will continue to be updated based on user feedback. This table quickly explains how to get things done in `PyEarthTools`
| Step | Without PyEarthTools | With PyEarthTools |
|----------------|-----------------------------|----------------------|
| Obtaining and loading data | Manual download or open from disk + data cleaning | Use or adapt in-built fetchers and openers (no cleaning) |
@@ -8,7 +8,7 @@ Welcome new user! This document will continue to be updated based on user feedba
| Present your data to PyTorch or another framework as a Python iterator | Custom code to iterate over data | Pipelines are iterators |
| Define a machine learning model | Clone someone's repo or define your own | Use a 'bundled model' or write your own |
| Denormalise model outputs | Manual code | Pipelines are reversible |
-| Model evaluation | Use a separate framework | Use pre-defined evaluation to generate standard scorecards |
+| Model evaluation | Use a separate framework | (coming soon) Use pre-defined evaluation to generate standard scorecards |
This approach also allows your research to be more targetted into varying only the part of the end-to-end process which you are investigating. By starting with a baseline implementation to provide a strong basis for comparison and modifying only the relevant step, you can undertake a more controlled investigative process, confidently generating results from experimentation along the way.
diff --git a/notebooks/TemporalWindowDemo.ipynb b/notebooks/TemporalWindowDemo.ipynb
new file mode 100644
index 00000000..4d7075b7
--- /dev/null
+++ b/notebooks/TemporalWindowDemo.ipynb
@@ -0,0 +1,1356 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "074883b8-caf1-43eb-a2d5-fd3ca05f3b8e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import functools\n",
+ "import xarray as xr\n",
+ "import pyearthtools.pipeline\n",
+ "class TemporalWindow(pyearthtools.pipeline.controller.PipelineIndex):\n",
+ " '''\n",
+ " The purpose of this class is to provide the ability to perform\n",
+ " sequence-to-sequence modelling from an data accessor or pipeline\n",
+ " that was designed to produce single time steps (i.e. single samples).\n",
+ "\n",
+ " The temporal window allows the specification of the 'back window'\n",
+ " and the 'forward window', and will produce a binary branch.\n",
+ "\n",
+ " For example, if the time steps are hourly, and the base pipeline\n",
+ " can produce hours 1, 2, 3 ... 10; then this Temporal Window can\n",
+ " be used to produce sequence pairs like:\n",
+ " [1,2,3], [4], \n",
+ " [2,3,4], [5],\n",
+ " ...\n",
+ " [7,8,9], [10]\n",
+ "\n",
+ " or like:\n",
+ " [1,2], [3,4,5],\n",
+ " [2,3], [4,5,6],\n",
+ " ...\n",
+ " [6,7], [8,9,10]\n",
+ "\n",
+ " This provides a simpler interface than the TemporalRetrieval which\n",
+ " is a more general alternative.\n",
+ "\n",
+ " The window offsets are calculated not using positional indexing, but \n",
+ " using calculated date-times based on the reference time and the specified\n",
+ " timedelta to calculate each required index exactly. The handling of missing\n",
+ " data is left to the underlying pipeline response to the retrieval of the\n",
+ " calculated datetime.\n",
+ "\n",
+ " The resultant sequences may be left unmerged (i.e. a list of retrieved\n",
+ " results for each timetime) or merged (e.g. into an xarray along the time\n",
+ " dimension). The default behaviour is to merge along the time dimension.\n",
+ "\n",
+ " A custom merge method may be specified.\n",
+ " '''\n",
+ "\n",
+ " def __init__(self,\n",
+ " *,\n",
+ " a_indexes,\n",
+ " b_indexes,\n",
+ " timedelta,\n",
+ " merge_method=None\n",
+ " ):\n",
+ " '''\n",
+ " Args:\n",
+ " a_indexes: Multiplied by the timedelta then applied to the reference date\n",
+ " b_indexes: Multiplied by the timedelta then applied to the reference date\n",
+ " timedelta: Typically the time step of the underlying data\n",
+ " merge_method: How to merge samples into a combined object\n",
+ " '''\n",
+ " self.a_indexes = a_indexes\n",
+ " self.b_indexes = b_indexes\n",
+ " self.timedelta = timedelta\n",
+ " self.merge_method = merge_method\n",
+ "\n",
+ " def __getitem__(self, date_of_interest):\n",
+ "\n",
+ " date_of_interest = pyearthtools.data.time.Petdt(date_of_interest)\n",
+ "\n",
+ " head_i = [i * self.timedelta for i in self.a_indexes]\n",
+ " tail_i = [i * self.timedelta for i in self.b_indexes]\n",
+ "\n",
+ " head = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in head_i]\n",
+ " tail = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in tail_i]\n",
+ "\n",
+ " if self.merge_method:\n",
+ " head = self.merge_method(head)\n",
+ " tail = self.merge_method(tail)\n",
+ "\n",
+ " return head, tail\n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "eea30b9f-58c9-4c20-968f-0f0635f07a43",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "\n",
+ "# Most users should change this to the current directory.\n",
+ "os.environ['PETPROJECT'] = os.path.expanduser(\"~\") + '/dev/proj/petcache'\n",
+ "workdir = os.environ['PETPROJECT']\n",
+ "# print(workdir)\n",
+ "\n",
+ "import pathlib\n",
+ "import xarray as xr\n",
+ "from pathlib import Path\n",
+ "import time\n",
+ "\n",
+ "import pyearthtools.data.archive\n",
+ "import pyearthtools.tutorial\n",
+ "import pyearthtools.pipeline\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "4eff742e-2737-42ac-acad-695cf21166cf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "file_location = workdir + '/mini.nc'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "ec9aca47-1321-4e74-88e2-44415440726a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "accessor = pyearthtools.tutorial.ERA5DataClass.ERA5LowResDemoIndex([\n",
+ " '10m_u_component_of_wind', \n",
+ " '10m_v_component_of_wind', \n",
+ " 'mean_sea_level_pressure',\n",
+ " '2m_temperature' \n",
+ "],\n",
+ "filename_override=file_location)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "9af60306-0f99-44eb-8912-e320cf11a394",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sample = accessor['2010-01-01T00']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "6697bc3a-82e5-438f-83b9-3f8101509d38",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "timedelta = pyearthtools.data.time.TimeDelta((1, \"day\"))\n",
+ "\n",
+ "merge_method = functools.partial(xr.concat, dim='time')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "4043efc2-1ebe-4ff4-9ff5-cc0c2a664f29",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_pipeline = pyearthtools.pipeline.Pipeline(\n",
+ " accessor,\n",
+ " pyearthtools.data.transforms.coordinates.StandardLongitude(type=\"-180-180\"), \n",
+ " # pyearthtools.pipeline.modifications.TemporalRetrieval(\n",
+ " # concat=True, samples=((0, 1), (6, 1, 6)) # Input = 1 sample from time T=0 hours. Output = T+6,+12,+18,+24\n",
+ " # ), \n",
+ " TemporalWindow(a_indexes=[-3,-2,-1], b_indexes=[0], timedelta=timedelta, merge_method=merge_method),\n",
+ " sampler=pyearthtools.pipeline.samplers.Default(),\n",
+ " iterator=pyearthtools.pipeline.iterators.DateRange(1980, 2016, interval='6 hours')\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "45d00667-d6b4-4290-8f4c-36f1650cde42",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/munin/dev/proj/PyEarthTools/packages/data/src/pyearthtools/data/indexes/_indexes.py:809: IndexWarning: Data requested at a higher resolution than available. minute > hour\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
+ "source": [
+ "doi = '20100101T0000'\n",
+ "sample = data_pipeline[doi]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "da5c1493-2f1b-432a-80d0-13bfe1f5a794",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(sample)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "32c25070-535b-4a61-91ed-6c9f475d37cf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "
<xarray.Dataset> Size: 99kB\n",
+ "Dimensions: (time: 3, longitude: 64, latitude: 32)\n",
+ "Coordinates:\n",
+ " * latitude (latitude) float64 256B -87.19 -81.56 ... 87.19\n",
+ " * time (time) datetime64[ns] 24B 2009-12-29 ... 2009-12-31\n",
+ " * longitude (longitude) float64 512B -180.0 -174.4 ... 174.4\n",
+ "Data variables:\n",
+ " 10m_u_component_of_wind (time, longitude, latitude) float32 25kB 0.01296...\n",
+ " 10m_v_component_of_wind (time, longitude, latitude) float32 25kB -0.1656...\n",
+ " 2m_temperature (time, longitude, latitude) float32 25kB 251.8 ....\n",
+ " mean_sea_level_pressure (time, longitude, latitude) float32 25kB 9.964e+... Dimensions: time : 3longitude : 64latitude : 32
Coordinates: (3)
latitude
(latitude)
float64
-87.19 -81.56 ... 81.56 87.19
array([-87.1875, -81.5625, -75.9375, -70.3125, -64.6875, -59.0625, -53.4375,\n",
+ " -47.8125, -42.1875, -36.5625, -30.9375, -25.3125, -19.6875, -14.0625,\n",
+ " -8.4375, -2.8125, 2.8125, 8.4375, 14.0625, 19.6875, 25.3125,\n",
+ " 30.9375, 36.5625, 42.1875, 47.8125, 53.4375, 59.0625, 64.6875,\n",
+ " 70.3125, 75.9375, 81.5625, 87.1875]) time
(time)
datetime64[ns]
2009-12-29 2009-12-30 2009-12-31
array(['2009-12-29T00:00:00.000000000', '2009-12-30T00:00:00.000000000',\n",
+ " '2009-12-31T00:00:00.000000000'], dtype='datetime64[ns]') longitude
(longitude)
float64
-180.0 -174.4 ... 168.8 174.4
array([-180. , -174.375, -168.75 , -163.125, -157.5 , -151.875, -146.25 ,\n",
+ " -140.625, -135. , -129.375, -123.75 , -118.125, -112.5 , -106.875,\n",
+ " -101.25 , -95.625, -90. , -84.375, -78.75 , -73.125, -67.5 ,\n",
+ " -61.875, -56.25 , -50.625, -45. , -39.375, -33.75 , -28.125,\n",
+ " -22.5 , -16.875, -11.25 , -5.625, 0. , 5.625, 11.25 ,\n",
+ " 16.875, 22.5 , 28.125, 33.75 , 39.375, 45. , 50.625,\n",
+ " 56.25 , 61.875, 67.5 , 73.125, 78.75 , 84.375, 90. ,\n",
+ " 95.625, 101.25 , 106.875, 112.5 , 118.125, 123.75 , 129.375,\n",
+ " 135. , 140.625, 146.25 , 151.875, 157.5 , 163.125, 168.75 ,\n",
+ " 174.375]) Data variables: (4)
10m_u_component_of_wind
(time, longitude, latitude)
float32
0.01296 -2.391 ... 2.367 6.094
long_name : 10 metre U wind component short_name : u10 units : m s**-1 array([[[ 0.01295517, -2.3912122 , 2.9354267 , ..., -7.9532948 ,\n",
+ " 0.25357214, 6.0169716 ],\n",
+ " [-0.52722347, -3.2151184 , 2.5158815 , ..., -7.51472 ,\n",
+ " 0.47033545, 6.272334 ],\n",
+ " [-0.580429 , -2.7676497 , 2.0751765 , ..., -7.0506973 ,\n",
+ " 0.92148596, 6.6449776 ],\n",
+ " ...,\n",
+ " [-0.07509121, 0.8475576 , 1.1629357 , ..., -8.298301 ,\n",
+ " 0.8962459 , 5.364363 ],\n",
+ " [-0.3950262 , -0.7227352 , 1.5612326 , ..., -8.285944 ,\n",
+ " 0.32740173, 5.6214347 ],\n",
+ " [-0.5349231 , -1.4303305 , 1.9131229 , ..., -7.968339 ,\n",
+ " 0.17176305, 5.8434205 ]],\n",
+ "\n",
+ " [[-0.39811176, 1.7230395 , 0.27908617, ..., -4.969552 ,\n",
+ " 0.5818986 , 5.3759785 ],\n",
+ " [-0.50838226, 1.9183471 , 0.74721265, ..., -5.4273696 ,\n",
+ " 0.9969079 , 5.325866 ],\n",
+ " [ 0.2063406 , 2.3328078 , 2.807473 , ..., -5.4888024 ,\n",
+ " 1.5664698 , 5.251631 ],\n",
+ "...\n",
+ " [ 0.07154416, 0.43923795, 1.0447255 , ..., -3.051137 ,\n",
+ " 0.3713494 , 5.5305843 ],\n",
+ " [-0.55116624, 1.1838893 , 0.45468566, ..., -3.3963523 ,\n",
+ " 0.30960777, 5.434267 ],\n",
+ " [-1.1435357 , 1.4078368 , -0.27678475, ..., -4.0079236 ,\n",
+ " 0.36916453, 5.408901 ]],\n",
+ "\n",
+ " [[-2.0001998 , 2.128516 , -1.7078512 , ..., -3.6868687 ,\n",
+ " 2.3377357 , 6.360939 ],\n",
+ " [-2.211365 , 3.4102862 , -2.0384774 , ..., -3.0071027 ,\n",
+ " 2.4915304 , 6.639659 ],\n",
+ " [-2.382363 , 3.368487 , -2.376643 , ..., -2.9129133 ,\n",
+ " 2.6026807 , 6.803876 ],\n",
+ " ...,\n",
+ " [-0.49716297, 1.150571 , 1.2389406 , ..., -5.873276 ,\n",
+ " 2.7889493 , 5.8885164 ],\n",
+ " [-1.0885781 , 1.4702489 , -0.09348243, ..., -4.7808895 ,\n",
+ " 2.5889966 , 5.951209 ],\n",
+ " [-2.394913 , 2.015512 , -0.45130092, ..., -4.174971 ,\n",
+ " 2.3670063 , 6.094167 ]]], dtype=float32) 10m_v_component_of_wind
(time, longitude, latitude)
float32
-0.1656 3.359 ... -0.8335 1.215
long_name : 10 metre V wind component short_name : v10 units : m s**-1 array([[[-0.16557042, 3.358766 , 3.9969978 , ..., -1.2642678 ,\n",
+ " 1.1131968 , 5.6573677 ],\n",
+ " [-0.09152302, 1.2762302 , 0.13379075, ..., -1.3670216 ,\n",
+ " 0.34614825, 4.880553 ],\n",
+ " [ 0.18972367, 0.30770206, -1.8509722 , ..., -1.3079865 ,\n",
+ " -0.44029537, 4.0179176 ],\n",
+ " ...,\n",
+ " [-0.04006498, 1.8271967 , 0.20499082, ..., -2.9372787 ,\n",
+ " 2.288085 , 7.3871984 ],\n",
+ " [-0.08938481, 3.0897412 , 1.6591011 , ..., -1.863201 ,\n",
+ " 2.1047406 , 6.9135156 ],\n",
+ " [-0.5623997 , 1.9263891 , 2.8747828 , ..., -1.4309009 ,\n",
+ " 1.7482747 , 6.3307033 ]],\n",
+ "\n",
+ " [[ 2.2340987 , 3.4786212 , 4.1956396 , ..., 0.5354468 ,\n",
+ " -0.5842197 , -3.335201 ],\n",
+ " [ 2.2327716 , 4.382334 , 3.2637868 , ..., 1.4966141 ,\n",
+ " -0.06261355, -3.2567768 ],\n",
+ " [ 2.1854079 , 5.154851 , 2.3214054 , ..., 1.8796144 ,\n",
+ " 0.58342403, -3.2612605 ],\n",
+ "...\n",
+ " [ 2.5729852 , -0.415482 , 0.09965239, ..., -2.2066414 ,\n",
+ " -1.8052534 , -3.4664524 ],\n",
+ " [ 2.047807 , 1.4169827 , 0.35017806, ..., -2.0707934 ,\n",
+ " -1.4196922 , -3.4500935 ],\n",
+ " [ 1.4159832 , 1.9486716 , 3.7002926 , ..., -0.888266 ,\n",
+ " -1.0308341 , -3.4204154 ]],\n",
+ "\n",
+ " [[ 3.8281336 , -1.3502898 , 0.6728385 , ..., -4.540028 ,\n",
+ " -1.9200238 , 0.6156175 ],\n",
+ " [ 3.8099473 , 0.6798635 , 2.831329 , ..., -4.6986117 ,\n",
+ " -3.0414934 , -0.5036043 ],\n",
+ " [ 3.9214299 , -0.6622708 , 1.153243 , ..., -5.044118 ,\n",
+ " -4.1522155 , -1.952265 ],\n",
+ " ...,\n",
+ " [ 4.741663 , -1.1170123 , -1.2198285 , ..., -3.0964751 ,\n",
+ " 0.4758325 , 0.85219425],\n",
+ " [ 3.8926513 , -1.9493812 , -3.4722123 , ..., -3.5516295 ,\n",
+ " 0.03120308, 1.1980363 ],\n",
+ " [ 3.2614179 , -2.2967741 , -1.8544527 , ..., -3.9371238 ,\n",
+ " -0.83350736, 1.2145286 ]]], dtype=float32) 2m_temperature
(time, longitude, latitude)
float32
251.8 268.6 270.5 ... 242.5 242.8
long_name : 2 metre temperature short_name : t2m units : K array([[[251.75461, 268.57892, 270.52295, ..., 252.00554, 246.37273,\n",
+ " 244.6797 ],\n",
+ " [252.89363, 268.81277, 271.33176, ..., 251.3174 , 246.78493,\n",
+ " 244.28052],\n",
+ " [255.17232, 268.71286, 272.06757, ..., 250.90524, 247.08192,\n",
+ " 243.9517 ],\n",
+ " ...,\n",
+ " [248.42833, 267.15964, 264.36105, ..., 250.04218, 245.12329,\n",
+ " 246.46027],\n",
+ " [250.51184, 269.25613, 271.25787, ..., 250.89912, 245.68736,\n",
+ " 245.78036],\n",
+ " [250.20346, 268.76248, 270.58005, ..., 251.75238, 246.0445 ,\n",
+ " 245.18864]],\n",
+ "\n",
+ " [[250.86383, 268.97903, 270.5385 , ..., 253.38641, 245.78412,\n",
+ " 244.58537],\n",
+ " [252.12962, 268.93677, 271.06766, ..., 251.77567, 245.98862,\n",
+ " 244.49927],\n",
+ " [254.64372, 268.78174, 271.4692 , ..., 251.04315, 246.34904,\n",
+ " 244.36734],\n",
+ "...\n",
+ " [248.39932, 265.68417, 264.04004, ..., 252.12802, 245.57913,\n",
+ " 244.43724],\n",
+ " [250.06033, 268.02518, 271.32306, ..., 252.28748, 245.71614,\n",
+ " 244.52553],\n",
+ " [249.38322, 268.6531 , 270.72855, ..., 253.5123 , 245.55733,\n",
+ " 244.55751]],\n",
+ "\n",
+ " [[250.3586 , 269.97635, 271.46622, ..., 250.19849, 242.02625,\n",
+ " 242.04263],\n",
+ " [251.79771, 269.72617, 271.69028, ..., 249.84404, 241.82443,\n",
+ " 241.47148],\n",
+ " [253.62262, 269.3288 , 273.1044 , ..., 249.37828, 241.87207,\n",
+ " 241.20848],\n",
+ " ...,\n",
+ " [247.30255, 265.77625, 264.9217 , ..., 251.32233, 244.26706,\n",
+ " 244.58209],\n",
+ " [248.99608, 268.63773, 271.84576, ..., 250.93294, 243.2644 ,\n",
+ " 243.77107],\n",
+ " [248.63269, 269.8876 , 271.78638, ..., 250.66096, 242.4893 ,\n",
+ " 242.80858]]], dtype=float32) mean_sea_level_pressure
(time, longitude, latitude)
float32
9.964e+04 9.929e+04 ... 1.032e+05
long_name : Mean sea level pressure short_name : msl standard_name : air_pressure_at_mean_sea_level units : Pa array([[[ 99638.27 , 99287.805, 99156.375, ..., 103967.2 ,\n",
+ " 104778.83 , 103657.516],\n",
+ " [ 99643.19 , 99258.984, 99078.74 , ..., 104039.305,\n",
+ " 104778.266, 103702.03 ],\n",
+ " [ 99680.914, 99283.555, 99120.1 , ..., 104113.8 ,\n",
+ " 104761.91 , 103736.484],\n",
+ " ...,\n",
+ " [ 99768.266, 99474.414, 99385.016, ..., 103817.67 ,\n",
+ " 104686.984, 103473.734],\n",
+ " [ 99722.37 , 99376.14 , 99309.18 , ..., 103842.22 ,\n",
+ " 104728.12 , 103542.73 ],\n",
+ " [ 99660.875, 99343.9 , 99265.984, ..., 103898.16 ,\n",
+ " 104761.19 , 103604.914]],\n",
+ "\n",
+ " [[ 99641.85 , 99322.89 , 99444.31 , ..., 103391.484,\n",
+ " 104022.89 , 103427.375],\n",
+ " [ 99631.93 , 99211.67 , 99349.82 , ..., 103492.79 ,\n",
+ " 104010.18 , 103368.9 ],\n",
+ " [ 99609.87 , 99064.54 , 99276.76 , ..., 103627.06 ,\n",
+ " 104001.695, 103310.58 ],\n",
+ "...\n",
+ " [ 99814.05 , 99531.016, 99617.52 , ..., 103414.54 ,\n",
+ " 104101.375, 103609.26 ],\n",
+ " [ 99764.984, 99472.99 , 99574.83 , ..., 103365.15 ,\n",
+ " 104067.51 , 103547.37 ],\n",
+ " [ 99666.734, 99407.336, 99545.07 , ..., 103340.48 ,\n",
+ " 104041.17 , 103487.02 ]],\n",
+ "\n",
+ " [[ 99874.97 , 99370.08 , 99699.35 , ..., 103685.7 ,\n",
+ " 104216.17 , 103178.83 ],\n",
+ " [ 99844.88 , 99333.63 , 99669.41 , ..., 103541.984,\n",
+ " 104097.28 , 103152.46 ],\n",
+ " [ 99805.43 , 99269.266, 99615.55 , ..., 103389.67 ,\n",
+ " 103946.68 , 103102.98 ],\n",
+ " ...,\n",
+ " [100122.94 , 99314.26 , 99564.84 , ..., 103939.74 ,\n",
+ " 104408.56 , 103217.04 ],\n",
+ " [100018.11 , 99291.234, 99554.28 , ..., 103896.445,\n",
+ " 104364.53 , 103201.6 ],\n",
+ " [ 99889.375, 99325.36 , 99652.06 , ..., 103809.055,\n",
+ " 104303.016, 103191.82 ]]], dtype=float32) Indexes: (3)
PandasIndex
PandasIndex(Index([ -87.18750000000003, -81.56250000000001, -75.9375,\n",
+ " -70.31249999999999, -64.68750000000001, -59.0625,\n",
+ " -53.4375, -47.8125, -42.1875,\n",
+ " -36.5625, -30.937499999999996, -25.312500000000004,\n",
+ " -19.687499999999996, -14.062499999999991, -8.437499999999996,\n",
+ " -2.812500000000003, 2.812500000000003, 8.437500000000009,\n",
+ " 14.062500000000004, 19.687499999999996, 25.312500000000004,\n",
+ " 30.93750000000001, 36.562499999999986, 42.1875,\n",
+ " 47.8125, 53.4375, 59.062500000000014,\n",
+ " 64.68750000000001, 70.3125, 75.9375,\n",
+ " 81.56249999999997, 87.18750000000003],\n",
+ " dtype='float64', name='latitude')) PandasIndex
PandasIndex(DatetimeIndex(['2009-12-29', '2009-12-30', '2009-12-31'], dtype='datetime64[ns]', name='time', freq=None)) PandasIndex
PandasIndex(Index([ -180.0, -174.375, -168.75, -163.125, -157.5, -151.875, -146.25,\n",
+ " -140.625, -135.0, -129.375, -123.75, -118.125, -112.5, -106.875,\n",
+ " -101.25, -95.625, -90.0, -84.375, -78.75, -73.125, -67.5,\n",
+ " -61.875, -56.25, -50.625, -45.0, -39.375, -33.75, -28.125,\n",
+ " -22.5, -16.875, -11.25, -5.625, 0.0, 5.625, 11.25,\n",
+ " 16.875, 22.5, 28.125, 33.75, 39.375, 45.0, 50.625,\n",
+ " 56.25, 61.875, 67.5, 73.125, 78.75, 84.375, 90.0,\n",
+ " 95.625, 101.25, 106.875, 112.5, 118.125, 123.75, 129.375,\n",
+ " 135.0, 140.625, 146.25, 151.875, 157.5, 163.125, 168.75,\n",
+ " 174.375],\n",
+ " dtype='float64', name='longitude')) Attributes: (0)
"
+ ],
+ "text/plain": [
+ " Size: 99kB\n",
+ "Dimensions: (time: 3, longitude: 64, latitude: 32)\n",
+ "Coordinates:\n",
+ " * latitude (latitude) float64 256B -87.19 -81.56 ... 87.19\n",
+ " * time (time) datetime64[ns] 24B 2009-12-29 ... 2009-12-31\n",
+ " * longitude (longitude) float64 512B -180.0 -174.4 ... 174.4\n",
+ "Data variables:\n",
+ " 10m_u_component_of_wind (time, longitude, latitude) float32 25kB 0.01296...\n",
+ " 10m_v_component_of_wind (time, longitude, latitude) float32 25kB -0.1656...\n",
+ " 2m_temperature (time, longitude, latitude) float32 25kB 251.8 ....\n",
+ " mean_sea_level_pressure (time, longitude, latitude) float32 25kB 9.964e+..."
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sample[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "b17f6eb9-19d7-4413-a63f-2b1f60351584",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "
<xarray.Dataset> Size: 34kB\n",
+ "Dimensions: (time: 1, longitude: 64, latitude: 32)\n",
+ "Coordinates:\n",
+ " * latitude (latitude) float64 256B -87.19 -81.56 ... 87.19\n",
+ " * time (time) datetime64[ns] 8B 2010-01-01\n",
+ " * longitude (longitude) float64 512B -180.0 -174.4 ... 174.4\n",
+ "Data variables:\n",
+ " 10m_u_component_of_wind (time, longitude, latitude) float32 8kB 0.9633 ....\n",
+ " 10m_v_component_of_wind (time, longitude, latitude) float32 8kB 2.515 .....\n",
+ " 2m_temperature (time, longitude, latitude) float32 8kB 249.6 .....\n",
+ " mean_sea_level_pressure (time, longitude, latitude) float32 8kB 1.007e+0... Dimensions: time : 1longitude : 64latitude : 32
Coordinates: (3)
Data variables: (4)
10m_u_component_of_wind
(time, longitude, latitude)
float32
0.9633 0.611 -6.142 ... 11.24 4.895
long_name : 10 metre U wind component short_name : u10 units : m s**-1 array([[[ 0.9632556 , 0.6109847 , -6.142399 , ..., 3.172378 ,\n",
+ " 11.177181 , 3.2300625 ],\n",
+ " [ 0.8614009 , 1.7282428 , -6.519317 , ..., 6.116332 ,\n",
+ " 9.922595 , 2.8010194 ],\n",
+ " [ 1.224251 , 2.48009 , -5.390903 , ..., 8.361196 ,\n",
+ " 8.080846 , 2.6937218 ],\n",
+ " ...,\n",
+ " [ 1.2538012 , 0.604716 , 0.6994493 , ..., -3.5058506 ,\n",
+ " 9.553845 , 8.014986 ],\n",
+ " [ 0.73623514, 0.11170814, -0.779397 , ..., -1.8638835 ,\n",
+ " 10.509155 , 6.6528363 ],\n",
+ " [ 0.43471932, -0.21207632, -3.8454657 , ..., 0.25836125,\n",
+ " 11.240351 , 4.894728 ]]], dtype=float32) 10m_v_component_of_wind
(time, longitude, latitude)
float32
2.515 -0.6715 ... -6.244 -6.613
long_name : 10 metre V wind component short_name : v10 units : m s**-1 array([[[ 2.514789 , -0.67149574, 4.4828005 , ..., -1.7482406 ,\n",
+ " -7.6362443 , -6.0685515 ],\n",
+ " [ 2.6404576 , 1.6303428 , 3.8826547 , ..., -2.8417091 ,\n",
+ " -8.442919 , -5.818139 ],\n",
+ " [ 2.712953 , 3.7668324 , 3.0043316 , ..., -3.5103695 ,\n",
+ " -8.965655 , -5.829777 ],\n",
+ " ...,\n",
+ " [ 3.2032475 , 0.33325508, 0.2818415 , ..., -0.26982972,\n",
+ " -3.5305 , -5.935523 ],\n",
+ " [ 2.825357 , -0.8672254 , 1.6980164 , ..., -0.37393594,\n",
+ " -4.7857823 , -6.531855 ],\n",
+ " [ 2.1568015 , -2.019828 , 3.952327 , ..., -0.86477417,\n",
+ " -6.243577 , -6.612936 ]]], dtype=float32) 2m_temperature
(time, longitude, latitude)
float32
249.6 270.5 272.6 ... 251.7 252.4
long_name : 2 metre temperature short_name : t2m units : K array([[[249.55533, 270.50128, 272.5765 , ..., 248.91371, 252.38441,\n",
+ " 250.18143],\n",
+ " [250.88121, 270.56384, 272.6508 , ..., 248.37007, 252.52696,\n",
+ " 247.81578],\n",
+ " [252.75655, 269.80878, 273.44815, ..., 248.28246, 251.72719,\n",
+ " 246.31178],\n",
+ " ...,\n",
+ " [246.35754, 265.28418, 265.13263, ..., 249.90196, 250.17097,\n",
+ " 254.51762],\n",
+ " [248.20985, 268.07693, 272.33966, ..., 248.94827, 250.88016,\n",
+ " 253.7525 ],\n",
+ " [247.94386, 270.0292 , 272.8252 , ..., 248.62856, 251.66301,\n",
+ " 252.43272]]], dtype=float32) mean_sea_level_pressure
(time, longitude, latitude)
float32
1.007e+05 1.006e+05 ... 1.018e+05
long_name : Mean sea level pressure short_name : msl standard_name : air_pressure_at_mean_sea_level units : Pa array([[[100734.19 , 100588.47 , 100218.32 , ..., 104016.234,\n",
+ " 102640.94 , 101745.05 ],\n",
+ " [100720.27 , 100578.92 , 100162.05 , ..., 103815.23 ,\n",
+ " 102250.586, 101671.68 ],\n",
+ " [100709.59 , 100509.086, 100155.125, ..., 103499.875,\n",
+ " 101861.86 , 101593.69 ],\n",
+ " ...,\n",
+ " [100963.52 , 100556.555, 100545.8 , ..., 104083.75 ,\n",
+ " 103556.9 , 102052.45 ],\n",
+ " [100880.85 , 100517.28 , 100479.05 , ..., 104124.516,\n",
+ " 103309.59 , 101934.445],\n",
+ " [100752.82 , 100551.96 , 100356.97 , ..., 104108.125,\n",
+ " 103002.51 , 101829.41 ]]], dtype=float32) Indexes: (3)
PandasIndex
PandasIndex(Index([ -87.18750000000003, -81.56250000000001, -75.9375,\n",
+ " -70.31249999999999, -64.68750000000001, -59.0625,\n",
+ " -53.4375, -47.8125, -42.1875,\n",
+ " -36.5625, -30.937499999999996, -25.312500000000004,\n",
+ " -19.687499999999996, -14.062499999999991, -8.437499999999996,\n",
+ " -2.812500000000003, 2.812500000000003, 8.437500000000009,\n",
+ " 14.062500000000004, 19.687499999999996, 25.312500000000004,\n",
+ " 30.93750000000001, 36.562499999999986, 42.1875,\n",
+ " 47.8125, 53.4375, 59.062500000000014,\n",
+ " 64.68750000000001, 70.3125, 75.9375,\n",
+ " 81.56249999999997, 87.18750000000003],\n",
+ " dtype='float64', name='latitude')) PandasIndex
PandasIndex(DatetimeIndex(['2010-01-01'], dtype='datetime64[ns]', name='time', freq=None)) PandasIndex
PandasIndex(Index([ -180.0, -174.375, -168.75, -163.125, -157.5, -151.875, -146.25,\n",
+ " -140.625, -135.0, -129.375, -123.75, -118.125, -112.5, -106.875,\n",
+ " -101.25, -95.625, -90.0, -84.375, -78.75, -73.125, -67.5,\n",
+ " -61.875, -56.25, -50.625, -45.0, -39.375, -33.75, -28.125,\n",
+ " -22.5, -16.875, -11.25, -5.625, 0.0, 5.625, 11.25,\n",
+ " 16.875, 22.5, 28.125, 33.75, 39.375, 45.0, 50.625,\n",
+ " 56.25, 61.875, 67.5, 73.125, 78.75, 84.375, 90.0,\n",
+ " 95.625, 101.25, 106.875, 112.5, 118.125, 123.75, 129.375,\n",
+ " 135.0, 140.625, 146.25, 151.875, 157.5, 163.125, 168.75,\n",
+ " 174.375],\n",
+ " dtype='float64', name='longitude')) Attributes: (0)
"
+ ],
+ "text/plain": [
+ " Size: 34kB\n",
+ "Dimensions: (time: 1, longitude: 64, latitude: 32)\n",
+ "Coordinates:\n",
+ " * latitude (latitude) float64 256B -87.19 -81.56 ... 87.19\n",
+ " * time (time) datetime64[ns] 8B 2010-01-01\n",
+ " * longitude (longitude) float64 512B -180.0 -174.4 ... 174.4\n",
+ "Data variables:\n",
+ " 10m_u_component_of_wind (time, longitude, latitude) float32 8kB 0.9633 ....\n",
+ " 10m_v_component_of_wind (time, longitude, latitude) float32 8kB 2.515 .....\n",
+ " 2m_temperature (time, longitude, latitude) float32 8kB 249.6 .....\n",
+ " mean_sea_level_pressure (time, longitude, latitude) float32 8kB 1.007e+0..."
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sample[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "50e45ec6-c10a-4a55-a471-bf0f7f3aacbf",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py b/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py
index eee5cf2f..0e9180a1 100644
--- a/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py
+++ b/packages/data/src/pyearthtools/data/indexes/utilities/fileload.py
@@ -215,7 +215,7 @@ def open_files(
if not isinstance(files, (tuple, list, dict)):
raise TypeError(
- f"Could not load files from input: {files!r}.\n" "Supported types are (str, Path, tuple, list, dict)"
+ f"Could not load files from input: {files!r}.\nSupported types are (str, Path, tuple, list, dict)"
)
files_to_load = [value for _, value in files.items()] if isinstance(files, dict) else list(files)
diff --git a/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py b/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py
index 22ef8896..d033a651 100644
--- a/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py
+++ b/packages/pipeline/src/pyearthtools/pipeline/modifications/__init__.py
@@ -13,17 +13,16 @@
# limitations under the License.
+from pyearthtools.pipeline.modifications import idx_modification
+from pyearthtools.pipeline.modifications.cache import Cache, MemCache, StaticCache
from pyearthtools.pipeline.modifications.idx_modification import (
IdxModifier,
IdxOverride,
- TimeIdxModifier,
SequenceRetrieval,
TemporalRetrieval,
+ TemporalWindow,
+ TimeIdxModifier,
)
-from pyearthtools.pipeline.modifications.cache import Cache, StaticCache, MemCache
-
-
-from pyearthtools.pipeline.modifications import idx_modification
__all__ = [
"Cache",
@@ -34,5 +33,6 @@
"TimeIdxModifier",
"SequenceRetrieval",
"TemporalRetrieval",
+ "TemporalWindow",
"idx_modification",
]
diff --git a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py
index 8773191f..d35608f8 100644
--- a/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py
+++ b/packages/pipeline/src/pyearthtools/pipeline/modifications/idx_modification.py
@@ -17,15 +17,13 @@
from __future__ import annotations
+import warnings
from dataclasses import dataclass
from typing import Any, Callable, Iterable, Optional, Type, Union
-import warnings
-import xarray as xr
import numpy as np
-
import pyearthtools.data
-
+import xarray as xr
from pyearthtools.pipeline.controller import PipelineIndex
from pyearthtools.pipeline.parallel import ParallelEnabledMixin
@@ -33,8 +31,8 @@
DASK_IMPORTED = True
try:
- from dask.delayed import Delayed, delayed
import dask.array as da
+ from dask.delayed import Delayed, delayed
except (ImportError, ModuleNotFoundError) as _:
DASK_IMPORTED = False
@@ -215,7 +213,6 @@ def _get_tuple(self, idx, mod: tuple[Any, ...], layer: int) -> Union[tuple[Any],
return samples
def __getitem__(self, idx: Any):
-
if not isinstance(self._modification, tuple):
return self.parent_pipeline()[idx + self._modification]
@@ -452,7 +449,12 @@ def __init__(
delta_unit: e.g. "month" or "hour"
concat: whether to contact or merge
"""
- super().__init__(samples, merge_function=merge_function, concat=concat, merge_kwargs=merge_kwargs)
+ super().__init__(
+ samples,
+ merge_function=merge_function,
+ concat=concat,
+ merge_kwargs=merge_kwargs,
+ )
def map_to_tuple(mod):
if isinstance(mod, tuple):
@@ -469,3 +471,82 @@ def __getitem__(self, idx: Any):
idx = pyearthtools.data.Petdt(idx)
return super().__getitem__(idx)
+
+
+class TemporalWindow(PipelineIndex):
+ """
+ The purpose of this class is to provide the ability to perform
+ sequence-to-sequence modelling from a data accessor or pipeline
+ that was designed to produce single time steps (i.e. single samples).
+
+ The temporal window allows the specification of the 'back window'
+ and the 'forward window', and will produce a binary branch.
+
+ For example, if the time steps are hourly, and the base pipeline
+ can produce hours 1, 2, 3 ... 10; then this Temporal Window can
+ be used to produce sequence pairs like::
+
+ [1,2,3], [4],
+ [2,3,4], [5],
+ ...
+ [7,8,9], [10]
+
+ or like::
+
+ [1,2], [3,4,5],
+ [2,3], [4,5,6],
+ ...
+ [6,7], [8,9,10]
+
+ This provides a simpler interface than the TemporalRetrieval which
+ is a more general alternative.
+
+ The window offsets are calculated not using positional indexing, but
+ using calculated date-times based on the reference time and the specified
+ timedelta to calculate each required index exactly. The handling of missing
+ data is left to the underlying pipeline response to the retrieval of the
+ calculated datetime.
+
+ The resultant sequences may be left unmerged (i.e. a list of retrieved
+ results for each timetime) or merged (e.g. into an xarray along the time
+ dimension). The default behaviour is to merge along the time dimension.
+
+ A custom merge method may be specified.
+ """
+
+ def __init__(self, *, prior_indexes, posterior_indexes, timedelta, merge_method=None):
+ """
+ Args:
+ prior_indexes: Multiplied by the timedelta then applied to the reference date
+ posterior_indexes: Multiplied by the timedelta then applied to the reference date
+ timedelta: Typically the time step of the underlying data
+ merge_method: How to merge samples into a combined object
+
+
+ Examples:
+
+ >>> TemporalWindow(prior_indexes=[-3,-2,-1], posterior_indexes=[0], timedelta=timedelta, merge_method=merge_method)
+
+ (assuming xarray data) will result in a tuple of two datasets, the first with a time coordinate dimension of 3 time steps and the
+ second with a time coordinate dimension of 1 time step.
+
+ """
+ self.prior_indexes = prior_indexes
+ self.posterior_indexes = posterior_indexes
+ self.timedelta = timedelta
+ self.merge_method = merge_method
+
+ def __getitem__(self, date_of_interest):
+ date_of_interest = pyearthtools.data.time.Petdt(date_of_interest)
+
+ prior_i = [i * self.timedelta for i in self.prior_indexes]
+ posterior_i = [i * self.timedelta for i in self.posterior_indexes]
+
+ prior = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in prior_i]
+ posterior = [self.parent_pipeline()[str(date_of_interest + delta)] for delta in posterior_i]
+
+ if self.merge_method:
+ prior = self.merge_method(prior)
+ posterior = self.merge_method(posterior)
+
+ return prior, posterior