Skip to content
Merged
5 changes: 5 additions & 0 deletions .zenodo.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@
"orcid": "https://orcid.org/0009-0004-9553-8387",
"affiliation": "Independent researcher, Australia",
"name": "James, Matthew"
},
{
"orcid": "https://orcid.org/0000-0002-5407-4297",
"affiliation": "Bureau of Meteorology, Australia",
"name": "Stassen, Christian"
}
],
"license": "Apache-2.0",
Expand Down
1 change: 1 addition & 0 deletions notebooks/Gallery.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
"| **Data access at NCI** | Shows how to access NCI (Australia) data collections | [Data Access at NCI](./tutorial/Interfacing_to_Data_at_NCI.ipynb) | 18 Aug 2025 |\n",
"| **Downloading ERA5** | How to download a copy of ERA5 for yourself | [Downloading ERA5 Data](./tutorial/Downloading_ERA5.ipynb) | 18 Aug 2025 |\n",
"| **Accessing ERA5** | Shows how to load ERA5 with PyEarthTools | [Accessing ERA5 Data](./tutorial/Accessing_ERA5_Data.ipynb) | 18 Aug 2025 |\n",
"| **Accessing BARRA-R2** | Shows how to load BARRA-R2 with PyEarthTools | [Accessing BARRA-R2 Data](./tutorial/accessing_barra2_and_matching_era5.ipynb) | 10 Oct 2025 |\n",
"| **Introduction to Pipelines** | Introduces the concept of a Pipeline | [Introduction to Pipelines](./tutorial/Data_Pipelines.ipynb) | 18 Aug 2025 |\n",
"| **Multiple data sources** | Shows how to take a wide variety of different geospatial data sources and join them into a single data structure for use in machine learning | [Working with Multiple Data Sources](./tutorial/MultipleSources.ipynb) | 18 Aug 2025 |\n",
"| **Working with climate data** | Shows how to load and work with climate data, which uses non-standard date time libraries. Note, because of the date-time differences, it is not easy to work with climate data and weather data at the same time. | [Working with Climate Data](./tutorial/Working_with_Climate_Data.ipynb) | 18 Aug 2025 |"
Expand Down
9,878 changes: 9,878 additions & 0 deletions notebooks/tutorial/accessing_barra2_and_matching_era5.ipynb

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions packages/data/src/pyearthtools/data/transforms/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,22 @@ def apply(self, dataset: xr.Dataset) -> xr.Dataset:
class Drop(Transform):
"""Drop dataset variables"""

def __init__(self, variables: list[str] | str, *extra_variables):
def __init__(self, variables: list[str] | str, *extra_variables, **kwargs):
"""
Drop variables from dataset

Args:
variables (list[str] | str):
List of vars to drop
kwargs (dict):
kwargs to pass to drop
"""
super().__init__()
self.record_initialisation()

variables = variables if isinstance(variables, (list, tuple)) else [variables]
self._variables = [*variables, *extra_variables]
self.kwargs = kwargs

def apply(self, dataset: xr.Dataset) -> xr.Dataset:
if self._variables is None:
Expand All @@ -84,8 +87,7 @@ def apply(self, dataset: xr.Dataset) -> xr.Dataset:
# if not var_included:
# return dataset
# return dataset[var_included]

return dataset.drop_vars(self._variables)
return dataset.drop_vars(self._variables, **self.kwargs)


class Select(Transform):
Expand Down
94 changes: 51 additions & 43 deletions packages/nci_site_archive/src/site_archive_nci/BARPA.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,30 @@
from site_archive_nci.utilities import check_project

"""
Structure order
Structure order is taken from the CORDEX-CMIP6 archiving specs:
https://zenodo.org/records/15047096

order:
- activity
- product
- domain
- institution
- gcm_model_name
- experiment_name
- ensemble_member
- rcm_model_name
- version
- project_id
- activity_id
- domain_id
- institution_id
- driving_source_id
- driving_experiment_id
- driving_variant_label
- source_id
- version_realisation
- frequency
- variable

- variables
- version
"""

BARPA_DIR_STRUCTURE = "{project}/{MIP}/{activity}/{domain}/{institution}/{driving_source}/{experiment}/{variant}/{source}/{version_realisation}/{frequency}/"
BARPA_DIR_STRUCTURE = "{nature}/{project_id}/{activity_id}/{domain_id}/{institution_id}/{driving_source_id}/{driving_experiment_id}/{driving_variant_label}/{source_id}/{version_realisation}/{frequency}/"

VARIABLE_DEFAULT = Type[VariableDefault]


@register_archive("BARPA", sample_kwargs={"variables": "CAPE", "driving_source": "ERA5", "frequency": "1hr"})
@register_archive("BARPA", sample_kwargs={"variables": "CAPE", "driving_source_id": "ERA5", "frequency": "1hr"})
class BARPA(ArchiveIndex):
"""Index into Bureau of Meteorology Atmospheric Regional Projections for Australia"""

Expand All @@ -60,19 +61,19 @@ class BARPA(ArchiveIndex):
def __init__(
self,
variables: list[str] | str,
driving_source: str,
driving_source_id: str,
frequency: str,
driving_experiment_id: str,
*,
project: str | VARIABLE_DEFAULT = VariableDefault,
MIP: str | VARIABLE_DEFAULT = VariableDefault,
activity: str | VARIABLE_DEFAULT = VariableDefault,
domain: str | VARIABLE_DEFAULT = VariableDefault,
institution: str | VARIABLE_DEFAULT = VariableDefault,
experiment: str | VARIABLE_DEFAULT = VariableDefault,
variant: str | VARIABLE_DEFAULT = VariableDefault,
source: str | VARIABLE_DEFAULT = VariableDefault,
nature: str | VARIABLE_DEFAULT = VariableDefault,
project_id: str | VARIABLE_DEFAULT = VariableDefault,
activity_id: str | VARIABLE_DEFAULT = VariableDefault,
domain_id: str | VARIABLE_DEFAULT = VariableDefault,
institution_id: str | VARIABLE_DEFAULT = VariableDefault,
driving_variant_label: str | VARIABLE_DEFAULT = VariableDefault,
source_id: str | VARIABLE_DEFAULT = VariableDefault,
version_realisation: str | VARIABLE_DEFAULT = VariableDefault,
version: str | VARIABLE_DEFAULT = "v20231001", # VariableDefault,
version: str | VARIABLE_DEFAULT = "latest", # VariableDefault,
transforms: Transform | TransformCollection | None = None,
):
"""
Expand All @@ -87,34 +88,31 @@ def __init__(
Args:
variables (list[str] | str):
Variables to retireve.
Based upon https://docs.google.com/spreadsheets/d/1qUauozwXkq7r1g-L4ALMIkCNINIhhCPx/pyearthtools#gid=1672965248
driving_source (str):
Based upon https://opus.nci.org.au/spaces/NDP/pages/338002650/BARPA+Parameter+Descriptions
driving_source_id (str):
Global Coupled Model. The models selected are:
ERA5, ACCESS-CM2, ACCESS-ESM1-5, NorESM2-MM, EC-Earth3, CESM2, CMCC-ESM2, MPI-ESM1-2-HR
Must be only one.
frequency (str):
Temporal Frequency. 1hr (1-hourly), 3hr, 6hr, day (daily), mon (monthly), fx
transforms (Transform | TransformCollection, optional):
Transforms to apply to the data. Defaults to TransformCollection().

project (str | VARIABLE_DEFAULT, optional):
nature of data or project_id is output or CORDEX for data for CORDEX-CMIP6.
MIP (str | VARIABLE_DEFAULT, optional):
MIP-era is the cycle of CMIP defines experiment and data specifications. BARPS uses CMIP6.
activity (str | VARIABLE_DEFAULT, optional):
project_id (str | VARIABLE_DEFAULT, optional):
nature of data or project_id is output or CORDEX-CMIP6.
activity_id (str | VARIABLE_DEFAULT, optional):
DD for dynamical downscaling.
domain (str | VARIABLE_DEFAULT, optional):
Spatial domain and grid resolution of the data, namely AUS-15, AUS-04.
institution (str | VARIABLE_DEFAULT, optional):
domain_id (str | VARIABLE_DEFAULT, optional):
Spatial domain and grid resolution of the data, namely AUS-15, AUST-15, AUST-04, AUS-20i.
institution_id (str | VARIABLE_DEFAULT, optional):
RCM-institution is BOM
experiment (str | VARIABLE_DEFAULT, optional):
Evaluation (for ERA5), historical or ssp126, ssp370 for CMIP6 experiments.
variant (str | VARIABLE_DEFAULT, optional):
driving_experiment_id (str | VARIABLE_DEFAULT, optional):
Evaluation (for ERA5), historical or ssp126, ssp370, ssp585 (only ACCESS-ESM-1-5, EC-Earth3) for CMIP6 experiments.
driving_variant_label (str | VARIABLE_DEFAULT, optional):
Labels the ensemble member of the CMIP6 simulation that produced forcing data.
source (str | VARIABLE_DEFAULT, optional):
source_id (str | VARIABLE_DEFAULT, optional):
Either BARPA-R or BARPA-C.
version_realisation (str | VARIABLE_DEFAULT, optional):
Identifies the modelling version (TBC on identifying data version)
Identifies the modelling version (v1-r1)
version (str | VARIABLE_DEFAULT, optional):
Denotes the date of data generation or date of data release
"""
Expand All @@ -126,6 +124,7 @@ def __init__(

self.variables = variables
self.version = str(version)
self.source_id = source_id

super().__init__(transforms=(transforms or TransformCollection()))
self.record_initialisation()
Expand All @@ -138,11 +137,20 @@ def filesystem(

discovered_paths = {}

querytime_year = Petdt(querytime).at_resolution("year")

for variable in self.variables:
dir_path = BARPA_HOME / self.dir / variable / self.version
paths = list(dir_path.glob(f"*{querytime_year.year}01-{querytime_year.year}12*.nc"))
if self.source_id == "BARPA-R":
querytime_year = Petdt(querytime).at_resolution("year")
filetmpl = f"*{querytime_year.year}01-{querytime_year.year}12*.nc"
elif self.source_id == "BARPA-C":
querytime_year = Petdt(querytime).at_resolution("month")
filetmpl = (
f"*{querytime_year.year}{querytime_year.month:02}-{querytime_year.year}{querytime_year.month:02}.nc"
)
else:
raise DataNotFoundError(f"Could not find source_id of {self.source_id}")

paths = list(dir_path.glob(filetmpl))
if len(paths) == 0:
raise DataNotFoundError(f"Could not find data at {dir_path!r} at time {querytime!r}")
discovered_paths[variable] = paths[0]
Expand Down
51 changes: 26 additions & 25 deletions packages/nci_site_archive/src/site_archive_nci/BARRAV2.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _desc_(self):
"Documentation": "https://dx.doi.org/10.25914/1x6g-2v48",
}

DIR_STRUCTURE = "{nature}/{activity}/{domain}/{institution}/{driving_source}/{experiment}/{variant}/{source}/{version_realisation}/{frequency}/"
DIR_STRUCTURE = "{nature}/{activity_id}/{domain_id}/{institution_id}/{driving_source_id}/{driving_experiment_id}/{driving_variant_label}/{source_id}/{version_realisation}/{frequency}/"
GLOB_TEMPLATE = "{variable}/{version}/{variable}_*%Y%m-%Y%m.nc"

@decorators.alias_arguments(variables=["variable"])
Expand All @@ -61,13 +61,13 @@ def __init__(
frequency: str,
*,
nature: str | VARIABLE_DEFAULT = VariableDefault,
activity: str | VARIABLE_DEFAULT = VariableDefault,
domain: str | VARIABLE_DEFAULT = VariableDefault,
institution: str | VARIABLE_DEFAULT = VariableDefault,
driving_source: str | VARIABLE_DEFAULT = VariableDefault,
experiment: str | VARIABLE_DEFAULT = VariableDefault,
variant: str | VARIABLE_DEFAULT = VariableDefault,
source: str | VARIABLE_DEFAULT = VariableDefault,
activity_id: str | VARIABLE_DEFAULT = VariableDefault,
domain_id: str | VARIABLE_DEFAULT = VariableDefault,
institution_id: str | VARIABLE_DEFAULT = VariableDefault,
driving_source_id: str | VARIABLE_DEFAULT = VariableDefault,
driving_experiment_id: str | VARIABLE_DEFAULT = VariableDefault,
driving_variant_label: str | VARIABLE_DEFAULT = VariableDefault,
source_id: str | VARIABLE_DEFAULT = VariableDefault,
version_realisation: str | VARIABLE_DEFAULT = VariableDefault,
version: str | VARIABLE_DEFAULT = "latest",
transforms: Transform | TransformCollection | None = None,
Expand All @@ -87,27 +87,28 @@ def __init__(
Args:
variables (list[str] | str):
Variables to retrieve.
Mostly based on https://docs.google.com/spreadsheets/d/1qUauozwXkq7r1g-L4ALMIkCNINIhhCPx/pyearthtools#gid=1672965248
Mostly based on https://opus.nci.org.au/spaces/NDP/pages/338002591/BARRA2+Parameter+Descriptions and
structure order is taken from the CORDEX-CMIP6 archiving specs: https://zenodo.org/records/15047096
frequency (str):
Temporal Frequency. '1hr' (1-hourly), '3hr', '6hr', 'day' (daily), 'mon' (monthly), 'fx'
transforms (Transform | TransformCollection, optional):
Transforms to apply to the data. Defaults to TransformCollection().

nature (str | VARIABLE_DEFAULT, optional):
'output'
activity (str | VARIABLE_DEFAULT, optional):
activity_id (str | VARIABLE_DEFAULT, optional):
'reanalysis'
domain (str | VARIABLE_DEFAULT, optional):
Spatial domain and grid resolution of the data, namely 'AUS-11', 'AUS-22', 'AUS-04'.
institution (str | VARIABLE_DEFAULT, optional):
domain_id (str | VARIABLE_DEFAULT, optional):
Spatial domain and grid resolution of the data, namely 'AUS-11', AUST-11, 'AUS-22', AUST-22, 'AUST-04'.
institution_id (str | VARIABLE_DEFAULT, optional):
'BOM', RCM-institution
driving_source (str| VARIABLE_DEFAULT, optional):
driving_source_id (str| VARIABLE_DEFAULT, optional):
'ERA5', global model that drives BARRA2 at the lateral boundary
experiment (str | VARIABLE_DEFAULT, optional):
driving_experiment_id (str | VARIABLE_DEFAULT, optional):
'historical'
variant (str | VARIABLE_DEFAULT, optional):
driving_variant_label (str | VARIABLE_DEFAULT, optional):
labels the nature of ERA5 data used, either 'hres' or 'eda'
source (str | VARIABLE_DEFAULT, optional):
source_id (str | VARIABLE_DEFAULT, optional):
BARRA-R2, BARRA-RE2, or BARRA-C2
version_realisation (str | VARIABLE_DEFAULT, optional):
identifies the modelling version of BARRA2 (TBC on identifying data version)
Expand All @@ -122,7 +123,7 @@ def __init__(
self.GLOB_TEMPLATE = "{variable}/{version}/{variable}_*.nc"

transforms = transforms or TransformCollection()
transforms += pyearthtools.data.transforms.variables.Drop("time_bnds")
transforms += pyearthtools.data.transforms.variables.Drop("time_bnds", errors="ignore")

variables = [variables] if isinstance(variables, str) else variables
new_vars = []
Expand All @@ -148,13 +149,13 @@ def __init__(
config_vars=dict(
frequency=frequency,
nature=nature,
activity=activity,
domain=domain,
institution=institution,
driving_source=driving_source,
experiment=experiment,
variant=variant,
source=source,
activity_id=activity_id,
domain_id=domain_id,
institution_id=institution_id,
driving_source_id=driving_source_id,
driving_experiment_id=driving_experiment_id,
driving_variant_label=driving_variant_label,
source_id=source_id,
version_realisation=version_realisation,
version=version,
),
Expand Down
Loading