From a12aa9fa9b579749e644d4d16add7463be6235ed Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 28 Aug 2025 17:49:12 -0400 Subject: [PATCH 1/3] infer dimension ordering within nc_specs using cf-xarray --- src/ravenpy/config/commands.py | 6 +++++- src/ravenpy/config/utils.py | 32 +++++++++++++++++++++++++++++++- tests/test_utils.py | 25 +++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/src/ravenpy/config/commands.py b/src/ravenpy/config/commands.py index 8af9bb58..123f56b8 100644 --- a/src/ravenpy/config/commands.py +++ b/src/ravenpy/config/commands.py @@ -883,7 +883,11 @@ class ReadFromNetCDF(FlatCommand): @field_validator("dim_names_nc") @classmethod def reorder_time(cls, v): - """TODO: Return dimensions as x, y, t. Currently only puts time at the end.""" + """Return dimensions as x, y, t. + + This is a fail safe because if input files are CF-compliant, dimensions should already + have been ordered by `nc_specs`. + """ dims = list(v) for time_dim in ("t", "time"): if time_dim in dims: diff --git a/src/ravenpy/config/utils.py b/src/ravenpy/config/utils.py index 20aadd75..246f713c 100644 --- a/src/ravenpy/config/utils.py +++ b/src/ravenpy/config/utils.py @@ -79,7 +79,7 @@ def nc_specs( if v in ds.data_vars: nc_var = ds[v] attrs["var_name_nc"] = v - attrs["dim_names_nc"] = nc_var.dims + attrs["dim_names_nc"] = infer_dim_names(nc_var) attrs["_time_dim_name_nc"] = ds.cf["time"].name attrs["_dim_size_nc"] = dict(zip(nc_var.dims, nc_var.shape)) attrs["units"] = nc_var.attrs.get("units") @@ -189,3 +189,33 @@ def get_annotations(a): yield from get_annotations(arg) else: yield arg + + +def infer_dim_names(da: xr.DataArray) -> tuple: + """Return names of dimensions in dataset in order expected by Raven. + + If 3D, return X, Y, T axes names if they can be inferred from CF conventions. + If 2D, return STATION, T + """ + try: + if da.ndim == 1: + dims = da.cf.axes["T"] + if len(dims) != 1: + raise ValueError("Should have exactly 1 dimension.") + + elif da.ndim == 2: + dims = list(da.dims) + tdim = da.cf.axes["T"][0] + dims.remove(tdim) + dims.append(tdim) + + elif da.ndim == 3: + dims = da.cf.axes["X"] + da.cf.axes["Y"] + da.cf.axes["T"] + if len(dims) != 3: + raise ValueError("Should have exactly 3 dimensions.") + + except: + # In case CF inference fails, return the original dims. + return da.dims + + return tuple(dims) diff --git a/tests/test_utils.py b/tests/test_utils.py index 9e8ff7aa..85e42949 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,11 +4,36 @@ def test_nc_specs(get_local_testdata): + # 1D f = get_local_testdata( "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc" ) attrs = nc_specs(f, "PRECIP", station_idx=1, alt_names=("rain",)) assert "file_name_nc" in attrs + assert attrs["dim_names_nc"] == ("time",) + + # 2D with station dimension + f = get_local_testdata( + "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc" + ) + attrs = nc_specs(f, "PRECIP", station_idx=1, alt_names=("rain",)) + assert attrs["dim_names_nc"] == ( + "region", + "time", + ) + + # 3D - Since this file is not CF compliant, nc_specs cannot infer the correct dimension order + f = get_local_testdata( + "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc" + ) + attrs = nc_specs(f, "PRECIP", station_idx=1, alt_names=("rain",)) + assert attrs["dim_names_nc"] == ("time", "lon", "lat") + + f = get_local_testdata( + "cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200601-210012_subset.nc" + ) + attrs = nc_specs(f, "TEMP_AVE", station_idx=1, engine="netcdf4") + assert attrs["dim_names_nc"] == ("lon", "lat", "time") def test_nc_specs_bad(bad_netcdf): From a4a0724f5fb4c1b8aed97efb20e9e467fcf595e4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Sep 2025 18:47:08 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/ravenpy/config/commands.py | 3 ++- src/ravenpy/config/utils.py | 3 ++- tests/test_utils.py | 12 +++--------- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/ravenpy/config/commands.py b/src/ravenpy/config/commands.py index 78d5e977..d94c6f42 100644 --- a/src/ravenpy/config/commands.py +++ b/src/ravenpy/config/commands.py @@ -853,7 +853,8 @@ class ReadFromNetCDF(FlatCommand): @field_validator("dim_names_nc") @classmethod def reorder_time(cls, v): - """Return dimensions as x, y, t. + """ + Return dimensions as x, y, t. This is a fail safe because if input files are CF-compliant, dimensions should already have been ordered by `nc_specs`. diff --git a/src/ravenpy/config/utils.py b/src/ravenpy/config/utils.py index f980d283..259fb4ff 100644 --- a/src/ravenpy/config/utils.py +++ b/src/ravenpy/config/utils.py @@ -188,7 +188,8 @@ def get_annotations(a): def infer_dim_names(da: xr.DataArray) -> tuple: - """Return names of dimensions in dataset in order expected by Raven. + """ + Return names of dimensions in dataset in order expected by Raven. If 3D, return X, Y, T axes names if they can be inferred from CF conventions. If 2D, return STATION, T diff --git a/tests/test_utils.py b/tests/test_utils.py index ccb7c75c..f0bd9040 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -17,9 +17,7 @@ def test_nc_specs(yangtze): assert attrs["dim_names_nc"] == ("time",) # 2D with station dimension - f = get_local_testdata( - "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc" - ) + f = get_local_testdata("raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc") attrs = nc_specs(f, "PRECIP", station_idx=1, alt_names=("rain",)) assert attrs["dim_names_nc"] == ( "region", @@ -27,15 +25,11 @@ def test_nc_specs(yangtze): ) # 3D - Since this file is not CF compliant, nc_specs cannot infer the correct dimension order - f = get_local_testdata( - "raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc" - ) + f = get_local_testdata("raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc") attrs = nc_specs(f, "PRECIP", station_idx=1, alt_names=("rain",)) assert attrs["dim_names_nc"] == ("time", "lon", "lat") - f = get_local_testdata( - "cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200601-210012_subset.nc" - ) + f = get_local_testdata("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200601-210012_subset.nc") attrs = nc_specs(f, "TEMP_AVE", station_idx=1, engine="netcdf4") assert attrs["dim_names_nc"] == ("lon", "lat", "time") From ee6b6aa5d6f4bbdb1a04b9b1408aa49aa97de445 Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 11 Sep 2025 09:57:03 -0400 Subject: [PATCH 3/3] Update CHANGELOG --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e1764343..25623c94 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,6 +8,7 @@ v0.18.3 (unreleased) Bug fixes ^^^^^^^^^ * Fixed bug affecting `GriddedForcing`, where `station_idx` in the call to ``nc_specs`` was set to ``1`` instead of ``None``. (PR #501) +* In `nc_specs`, set `dim_names_nc` in the order expected by Raven (x, y, t). Previously, we only made sure that `time` was the last dimension, but did not ensure x and y were in the right order. (PR #533) Internal changes ^^^^^^^^^^^^^^^^