From 7850de340c3df2d0927e1d8b49e754667737cb25 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:20:55 -0400 Subject: [PATCH 1/9] shorten notebook example --- ...e_change_impact_study_on_a_watershed.ipynb | 56 +++++++++++-------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb index 4414cd52..07e3a134 100644 --- a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb +++ b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb @@ -158,15 +158,15 @@ "outputs": [], "source": [ "# Reference period that will be used for ERA5 and climate model data for the reference period.\n", - "# Here let's focus on a 10-year period to keep running times lower.\n", + "# Here let's focus on a 5-year period to keep running times lower.\n", "reference_start_day = dt.datetime(1980, 12, 31)\n", - "reference_end_day = dt.datetime(1990, 12, 31)\n", - "# Notice we are using one day before the desired period of 1981-01-01 to 1990-12-31.\n", + "reference_end_day = dt.datetime(1984, 12, 30)\n", + "# Notice we are using one day before the desired period of 1981-01-01 to 1985-12-31.\n", "# This is to account for any UTC shifts that might require getting data in a previous or later time.\n", "\n", "# Same process for the future period, 100 years later.\n", "future_start_day = dt.datetime(2080, 12, 31)\n", - "future_end_day = dt.datetime(2090, 12, 31)" + "future_end_day = dt.datetime(2084, 12, 31)" ] }, { @@ -538,6 +538,16 @@ "Here is where we perform the bias-correction to the reference and future climate data in order to remove biases as seen between the reference and historical data. The future dataset is then corrected with the same adjustment factors as those in the reference period. Feel free to modify the bias-correction method, quantiles, etc." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(ERA5_weather.pr)\n", + "# display(historical_pr)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -549,13 +559,12 @@ "# Use xsdba utilities to give information on the type of window used for the bias correction.\n", "\n", "xsdba.DetrendedQuantileMapping._allow_diff_training_times = True\n", - "# xsdba.DetrendedQuantileMapping._allow_diff_calendars = True\n", + "xsdba.DetrendedQuantileMapping._allow_diff_calendars = True\n", "\n", "group_month_window = xsdba.utils.Grouper(\"time.dayofyear\", window=15)\n", "\n", "# This is an adjusting function.\n", "# It builds the tool that will perform the corrections.\n", - "\n", "Adjustment = xsdba.DetrendedQuantileMapping.train(\n", " ref=ERA5_weather.pr,\n", " hist=historical_pr,\n", @@ -624,28 +633,31 @@ "source": [ "# Convert the reference corrected data into netCDF file.\n", "# We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n", - "ref_dataset = xr.merge(\n", + "\n", + "with ProgressBar():\n", + " # Convert the reference corrected data into netCDF file.\n", + " ref_dataset = xr.merge(\n", " [\n", " corrected_ref_precip.to_dataset(name=\"pr\"),\n", " corrected_ref_tasmax.to_dataset(name=\"tasmax\"),\n", " corrected_ref_tasmin.to_dataset(name=\"tasmin\"),\n", " ]\n", - ")\n", + " )\n", "\n", - "# Write to temporary folder.\n", - "fn_tmp_ref = tmp / \"reference_dataset_tmp.nc\"\n", - "ref_dataset.to_netcdf(fn_tmp_ref)\n", + " # Write to temporary folder.\n", + " fn_tmp_ref = tmp / \"reference_dataset_tmp.nc\"\n", + " ref_dataset.to_netcdf(fn_tmp_ref)\n", "\n", - "# Convert the future corrected data into netCDF file.\n", - "fut_dataset = xr.merge(\n", - " [\n", + " # Convert the future corrected data into netCDF file.\n", + " fut_dataset = xr.merge(\n", + " [\n", " corrected_fut_precip.to_dataset(name=\"pr\"),\n", " corrected_fut_tasmax.to_dataset(name=\"tasmax\"),\n", " corrected_fut_tasmin.to_dataset(name=\"tasmin\"),\n", - " ]\n", - ")\n", - "# Write to temporary folder.\n", - "with ProgressBar():\n", + " ]\n", + " )\n", + "\n", + " # Write to temporary folder.\n", " fn_tmp_fut = tmp / \"future_dataset_tmp.nc\"\n", " fut_dataset.to_netcdf(fn_tmp_fut)\n", "\n", @@ -688,8 +700,8 @@ "\n", "# Establish the start date for the calibration.\n", "# This is set in the model configuration, so the calibrator will simply execute the model which has been pre-configured to run on this period.\n", - "start_date = dt.datetime(1981, 1, 1)\n", - "end_date = dt.datetime(1985, 12, 31)\n", + "start_date = dt.datetime(1980, 12, 31)\n", + "end_date = dt.datetime(1984, 12, 31)\n", "\n", "# The data types available in the forcing netcdf file from ERA5, as per the tutorials.\n", "data_type = [\"TEMP_MAX\", \"TEMP_MIN\", \"PRECIP\"]\n", @@ -840,8 +852,8 @@ "source": [ "# Copy the configuration of the previous model that we will modify for our validation:\n", "model_validation = model_config.duplicate(\n", - " StartDate=dt.datetime(1986, 1, 1),\n", - " EndDate=dt.datetime(1990, 12, 31),\n", + " StartDate=dt.datetime(1983, 1, 1),\n", + " EndDate=dt.datetime(1984, 12, 31),\n", " SuppressOutput=False,\n", ").set_params(optimized_parameters)\n", "\n", From 8502f3994c6717b28824af4f41769b840b4223bd Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:21:15 -0400 Subject: [PATCH 2/9] fix broken test --- tests/test_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index f0bd9040..ff7acd3e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -17,7 +17,7 @@ def test_nc_specs(yangtze): assert attrs["dim_names_nc"] == ("time",) # 2D with station dimension - f = get_local_testdata("raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc") + f = yangtze.fetch("raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc") attrs = nc_specs(f, "PRECIP", station_idx=1, alt_names=("rain",)) assert attrs["dim_names_nc"] == ( "region", @@ -25,11 +25,11 @@ def test_nc_specs(yangtze): ) # 3D - Since this file is not CF compliant, nc_specs cannot infer the correct dimension order - f = get_local_testdata("raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc") + f = yangtze.fetch("raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc") attrs = nc_specs(f, "PRECIP", station_idx=1, alt_names=("rain",)) assert attrs["dim_names_nc"] == ("time", "lon", "lat") - f = get_local_testdata("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200601-210012_subset.nc") + f = yangtze.fetch("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200601-210012_subset.nc") attrs = nc_specs(f, "TEMP_AVE", station_idx=1, engine="netcdf4") assert attrs["dim_names_nc"] == ("lon", "lat", "time") From 7dc0caee24b0a2d26797671fac9aea233d9163af Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:22:57 -0400 Subject: [PATCH 3/9] remove test cell --- ..._a_climate_change_impact_study_on_a_watershed.ipynb | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb index 07e3a134..21a86892 100644 --- a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb +++ b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb @@ -538,16 +538,6 @@ "Here is where we perform the bias-correction to the reference and future climate data in order to remove biases as seen between the reference and historical data. The future dataset is then corrected with the same adjustment factors as those in the reference period. Feel free to modify the bias-correction method, quantiles, etc." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display(ERA5_weather.pr)\n", - "# display(historical_pr)" - ] - }, { "cell_type": "code", "execution_count": null, From 99d6eeca1e7caf0359bb6ae3a3e64d6738aefeb8 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:54:43 -0400 Subject: [PATCH 4/9] add missing dependencies for fsspec and xarray with h5netcdf backend, pin blackdoc higher --- environment-dev.yml | 2 +- pyproject.toml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/environment-dev.yml b/environment-dev.yml index e6ea1e4c..63fc59a6 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -43,7 +43,7 @@ dependencies: # Dev tools and testing - pip >=25.0 - black ==25.1.0 - - blackdoc ==0.3.9 + - blackdoc ==0.4.1 - bump-my-version >=1.2.0 - click >=8.2.0 - coverage >=7.5.0 diff --git a/pyproject.toml b/pyproject.toml index 61164e3b..56495b79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,7 @@ dependencies = [ [project.optional-dependencies] dev = [ # Dev tools and testing + "aiohttp", # needed for HTTPFileSystem in fsspec "black ==25.1.0", "blackdoc ==0.4.1", "bump-my-version >=1.2.0", @@ -73,6 +74,7 @@ dev = [ "flake8 >=7.2.0", "flake8-rst-docstrings >=0.3.0", "flit >=3.11.0,<4.0", + "fsspec", # needed for HTTPFileSystem in fsspec "holoviews", "hvplot", "mypy >=1.14.1", From 67cb49522cc7573f9d26e6b13aa264b2762a5699 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:13:45 -0400 Subject: [PATCH 5/9] use pydap as remote fetch engine by default --- pyproject.toml | 2 -- src/ravenpy/extractors/forecasts.py | 10 ++++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 56495b79..61164e3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,6 @@ dependencies = [ [project.optional-dependencies] dev = [ # Dev tools and testing - "aiohttp", # needed for HTTPFileSystem in fsspec "black ==25.1.0", "blackdoc ==0.4.1", "bump-my-version >=1.2.0", @@ -74,7 +73,6 @@ dev = [ "flake8 >=7.2.0", "flake8-rst-docstrings >=0.3.0", "flit >=3.11.0,<4.0", - "fsspec", # needed for HTTPFileSystem in fsspec "holoviews", "hvplot", "mypy >=1.14.1", diff --git a/src/ravenpy/extractors/forecasts.py b/src/ravenpy/extractors/forecasts.py index 2f52389d..c90c835f 100644 --- a/src/ravenpy/extractors/forecasts.py +++ b/src/ravenpy/extractors/forecasts.py @@ -59,6 +59,7 @@ def get_CASPAR_dataset( # noqa: N802 date: dt.datetime, thredds: str = THREDDS_URL, directory: str = "dodsC/birdhouse/disk2/caspar/daily/", + engine: str = "pydap", ) -> tuple[xr.Dataset, list[Union[Union[DatetimeIndex, Series, Timestamp, Timestamp], Any]]]: """ Return CASPAR dataset. @@ -73,6 +74,8 @@ def get_CASPAR_dataset( # noqa: N802 The thredds server url. Default: "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/". directory : str The directory on the thredds server where the data is stored. Default: "dodsC/birdhouse/disk2/caspar/daily/". + engine : str + The xarray engine to use to open the dataset. Default: "pydap". Returns ------- @@ -87,7 +90,7 @@ def get_CASPAR_dataset( # noqa: N802 d = dt.datetime.strftime(date, "%Y%m%d") file_location = urljoin(directory, f"GEPS_{d}.nc") file_url = urljoin(thredds, file_location) - ds = xr.open_dataset(file_url) + ds = xr.open_dataset(file_url, engine=engine) # Here we also extract the times at 6-hour intervals as Raven must have # constant timesteps and GEPS goes to 6 hours start = pd.to_datetime(ds.time[0].values) @@ -108,6 +111,7 @@ def get_ECCC_dataset( # noqa: N802 climate_model: str, thredds: str = THREDDS_URL, directory: str = "dodsC/datasets/forecasts/eccc_geps/", + engine: str = "pydap", ) -> tuple[Dataset, list[Union[Union[DatetimeIndex, Series, Timestamp, Timestamp], Any]]]: """ Return latest GEPS forecast dataset. @@ -120,6 +124,8 @@ def get_ECCC_dataset( # noqa: N802 The thredds server url. Default: "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/". directory : str The directory on the thredds server where the data is stored. Default: "dodsC/datasets/forecasts/eccc_geps/". + engine : str + The xarray engine to use to open the dataset. Default: "pydap". Returns ------- @@ -134,7 +140,7 @@ def get_ECCC_dataset( # noqa: N802 # Eventually the file will find a permanent home, until then let's use the test folder. file_location = urljoin(directory, "GEPS_latest.ncml") file_url = urljoin(thredds, file_location) - ds = xr.open_dataset(file_url) + ds = xr.open_dataset(file_url, engine=engine) # Here we also extract the times at 6-hour intervals as Raven must have # constant timesteps and GEPS goes to 6 hours start = pd.to_datetime(ds.time[0].values) From e699225dee71f0a02940d088934f5ff172dda025 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:32:58 -0400 Subject: [PATCH 6/9] change engine --- src/ravenpy/extractors/forecasts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ravenpy/extractors/forecasts.py b/src/ravenpy/extractors/forecasts.py index c90c835f..9e8b9717 100644 --- a/src/ravenpy/extractors/forecasts.py +++ b/src/ravenpy/extractors/forecasts.py @@ -59,7 +59,7 @@ def get_CASPAR_dataset( # noqa: N802 date: dt.datetime, thredds: str = THREDDS_URL, directory: str = "dodsC/birdhouse/disk2/caspar/daily/", - engine: str = "pydap", + engine: str = "netcdf4", ) -> tuple[xr.Dataset, list[Union[Union[DatetimeIndex, Series, Timestamp, Timestamp], Any]]]: """ Return CASPAR dataset. @@ -111,7 +111,7 @@ def get_ECCC_dataset( # noqa: N802 climate_model: str, thredds: str = THREDDS_URL, directory: str = "dodsC/datasets/forecasts/eccc_geps/", - engine: str = "pydap", + engine: str = "netcdf4", ) -> tuple[Dataset, list[Union[Union[DatetimeIndex, Series, Timestamp, Timestamp], Any]]]: """ Return latest GEPS forecast dataset. From e2445ab377d6635ca202ab521d016f86e1eae5eb Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 6 Oct 2025 13:42:11 -0400 Subject: [PATCH 7/9] update documentation surrounding example --- ..._climate_change_impact_study_on_a_watershed.ipynb | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb index 21a86892..c416fd2c 100644 --- a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb +++ b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb @@ -19,10 +19,9 @@ "\n", "We will also setup a hydrological model, calibrate it, and use it in evaluating the impacts of climate change on the hydrology of a catchment. We will be using the Mistassini river as the test-case for this example, but you can substitute the data for any catchment of your liking. We provide:\n", "\n", - "1- Streamflow observations (Water Survey Canada station 02RD003)\n", - "\n", - "2- Watershed boundaries in the form of shapefiles (all shape files .shp, .shx, .prj, .dbf, etc. zipped into a single file. The platform will detect and unzip the file to extract the required data)\n", + " 1 - Streamflow observations (Water Survey Canada station 02RD003).\n", "\n", + " 2 - Watershed boundaries in the form of shapefiles (all shape files .shp, .shx, .prj, .dbf, etc. zipped into a single file. The platform will detect and unzip the file to extract the required data).\n", "\n", "The rest will be done by PAVICS-Hydro, including getting meteorological information from our ERA5 reanalysis database and climate change model data from CMIP hosted by PanGEO.\n", "\n", @@ -158,7 +157,7 @@ "outputs": [], "source": [ "# Reference period that will be used for ERA5 and climate model data for the reference period.\n", - "# Here let's focus on a 5-year period to keep running times lower.\n", + "# Here let's focus on a 4-year period to keep running times lower.\n", "reference_start_day = dt.datetime(1980, 12, 31)\n", "reference_end_day = dt.datetime(1984, 12, 30)\n", "# Notice we are using one day before the desired period of 1981-01-01 to 1985-12-31.\n", @@ -970,7 +969,10 @@ "- The reference period flows;\n", "- The future period flows.\n", "\n", - "Results cannot be compared on a day-to-day basis because climate models do not reflect actual weather data. Therefore, we will compare the mean annual hydrographs to see changes in long-term flow patterns. Note that this test only uses 10 years (5 for the validation period) which is insufficient. Operational tests should include more years (ideally 30 or more) to reflect the climatology of the various periods.\n" + "Results cannot be compared on a day-to-day basis because climate models do not reflect actual weather data. Therefore, we will compare the mean annual hydrographs to see changes in long-term flow patterns.\n", + "\n", + "> [!IMPORTANT]\n", + "> This test only uses 5 years (2 for the validation period) which is insufficient. Operational tests should include more years (ideally 30 or more) to reflect the climatology of the various periods." ] }, { From 68e22d3e0747c7f5f87887b3ebc5362715bd7c28 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 6 Oct 2025 13:44:08 -0400 Subject: [PATCH 8/9] update documentation surrounding example --- ...rform_a_climate_change_impact_study_on_a_watershed.ipynb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb index c416fd2c..ca1489e8 100644 --- a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb +++ b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb @@ -969,10 +969,12 @@ "- The reference period flows;\n", "- The future period flows.\n", "\n", - "Results cannot be compared on a day-to-day basis because climate models do not reflect actual weather data. Therefore, we will compare the mean annual hydrographs to see changes in long-term flow patterns.\n", + "Results cannot be compared on a day-to-day basis because climate models do not reflect actual weather data.\n", + "Therefore, we will compare the mean annual hydrographs to see changes in long-term flow patterns.\n", "\n", "> [!IMPORTANT]\n", - "> This test only uses 5 years (2 for the validation period) which is insufficient. Operational tests should include more years (ideally 30 or more) to reflect the climatology of the various periods." + "> This test only uses 4 years of observations (2 years for the validation period) which is insufficient.\n", + "> Operational tests should include more years (ideally 30 or more) to reflect the climatology of the various periods." ] }, { From 5cc405388738591cb28a2b1fac811488f2a295af Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 6 Oct 2025 14:05:23 -0400 Subject: [PATCH 9/9] update CHANGELOG.rst --- CHANGELOG.rst | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3813fdc6..3505224c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,20 +2,16 @@ Changelog ========= -.. - `Unreleased `_ (latest) - ------------------------------------------------------------- +`Unreleased `_ (latest) +------------------------------------------------------------- - Contributors: David Huard (:user:`huard`). - - Changes - ^^^^^^^ - * No change. - - Fixes - ^^^^^ - * In `nc_specs`, set `dim_names_nc` in the order expected by Raven (x, y, t). Previously, we only made sure that `time` was the last dimension, but did not ensure x and y were in the right order. (PR #533) +Contributors: David Huard (:user:`huard`), Trevor James Smith (:user:`Zeitsperre`). +Fixes +^^^^^ +* In `nc_specs`, set `dim_names_nc` in the order expected by Raven (x, y, t). Previously, we only made sure that `time` was the last dimension, but did not ensure x and y were in the right order. (PR #533) +* Adjusted the `Perform_a_climate_change_impact_study_on_a_watershed.ipynb` notebook to reduce the number of years in the simulation to speed up execution time. (PR #535) +* Adjusted a broken test that was overlooked in the previous release (from changes in PR #513). (PR #535) .. _changes_0.19.1: