diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3813fdc6..3505224c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,20 +2,16 @@ Changelog ========= -.. - `Unreleased `_ (latest) - ------------------------------------------------------------- +`Unreleased `_ (latest) +------------------------------------------------------------- - Contributors: David Huard (:user:`huard`). - - Changes - ^^^^^^^ - * No change. - - Fixes - ^^^^^ - * In `nc_specs`, set `dim_names_nc` in the order expected by Raven (x, y, t). Previously, we only made sure that `time` was the last dimension, but did not ensure x and y were in the right order. (PR #533) +Contributors: David Huard (:user:`huard`), Trevor James Smith (:user:`Zeitsperre`). +Fixes +^^^^^ +* In `nc_specs`, set `dim_names_nc` in the order expected by Raven (x, y, t). Previously, we only made sure that `time` was the last dimension, but did not ensure x and y were in the right order. (PR #533) +* Adjusted the `Perform_a_climate_change_impact_study_on_a_watershed.ipynb` notebook to reduce the number of years in the simulation to speed up execution time. (PR #535) +* Adjusted a broken test that was overlooked in the previous release (from changes in PR #513). (PR #535) .. _changes_0.19.1: diff --git a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb index 4414cd52..ca1489e8 100644 --- a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb +++ b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb @@ -19,10 +19,9 @@ "\n", "We will also setup a hydrological model, calibrate it, and use it in evaluating the impacts of climate change on the hydrology of a catchment. We will be using the Mistassini river as the test-case for this example, but you can substitute the data for any catchment of your liking. We provide:\n", "\n", - "1- Streamflow observations (Water Survey Canada station 02RD003)\n", - "\n", - "2- Watershed boundaries in the form of shapefiles (all shape files .shp, .shx, .prj, .dbf, etc. zipped into a single file. The platform will detect and unzip the file to extract the required data)\n", + " 1 - Streamflow observations (Water Survey Canada station 02RD003).\n", "\n", + " 2 - Watershed boundaries in the form of shapefiles (all shape files .shp, .shx, .prj, .dbf, etc. zipped into a single file. The platform will detect and unzip the file to extract the required data).\n", "\n", "The rest will be done by PAVICS-Hydro, including getting meteorological information from our ERA5 reanalysis database and climate change model data from CMIP hosted by PanGEO.\n", "\n", @@ -158,15 +157,15 @@ "outputs": [], "source": [ "# Reference period that will be used for ERA5 and climate model data for the reference period.\n", - "# Here let's focus on a 10-year period to keep running times lower.\n", + "# Here let's focus on a 4-year period to keep running times lower.\n", "reference_start_day = dt.datetime(1980, 12, 31)\n", - "reference_end_day = dt.datetime(1990, 12, 31)\n", - "# Notice we are using one day before the desired period of 1981-01-01 to 1990-12-31.\n", + "reference_end_day = dt.datetime(1984, 12, 30)\n", + "# Notice we are using one day before the desired period of 1981-01-01 to 1985-12-31.\n", "# This is to account for any UTC shifts that might require getting data in a previous or later time.\n", "\n", "# Same process for the future period, 100 years later.\n", "future_start_day = dt.datetime(2080, 12, 31)\n", - "future_end_day = dt.datetime(2090, 12, 31)" + "future_end_day = dt.datetime(2084, 12, 31)" ] }, { @@ -549,13 +548,12 @@ "# Use xsdba utilities to give information on the type of window used for the bias correction.\n", "\n", "xsdba.DetrendedQuantileMapping._allow_diff_training_times = True\n", - "# xsdba.DetrendedQuantileMapping._allow_diff_calendars = True\n", + "xsdba.DetrendedQuantileMapping._allow_diff_calendars = True\n", "\n", "group_month_window = xsdba.utils.Grouper(\"time.dayofyear\", window=15)\n", "\n", "# This is an adjusting function.\n", "# It builds the tool that will perform the corrections.\n", - "\n", "Adjustment = xsdba.DetrendedQuantileMapping.train(\n", " ref=ERA5_weather.pr,\n", " hist=historical_pr,\n", @@ -624,28 +622,31 @@ "source": [ "# Convert the reference corrected data into netCDF file.\n", "# We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n", - "ref_dataset = xr.merge(\n", + "\n", + "with ProgressBar():\n", + " # Convert the reference corrected data into netCDF file.\n", + " ref_dataset = xr.merge(\n", " [\n", " corrected_ref_precip.to_dataset(name=\"pr\"),\n", " corrected_ref_tasmax.to_dataset(name=\"tasmax\"),\n", " corrected_ref_tasmin.to_dataset(name=\"tasmin\"),\n", " ]\n", - ")\n", + " )\n", "\n", - "# Write to temporary folder.\n", - "fn_tmp_ref = tmp / \"reference_dataset_tmp.nc\"\n", - "ref_dataset.to_netcdf(fn_tmp_ref)\n", + " # Write to temporary folder.\n", + " fn_tmp_ref = tmp / \"reference_dataset_tmp.nc\"\n", + " ref_dataset.to_netcdf(fn_tmp_ref)\n", "\n", - "# Convert the future corrected data into netCDF file.\n", - "fut_dataset = xr.merge(\n", - " [\n", + " # Convert the future corrected data into netCDF file.\n", + " fut_dataset = xr.merge(\n", + " [\n", " corrected_fut_precip.to_dataset(name=\"pr\"),\n", " corrected_fut_tasmax.to_dataset(name=\"tasmax\"),\n", " corrected_fut_tasmin.to_dataset(name=\"tasmin\"),\n", - " ]\n", - ")\n", - "# Write to temporary folder.\n", - "with ProgressBar():\n", + " ]\n", + " )\n", + "\n", + " # Write to temporary folder.\n", " fn_tmp_fut = tmp / \"future_dataset_tmp.nc\"\n", " fut_dataset.to_netcdf(fn_tmp_fut)\n", "\n", @@ -688,8 +689,8 @@ "\n", "# Establish the start date for the calibration.\n", "# This is set in the model configuration, so the calibrator will simply execute the model which has been pre-configured to run on this period.\n", - "start_date = dt.datetime(1981, 1, 1)\n", - "end_date = dt.datetime(1985, 12, 31)\n", + "start_date = dt.datetime(1980, 12, 31)\n", + "end_date = dt.datetime(1984, 12, 31)\n", "\n", "# The data types available in the forcing netcdf file from ERA5, as per the tutorials.\n", "data_type = [\"TEMP_MAX\", \"TEMP_MIN\", \"PRECIP\"]\n", @@ -840,8 +841,8 @@ "source": [ "# Copy the configuration of the previous model that we will modify for our validation:\n", "model_validation = model_config.duplicate(\n", - " StartDate=dt.datetime(1986, 1, 1),\n", - " EndDate=dt.datetime(1990, 12, 31),\n", + " StartDate=dt.datetime(1983, 1, 1),\n", + " EndDate=dt.datetime(1984, 12, 31),\n", " SuppressOutput=False,\n", ").set_params(optimized_parameters)\n", "\n", @@ -968,7 +969,12 @@ "- The reference period flows;\n", "- The future period flows.\n", "\n", - "Results cannot be compared on a day-to-day basis because climate models do not reflect actual weather data. Therefore, we will compare the mean annual hydrographs to see changes in long-term flow patterns. Note that this test only uses 10 years (5 for the validation period) which is insufficient. Operational tests should include more years (ideally 30 or more) to reflect the climatology of the various periods.\n" + "Results cannot be compared on a day-to-day basis because climate models do not reflect actual weather data.\n", + "Therefore, we will compare the mean annual hydrographs to see changes in long-term flow patterns.\n", + "\n", + "> [!IMPORTANT]\n", + "> This test only uses 4 years of observations (2 years for the validation period) which is insufficient.\n", + "> Operational tests should include more years (ideally 30 or more) to reflect the climatology of the various periods." ] }, { diff --git a/environment-dev.yml b/environment-dev.yml index e6ea1e4c..63fc59a6 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -43,7 +43,7 @@ dependencies: # Dev tools and testing - pip >=25.0 - black ==25.1.0 - - blackdoc ==0.3.9 + - blackdoc ==0.4.1 - bump-my-version >=1.2.0 - click >=8.2.0 - coverage >=7.5.0 diff --git a/src/ravenpy/extractors/forecasts.py b/src/ravenpy/extractors/forecasts.py index 2f52389d..9e8b9717 100644 --- a/src/ravenpy/extractors/forecasts.py +++ b/src/ravenpy/extractors/forecasts.py @@ -59,6 +59,7 @@ def get_CASPAR_dataset( # noqa: N802 date: dt.datetime, thredds: str = THREDDS_URL, directory: str = "dodsC/birdhouse/disk2/caspar/daily/", + engine: str = "netcdf4", ) -> tuple[xr.Dataset, list[Union[Union[DatetimeIndex, Series, Timestamp, Timestamp], Any]]]: """ Return CASPAR dataset. @@ -73,6 +74,8 @@ def get_CASPAR_dataset( # noqa: N802 The thredds server url. Default: "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/". directory : str The directory on the thredds server where the data is stored. Default: "dodsC/birdhouse/disk2/caspar/daily/". + engine : str + The xarray engine to use to open the dataset. Default: "pydap". Returns ------- @@ -87,7 +90,7 @@ def get_CASPAR_dataset( # noqa: N802 d = dt.datetime.strftime(date, "%Y%m%d") file_location = urljoin(directory, f"GEPS_{d}.nc") file_url = urljoin(thredds, file_location) - ds = xr.open_dataset(file_url) + ds = xr.open_dataset(file_url, engine=engine) # Here we also extract the times at 6-hour intervals as Raven must have # constant timesteps and GEPS goes to 6 hours start = pd.to_datetime(ds.time[0].values) @@ -108,6 +111,7 @@ def get_ECCC_dataset( # noqa: N802 climate_model: str, thredds: str = THREDDS_URL, directory: str = "dodsC/datasets/forecasts/eccc_geps/", + engine: str = "netcdf4", ) -> tuple[Dataset, list[Union[Union[DatetimeIndex, Series, Timestamp, Timestamp], Any]]]: """ Return latest GEPS forecast dataset. @@ -120,6 +124,8 @@ def get_ECCC_dataset( # noqa: N802 The thredds server url. Default: "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/". directory : str The directory on the thredds server where the data is stored. Default: "dodsC/datasets/forecasts/eccc_geps/". + engine : str + The xarray engine to use to open the dataset. Default: "pydap". Returns ------- @@ -134,7 +140,7 @@ def get_ECCC_dataset( # noqa: N802 # Eventually the file will find a permanent home, until then let's use the test folder. file_location = urljoin(directory, "GEPS_latest.ncml") file_url = urljoin(thredds, file_location) - ds = xr.open_dataset(file_url) + ds = xr.open_dataset(file_url, engine=engine) # Here we also extract the times at 6-hour intervals as Raven must have # constant timesteps and GEPS goes to 6 hours start = pd.to_datetime(ds.time[0].values) diff --git a/tests/test_utils.py b/tests/test_utils.py index f0bd9040..ff7acd3e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -17,7 +17,7 @@ def test_nc_specs(yangtze): assert attrs["dim_names_nc"] == ("time",) # 2D with station dimension - f = get_local_testdata("raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc") + f = yangtze.fetch("raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_2d.nc") attrs = nc_specs(f, "PRECIP", station_idx=1, alt_names=("rain",)) assert attrs["dim_names_nc"] == ( "region", @@ -25,11 +25,11 @@ def test_nc_specs(yangtze): ) # 3D - Since this file is not CF compliant, nc_specs cannot infer the correct dimension order - f = get_local_testdata("raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc") + f = yangtze.fetch("raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily_3d.nc") attrs = nc_specs(f, "PRECIP", station_idx=1, alt_names=("rain",)) assert attrs["dim_names_nc"] == ("time", "lon", "lat") - f = get_local_testdata("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200601-210012_subset.nc") + f = yangtze.fetch("cmip5/tas_Amon_CanESM2_rcp85_r1i1p1_200601-210012_subset.nc") attrs = nc_specs(f, "TEMP_AVE", station_idx=1, engine="netcdf4") assert attrs["dim_names_nc"] == ("lon", "lat", "time")