diff --git a/notebooks/tutorial/Accessing_ERA5_Data.ipynb b/notebooks/tutorial/Accessing_ERA5_Data.ipynb index fb7ca3f6..82186fe1 100644 --- a/notebooks/tutorial/Accessing_ERA5_Data.ipynb +++ b/notebooks/tutorial/Accessing_ERA5_Data.ipynb @@ -7,6 +7,10 @@ "source": [ "# Accessing ERA5 Data\n", "\n", + "* NCI, NIWA and the Met Office have all downloaded a local copy of ERA5 5.625 data.\n", + "* The locations of the data are presented below, uncomment the organisation you have access to. \n", + "* If you work outside these organisations, follow the instructions in the `Downloading_ERA5.ipynb` notebook to download a local copy. \n", + "\n", "Assumptions:\n", "\n", "1. You should already have downloaded a copy or partial copy of ERA5 5.625 degree resolution\n", @@ -17,12 +21,12 @@ "This notebook will present two things:\n", "\n", "1. The quick install-and-use demo\n", - "2. How it was done slowly and carefully so you can do it on new data\n" + "2. How it was done slowly and carefully so you can do it on new data" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "0282bc19-9110-43fe-896a-c9263873e782", "metadata": {}, "outputs": [ @@ -30,23 +34,49 @@ "name": "stdout", "output_type": "stream", "text": [ - "10m_u_component_of_wind potential_vorticity\t total_cloud_cover\n", - "10m_v_component_of_wind relative_humidity\t total_precipitation\n", - "2m_temperature\t\t specific_humidity\t u_component_of_wind\n", - "constants\t\t temperature\t\t v_component_of_wind\n", - "geopotential\t\t toa_incident_solar_radiation vorticity\n" + "10m_u_component_of_wind geopotential_500\t total_cloud_cover\n", + "10m_v_component_of_wind potential_vorticity\t total_precipitation\n", + "2m_temperature\t\t relative_humidity\t u_component_of_wind\n", + "baselines\t\t specific_humidity\t v_component_of_wind\n", + "constants\t\t temperature\t\t vorticity\n", + "geopotential\t\t temperature_850\n", + "Geopotential\t\t toa_incident_solar_radiation\n" ] } ], "source": [ - "wbench_data_dir = '/g/data/wb00/NCI-Weatherbench/5.625deg' # on NCI\n", - "# wbench_data_dir = \"/nesi/nobackup/niwa00004/riom/weatherbench/5.625deg/\" # on NIWA HPC\n", + "# Uncomment the organisation you have access to.\n", + "\n", + "# wbench_data_dir = '/g/data/wb00/NCI-Weatherbench/5.625deg' # NCI\n", + "# wbench_data_dir = \"/nesi/nobackup/niwa00004/riom/weatherbench/5.625deg/\" # NIWA HPC\n", + "wbench_data_dir = \"/data/users/infolab/weatherbench/5.625deg\" # Met Office\n", + "\n", + "# List the contents of the data directory.\n", "!ls $wbench_data_dir" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, + "id": "be592b09-9267-4be9-9876-d7727602bb85", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2m_temperature_1979_5.625deg.nc\n" + ] + } + ], + "source": [ + "# Select the first 2m temperature directory. \n", + "!ls $wbench_data_dir/2m_temperature/ | head -n 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "654cfbe5", "metadata": {}, "outputs": [ @@ -54,21 +84,22 @@ "name": "stdout", "output_type": "stream", "text": [ - "/g/data/wb00/NCI-Weatherbench/5.625deg/total_cloud_cover/total_cloud_cover_1980_5.625deg.nc\n", - "/g/data/wb00/NCI-Weatherbench/5.625deg/total_cloud_cover/total_cloud_cover_1981_5.625deg.nc\n", - "/g/data/wb00/NCI-Weatherbench/5.625deg/total_cloud_cover/total_cloud_cover_1982_5.625deg.nc\n", - "/g/data/wb00/NCI-Weatherbench/5.625deg/total_cloud_cover/total_cloud_cover_1983_5.625deg.nc\n", - "/g/data/wb00/NCI-Weatherbench/5.625deg/total_cloud_cover/total_cloud_cover_1984_5.625deg.nc\n", - "/g/data/wb00/NCI-Weatherbench/5.625deg/total_cloud_cover/total_cloud_cover_1985_5.625deg.nc\n", - "/g/data/wb00/NCI-Weatherbench/5.625deg/total_cloud_cover/total_cloud_cover_1986_5.625deg.nc\n", - "/g/data/wb00/NCI-Weatherbench/5.625deg/total_cloud_cover/total_cloud_cover_1987_5.625deg.nc\n", - "/g/data/wb00/NCI-Weatherbench/5.625deg/total_cloud_cover/total_cloud_cover_1988_5.625deg.nc\n", - "/g/data/wb00/NCI-Weatherbench/5.625deg/total_cloud_cover/total_cloud_cover_1989_5.625deg.nc\n" + "/data/users/infolab/weatherbench/5.625deg/2m_temperature/2m_temperature_1980_5.625deg.nc\n", + "/data/users/infolab/weatherbench/5.625deg/2m_temperature/2m_temperature_1981_5.625deg.nc\n", + "/data/users/infolab/weatherbench/5.625deg/2m_temperature/2m_temperature_1982_5.625deg.nc\n", + "/data/users/infolab/weatherbench/5.625deg/2m_temperature/2m_temperature_1983_5.625deg.nc\n", + "/data/users/infolab/weatherbench/5.625deg/2m_temperature/2m_temperature_1984_5.625deg.nc\n", + "/data/users/infolab/weatherbench/5.625deg/2m_temperature/2m_temperature_1985_5.625deg.nc\n", + "/data/users/infolab/weatherbench/5.625deg/2m_temperature/2m_temperature_1986_5.625deg.nc\n", + "/data/users/infolab/weatherbench/5.625deg/2m_temperature/2m_temperature_1987_5.625deg.nc\n", + "/data/users/infolab/weatherbench/5.625deg/2m_temperature/2m_temperature_1988_5.625deg.nc\n", + "/data/users/infolab/weatherbench/5.625deg/2m_temperature/2m_temperature_1989_5.625deg.nc\n" ] } ], "source": [ - "!ls $wbench_data_dir/total_cloud_cover/*198*" + "# Select all 2m_temperature files with '198' *wildcard* to select all files from the 1980s.\n", + "!ls $wbench_data_dir/2m_temperature/*198*" ] }, { @@ -82,7 +113,7 @@ "\n", "The challenge for PyEarthTools is to understand this directory structure, figure out the shorthand variables names that are actually present in the files (e.g. total_cloud_cover is called \"tcc\" inside the netcdf file), and work out how to index the whole thing by shorthand-variable-name and date, including interpreting things like the unit of the variable and handling any variable renaming that might be needed for standardisation of naming conventions.\n", "\n", - "This requires some configuration, and a lot of PyEarthTools' code is about this kind of data set comprehension. \n", + "This requires some configuration, and a lot of PyEarthTools' code is about this kind of dataset comprehension. \n", "\n", "Let's summarise the easy way.\n", "\n", @@ -95,7 +126,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "40f49133", "metadata": {}, "outputs": [ @@ -103,72 +134,73 @@ "name": "stdout", "output_type": "stream", "text": [ - "env: ERA5LOWRES=/g/data/wb00/NCI-Weatherbench/5.625deg\n" + "env: ERA5LOWRES=/data/users/infolab/weatherbench/5.625deg\n" ] } ], "source": [ + "# Create an environment variable to store the path to the ERA5 data.\n", "%env ERA5LOWRES=$wbench_data_dir" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "b44fd368-3ab5-4a85-990d-331a749e84b1", "metadata": {}, "outputs": [], "source": [ + "# Import pyearthtools packages.\n", "import pyearthtools.data\n", "import pyearthtools.tutorial" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "9c5d26bb-6c8e-4c5c-a4d9-10e8250fe355", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "\u001b[0;31mInit signature:\u001b[0m\n", - "\u001b[0mpyearthtools\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marchive\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mera5lowres\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n", - "\u001b[0;34m\u001b[0m \u001b[0mvariables\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'list[str] | str'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", - "\u001b[0;34m\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", - "\u001b[0;34m\u001b[0m \u001b[0mlevel_value\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'int | float | list[int | float] | tuple[list | int, ...] | None'\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", - "\u001b[0;34m\u001b[0m \u001b[0mtransforms\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'Transform | TransformCollection | None'\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", - "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mDocstring:\u001b[0m ECWMF ReAnalysis v5\n", - "\u001b[0;31mInit docstring:\u001b[0m\n", - "Setup ERA5 Low-Res Indexer\n", - "\n", - "Args:\n", - " variables (list[str] | str):\n", - " Data variables to retrieve\n", - " resolution (Literal[ERA_RES], optional):\n", - " Resolution of data, must be one of 'monthly-averaged','monthly-averaged-by-hour', 'reanalysis'.\n", - " Defaults to 'reanalysis'.\n", - " level_value: (int, optional):\n", - " Level value to select if data contains levels. Defaults to None.\n", - " transforms (Transform | TransformCollection, optional):\n", - " Base Transforms to apply.\n", - " Defaults to TransformCollection().\n", - "\u001b[0;31mFile:\u001b[0m /g/data/kd24/tjl/src/PyEarthTools/packages/tutorial/src/pyearthtools/tutorial/ERA5DataClass.py\n", - "\u001b[0;31mType:\u001b[0m ABCMeta\n", - "\u001b[0;31mSubclasses:\u001b[0m " - ] - }, - "metadata": {}, - "output_type": "display_data" + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[31mInit signature:\u001b[39m\n", + "pyearthtools.data.archive.era5lowres(\n", + " variables: \u001b[33m'list[str] | str'\u001b[39m,\n", + " *,\n", + " level_value: \u001b[33m'int | float | list[int | float] | tuple[list | int, ...] | None'\u001b[39m = \u001b[38;5;28;01mNone\u001b[39;00m,\n", + " transforms: \u001b[33m'Transform | TransformCollection | None'\u001b[39m = \u001b[38;5;28;01mNone\u001b[39;00m,\n", + ")\n", + "\u001b[31mDocstring:\u001b[39m ECWMF ReAnalysis v5\n", + "\u001b[31mInit docstring:\u001b[39m\n", + "Setup ERA5 Low-Res Indexer\n", + "\n", + "Args:\n", + " variables (list[str] | str):\n", + " Data variables to retrieve\n", + " resolution (Literal[ERA_RES], optional):\n", + " Resolution of data, must be one of 'monthly-averaged','monthly-averaged-by-hour', 'reanalysis'.\n", + " Defaults to 'reanalysis'.\n", + " level_value: (int, optional):\n", + " Level value to select if data contains levels. Defaults to None.\n", + " transforms (Transform | TransformCollection, optional):\n", + " Base Transforms to apply.\n", + " Defaults to TransformCollection().\n", + "\u001b[31mFile:\u001b[39m ~/Projects/PyEarthTools/packages/tutorial/src/pyearthtools/tutorial/ERA5DataClass.py\n", + "\u001b[31mType:\u001b[39m ABCMeta\n", + "\u001b[31mSubclasses:\u001b[39m " + ] } ], "source": [ + "# Display information about era5lowres.\n", "pyearthtools.data.archive.era5lowres?" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 27, "id": "ef87fd0f-6e20-4e3b-9b47-a6b99e2390ea", "metadata": {}, "outputs": [ @@ -558,7 +590,6 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "\n", "
ERA5LowResIndex\n",
        "\tDescription                    ECWMF ReAnalysis v5\n",
        "\t\t range                          '1970-current'\n",
@@ -570,7 +601,7 @@
        "\t\t variables                      ['u', 'v']\n",
        "\tTransforms                     \n",
        "\t\t StandardCoordinateNames        {'latitude': "['lat', 'Latitude', 'yt_ocean', 'yt']", 'longitude': "['lon', 'Longitude', 'xt_ocean', 'xt']", 'replacement_dictionary': 'None', 'time': "['Time']"}\n",
-       "\t\t Rename                         {'names': {'t2m': "'2t'", 'u10': "'10u'", 'v10': "'10v'", 'siconc': "'ci'"}}
" + "\t\t Rename {'names': {'t2m': "'2t'", 'u10': "'10u'", 'v10': "'10v'", 'siconc': "'ci'"}}" ], "text/plain": [ "ERA5LowResIndex\n", @@ -587,12 +618,13 @@ "\t\t Rename {'names': {'t2m': \"'2t'\", 'u10': \"'10u'\", 'v10': \"'10v'\", 'siconc': \"'ci'\"}}" ] }, - "execution_count": 6, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "\n", "var = ['u', 'v'] # Note - there is no really straightforward way to just list the variables in the archive\n", " # However, mismatches will cause PyEarthTools to list what's available with a \"did you mean\" prompt\n", " # A specific listing function should be added in future.\n", @@ -602,7 +634,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "48c4a5b1-90a2-4ccc-a324-ce820cb73de4", "metadata": {}, "outputs": [ @@ -980,18 +1012,29 @@ " fill: currentColor;\n", "}\n", "
<xarray.Dataset> Size: 5MB\n",
-       "Dimensions:    (time: 24, level: 13, latitude: 32, longitude: 64)\n",
+       "Dimensions:    (latitude: 32, longitude: 64, level: 13, time: 24)\n",
        "Coordinates:\n",
-       "  * level      (level) int32 52B 50 100 150 200 250 300 ... 600 700 850 925 1000\n",
-       "  * time       (time) datetime64[ns] 192B 1984-01-01 ... 1984-01-01T23:00:00\n",
        "  * latitude   (latitude) float64 256B -87.19 -81.56 -75.94 ... 81.56 87.19\n",
        "  * longitude  (longitude) float64 512B 0.0 5.625 11.25 ... 343.1 348.8 354.4\n",
+       "  * level      (level) int32 52B 50 100 150 200 250 300 ... 600 700 850 925 1000\n",
+       "  * time       (time) datetime64[ns] 192B 1984-01-01 ... 1984-01-01T23:00:00\n",
        "Data variables:\n",
        "    u          (time, level, latitude, longitude) float32 3MB dask.array<chunksize=(24, 8, 19, 39), meta=np.ndarray>\n",
        "    v          (time, level, latitude, longitude) float32 3MB dask.array<chunksize=(24, 8, 19, 39), meta=np.ndarray>\n",
        "Attributes:\n",
-       "    regrid_method:  bilinear