diff --git a/notebooks/Project_config.ipynb b/notebooks/Project_config.ipynb
new file mode 100644
index 00000000..565587a0
--- /dev/null
+++ b/notebooks/Project_config.ipynb
@@ -0,0 +1,126 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "import os"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### **Set Up The Project Paths:** User input required"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Replace the PROJECT_HOME and ERA5LOWRES paths with your actual paths\n",
+ "ERA5LOWRES = Path(\"/path/to/your/data\") # Replace with the path to your ERA5LOWRES data here. Example: Path(\"/path/to/your/ERA5LOWRES\")\n",
+ "PROJECT_HOME = None # Replace with the path to your PROJECT_HOME here, or set to None to default to current directory. Example: Path(\"/path/to/your/PROJECT_HOME\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### **Path Setting Logic:** Can be ignored by the user"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Ensure PROJECT_HOME is set to a valid Path object\n",
+ "try:\n",
+ " if PROJECT_HOME is None:\n",
+ " PROJECT_HOME = Path.cwd()\n",
+ " elif not isinstance(PROJECT_HOME, Path):\n",
+ " raise TypeError(f\"PROJECT_HOME must be a Path object or None. Got: {type(PROJECT_HOME).__name__}\")\n",
+ "except Exception as e:\n",
+ " raise ValueError(f\"Error setting PROJECT_HOME: {e}\")\n",
+ "\n",
+ "# Validate the PROJECT_HOME path\n",
+ "try:\n",
+ " if not PROJECT_HOME.is_dir():\n",
+ " raise ValueError(f\"The provided PROJECT_HOME path '{PROJECT_HOME}' is not a valid directory. Please update it.\")\n",
+ "except ValueError as e:\n",
+ " raise ValueError(f\"Error validating PROJECT_HOME: {e}\")\n",
+ "\n",
+ "# Ensure ERA5LOWRES is set to a valid Path object\n",
+ "try:\n",
+ " if not isinstance(ERA5LOWRES, Path):\n",
+ " raise TypeError(f\"ERA5LOWRES must be a Path object. Got: {type(ERA5LOWRES).__name__}\")\n",
+ "except Exception as e:\n",
+ " raise ValueError(f\"Error setting ERA5LOWRES: {e}\")\n",
+ "\n",
+ "# Validate the ERA5LOWRES path\n",
+ "try:\n",
+ " if not ERA5LOWRES.is_dir():\n",
+ " raise ValueError(f\"The provided ERA5LOWRES path '{ERA5LOWRES}' does not exist or is not a directory. Please update it.\")\n",
+ "except ValueError as e:\n",
+ " raise ValueError(f\"Error validating ERA5LOWRES: {e}\")\n",
+ "\n",
+ "\n",
+ "# Set the environment variable if ERA5LOWRES is valid\n",
+ "if ERA5LOWRES:\n",
+ " os.environ[\"ERA5LOWRES\"] = str(ERA5LOWRES)\n",
+ "else:\n",
+ " print(\"ERA5LOWRES is not set. Please check your configuration.\")\n",
+ "\n",
+ "# Define paths in PROJECT_HOME for stats and caching\n",
+ "default_root_dir = PROJECT_HOME / \"cnn_training\" # Folder to save the trained models and other project outputs\n",
+ "stats_folder = PROJECT_HOME / \"cnn_training/stats\" # Folder to save estimated mean & standard deviation of fields\n",
+ "cache_folder = PROJECT_HOME / \"cnn_training/cache\" # Folders used to cache dataset processed by the pipeline\n",
+ "\n",
+ "# Ensure that the required directories exist or create them\n",
+ "stats_folder.mkdir(parents=True, exist_ok=True)\n",
+ "cache_folder.mkdir(parents=True, exist_ok=True)\n",
+ "default_root_dir.mkdir(parents=True, exist_ok=True)\n",
+ "\n",
+ "# Print paths for debugging\n",
+ "print(f\"Project Home: {PROJECT_HOME}\")\n",
+ "print(f\"Stats folder: {stats_folder}\")\n",
+ "print(f\"Cache folder: {cache_folder}\")\n",
+ "print(f\"Default root directory: {default_root_dir}\")\n",
+ "print(f\"ERA5LOWRES: {ERA5LOWRES}\")\n",
+ "\n",
+ "# Show contents of the ERA5LOWRES directory\n",
+ "if ERA5LOWRES:\n",
+ " print(f\"Contents of ERA5LOWRES:\")\n",
+ " !ls -l {ERA5LOWRES}"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "PET_tutorial",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.11"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/tutorial/CNN_model_training.ipynb b/notebooks/tutorial/CNN_model_training.ipynb
index dbce8bec..a5e056bc 100644
--- a/notebooks/tutorial/CNN_model_training.ipynb
+++ b/notebooks/tutorial/CNN_model_training.ipynb
@@ -27,44 +27,23 @@
"source": [
"## Set the path to the ERA5lowres archive as an environment variable\n",
"\n",
- "Make sure to set the `ERA5LOWRES` environment variable to make the ERA5 low-resolution archive findable on your system.\n",
+ "Make sure to set the `ERA5LOWRES` environment variable to make the ERA5 low-resolution archive findable on your system. If you dont have the dataset already downloaded, you will need to download the 5.625deg ERA5 dataset from Weatherbench. Instructions to do so can be found in the `Downloading_ERA5.ipynb` notebook.\n",
"\n",
- "If you are at one of the following organisations, you can modify the following cell as follows:\n",
+ "Using the %run magic command we can run `Project_config.ipynb` from within our current notebook and access its variables. The follwowing code will run the `Project_config.ipynb` notebook, where the user should have already set set the ERA5LOWRES environment variable and the PROJECT_HOME variable. If you haven't done this, do it now before running the notebook.\n",
"\n",
- "* for NCI (Australia): `%env ERA5LOWRES=/g/data/wb00/NCI-Weatherbench/5.625deg`\n",
- "\n",
- "* for NIWA (New Zealand): `%env ERA5LOWRES=/nesi/nobackup/niwa00004/riom/weatherbench/5.625deg`\n",
- "\n",
- "* for the Met Office (UK): `%env ERA5LOWRES=/data/users/infolab/weatherbench/5.625deg`\n",
- "\n",
- "If you are at another facility, you will need to download the 5.625deg ERA5 dataset from Weatherbench. Instructions to do so can be found in the `Downloading_ERA5.ipynb` notebook. Then set the environment variable to the location of your local copy. "
+ "- The `ERA5LOWRES` variable is used to set the path to the ERA5 data files\n",
+ "- The `PROJECT_HOME` variable is used to set the path to the project directory"
]
},
{
"cell_type": "code",
- "execution_count": 1,
- "id": "613f5e9f-0378-4c74-93ee-c49083bee302",
- "metadata": {
- "tags": []
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "env: ERA5LOWRES=/g/data/wb00/NCI-Weatherbench/5.625deg\n"
- ]
- }
- ],
+ "execution_count": null,
+ "id": "1777333a",
+ "metadata": {},
+ "outputs": [],
"source": [
- "# NCI: \n",
- "%env ERA5LOWRES=/g/data/wb00/NCI-Weatherbench/5.625deg\n",
- "\n",
- "# NIWA:\n",
- "# %env ERA5LOWRES=/nesi/nobackup/niwa00004/riom/weatherbench/5.625deg\n",
- "\n",
- "# Met Office:\n",
- "# %env ERA5LOWRES=/data/users/infolab/weatherbench/5.625deg"
+ "# If this fails, explicitily set the path to the config notebook e.g. /some/place/where/things/are/notebooks/Project_config.ipynb\n",
+ "%run ../Project_config.ipynb"
]
},
{
@@ -77,7 +56,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "0cd1e8b6-acc3-4074-92db-536c0e8ea112",
"metadata": {
"tags": []
@@ -106,7 +85,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "e6cd2afc-fdc1-418c-a4a0-59d8cd080de5",
"metadata": {
"tags": []
@@ -127,18 +106,12 @@
"\n",
"# number of samples to estimate mean & standard deviation of fields\n",
"n_samples = 200\n",
- "# folder to save estimated mean & standard deviation of fields\n",
- "stats_folder = \"/g/data/kd24/temp/cnn_training/stats\"\n",
- "\n",
- "# folders used to cache dataset processed by the pipeline\n",
- "cache_folder = \"/g/data/kd24/temp/cnn_training/cache\"\n",
"\n",
"# data loader parameters\n",
"batch_size = 1\n",
"n_workers = 2\n",
"\n",
"# trainer parameters\n",
- "default_root_dir = \"cnn_training\"\n",
"max_epochs = 10"
]
},
@@ -184,509 +157,10 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "8eb82b2d",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
Pipeline\n",
- "\tDescription `pyearthtools.pipeline` Data Pipeline\n",
- "\n",
- "\n",
- "\tInitialisation \n",
- "\t\t exceptions_to_ignore None\n",
- "\t\t iterator None\n",
- "\t\t sampler None\n",
- "\tSteps \n",
- "\t\t ERA5DataClass.ERA5LowResIndex {'ERA5LowResIndex': {'level_value': 'None', 'variables': "['2m_temperature', 'u', 'v', 'geopotential', 'vorticity']"}}\n",
- "\t\t sort.Sort {'Sort': {'order': "['t2', 'u', 'v', 'vorticity', 'geopotential']", 'strict': 'False'}}\n",
- "\t\t coordinates.StandardLongitude {'StandardLongitude': {'longitude_name': "'longitude'", 'type': "'0-360'"}}\n",
- "\t\t reshape.CoordinateFlatten {'CoordinateFlatten': {'__args': '()', 'coordinate': "'level'", 'skip_missing': 'False'}}\n",
- "\t\t idx_modification.TemporalRetrieval {'TemporalRetrieval': {'concat': 'True', 'delta_unit': 'None', 'merge_function': 'None', 'merge_kwargs': 'None', 'samples': '((-1, 1), (6, 1))'}} Initialisation:
exceptions_to_ignore
None
None
iterator
None
None
sampler
None
None
Steps:
ERA5DataClass.ERA5LowResIndex
{'ERA5LowResIndex': {'level_value': None, 'variables': ['2m_temperature', 'u', 'v', 'geopotential', 'vorticity']}}
ERA5LowResIndex : {'level_value': None, 'variables': ['2m_temperature', 'u', 'v', 'geopotential', 'vorticity']}
sort.Sort
{'Sort': {'order': ['t2', 'u', 'v', 'vorticity', 'geopotential'], 'strict': False}}
Sort : {'order': ['t2', 'u', 'v', 'vorticity', 'geopotential'], 'strict': False}
coordinates.StandardLongitude
{'StandardLongitude': {'longitude_name': 'longitude', 'type': '0-360'}}
StandardLongitude : {'longitude_name': 'longitude', 'type': '0-360'}
reshape.CoordinateFlatten
{'CoordinateFlatten': {'__args': (), 'coordinate': 'level', 'skip_missing': False}}
CoordinateFlatten : {'__args': (), 'coordinate': 'level', 'skip_missing': False}
idx_modification.TemporalRetrieval
{'TemporalRetrieval': {'concat': True, 'delta_unit': None, 'merge_function': None, 'merge_kwargs': None, 'samples': ((-1, 1), (6, 1))}}
TemporalRetrieval : {'concat': True, 'delta_unit': None, 'merge_function': None, 'merge_kwargs': None, 'samples': ((-1, 1), (6, 1))}
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Graph "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "image/svg+xml": [
- "\n",
- "\n",
- "\n",
- "\n",
- "\n",
- "\n",
- "%3 \n",
- " \n",
- "\n",
- "\n",
- "ERA5LowResIndex_12777dcb-d0de-41fc-8c74-5624212551d9 \n",
- " \n",
- "ERA5DataClass.ERA5LowResIndex \n",
- " \n",
- "\n",
- "\n",
- "Sort_8852d690-a323-4691-8858-205b2a3963f1 \n",
- "\n",
- "sort.Sort \n",
- " \n",
- "\n",
- "\n",
- "ERA5LowResIndex_12777dcb-d0de-41fc-8c74-5624212551d9->Sort_8852d690-a323-4691-8858-205b2a3963f1 \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "StandardLongitude_57e364f0-638f-400f-be58-06da9a084be0 \n",
- "\n",
- "coordinates.StandardLongitude \n",
- " \n",
- "\n",
- "\n",
- "Sort_8852d690-a323-4691-8858-205b2a3963f1->StandardLongitude_57e364f0-638f-400f-be58-06da9a084be0 \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "CoordinateFlatten_7a49bdd7-7503-41c7-a87c-37466a2cf3ac \n",
- "\n",
- "reshape.CoordinateFlatten \n",
- " \n",
- "\n",
- "\n",
- "StandardLongitude_57e364f0-638f-400f-be58-06da9a084be0->CoordinateFlatten_7a49bdd7-7503-41c7-a87c-37466a2cf3ac \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "TemporalRetrieval_75beac88-ac04-4810-8f31-7bac6523d501 \n",
- "\n",
- "idx_modification.TemporalRetrieval \n",
- " \n",
- "\n",
- "\n",
- "CoordinateFlatten_7a49bdd7-7503-41c7-a87c-37466a2cf3ac->TemporalRetrieval_75beac88-ac04-4810-8f31-7bac6523d501 \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"data_preparation = pyearthtools.pipeline.Pipeline(\n",
" pyearthtools.data.archive.era5lowres([\"2m_temperature\", \"u\", \"v\", \"geopotential\", \"vorticity\"]),\n",
@@ -738,20 +212,10 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "d1ca459e",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Train start: 2015-01-01T00\n",
- "Input sample: ['2014-12-31T23:00:00.000000000']\n",
- "Target sample: ['2015-01-01T05:00:00.000000000']\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# This code shows how we can pass a date to the data_preparation pipeline and get the corresponding input and target samples.\n",
"# We can then index on this object to get the input and target samples using [0] for input samples and [1] for target samples.\n",
@@ -767,20 +231,10 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "39f2eae8",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Train start: 2015-01-01T00\n",
- "X_sample: ['2014-12-31T23:00:00.000000000']\n",
- "y_sample: ['2015-01-01T05:00:00.000000000']\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# We could also split the data into X Input data and y Target data as follows:\n",
"print(\"Train start:\", train_start)\n",
@@ -816,548 +270,10 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "e0d6eb27",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
Pipeline\n",
- "\tDescription `pyearthtools.pipeline` Data Pipeline\n",
- "\n",
- "\n",
- "\tInitialisation \n",
- "\t\t exceptions_to_ignore None\n",
- "\t\t iterator None\n",
- "\t\t sampler None\n",
- "\tSteps \n",
- "\t\t ERA5DataClass.ERA5LowResIndex {'ERA5LowResIndex': {'level_value': 'None', 'variables': "['2m_temperature', 'u', 'v', 'geopotential', 'vorticity']"}}\n",
- "\t\t sort.Sort {'Sort': {'order': "['t2', 'u', 'v', '2t', 'geopotential', 'vorticity']", 'strict': 'False'}}\n",
- "\t\t coordinates.StandardLongitude {'StandardLongitude': {'longitude_name': "'longitude'", 'type': "'0-360'"}}\n",
- "\t\t reshape.CoordinateFlatten {'CoordinateFlatten': {'__args': '()', 'coordinate': "'level'", 'skip_missing': 'False'}}\n",
- "\t\t idx_modification.TemporalRetrieval {'TemporalRetrieval': {'concat': 'True', 'delta_unit': 'None', 'merge_function': 'None', 'merge_kwargs': 'None', 'samples': '((-1, 1), (6, 1))'}}\n",
- "\t\t conversion.ToNumpy {'ToNumpy': {'reference_dataset': 'None', 'run_parallel': 'False', 'saved_records': 'None', 'warn': 'True'}}\n",
- "\t\t reshape.Rearrange {'Rearrange': {'rearrange': "'c t h w -> t c h w'", 'rearrange_kwargs': 'None', 'reverse_rearrange': 'None', 'skip': 'False'}}\n",
- "\t\t reshape.Squish {'Squish': {'axis': '0'}} Initialisation:
exceptions_to_ignore
None
None
iterator
None
None
sampler
None
None
Steps:
ERA5DataClass.ERA5LowResIndex
{'ERA5LowResIndex': {'level_value': None, 'variables': ['2m_temperature', 'u', 'v', 'geopotential', 'vorticity']}}
ERA5LowResIndex : {'level_value': None, 'variables': ['2m_temperature', 'u', 'v', 'geopotential', 'vorticity']}
sort.Sort
{'Sort': {'order': ['t2', 'u', 'v', '2t', 'geopotential', 'vorticity'], 'strict': False}}
Sort : {'order': ['t2', 'u', 'v', '2t', 'geopotential', 'vorticity'], 'strict': False}
coordinates.StandardLongitude
{'StandardLongitude': {'longitude_name': 'longitude', 'type': '0-360'}}
StandardLongitude : {'longitude_name': 'longitude', 'type': '0-360'}
reshape.CoordinateFlatten
{'CoordinateFlatten': {'__args': (), 'coordinate': 'level', 'skip_missing': False}}
CoordinateFlatten : {'__args': (), 'coordinate': 'level', 'skip_missing': False}
idx_modification.TemporalRetrieval
{'TemporalRetrieval': {'concat': True, 'delta_unit': None, 'merge_function': None, 'merge_kwargs': None, 'samples': ((-1, 1), (6, 1))}}
TemporalRetrieval : {'concat': True, 'delta_unit': None, 'merge_function': None, 'merge_kwargs': None, 'samples': ((-1, 1), (6, 1))}
conversion.ToNumpy
{'ToNumpy': {'reference_dataset': None, 'run_parallel': False, 'saved_records': None, 'warn': True}}
ToNumpy : {'reference_dataset': None, 'run_parallel': False, 'saved_records': None, 'warn': True}
reshape.Rearrange
{'Rearrange': {'rearrange': 'c t h w -> t c h w', 'rearrange_kwargs': None, 'reverse_rearrange': None, 'skip': False}}
Rearrange : {'rearrange': 'c t h w -> t c h w', 'rearrange_kwargs': None, 'reverse_rearrange': None, 'skip': False}
reshape.Squish
{'Squish': {'axis': 0}}
Squish : {'axis': 0}
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Graph "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "image/svg+xml": [
- "\n",
- "\n",
- "\n",
- "\n",
- "\n",
- "\n",
- "%3 \n",
- " \n",
- "\n",
- "\n",
- "ERA5LowResIndex_daa4b95a-9595-4b80-90b7-f394b481a7af \n",
- " \n",
- "ERA5DataClass.ERA5LowResIndex \n",
- " \n",
- "\n",
- "\n",
- "Sort_40512330-3edf-48e4-a8b9-6f31273b3f9c \n",
- "\n",
- "sort.Sort \n",
- " \n",
- "\n",
- "\n",
- "ERA5LowResIndex_daa4b95a-9595-4b80-90b7-f394b481a7af->Sort_40512330-3edf-48e4-a8b9-6f31273b3f9c \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "StandardLongitude_2348acd8-c32d-4fc6-9e66-9089bc42f095 \n",
- "\n",
- "coordinates.StandardLongitude \n",
- " \n",
- "\n",
- "\n",
- "Sort_40512330-3edf-48e4-a8b9-6f31273b3f9c->StandardLongitude_2348acd8-c32d-4fc6-9e66-9089bc42f095 \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "CoordinateFlatten_704ca047-e14a-4a85-81cf-6be352e78e1c \n",
- "\n",
- "reshape.CoordinateFlatten \n",
- " \n",
- "\n",
- "\n",
- "StandardLongitude_2348acd8-c32d-4fc6-9e66-9089bc42f095->CoordinateFlatten_704ca047-e14a-4a85-81cf-6be352e78e1c \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "TemporalRetrieval_3039e587-45a5-4904-a3fd-857ca80c0840 \n",
- "\n",
- "idx_modification.TemporalRetrieval \n",
- " \n",
- "\n",
- "\n",
- "CoordinateFlatten_704ca047-e14a-4a85-81cf-6be352e78e1c->TemporalRetrieval_3039e587-45a5-4904-a3fd-857ca80c0840 \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "ToNumpy_9b244b21-931f-4e18-9385-fd487a28d354 \n",
- "\n",
- "conversion.ToNumpy \n",
- " \n",
- "\n",
- "\n",
- "TemporalRetrieval_3039e587-45a5-4904-a3fd-857ca80c0840->ToNumpy_9b244b21-931f-4e18-9385-fd487a28d354 \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "Rearrange_c4b89996-6b35-4673-9d9a-0f31e655f9eb \n",
- "\n",
- "reshape.Rearrange \n",
- " \n",
- "\n",
- "\n",
- "ToNumpy_9b244b21-931f-4e18-9385-fd487a28d354->Rearrange_c4b89996-6b35-4673-9d9a-0f31e655f9eb \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "Squish_8706048a-8424-405f-9baf-002d0cb4a4f7 \n",
- "\n",
- "reshape.Squish \n",
- " \n",
- "\n",
- "\n",
- "Rearrange_c4b89996-6b35-4673-9d9a-0f31e655f9eb->Squish_8706048a-8424-405f-9baf-002d0cb4a4f7 \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"data_preparation = pyearthtools.pipeline.Pipeline(\n",
" pyearthtools.data.archive.era5lowres([\"2m_temperature\", \"u\", \"v\", \"geopotential\", \"vorticity\"]),\n",
@@ -1391,22 +307,12 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "7e4c4ff2-78ac-4ab8-bce2-ffbcadb202e3",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Number of samples: 2\n",
- "Input data shape: (53, 32, 64)\n",
- "Target data shape: (53, 32, 64)\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Create a sample by passing the data_preparation a datetime. \n",
"\n",
@@ -1430,415 +336,12 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"id": "54c4baca-afe0-4450-9371-7aa57fb9e39b",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
Randomise\n",
- "\tInitialisation Wrap around another `Iterator` and randomly sample\n",
- "\t\t iterator {'DateRange': {'end': "'2015-01-12T00'", 'interval': "'1h'", 'start': "'2015-01-01T00'"}}\n",
- "\t\t seed 42 "
- ],
- "text/plain": [
- "Randomise\n",
- "\tInitialisation Wrap around another `Iterator` and randomly sample\n",
- "\t\t iterator {'DateRange': {'end': \"'2015-01-12T00'\", 'interval': \"'1h'\", 'start': \"'2015-01-01T00'\"}}\n",
- "\t\t seed 42"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"train_split = pyearthtools.pipeline.iterators.DateRange(train_start, train_end, interval=\"1h\").randomise(seed=42)\n",
"train_split"
@@ -1854,415 +357,10 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "78343163",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
DateRange\n",
- "\tInitialisation DateRange Iterator\n",
- "\t\t end '2016-01-12T00'\n",
- "\t\t interval '1h'\n",
- "\t\t start '2016-01-01T00' "
- ],
- "text/plain": [
- "DateRange\n",
- "\tInitialisation DateRange Iterator\n",
- "\t\t end '2016-01-12T00'\n",
- "\t\t interval '1h'\n",
- "\t\t start '2016-01-01T00'"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"val_split = pyearthtools.pipeline.iterators.DateRange(val_start, val_end, interval=\"1h\")\n",
"val_split"
@@ -2270,21 +368,12 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "927ca606-6031-44b4-a646-4b88528f3f1a",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Train Splits: (Petdt('2015-01-01T23'), Petdt('2015-01-09T12'), Petdt('2015-01-08T04'), Petdt('2015-01-05T19'), Petdt('2015-01-05T17'))\n",
- "Val Splits: (Petdt('2016-01-01T00'), Petdt('2016-01-01T01'), Petdt('2016-01-01T02'), Petdt('2016-01-01T03'), Petdt('2016-01-01T04'))\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"print(\"Train Splits:\", train_split[:5])\n",
"print(\"Val Splits: \", val_split[:5])"
@@ -2302,21 +391,12 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"id": "e6b69b1c-780f-4890-bf00-8409026205bd",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 4min 1s, sys: 30 s, total: 4min 31s\n",
- "Wall time: 4min 13s\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%%time\n",
"stats_folder = Path(stats_folder)\n",
@@ -2326,7 +406,7 @@
"std_path = stats_folder / \"std.npy\"\n",
"\n",
"# Flag to control whether to recompute mean and std\n",
- "recompute_stats = True # Set to True if you want to recompute even if files exist (useful if variables have changed)\n",
+ "recompute_stats = False # Set to True if you want to recompute even if files exist (useful if variables have changed)\n",
"\n",
"# Compute mean/std only if files are missing or if recompute_stats is True\n",
"if recompute_stats or not mean_path.is_file() or not std_path.is_file():\n",
@@ -2340,7 +420,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"id": "d97307a5",
"metadata": {},
"outputs": [],
@@ -2355,7 +435,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"id": "53d89dcd",
"metadata": {},
"outputs": [],
@@ -2370,7 +450,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"id": "bf352aa8",
"metadata": {},
"outputs": [],
@@ -2385,576 +465,12 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"id": "e3d6e689-f5c7-4b6b-a88e-d7d12c8997fa",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
Pipeline\n",
- "\tDescription `pyearthtools.pipeline` Data Pipeline\n",
- "\n",
- "\n",
- "\tInitialisation \n",
- "\t\t exceptions_to_ignore None\n",
- "\t\t iterator None\n",
- "\t\t sampler None\n",
- "\tSteps \n",
- "\t\t ERA5DataClass.ERA5LowResIndex {'ERA5LowResIndex': {'level_value': 'None', 'variables': "['2m_temperature', 'u', 'v', 'geopotential', 'vorticity']"}}\n",
- "\t\t sort.Sort {'Sort': {'order': "['t2', 'u', 'v', '2t', 'geopotential', 'vorticity']", 'strict': 'False'}}\n",
- "\t\t coordinates.StandardLongitude {'StandardLongitude': {'longitude_name': "'longitude'", 'type': "'0-360'"}}\n",
- "\t\t reshape.CoordinateFlatten {'CoordinateFlatten': {'__args': '()', 'coordinate': "'level'", 'skip_missing': 'False'}}\n",
- "\t\t idx_modification.TemporalRetrieval {'TemporalRetrieval': {'concat': 'True', 'delta_unit': 'None', 'merge_function': 'None', 'merge_kwargs': 'None', 'samples': '((-1, 1), (6, 1))'}}\n",
- "\t\t conversion.ToNumpy {'ToNumpy': {'reference_dataset': 'None', 'run_parallel': 'False', 'saved_records': 'None', 'warn': 'True'}}\n",
- "\t\t reshape.Rearrange {'Rearrange': {'rearrange': "'c t h w -> t c h w'", 'rearrange_kwargs': 'None', 'reverse_rearrange': 'None', 'skip': 'False'}}\n",
- "\t\t reshape.Squish {'Squish': {'axis': '0'}}\n",
- "\t\t normalisation.Deviation {'Deviation': {'deviation': "PosixPath('/g/data/kd24/temp/cnn_training/stats/std.npy')", 'expand': 'False', 'mean': "PosixPath('/g/data/kd24/temp/cnn_training/stats/mean.npy')"}}\n",
- "\t\t cache.Cache {'Cache': {'cache': "'/g/data/kd24/temp/cnn_training/cache'", 'cache_validity': "'warn'", 'pattern': 'None', 'pattern_kwargs': {'extension': "'npy'"}, 'save_kwargs': 'None'}} Initialisation:
exceptions_to_ignore
None
None
iterator
None
None
sampler
None
None
Steps:
ERA5DataClass.ERA5LowResIndex
{'ERA5LowResIndex': {'level_value': None, 'variables': ['2m_temperature', 'u', 'v', 'geopotential', 'vorticity']}}
ERA5LowResIndex : {'level_value': None, 'variables': ['2m_temperature', 'u', 'v', 'geopotential', 'vorticity']}
sort.Sort
{'Sort': {'order': ['t2', 'u', 'v', '2t', 'geopotential', 'vorticity'], 'strict': False}}
Sort : {'order': ['t2', 'u', 'v', '2t', 'geopotential', 'vorticity'], 'strict': False}
coordinates.StandardLongitude
{'StandardLongitude': {'longitude_name': 'longitude', 'type': '0-360'}}
StandardLongitude : {'longitude_name': 'longitude', 'type': '0-360'}
reshape.CoordinateFlatten
{'CoordinateFlatten': {'__args': (), 'coordinate': 'level', 'skip_missing': False}}
CoordinateFlatten : {'__args': (), 'coordinate': 'level', 'skip_missing': False}
idx_modification.TemporalRetrieval
{'TemporalRetrieval': {'concat': True, 'delta_unit': None, 'merge_function': None, 'merge_kwargs': None, 'samples': ((-1, 1), (6, 1))}}
TemporalRetrieval : {'concat': True, 'delta_unit': None, 'merge_function': None, 'merge_kwargs': None, 'samples': ((-1, 1), (6, 1))}
conversion.ToNumpy
{'ToNumpy': {'reference_dataset': None, 'run_parallel': False, 'saved_records': None, 'warn': True}}
ToNumpy : {'reference_dataset': None, 'run_parallel': False, 'saved_records': None, 'warn': True}
reshape.Rearrange
{'Rearrange': {'rearrange': 'c t h w -> t c h w', 'rearrange_kwargs': None, 'reverse_rearrange': None, 'skip': False}}
Rearrange : {'rearrange': 'c t h w -> t c h w', 'rearrange_kwargs': None, 'reverse_rearrange': None, 'skip': False}
reshape.Squish
{'Squish': {'axis': 0}}
Squish : {'axis': 0}
normalisation.Deviation
{'Deviation': {'deviation': PosixPath('/g/data/kd24/temp/cnn_training/stats/std.npy'), 'expand': False, 'mean': PosixPath('/g/data/kd24/temp/cnn_training/stats/mean.npy')}}
Deviation : {'deviation': PosixPath('/g/data/kd24/temp/cnn_training/stats/std.npy'), 'expand': False, 'mean': PosixPath('/g/data/kd24/temp/cnn_training/stats/mean.npy')}
cache.Cache
{'Cache': {'cache': '/g/data/kd24/temp/cnn_training/cache', 'cache_validity': 'warn', 'pattern': None, 'pattern_kwargs': {'extension': 'npy'}, 'save_kwargs': None}}
Cache : {'cache': '/g/data/kd24/temp/cnn_training/cache', 'cache_validity': 'warn', 'pattern': None, 'pattern_kwargs': {'extension': 'npy'}, 'save_kwargs': None}
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Graph "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "image/svg+xml": [
- "\n",
- "\n",
- "\n",
- "\n",
- "\n",
- "\n",
- "%3 \n",
- " \n",
- "\n",
- "\n",
- "ERA5LowResIndex_cd13d44d-215f-47f2-b5dd-67ff94959d37 \n",
- " \n",
- "ERA5DataClass.ERA5LowResIndex \n",
- " \n",
- "\n",
- "\n",
- "Sort_21e48ee9-06c0-4ac0-8331-e57634dcf289 \n",
- "\n",
- "sort.Sort \n",
- " \n",
- "\n",
- "\n",
- "ERA5LowResIndex_cd13d44d-215f-47f2-b5dd-67ff94959d37->Sort_21e48ee9-06c0-4ac0-8331-e57634dcf289 \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "StandardLongitude_c54f1430-02c7-4f12-abae-4b873448f732 \n",
- "\n",
- "coordinates.StandardLongitude \n",
- " \n",
- "\n",
- "\n",
- "Sort_21e48ee9-06c0-4ac0-8331-e57634dcf289->StandardLongitude_c54f1430-02c7-4f12-abae-4b873448f732 \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "CoordinateFlatten_0dcaef6e-44b2-48ec-9905-fc7b970fa8bf \n",
- "\n",
- "reshape.CoordinateFlatten \n",
- " \n",
- "\n",
- "\n",
- "StandardLongitude_c54f1430-02c7-4f12-abae-4b873448f732->CoordinateFlatten_0dcaef6e-44b2-48ec-9905-fc7b970fa8bf \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "TemporalRetrieval_734b15a8-a9c3-4be7-bfd0-3504835dd2f3 \n",
- "\n",
- "idx_modification.TemporalRetrieval \n",
- " \n",
- "\n",
- "\n",
- "CoordinateFlatten_0dcaef6e-44b2-48ec-9905-fc7b970fa8bf->TemporalRetrieval_734b15a8-a9c3-4be7-bfd0-3504835dd2f3 \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "ToNumpy_de6238d3-8f06-4e4d-a009-fd3059b2d06f \n",
- "\n",
- "conversion.ToNumpy \n",
- " \n",
- "\n",
- "\n",
- "TemporalRetrieval_734b15a8-a9c3-4be7-bfd0-3504835dd2f3->ToNumpy_de6238d3-8f06-4e4d-a009-fd3059b2d06f \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "Rearrange_7f2111f2-02c3-4713-a42d-434cbe3f968a \n",
- "\n",
- "reshape.Rearrange \n",
- " \n",
- "\n",
- "\n",
- "ToNumpy_de6238d3-8f06-4e4d-a009-fd3059b2d06f->Rearrange_7f2111f2-02c3-4713-a42d-434cbe3f968a \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "Squish_2e7fd87a-c727-4d58-81d4-6581b592b74a \n",
- "\n",
- "reshape.Squish \n",
- " \n",
- "\n",
- "\n",
- "Rearrange_7f2111f2-02c3-4713-a42d-434cbe3f968a->Squish_2e7fd87a-c727-4d58-81d4-6581b592b74a \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "Deviation_cd710809-afdd-4fcb-bba3-7ac5abd3f360 \n",
- "\n",
- "normalisation.Deviation \n",
- " \n",
- "\n",
- "\n",
- "Squish_2e7fd87a-c727-4d58-81d4-6581b592b74a->Deviation_cd710809-afdd-4fcb-bba3-7ac5abd3f360 \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "Cache_d7318eed-f67b-4397-b8e6-83eb65c27692 \n",
- "\n",
- "cache.Cache \n",
- " \n",
- "\n",
- "\n",
- "Deviation_cd710809-afdd-4fcb-bba3-7ac5abd3f360->Cache_d7318eed-f67b-4397-b8e6-83eb65c27692 \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"data_preparation_normed"
]
@@ -2969,7 +485,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"id": "0d1a4965-b224-472f-9675-4c17f83f548f",
"metadata": {
"tags": []
@@ -3035,19 +551,10 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"id": "ab29d950",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(53, 32, 64)\n",
- "Number of features: 53\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# Print the shape of the normalised training data for the given start date\n",
"print(data_preparation_normed[train_start][0].shape)\n",
@@ -3059,7 +566,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"id": "3dd973c3",
"metadata": {},
"outputs": [],
@@ -3078,40 +585,19 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"id": "90fa9f5a-b2b1-4952-a066-831b66d28917",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "CNN(\n",
- " (cnn): Sequential(\n",
- " (0): Conv2d(53, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (1): ReLU()\n",
- " (2): Dropout(p=0.6, inplace=False)\n",
- " (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " (4): ReLU()\n",
- " (5): Dropout(p=0.6, inplace=False)\n",
- " (6): Conv2d(64, 53, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
- " )\n",
- ")"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"model"
]
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": null,
"id": "51aa6230-b381-4c04-b231-9cf432148079",
"metadata": {
"tags": []
@@ -3146,7 +632,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": null,
"id": "615389bf-2e4d-4cac-abe0-06637d699792",
"metadata": {
"tags": []
@@ -3167,486 +653,19 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": null,
"id": "d54e16c5-dfe4-4c4b-bea7-ab7646591b6a",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
PipelineLightningDataModule\n",
- "\tInitialisation Pytorch Lightning DataModule.\n",
- "\t\t batch_size 1\n",
- "\t\t iterator_dataset False\n",
- "\t\t multiprocessing_context 'forkserver'\n",
- "\t\t num_workers 2\n",
- "\t\t persistent_workers True\n",
- "\t\t pipelines {'Pipeline': {'__args': '(Pipeline\\n\\tDescription `pyearthtools.pipeline` Data Pipeline\\n\\n\\n\\tInitialisation \\n\\t\\t exceptions_to_ignore None\\n\\t\\t iterator None\\n\\t\\t sampler None\\n\\tSteps \\n\\t\\t ERA5DataClass.ERA5LowResIndex {\\'ERA5LowResIndex\\': {\\'level_value\\': \\'None\\', \\'variables\\': "[\\'2m_temperature\\', \\'u\\', \\'v\\', \\'geopotential\\', \\'vorticity\\']"}}\\n\\t\\t sort.Sort {\\'Sort\\': {\\'order\\': "[\\'t2\\', \\'u\\', \\'v\\', \\'2t\\', \\'geopotential\\', \\'vorticity\\']", \\'strict\\': \\'False\\'}}\\n\\t\\t coordinates.StandardLongitude {\\'StandardLongitude\\': {\\'longitude_name\\': "\\'longitude\\'", \\'type\\': "\\'0-360\\'"}}\\n\\t\\t reshape.CoordinateFlatten {\\'CoordinateFlatten\\': {\\'__args\\': \\'()\\', \\'coordinate\\': "\\'level\\'", \\'skip_missing\\': \\'False\\'}}\\n\\t\\t idx_modification.TemporalRetrieval {\\'TemporalRetrieval\\': {\\'concat\\': \\'True\\', \\'delta_unit\\': \\'None\\', \\'merge_function\\': \\'None\\', \\'merge_kwargs\\': \\'None\\', \\'samples\\': \\'((-1, 1), (6, 1))\\'}}\\n\\t\\t conversion.ToNumpy {\\'ToNumpy\\': {\\'reference_dataset\\': \\'None\\', \\'run_parallel\\': \\'False\\', \\'saved_records\\': \\'None\\', \\'warn\\': \\'True\\'}}\\n\\t\\t reshape.Rearrange {\\'Rearrange\\': {\\'rearrange\\': "\\'c t h w -> t c h w\\'", \\'rearrange_kwargs\\': \\'None\\', \\'reverse_rearrange\\': \\'None\\', \\'skip\\': \\'False\\'}}\\n\\t\\t reshape.Squish {\\'Squish\\': {\\'axis\\': \\'0\\'}}, Deviation\\n\\tInitialisation Deviation Normalisation\\n\\t\\t deviation PosixPath(\\'/g/data/kd24/temp/cnn_training/stats/std.npy\\')\\n\\t\\t expand False\\n\\t\\t mean PosixPath(\\'/g/data/kd24/temp/cnn_training/stats/mean.npy\\'), Cache\\n\\tInitialisation An `pyearthtools.pipeline` implementation of the `CachingIndex` from `pyearthtools.data`.\\n\\t\\t cache \\'/g/data/kd24/temp/cnn_training/cache\\'\\n\\t\\t cache_validity \\'warn\\'\\n\\t\\t pattern None\\n\\t\\t pattern_kwargs {\\'extension\\': "\\'npy\\'"}\\n\\t\\t save_kwargs None)', 'exceptions_to_ignore': 'None', 'iterator': 'None', 'sampler': 'None'}}\n",
- "\t\t train_split {'Randomise': {'iterator': {'DateRange': {'end': "'2015-01-12T00'", 'interval': "'1h'", 'start': "'2015-01-01T00'"}}, 'seed': '42'}}\n",
- "\t\t valid_split {'DateRange': {'end': "'2016-01-12T00'", 'interval': "'1h'", 'start': "'2016-01-01T00'"}} "
- ],
- "text/plain": [
- "PipelineLightningDataModule\n",
- "\tInitialisation Pytorch Lightning DataModule.\n",
- "\t\t batch_size 1\n",
- "\t\t iterator_dataset False\n",
- "\t\t multiprocessing_context 'forkserver'\n",
- "\t\t num_workers 2\n",
- "\t\t persistent_workers True\n",
- "\t\t pipelines {'Pipeline': {'__args': '(Pipeline\\n\\tDescription `pyearthtools.pipeline` Data Pipeline\\n\\n\\n\\tInitialisation \\n\\t\\t exceptions_to_ignore None\\n\\t\\t iterator None\\n\\t\\t sampler None\\n\\tSteps \\n\\t\\t ERA5DataClass.ERA5LowResIndex {\\'ERA5LowResIndex\\': {\\'level_value\\': \\'None\\', \\'variables\\': \"[\\'2m_temperature\\', \\'u\\', \\'v\\', \\'geopotential\\', \\'vorticity\\']\"}}\\n\\t\\t sort.Sort {\\'Sort\\': {\\'order\\': \"[\\'t2\\', \\'u\\', \\'v\\', \\'2t\\', \\'geopotential\\', \\'vorticity\\']\", \\'strict\\': \\'False\\'}}\\n\\t\\t coordinates.StandardLongitude {\\'StandardLongitude\\': {\\'longitude_name\\': \"\\'longitude\\'\", \\'type\\': \"\\'0-360\\'\"}}\\n\\t\\t reshape.CoordinateFlatten {\\'CoordinateFlatten\\': {\\'__args\\': \\'()\\', \\'coordinate\\': \"\\'level\\'\", \\'skip_missing\\': \\'False\\'}}\\n\\t\\t idx_modification.TemporalRetrieval {\\'TemporalRetrieval\\': {\\'concat\\': \\'True\\', \\'delta_unit\\': \\'None\\', \\'merge_function\\': \\'None\\', \\'merge_kwargs\\': \\'None\\', \\'samples\\': \\'((-1, 1), (6, 1))\\'}}\\n\\t\\t conversion.ToNumpy {\\'ToNumpy\\': {\\'reference_dataset\\': \\'None\\', \\'run_parallel\\': \\'False\\', \\'saved_records\\': \\'None\\', \\'warn\\': \\'True\\'}}\\n\\t\\t reshape.Rearrange {\\'Rearrange\\': {\\'rearrange\\': \"\\'c t h w -> t c h w\\'\", \\'rearrange_kwargs\\': \\'None\\', \\'reverse_rearrange\\': \\'None\\', \\'skip\\': \\'False\\'}}\\n\\t\\t reshape.Squish {\\'Squish\\': {\\'axis\\': \\'0\\'}}, Deviation\\n\\tInitialisation Deviation Normalisation\\n\\t\\t deviation PosixPath(\\'/g/data/kd24/temp/cnn_training/stats/std.npy\\')\\n\\t\\t expand False\\n\\t\\t mean PosixPath(\\'/g/data/kd24/temp/cnn_training/stats/mean.npy\\'), Cache\\n\\tInitialisation An `pyearthtools.pipeline` implementation of the `CachingIndex` from `pyearthtools.data`.\\n\\t\\t cache \\'/g/data/kd24/temp/cnn_training/cache\\'\\n\\t\\t cache_validity \\'warn\\'\\n\\t\\t pattern None\\n\\t\\t pattern_kwargs {\\'extension\\': \"\\'npy\\'\"}\\n\\t\\t save_kwargs None)', 'exceptions_to_ignore': 'None', 'iterator': 'None', 'sampler': 'None'}}\n",
- "\t\t train_split {'Randomise': {'iterator': {'DateRange': {'end': \"'2015-01-12T00'\", 'interval': \"'1h'\", 'start': \"'2015-01-01T00'\"}}, 'seed': '42'}}\n",
- "\t\t valid_split {'DateRange': {'end': \"'2016-01-12T00'\", 'interval': \"'1h'\", 'start': \"'2016-01-01T00'\"}}"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data_module"
]
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"id": "e2b41f4f-a13d-45e2-bc25-5b8b6bd6555c",
"metadata": {
"tags": []
@@ -3658,109 +677,12 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": null,
"id": "cf979eb0-7966-4a78-b19e-1f434981803f",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/opt/conda/envs/pet/lib/python3.11/site-packages/torch/cuda/__init__.py:734: UserWarning: Can't initialize NVML\n",
- " warnings.warn(\"Can't initialize NVML\")\n",
- "GPU available: False, used: False\n",
- "TPU available: False, using: 0 TPU cores\n",
- "HPU available: False, using: 0 HPUs\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "┏━━━┳━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n",
- "┃ ┃ Name ┃ Type ┃ Params ┃ Mode ┃\n",
- "┡━━━╇━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n",
- "│ 0 │ cnn │ Sequential │ 98.1 K │ train │\n",
- "└───┴──────┴────────────┴────────┴───────┘\n",
- " \n"
- ],
- "text/plain": [
- "┏━━━┳━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n",
- "┃\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mName\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mType \u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mParams\u001b[0m\u001b[1;35m \u001b[0m┃\u001b[1;35m \u001b[0m\u001b[1;35mMode \u001b[0m\u001b[1;35m \u001b[0m┃\n",
- "┡━━━╇━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n",
- "│\u001b[2m \u001b[0m\u001b[2m0\u001b[0m\u001b[2m \u001b[0m│ cnn │ Sequential │ 98.1 K │ train │\n",
- "└───┴──────┴────────────┴────────┴───────┘\n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Trainable params : 98.1 K \n",
- "Non-trainable params : 0 \n",
- "Total params : 98.1 K \n",
- "Total estimated model params size (MB) : 0 \n",
- "Modules in train mode : 8 \n",
- "Modules in eval mode : 0 \n",
- " \n"
- ],
- "text/plain": [
- "\u001b[1mTrainable params\u001b[0m: 98.1 K \n",
- "\u001b[1mNon-trainable params\u001b[0m: 0 \n",
- "\u001b[1mTotal params\u001b[0m: 98.1 K \n",
- "\u001b[1mTotal estimated model params size (MB)\u001b[0m: 0 \n",
- "\u001b[1mModules in train mode\u001b[0m: 8 \n",
- "\u001b[1mModules in eval mode\u001b[0m: 0 \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "1b26c8dadf9c4846b8e88e68e5245a72",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "Output()"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/opt/conda/envs/pet/lib/python3.11/site-packages/torch/cuda/__init__.py:734: UserWarning: Can't initialize NVML\n",
- " warnings.warn(\"Can't initialize NVML\")\n",
- "`Trainer.fit` stopped: `max_epochs=10` reached.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- " \n"
- ],
- "text/plain": []
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 2min, sys: 11.9 s, total: 2min 12s\n",
- "Wall time: 7min 31s\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%%time\n",
"# Initialise the trainer with the specified parameters\n",
@@ -3786,7 +708,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": null,
"id": "b1af821b-0d56-4c05-8d25-04403712f580",
"metadata": {
"tags": []
@@ -3811,7 +733,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": null,
"id": "5868ae48-4be0-4f87-a429-60cc5a2d5ce1",
"metadata": {
"tags": []
@@ -3835,505 +757,19 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": null,
"id": "e32d829b-df99-4f6e-bf14-750ee17e22bb",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
ReversedPipeline\n",
- "\tDescription `pyearthtools.pipeline` Data Pipeline\n",
- "\n",
- "\n",
- "\tInitialisation \n",
- "\t\t exceptions_to_ignore None\n",
- "\t\t iterator None\n",
- "\t\t sampler None\n",
- "\tSteps \n",
- "\t\t conversion.ToNumpy {'ToNumpy': {'reference_dataset': 'None', 'run_parallel': 'False', 'saved_records': 'None', 'warn': 'True'}}\n",
- "\t\t reshape.Rearrange {'Rearrange': {'rearrange': "'c t h w -> t c h w'", 'rearrange_kwargs': 'None', 'reverse_rearrange': 'None', 'skip': 'False'}}\n",
- "\t\t reshape.Squish {'Squish': {'axis': '0'}}\n",
- "\t\t normalisation.Deviation {'Deviation': {'deviation': "PosixPath('/g/data/kd24/temp/cnn_training/stats/std.npy')", 'expand': 'False', 'mean': "PosixPath('/g/data/kd24/temp/cnn_training/stats/mean.npy')"}} Initialisation:
exceptions_to_ignore
None
None
iterator
None
None
sampler
None
None
Steps:
conversion.ToNumpy
{'ToNumpy': {'reference_dataset': None, 'run_parallel': False, 'saved_records': None, 'warn': True}}
ToNumpy : {'reference_dataset': None, 'run_parallel': False, 'saved_records': None, 'warn': True}
reshape.Rearrange
{'Rearrange': {'rearrange': 'c t h w -> t c h w', 'rearrange_kwargs': None, 'reverse_rearrange': None, 'skip': False}}
Rearrange : {'rearrange': 'c t h w -> t c h w', 'rearrange_kwargs': None, 'reverse_rearrange': None, 'skip': False}
reshape.Squish
{'Squish': {'axis': 0}}
Squish : {'axis': 0}
normalisation.Deviation
{'Deviation': {'deviation': PosixPath('/g/data/kd24/temp/cnn_training/stats/std.npy'), 'expand': False, 'mean': PosixPath('/g/data/kd24/temp/cnn_training/stats/mean.npy')}}
Deviation : {'deviation': PosixPath('/g/data/kd24/temp/cnn_training/stats/std.npy'), 'expand': False, 'mean': PosixPath('/g/data/kd24/temp/cnn_training/stats/mean.npy')}
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "Graph "
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "image/svg+xml": [
- "\n",
- "\n",
- "\n",
- "\n",
- "\n",
- "\n",
- "%3 \n",
- " \n",
- "\n",
- "\n",
- "ToNumpy_77601bce-c45e-4552-b18f-a44d923b5160 \n",
- "\n",
- "conversion.ToNumpy \n",
- " \n",
- "\n",
- "\n",
- "Rearrange_47afc0ba-fb12-4ef6-ac07-7b7b49824971 \n",
- "\n",
- "reshape.Rearrange \n",
- " \n",
- "\n",
- "\n",
- "ToNumpy_77601bce-c45e-4552-b18f-a44d923b5160->Rearrange_47afc0ba-fb12-4ef6-ac07-7b7b49824971 \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "Squish_0deff7d9-aa36-4eda-b8bc-57af987ace3b \n",
- "\n",
- "reshape.Squish \n",
- " \n",
- "\n",
- "\n",
- "Rearrange_47afc0ba-fb12-4ef6-ac07-7b7b49824971->Squish_0deff7d9-aa36-4eda-b8bc-57af987ace3b \n",
- " \n",
- " \n",
- " \n",
- "\n",
- "\n",
- "Deviation_8ea5831c-06ad-416b-b20e-eaaa7a2f53d4 \n",
- "\n",
- "normalisation.Deviation \n",
- " \n",
- "\n",
- "\n",
- "Squish_0deff7d9-aa36-4eda-b8bc-57af987ace3b->Deviation_8ea5831c-06ad-416b-b20e-eaaa7a2f53d4 \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"reverse_pipeline"
]
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": null,
"id": "072cbd6c-8a82-4213-99fc-811623ad8c41",
"metadata": {
"tags": []
@@ -4355,105 +791,10 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": null,
"id": "b4be6a52",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([[[ 2.3695787e-02, 3.4472156e-02, 3.8872182e-02, ...,\n",
- " -1.6505459e-01, -7.1043633e-02, -7.3589510e-03],\n",
- " [-1.1685666e+00, -1.2352436e+00, -1.0288321e+00, ...,\n",
- " -1.2402556e+00, -1.1273944e+00, -1.1250806e+00],\n",
- " [ 4.7418383e-01, 5.6828344e-01, 5.2670640e-01, ...,\n",
- " -1.3003880e+00, -8.0562055e-01, -3.4530771e-01],\n",
- " ...,\n",
- " [ 2.9770502e-01, -2.1889141e-01, -6.8689555e-01, ...,\n",
- " 1.6709738e+00, 1.1905416e+00, 6.9149756e-01],\n",
- " [-9.0035774e-02, -4.1601011e-01, -6.1029559e-01, ...,\n",
- " 7.3746735e-01, 4.9150047e-01, 2.6068068e-01],\n",
- " [ 7.2759598e-01, 6.8387878e-01, 6.3957113e-01, ...,\n",
- " 8.8256121e-01, 8.2623446e-01, 7.7443182e-01]],\n",
- "\n",
- " [[ 2.2614898e-01, 2.0084491e-01, 1.6215383e-01, ...,\n",
- " 2.1369857e-01, 2.3551507e-01, 2.3804154e-01],\n",
- " [ 8.8026071e-01, 1.0657024e+00, 1.0752757e+00, ...,\n",
- " 3.9153094e-03, 2.1829823e-01, 5.3814679e-01],\n",
- " [ 9.7337114e-03, 9.0758377e-01, 1.3127176e+00, ...,\n",
- " -5.3510386e-01, -8.2094312e-01, -3.3512720e-01],\n",
- " ...,\n",
- " [ 5.8833838e-01, 2.1870729e-01, -1.6291448e-01, ...,\n",
- " 2.6878192e+00, 1.6849070e+00, 1.0852201e+00],\n",
- " [ 1.0361502e-01, -3.3136120e-01, -6.6939932e-01, ...,\n",
- " 1.0300777e+00, 8.1800663e-01, 4.4499493e-01],\n",
- " [ 2.7806267e-01, 1.4875136e-01, 3.4395169e-02, ...,\n",
- " 7.1838278e-01, 5.6235814e-01, 4.1677654e-01]],\n",
- "\n",
- " [[ 6.9511491e-01, 6.7035401e-01, 6.2954497e-01, ...,\n",
- " 5.9594870e-01, 6.6468102e-01, 6.9551498e-01],\n",
- " [ 6.5359187e-01, 8.8116461e-01, 1.0682575e+00, ...,\n",
- " 1.9818132e-01, 2.5561711e-01, 3.9381081e-01],\n",
- " [-1.8842438e-02, 2.4793606e-01, 3.8589108e-01, ...,\n",
- " -1.1467899e+00, -1.5513562e-01, 5.3157166e-02],\n",
- " ...,\n",
- " [ 9.2546499e-01, 4.9285889e-01, 1.2044082e-01, ...,\n",
- " 3.0710783e+00, 2.2187176e+00, 1.4915732e+00],\n",
- " [-2.3683874e-01, -7.5362957e-01, -1.3611470e+00, ...,\n",
- " 1.2287025e+00, 7.8522444e-01, 2.5963935e-01],\n",
- " [-4.0350714e-01, -5.9461677e-01, -7.5036871e-01, ...,\n",
- " 3.5771328e-01, 7.8715011e-02, -1.7835616e-01]],\n",
- "\n",
- " ...,\n",
- "\n",
- " [[ 1.1560211e+00, 1.2228196e+00, 1.2746993e+00, ...,\n",
- " 9.6942943e-01, 1.0412282e+00, 1.0984427e+00],\n",
- " [ 1.2728928e+00, 1.4944965e+00, 1.6380012e+00, ...,\n",
- " 7.5697541e-01, 9.2720622e-01, 1.0713543e+00],\n",
- " [ 1.2968856e+00, 1.7629174e+00, 2.0246139e+00, ...,\n",
- " 4.8912856e-01, 7.3104161e-01, 1.1360239e+00],\n",
- " ...,\n",
- " [ 2.0898745e+00, 1.7627238e+00, 1.3818336e+00, ...,\n",
- " 2.1537223e+00, 2.3092527e+00, 2.2930501e+00],\n",
- " [ 2.5414321e+00, 2.4272087e+00, 2.3223903e+00, ...,\n",
- " 2.4267261e+00, 2.5933301e+00, 2.6183462e+00],\n",
- " [ 2.5096502e+00, 2.5829365e+00, 2.6497037e+00, ...,\n",
- " 2.2820547e+00, 2.3603778e+00, 2.4377646e+00]],\n",
- "\n",
- " [[ 1.1300322e+00, 1.2002773e+00, 1.2489322e+00, ...,\n",
- " 9.3466324e-01, 1.0140271e+00, 1.0727222e+00],\n",
- " [ 1.1983809e+00, 1.4319460e+00, 1.5859385e+00, ...,\n",
- " 6.6535771e-01, 8.7529796e-01, 1.0020041e+00],\n",
- " [ 1.1545411e+00, 1.6534176e+00, 1.9289250e+00, ...,\n",
- " 2.3586062e-01, 5.8267975e-01, 1.0458156e+00],\n",
- " ...,\n",
- " [ 2.2465320e+00, 1.9288920e+00, 1.5423485e+00, ...,\n",
- " 2.2304432e+00, 2.4246225e+00, 2.4243124e+00],\n",
- " [ 2.7472579e+00, 2.5950105e+00, 2.4726503e+00, ...,\n",
- " 2.7235341e+00, 2.9525659e+00, 2.8836329e+00],\n",
- " [ 2.5010371e+00, 2.5503972e+00, 2.5974314e+00, ...,\n",
- " 2.3345232e+00, 2.3930345e+00, 2.4478638e+00]],\n",
- "\n",
- " [[ 1.1018562e+00, 1.1745058e+00, 1.2204952e+00, ...,\n",
- " 8.9758617e-01, 9.8510605e-01, 1.0447645e+00],\n",
- " [ 1.1246872e+00, 1.3683009e+00, 1.5318618e+00, ...,\n",
- " 5.7839805e-01, 8.2582539e-01, 9.3571031e-01],\n",
- " [ 1.0209082e+00, 1.5452837e+00, 1.8333354e+00, ...,\n",
- " -3.1051515e-03, 4.4984731e-01, 9.6108818e-01],\n",
- " ...,\n",
- " [ 2.3885703e+00, 2.0640602e+00, 1.6813680e+00, ...,\n",
- " 2.2750089e+00, 2.4787307e+00, 2.5486567e+00],\n",
- " [ 2.8522756e+00, 2.6466005e+00, 2.4811199e+00, ...,\n",
- " 2.8704958e+00, 3.1878827e+00, 3.0978796e+00],\n",
- " [ 2.6645415e+00, 2.6863616e+00, 2.7021067e+00, ...,\n",
- " 2.5457606e+00, 2.5938289e+00, 2.6327112e+00]]], dtype=float32)"
- ]
- },
- "execution_count": 30,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"data_preparation_normed[test_start][1]"
]
@@ -4465,39 +806,7 @@
"metadata": {
"tags": []
},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "09cfd88fb1a044a7950246fae880828b",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "Output()"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "GPU available: False, used: False\n",
- "TPU available: False, using: 0 TPU cores\n",
- "HPU available: False, using: 0 HPUs\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 3min 44s, sys: 31 s, total: 4min 15s\n",
- "Wall time: 6min\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%%time \n",
"\n",
@@ -4617,19 +926,11 @@
"metadata": {},
"outputs": [],
"source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ffefd656-355c-428f-b99c-55784d7018f0",
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "PET_tutorial",
"language": "python",
"name": "python3"
},
@@ -4643,7 +944,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.7"
+ "version": "3.11.11"
}
},
"nbformat": 4,