From 0884afeefb0ad100aa295721100681419f0e3f88 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Fri, 11 Oct 2024 10:44:31 +0200 Subject: [PATCH 1/6] Allow grouping cubes by data for fix_metadata --- esmvalcore/cmor/_fixes/fix.py | 10 +++++++++ esmvalcore/cmor/fix.py | 42 ++++++++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 5aa41f6486..d93dc0de35 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -40,6 +40,16 @@ class Fix: """Base class for dataset fixes.""" + GROUP_CUBES_BY_DATE = False + """Flag for grouping cubes for fix_metadata. + + Fixes are applied to each group element individually. + + If ``False`` (default), group cubes by file. If ``True``, group cubes by + date. + + """ + def __init__( self, vardef: VariableInfo, diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 2e3209897d..d509980fc9 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -10,15 +10,16 @@ import logging import warnings from collections import defaultdict -from collections.abc import Sequence +from collections.abc import Iterable, Sequence from pathlib import Path -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Any, Optional from iris.cube import Cube, CubeList from esmvalcore.cmor._fixes.fix import Fix from esmvalcore.cmor.check import CheckLevels, _get_cmor_checker from esmvalcore.exceptions import ESMValCoreDeprecationWarning +from esmvalcore.local import _get_start_end_date if TYPE_CHECKING: from ..config import Session @@ -102,6 +103,27 @@ def fix_file( return file +def _group_cubes(fixes: Iterable[Fix], cubes: CubeList) -> dict[Any, CubeList]: + """Group cubes for fix_metadata; each group is processed individually.""" + grouped_cubes: dict[Any, CubeList] = defaultdict(CubeList) + + # Group by date + if any(fix.GROUP_CUBES_BY_DATE for fix in fixes): + for cube in cubes: + if "source_file" in cube.attributes: + dates = _get_start_end_date(cube.attributes["source_file"]) + else: + dates = None + grouped_cubes[dates].append(cube) + + # Group by file name + else: + for cube in cubes: + grouped_cubes[cube.attributes.get("source_file", "")].append(cube) + + return grouped_cubes + + def fix_metadata( cubes: Sequence[Cube], short_name: str, @@ -192,14 +214,14 @@ def fix_metadata( ) fixed_cubes = CubeList() - # Group cubes by input file and apply all fixes to each group element - # (i.e., each file) individually - by_file = defaultdict(list) - for cube in cubes: - by_file[cube.attributes.get("source_file", "")].append(cube) - - for cube_list in by_file.values(): - cube_list = CubeList(cube_list) + # Group cubes and apply all fixes to each group element individually. There + # are two options for grouping: + # (1) By input file name (default). + # (2) By time range (can be enabled by setting the attribute + # GROUP_CUBES_BY_DATE=True for the fix class; see + # _fixes.native6.era5.Rsut for an example). + grouped_cubes = _group_cubes(fixes, cubes) + for cube_list in grouped_cubes.values(): for fix in fixes: cube_list = fix.fix_metadata(cube_list) From 168c9f2a107ad7786f07f06a49774e5ab9a31a16 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Fri, 11 Oct 2024 10:44:40 +0200 Subject: [PATCH 2/6] Added ERA5 fix for rsut --- esmvalcore/cmor/_fixes/native6/era5.py | 38 ++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/esmvalcore/cmor/_fixes/native6/era5.py b/esmvalcore/cmor/_fixes/native6/era5.py index aa4beae984..3d25adf191 100644 --- a/esmvalcore/cmor/_fixes/native6/era5.py +++ b/esmvalcore/cmor/_fixes/native6/era5.py @@ -304,6 +304,44 @@ def fix_metadata(self, cubes): return cubes +class Rsut(Fix): + """Fixes for rsut.""" + + # Enable grouping cubes by date for fix_metadata since multiple variables + # from multiple files are needed + GROUP_CUBES_BY_DATE = True + + def fix_metadata(self, cubes): + """Fix metadata. + + Derive rsut as + + rsut = rsdt - rsnt + + with + + rsut = TOA Outgoing Shortwave Radiation + rsdt = TOA Incoming Shortwave Radiation + rsnt = TOA Net Incoming Shortwave Radiation + + """ + rsdt_cube = cubes.extract_cube( + iris.NameConstraint(long_name="TOA incident solar radiation") + ) + rsnt_cube = cubes.extract_cube( + iris.NameConstraint( + long_name="Mean top net short-wave radiation flux" + ) + ) + rsdt_cube = Rsdt(None).fix_metadata([rsdt_cube])[0] + rsdt_cube.convert_units(self.vardef.units) + + rsdt_cube.data = rsdt_cube.core_data() - rsnt_cube.core_data() + rsdt_cube.attributes["positive"] = "up" + + return iris.cube.CubeList([rsdt_cube]) + + class Rss(Fix): """Fixes for Rss.""" From 73450fa33b902053418c1bff37953a9780289fa0 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Fri, 11 Oct 2024 11:41:21 +0200 Subject: [PATCH 3/6] Fix test --- tests/unit/cmor/test_fix.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/cmor/test_fix.py b/tests/unit/cmor/test_fix.py index 01038d2786..c51073a2fb 100644 --- a/tests/unit/cmor/test_fix.py +++ b/tests/unit/cmor/test_fix.py @@ -121,6 +121,7 @@ def setUp(self): self.fixed_cube = self._create_mock_cube() self.mock_fix = Mock() self.mock_fix.fix_metadata.return_value = [self.intermediate_cube] + self.mock_fix.GROUP_CUBES_BY_DATE = False self.checker = Mock() self.check_metadata = self.checker.return_value.check_metadata self.expected_get_fixes_call = { From 80273040480a08a8d05fcd16938505d4bf20a759 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 28 May 2025 13:02:23 +0200 Subject: [PATCH 4/6] Safe fixes --- esmvalcore/cmor/_fixes/fix.py | 35 +++++++++++++++----------- esmvalcore/cmor/_fixes/native6/era5.py | 25 ++++++------------ esmvalcore/cmor/fix.py | 9 +++---- 3 files changed, 31 insertions(+), 38 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index d535a9b010..25ca2e42df 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -2,11 +2,11 @@ from __future__ import annotations -import contextlib import importlib import inspect import logging import tempfile +from collections.abc import Sequence from pathlib import Path from typing import TYPE_CHECKING @@ -30,8 +30,6 @@ from esmvalcore.iris_helpers import has_unstructured_grid, safe_convert_units if TYPE_CHECKING: - from collections.abc import Sequence - from esmvalcore.cmor.table import CoordinateInfo, VariableInfo from esmvalcore.config import Session @@ -167,8 +165,7 @@ def get_cube_from_list( for cube in cubes: if cube.var_name == short_name: return cube - msg = f'Cube for variable "{short_name}" not found' - raise ValueError(msg) + raise ValueError(f'Cube for variable "{short_name}" not found') def fix_data(self, cube: Cube) -> Cube: """Apply fixes to the data of the cube. @@ -262,33 +259,37 @@ def get_fixes( if project == "cordex": driver = extra_facets["driver"].replace("-", "_").lower() extra_facets["dataset"] = dataset - with contextlib.suppress(ImportError): + try: fixes_modules.append( importlib.import_module( f"esmvalcore.cmor._fixes.{project}.{driver}.{dataset}", ), ) + except ImportError: + pass fixes_modules.append( importlib.import_module( "esmvalcore.cmor._fixes.cordex.cordex_fixes", ), ) else: - with contextlib.suppress(ImportError): + try: fixes_modules.append( importlib.import_module( f"esmvalcore.cmor._fixes.{project}.{dataset}", ), ) + except ImportError: + pass for fixes_module in fixes_modules: - classes = { - name.lower(): value + classes = dict( + (name.lower(), value) for (name, value) in inspect.getmembers( fixes_module, inspect.isclass, ) - } + ) for fix_name in (short_name, mip.lower(), "allvars"): if fix_name in classes: fixes.append( @@ -710,7 +711,7 @@ def _fix_longitude_0_360( cube_coord: Coord, ) -> tuple[Cube, Coord]: """Fix longitude coordinate to be in [0, 360].""" - if cube_coord.standard_name != "longitude": + if not cube_coord.standard_name == "longitude": return (cube, cube_coord) points = cube_coord.core_points() @@ -854,21 +855,25 @@ def _fix_time_units(self, cube: Cube, cube_coord: Coord) -> None: branch_parent = "branch_time_in_parent" if branch_parent in attrs: - # Overflow happens when the time is very large. - with contextlib.suppress(OverflowError): + try: attrs[branch_parent] = parent_units.convert( attrs[branch_parent], cube_coord.units, ) + except OverflowError: + # This happens when the time is very large. + pass branch_child = "branch_time_in_child" if branch_child in attrs: - # Overflow happens when the time is very large. - with contextlib.suppress(OverflowError): + try: attrs[branch_child] = old_units.convert( attrs[branch_child], cube_coord.units, ) + except OverflowError: + # This happens when the time is very large. + pass def _fix_time_bounds(self, cube: Cube, cube_coord: Coord) -> None: """Fix time bounds.""" diff --git a/esmvalcore/cmor/_fixes/native6/era5.py b/esmvalcore/cmor/_fixes/native6/era5.py index 814aa9b6d4..08861da220 100644 --- a/esmvalcore/cmor/_fixes/native6/era5.py +++ b/esmvalcore/cmor/_fixes/native6/era5.py @@ -33,12 +33,9 @@ def get_frequency(cube): "Percentage of the Grid Cell Occupied by Land (Including Lakes)", ) if cube.long_name not in acceptable_long_names: - msg = ( - "Unable to infer frequency of cube " - f"with length 1 time dimension: {cube}" - ) raise ValueError( - msg, + "Unable to infer frequency of cube " + f"with length 1 time dimension: {cube}", ) return "fx" @@ -66,12 +63,9 @@ def fix_accumulated_units(cube): elif get_frequency(cube) == "hourly": cube.units = cube.units * "h-1" elif get_frequency(cube) == "daily": - msg = ( - f"Fixing of accumulated units of cube " - f"{cube.summary(shorten=True)} is not implemented for daily data" - ) raise NotImplementedError( - msg, + f"Fixing of accumulated units of cube " + f"{cube.summary(shorten=True)} is not implemented for daily data", ) return cube @@ -436,12 +430,12 @@ def fix_metadata(self, cubes): """ rsdt_cube = cubes.extract_cube( - iris.NameConstraint(long_name="TOA incident solar radiation") + iris.NameConstraint(long_name="TOA incident solar radiation"), ) rsnt_cube = cubes.extract_cube( iris.NameConstraint( - long_name="Mean top net short-wave radiation flux" - ) + long_name="Mean top net short-wave radiation flux", + ), ) rsdt_cube = Rsdt(None).fix_metadata([rsdt_cube])[0] rsdt_cube.convert_units(self.vardef.units) @@ -535,10 +529,7 @@ def fix_metadata(self, cubes): class AllVars(Fix): """Fixes for all variables.""" - def _fix_coordinates( # noqa: C901 - self, - cube, - ): + def _fix_coordinates(self, cube): """Fix coordinates.""" # Add scalar height coordinates if "height2m" in self.vardef.dimensions: diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 2c73d6c621..e9448e06e4 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -11,7 +11,7 @@ from collections import defaultdict from collections.abc import Iterable, Sequence from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any from iris.cube import Cube, CubeList @@ -19,15 +19,12 @@ from esmvalcore.local import _get_start_end_date if TYPE_CHECKING: - from collections.abc import Sequence - from pathlib import Path - - from esmvalcore.config import Session + from ..config import Session logger = logging.getLogger(__name__) -def fix_file( # noqa: PLR0913 +def fix_file( file: Path, short_name: str, project: str, From 7e41997b37411f5c7641cd95ac0f9ba487fc6603 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 28 May 2025 13:02:42 +0200 Subject: [PATCH 5/6] Unsafe fixes --- esmvalcore/cmor/_fixes/fix.py | 23 +++++++++++------------ esmvalcore/cmor/_fixes/native6/era5.py | 14 ++++++++++---- esmvalcore/cmor/fix.py | 7 ++++--- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 25ca2e42df..dc93c65506 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -2,11 +2,11 @@ from __future__ import annotations +import contextlib import importlib import inspect import logging import tempfile -from collections.abc import Sequence from pathlib import Path from typing import TYPE_CHECKING @@ -30,6 +30,8 @@ from esmvalcore.iris_helpers import has_unstructured_grid, safe_convert_units if TYPE_CHECKING: + from collections.abc import Sequence + from esmvalcore.cmor.table import CoordinateInfo, VariableInfo from esmvalcore.config import Session @@ -165,7 +167,8 @@ def get_cube_from_list( for cube in cubes: if cube.var_name == short_name: return cube - raise ValueError(f'Cube for variable "{short_name}" not found') + msg = f'Cube for variable "{short_name}" not found' + raise ValueError(msg) def fix_data(self, cube: Cube) -> Cube: """Apply fixes to the data of the cube. @@ -259,37 +262,33 @@ def get_fixes( if project == "cordex": driver = extra_facets["driver"].replace("-", "_").lower() extra_facets["dataset"] = dataset - try: + with contextlib.suppress(ImportError): fixes_modules.append( importlib.import_module( f"esmvalcore.cmor._fixes.{project}.{driver}.{dataset}", ), ) - except ImportError: - pass fixes_modules.append( importlib.import_module( "esmvalcore.cmor._fixes.cordex.cordex_fixes", ), ) else: - try: + with contextlib.suppress(ImportError): fixes_modules.append( importlib.import_module( f"esmvalcore.cmor._fixes.{project}.{dataset}", ), ) - except ImportError: - pass for fixes_module in fixes_modules: - classes = dict( - (name.lower(), value) + classes = { + name.lower(): value for (name, value) in inspect.getmembers( fixes_module, inspect.isclass, ) - ) + } for fix_name in (short_name, mip.lower(), "allvars"): if fix_name in classes: fixes.append( @@ -711,7 +710,7 @@ def _fix_longitude_0_360( cube_coord: Coord, ) -> tuple[Cube, Coord]: """Fix longitude coordinate to be in [0, 360].""" - if not cube_coord.standard_name == "longitude": + if cube_coord.standard_name != "longitude": return (cube, cube_coord) points = cube_coord.core_points() diff --git a/esmvalcore/cmor/_fixes/native6/era5.py b/esmvalcore/cmor/_fixes/native6/era5.py index 08861da220..100185d0e9 100644 --- a/esmvalcore/cmor/_fixes/native6/era5.py +++ b/esmvalcore/cmor/_fixes/native6/era5.py @@ -33,9 +33,12 @@ def get_frequency(cube): "Percentage of the Grid Cell Occupied by Land (Including Lakes)", ) if cube.long_name not in acceptable_long_names: - raise ValueError( + msg = ( "Unable to infer frequency of cube " - f"with length 1 time dimension: {cube}", + f"with length 1 time dimension: {cube}" + ) + raise ValueError( + msg, ) return "fx" @@ -63,9 +66,12 @@ def fix_accumulated_units(cube): elif get_frequency(cube) == "hourly": cube.units = cube.units * "h-1" elif get_frequency(cube) == "daily": - raise NotImplementedError( + msg = ( f"Fixing of accumulated units of cube " - f"{cube.summary(shorten=True)} is not implemented for daily data", + f"{cube.summary(shorten=True)} is not implemented for daily data" + ) + raise NotImplementedError( + msg, ) return cube diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index e9448e06e4..3fc77d0418 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -9,8 +9,6 @@ import logging from collections import defaultdict -from collections.abc import Iterable, Sequence -from pathlib import Path from typing import TYPE_CHECKING, Any from iris.cube import Cube, CubeList @@ -19,7 +17,10 @@ from esmvalcore.local import _get_start_end_date if TYPE_CHECKING: - from ..config import Session + from collections.abc import Iterable, Sequence + from pathlib import Path + + from esmvalcore.config import Session logger = logging.getLogger(__name__) From eae214ee75f1fc5d32c25d7b756febbd7c5e0e24 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 28 May 2025 13:05:32 +0200 Subject: [PATCH 6/6] Manual fixes --- esmvalcore/cmor/_fixes/fix.py | 12 ++++-------- esmvalcore/cmor/_fixes/native6/era5.py | 2 +- esmvalcore/cmor/fix.py | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index dc93c65506..d535a9b010 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -854,25 +854,21 @@ def _fix_time_units(self, cube: Cube, cube_coord: Coord) -> None: branch_parent = "branch_time_in_parent" if branch_parent in attrs: - try: + # Overflow happens when the time is very large. + with contextlib.suppress(OverflowError): attrs[branch_parent] = parent_units.convert( attrs[branch_parent], cube_coord.units, ) - except OverflowError: - # This happens when the time is very large. - pass branch_child = "branch_time_in_child" if branch_child in attrs: - try: + # Overflow happens when the time is very large. + with contextlib.suppress(OverflowError): attrs[branch_child] = old_units.convert( attrs[branch_child], cube_coord.units, ) - except OverflowError: - # This happens when the time is very large. - pass def _fix_time_bounds(self, cube: Cube, cube_coord: Coord) -> None: """Fix time bounds.""" diff --git a/esmvalcore/cmor/_fixes/native6/era5.py b/esmvalcore/cmor/_fixes/native6/era5.py index 100185d0e9..09bf4bc321 100644 --- a/esmvalcore/cmor/_fixes/native6/era5.py +++ b/esmvalcore/cmor/_fixes/native6/era5.py @@ -535,7 +535,7 @@ def fix_metadata(self, cubes): class AllVars(Fix): """Fixes for all variables.""" - def _fix_coordinates(self, cube): + def _fix_coordinates(self, cube): # noqa: C901 """Fix coordinates.""" # Add scalar height coordinates if "height2m" in self.vardef.dimensions: diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index 3fc77d0418..7a8cad1b1d 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) -def fix_file( +def fix_file( # noqa: PLR0913 file: Path, short_name: str, project: str,