diff --git a/CHANGELOG.md b/CHANGELOG.md index 8eb16a4c6..663f087a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -80,6 +80,7 @@ If upgrading from v2.x, see the [v3.0.0 release notes](https://github.com/flixOp - **Color terminology**: Standardized terminology from "colormap" to "colorscale" throughout the codebase for consistency with Plotly conventions - **Default colorscales**: Changed default sequential colorscale from 'viridis' to 'turbo' for better perceptual uniformity; qualitative colorscale now defaults to 'plotly' - **Aggregation plotting**: `Aggregation.plot()` now respects `CONFIG.Plotting.default_qualitative_colorscale` and uses `process_colors()` for consistent color handling +- **netcdf engine**: Following the xarray revert in `xarray==2025.09.2` and after running some benchmarks, we go back to using the netcdf4 engine ### 🗑️ Deprecated diff --git a/flixopt/calculation.py b/flixopt/calculation.py index 9d2164e1e..f744c5247 100644 --- a/flixopt/calculation.py +++ b/flixopt/calculation.py @@ -22,7 +22,6 @@ import yaml from . import io as fx_io -from . import utils as utils from .aggregation import Aggregation, AggregationModel, AggregationParameters from .components import Storage from .config import CONFIG @@ -144,7 +143,7 @@ def main_results(self) -> dict[str, Scalar | dict]: ], } - return utils.round_nested_floats(main_results) + return fx_io.round_nested_floats(main_results) @property def summary(self): @@ -253,7 +252,7 @@ def solve( logger.info( f'{" Main Results ":#^80}\n' + yaml.dump( - utils.round_nested_floats(self.main_results), + self.main_results, default_flow_style=False, sort_keys=False, allow_unicode=True, diff --git a/flixopt/config.py b/flixopt/config.py index b7162e55f..670f86da2 100644 --- a/flixopt/config.py +++ b/flixopt/config.py @@ -8,7 +8,6 @@ from types import MappingProxyType from typing import Literal -import yaml from rich.console import Console from rich.logging import RichHandler from rich.style import Style @@ -299,13 +298,15 @@ def load_from_file(cls, config_file: str | Path): Raises: FileNotFoundError: If the config file does not exist. """ + # Import here to avoid circular import + from . import io as fx_io + config_path = Path(config_file) if not config_path.exists(): raise FileNotFoundError(f'Config file not found: {config_file}') - with config_path.open() as file: - config_dict = yaml.safe_load(file) or {} - cls._apply_config_dict(config_dict) + config_dict = fx_io.load_yaml(config_path) + cls._apply_config_dict(config_dict) cls.apply() diff --git a/flixopt/io.py b/flixopt/io.py index 53d3d8e8a..059670ddd 100644 --- a/flixopt/io.py +++ b/flixopt/io.py @@ -1,13 +1,13 @@ from __future__ import annotations -import importlib.util import json import logging import pathlib import re from dataclasses import dataclass -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Any +import numpy as np import xarray as xr import yaml @@ -34,7 +34,235 @@ def remove_none_and_empty(obj): return obj -def _save_to_yaml(data, output_file='formatted_output.yaml'): +def round_nested_floats(obj: dict | list | float | int | Any, decimals: int = 2) -> dict | list | float | int | Any: + """Recursively round floating point numbers in nested data structures and convert it to python native types. + + This function traverses nested data structures (dictionaries, lists) and rounds + any floating point numbers to the specified number of decimal places. It handles + various data types including NumPy arrays and xarray DataArrays by converting + them to lists with rounded values. + + Args: + obj: The object to process. Can be a dict, list, float, int, numpy.ndarray, + xarray.DataArray, or any other type. + decimals (int, optional): Number of decimal places to round to. Defaults to 2. + + Returns: + The processed object with the same structure as the input, but with all floating point numbers rounded to the specified precision. NumPy arrays and xarray DataArrays are converted to lists. + + Examples: + >>> data = {'a': 3.14159, 'b': [1.234, 2.678]} + >>> round_nested_floats(data, decimals=2) + {'a': 3.14, 'b': [1.23, 2.68]} + + >>> import numpy as np + >>> arr = np.array([1.234, 5.678]) + >>> round_nested_floats(arr, decimals=1) + [1.2, 5.7] + """ + if isinstance(obj, dict): + return {k: round_nested_floats(v, decimals) for k, v in obj.items()} + elif isinstance(obj, list): + return [round_nested_floats(v, decimals) for v in obj] + elif isinstance(obj, np.floating): + return round(float(obj), decimals) + elif isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.bool_): + return bool(obj) + elif isinstance(obj, float): + return round(obj, decimals) + elif isinstance(obj, int): + return obj + elif isinstance(obj, np.ndarray): + return np.round(obj, decimals).tolist() + elif isinstance(obj, xr.DataArray): + return obj.round(decimals).values.tolist() + return obj + + +# ============================================================================ +# Centralized JSON and YAML I/O Functions +# ============================================================================ + + +def load_json(path: str | pathlib.Path) -> dict | list: + """ + Load data from a JSON file. + + Args: + path: Path to the JSON file. + + Returns: + Loaded data (typically dict or list). + + Raises: + FileNotFoundError: If the file does not exist. + json.JSONDecodeError: If the file is not valid JSON. + """ + path = pathlib.Path(path) + with open(path, encoding='utf-8') as f: + return json.load(f) + + +def save_json( + data: dict | list, + path: str | pathlib.Path, + indent: int = 4, + ensure_ascii: bool = False, + **kwargs, +) -> None: + """ + Save data to a JSON file with consistent formatting. + + Args: + data: Data to save (dict or list). + path: Path to save the JSON file. + indent: Number of spaces for indentation (default: 4). + ensure_ascii: If False, allow Unicode characters (default: False). + **kwargs: Additional arguments to pass to json.dump(). + """ + path = pathlib.Path(path) + with open(path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii, **kwargs) + + +def load_yaml(path: str | pathlib.Path) -> dict | list: + """ + Load data from a YAML file. + + Args: + path: Path to the YAML file. + + Returns: + Loaded data (typically dict or list), or empty dict if file is empty. + + Raises: + FileNotFoundError: If the file does not exist. + yaml.YAMLError: If the file is not valid YAML. + Note: Returns {} for empty YAML files instead of None. + """ + path = pathlib.Path(path) + with open(path, encoding='utf-8') as f: + return yaml.safe_load(f) or {} + + +def _load_yaml_unsafe(path: str | pathlib.Path) -> dict | list: + """ + INTERNAL: Load YAML allowing arbitrary tags. Do not use on untrusted input. + + This function exists only for loading internally-generated files that may + contain custom YAML tags. Never use this on user-provided files. + + Args: + path: Path to the YAML file. + + Returns: + Loaded data (typically dict or list), or empty dict if file is empty. + """ + path = pathlib.Path(path) + with open(path, encoding='utf-8') as f: + return yaml.unsafe_load(f) or {} + + +def save_yaml( + data: dict | list, + path: str | pathlib.Path, + indent: int = 4, + width: int = 1000, + allow_unicode: bool = True, + sort_keys: bool = False, + **kwargs, +) -> None: + """ + Save data to a YAML file with consistent formatting. + + Args: + data: Data to save (dict or list). + path: Path to save the YAML file. + indent: Number of spaces for indentation (default: 4). + width: Maximum line width (default: 1000). + allow_unicode: If True, allow Unicode characters (default: True). + sort_keys: If True, sort dictionary keys (default: False). + **kwargs: Additional arguments to pass to yaml.safe_dump(). + """ + path = pathlib.Path(path) + with open(path, 'w', encoding='utf-8') as f: + yaml.safe_dump( + data, + f, + indent=indent, + width=width, + allow_unicode=allow_unicode, + sort_keys=sort_keys, + default_flow_style=False, + **kwargs, + ) + + +def load_config_file(path: str | pathlib.Path) -> dict: + """ + Load a configuration file, automatically detecting JSON or YAML format. + + This function intelligently tries to load the file based on its extension, + with fallback support if the primary format fails. + + Supported extensions: + - .json: Tries JSON first, falls back to YAML + - .yaml, .yml: Tries YAML first, falls back to JSON + - Others: Tries YAML, then JSON + + Args: + path: Path to the configuration file. + + Returns: + Loaded configuration as a dictionary. + + Raises: + FileNotFoundError: If the file does not exist. + ValueError: If neither JSON nor YAML parsing succeeds. + """ + path = pathlib.Path(path) + + if not path.exists(): + raise FileNotFoundError(f'Configuration file not found: {path}') + + # Try based on file extension + # Normalize extension to lowercase for case-insensitive matching + suffix = path.suffix.lower() + + if suffix == '.json': + try: + return load_json(path) + except json.JSONDecodeError: + logger.warning(f'Failed to parse {path} as JSON, trying YAML') + try: + return load_yaml(path) + except yaml.YAMLError as e: + raise ValueError(f'Failed to parse {path} as JSON or YAML') from e + + elif suffix in ['.yaml', '.yml']: + try: + return load_yaml(path) + except yaml.YAMLError: + logger.warning(f'Failed to parse {path} as YAML, trying JSON') + try: + return load_json(path) + except json.JSONDecodeError as e: + raise ValueError(f'Failed to parse {path} as YAML or JSON') from e + + else: + # Unknown extension, try YAML first (more common for config) + try: + return load_yaml(path) + except yaml.YAMLError: + try: + return load_json(path) + except json.JSONDecodeError as e: + raise ValueError(f'Failed to parse {path} as YAML or JSON') from e + + +def _save_yaml_multiline(data, output_file='formatted_output.yaml'): """ Save dictionary data to YAML with proper multi-line string formatting. Handles complex string patterns including backticks, special characters, @@ -62,14 +290,14 @@ def represent_str(dumper, data): # Use plain style for simple strings return dumper.represent_scalar('tag:yaml.org,2002:str', data) - # Add the string representer to SafeDumper - yaml.add_representer(str, represent_str, Dumper=yaml.SafeDumper) - # Configure dumper options for better formatting class CustomDumper(yaml.SafeDumper): def increase_indent(self, flow=False, indentless=False): return super().increase_indent(flow, False) + # Bind representer locally to CustomDumper to avoid global side effects + CustomDumper.add_representer(str, represent_str) + # Write to file with settings that ensure proper formatting with open(output_file, 'w', encoding='utf-8') as file: yaml.dump( @@ -80,7 +308,7 @@ def increase_indent(self, flow=False, indentless=False): default_flow_style=False, # Use block style for mappings width=1000, # Set a reasonable line width allow_unicode=True, # Support Unicode characters - indent=2, # Set consistent indentation + indent=4, # Set consistent indentation ) @@ -190,7 +418,7 @@ def document_linopy_model(model: linopy.Model, path: pathlib.Path | None = None) if path is not None: if path.suffix not in ['.yaml', '.yml']: raise ValueError(f'Invalid file extension for path {path}. Only .yaml and .yml are supported') - _save_to_yaml(documentation, str(path)) + _save_yaml_multiline(documentation, str(path)) return documentation @@ -199,7 +427,6 @@ def save_dataset_to_netcdf( ds: xr.Dataset, path: str | pathlib.Path, compression: int = 0, - engine: Literal['netcdf4', 'scipy', 'h5netcdf'] = 'h5netcdf', ) -> None: """ Save a dataset to a netcdf file. Store all attrs as JSON strings in 'attrs' attributes. @@ -216,16 +443,6 @@ def save_dataset_to_netcdf( if path.suffix not in ['.nc', '.nc4']: raise ValueError(f'Invalid file extension for path {path}. Only .nc and .nc4 are supported') - apply_encoding = False - if compression != 0: - if importlib.util.find_spec(engine) is not None: - apply_encoding = True - else: - logger.warning( - f'Dataset was exported without compression due to missing dependency "{engine}".' - f'Install {engine} via `pip install {engine}`.' - ) - ds = ds.copy(deep=True) ds.attrs = {'attrs': json.dumps(ds.attrs)} @@ -242,9 +459,9 @@ def save_dataset_to_netcdf( ds.to_netcdf( path, encoding=None - if not apply_encoding + if compression == 0 else {data_var: {'zlib': True, 'complevel': compression} for data_var in ds.data_vars}, - engine=engine, + engine='netcdf4', ) @@ -258,7 +475,7 @@ def load_dataset_from_netcdf(path: str | pathlib.Path) -> xr.Dataset: Returns: Dataset: Loaded dataset with restored attrs. """ - ds = xr.load_dataset(str(path), engine='h5netcdf') + ds = xr.load_dataset(str(path), engine='netcdf4') # Restore Dataset attrs if 'attrs' in ds.attrs: diff --git a/flixopt/results.py b/flixopt/results.py index 847ee5a7f..950570df3 100644 --- a/flixopt/results.py +++ b/flixopt/results.py @@ -2,7 +2,6 @@ import copy import datetime -import json import logging import pathlib import warnings @@ -12,7 +11,6 @@ import numpy as np import pandas as pd import xarray as xr -import yaml from . import io as fx_io from . import plotting @@ -46,41 +44,7 @@ def load_mapping_from_file(path: pathlib.Path) -> dict[str, str | list[str]]: Raises: ValueError: If file cannot be loaded as JSON or YAML """ - suffix = path.suffix.lower() - - if suffix == '.json': - # Try JSON first, fallback to YAML - try: - with open(path) as f: - return json.load(f) - except json.JSONDecodeError: - try: - with open(path) as f: - return yaml.safe_load(f) - except Exception: - raise ValueError(f'Could not load config from {path}') from None - elif suffix in {'.yaml', '.yml'}: - # Try YAML first, fallback to JSON - try: - with open(path) as f: - return yaml.safe_load(f) - except yaml.YAMLError: - try: - with open(path) as f: - return json.load(f) - except Exception: - raise ValueError(f'Could not load config from {path}') from None - else: - # Unknown extension, try both starting with JSON - try: - with open(path) as f: - return json.load(f) - except json.JSONDecodeError: - try: - with open(path) as f: - return yaml.safe_load(f) - except Exception: - raise ValueError(f'Could not load config from {path}') from None + return fx_io.load_config_file(path) class _FlowSystemRestorationError(Exception): @@ -205,8 +169,7 @@ def from_file(cls, folder: str | pathlib.Path, name: str) -> CalculationResults: except Exception as e: logger.critical(f'Could not load the linopy model "{name}" from file ("{paths.linopy_model}"): {e}') - with open(paths.summary, encoding='utf-8') as f: - summary = yaml.load(f, Loader=yaml.FullLoader) + summary = fx_io.load_yaml(paths.summary) return cls( solution=fx_io.load_dataset_from_netcdf(paths.solution), @@ -1093,14 +1056,13 @@ def to_file( fx_io.save_dataset_to_netcdf(self.solution, paths.solution, compression=compression) fx_io.save_dataset_to_netcdf(self.flow_system_data, paths.flow_system, compression=compression) - with open(paths.summary, 'w', encoding='utf-8') as f: - yaml.dump(self.summary, f, allow_unicode=True, sort_keys=False, indent=4, width=1000) + fx_io.save_yaml(self.summary, paths.summary) if save_linopy_model: if self.model is None: logger.critical('No model in the CalculationResults. Saving the model is not possible.') else: - self.model.to_netcdf(paths.linopy_model, engine='h5netcdf') + self.model.to_netcdf(paths.linopy_model, engine='netcdf4') if document_model: if self.model is None: @@ -2085,8 +2047,7 @@ def from_file(cls, folder: str | pathlib.Path, name: str) -> SegmentedCalculatio folder = pathlib.Path(folder) path = folder / name logger.info(f'loading calculation "{name}" from file ("{path.with_suffix(".nc4")}")') - with open(path.with_suffix('.json'), encoding='utf-8') as f: - meta_data = json.load(f) + meta_data = fx_io.load_json(path.with_suffix('.json')) return cls( [CalculationResults.from_file(folder, sub_name) for sub_name in meta_data['sub_calculations']], all_timesteps=pd.DatetimeIndex( @@ -2330,8 +2291,7 @@ def to_file(self, folder: str | pathlib.Path | None = None, name: str | None = N for segment in self.segment_results: segment.to_file(folder=folder, name=segment.name, compression=compression) - with open(path.with_suffix('.json'), 'w', encoding='utf-8') as f: - json.dump(self.meta_data, f, indent=4, ensure_ascii=False) + fx_io.save_json(self.meta_data, path.with_suffix('.json')) logger.info(f'Saved calculation "{name}" to {path}') diff --git a/flixopt/structure.py b/flixopt/structure.py index 07c558eee..6ea618454 100644 --- a/flixopt/structure.py +++ b/flixopt/structure.py @@ -6,7 +6,6 @@ from __future__ import annotations import inspect -import json import logging from dataclasses import dataclass from io import StringIO @@ -788,8 +787,7 @@ def to_json(self, path: str | pathlib.Path): try: # Use the stats mode for JSON export (cleaner output) data = self.get_structure(clean=True, stats=True) - with open(path, 'w', encoding='utf-8') as f: - json.dump(data, f, indent=4, ensure_ascii=False) + fx_io.save_json(data, path) except Exception as e: raise OSError(f'Failed to save {self.__class__.__name__} to JSON file {path}: {e}') from e diff --git a/flixopt/utils.py b/flixopt/utils.py deleted file mode 100644 index dd1f93d64..000000000 --- a/flixopt/utils.py +++ /dev/null @@ -1,86 +0,0 @@ -""" -This module contains several utility functions used throughout the flixopt framework. -""" - -from __future__ import annotations - -import logging -from typing import Any, Literal - -import numpy as np -import xarray as xr - -logger = logging.getLogger('flixopt') - - -def round_nested_floats(obj: dict | list | float | int | Any, decimals: int = 2) -> dict | list | float | int | Any: - """Recursively round floating point numbers in nested data structures. - - This function traverses nested data structures (dictionaries, lists) and rounds - any floating point numbers to the specified number of decimal places. It handles - various data types including NumPy arrays and xarray DataArrays by converting - them to lists with rounded values. - - Args: - obj: The object to process. Can be a dict, list, float, int, numpy.ndarray, - xarray.DataArray, or any other type. - decimals (int, optional): Number of decimal places to round to. Defaults to 2. - - Returns: - The processed object with the same structure as the input, but with all floating point numbers rounded to the specified precision. NumPy arrays and xarray DataArrays are converted to lists. - - Examples: - >>> data = {'a': 3.14159, 'b': [1.234, 2.678]} - >>> round_nested_floats(data, decimals=2) - {'a': 3.14, 'b': [1.23, 2.68]} - - >>> import numpy as np - >>> arr = np.array([1.234, 5.678]) - >>> round_nested_floats(arr, decimals=1) - [1.2, 5.7] - """ - if isinstance(obj, dict): - return {k: round_nested_floats(v, decimals) for k, v in obj.items()} - elif isinstance(obj, list): - return [round_nested_floats(v, decimals) for v in obj] - elif isinstance(obj, float): - return round(obj, decimals) - elif isinstance(obj, int): - return obj - elif isinstance(obj, np.ndarray): - return np.round(obj, decimals).tolist() - elif isinstance(obj, xr.DataArray): - return obj.round(decimals).values.tolist() - return obj - - -def convert_dataarray( - data: xr.DataArray, mode: Literal['py', 'numpy', 'xarray', 'structure'] -) -> list | np.ndarray | xr.DataArray | str: - """ - Convert a DataArray to a different format. - - Args: - data: The DataArray to convert. - mode: The mode to convert to. - - 'py': Convert to python native types (for json) - - 'numpy': Convert to numpy array - - 'xarray': Convert to xarray.DataArray - - 'structure': Convert to strings (for structure, storing variable names) - - Returns: - The converted data. - - Raises: - ValueError: If the mode is unknown. - """ - if mode == 'numpy': - return data.values - elif mode == 'py': - return data.values.tolist() - elif mode == 'xarray': - return data - elif mode == 'structure': - return f':::{data.name}' - else: - raise ValueError(f'Unknown mode {mode}') diff --git a/pyproject.toml b/pyproject.toml index 227eca49e..8c44d025f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "xarray >= 2024.2.0, < 2026.0", # CalVer: allow through next calendar year # Optimization and data handling "linopy >= 0.5.1, < 0.6", # Widened from patch pin to minor range - "h5netcdf>=1.0.0, < 2", + "netcdf4 >= 1.6.1, < 2", # Utilities "pyyaml >= 6.0.0, < 7", "rich >= 13.0.0, < 15",