From 86232e8bf1402647a7fa9acc10d964ea934d155e Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 21:02:40 +0100 Subject: [PATCH 01/19] perf: use Polars streaming engine for LP file writing Extract _format_and_write() helper that uses lazy().collect(engine="streaming") with automatic fallback, replacing 7 instances of df.select(concat_str(...)).write_csv(...). --- linopy/io.py | 63 +++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/linopy/io.py b/linopy/io.py index 56fe033d..8b53fea1 100644 --- a/linopy/io.py +++ b/linopy/io.py @@ -54,6 +54,29 @@ def clean_name(name: str) -> str: coord_sanitizer = str.maketrans("[,]", "(,)", " ") +def _format_and_write( + df: pl.DataFrame, columns: list[pl.Expr], f: BufferedWriter +) -> None: + """ + Format columns via concat_str and write to file. + + Uses Polars streaming engine for better performance when available, + with automatic fallback to eager evaluation. + """ + kwargs: Any = dict( + separator=" ", null_value="", quote_style="never", include_header=False + ) + try: + formatted = ( + df.lazy() + .select(pl.concat_str(columns, ignore_nulls=True)) + .collect(engine="streaming") + ) + except Exception: + formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) + formatted.write_csv(f, **kwargs) + + def signed_number(expr: pl.Expr) -> tuple[pl.Expr, pl.Expr]: """ Return polars expressions for a signed number string, handling -0.0 correctly. @@ -155,10 +178,7 @@ def objective_write_linear_terms( *signed_number(pl.col("coeffs")), *print_variable(pl.col("vars")), ] - df = df.select(pl.concat_str(cols, ignore_nulls=True)) - df.write_csv( - f, separator=" ", null_value="", quote_style="never", include_header=False - ) + _format_and_write(df, cols, f) def objective_write_quadratic_terms( @@ -171,10 +191,7 @@ def objective_write_quadratic_terms( *print_variable(pl.col("vars2")), ] f.write(b"+ [\n") - df = df.select(pl.concat_str(cols, ignore_nulls=True)) - df.write_csv( - f, separator=" ", null_value="", quote_style="never", include_header=False - ) + _format_and_write(df, cols, f) f.write(b"] / 2\n") @@ -254,11 +271,7 @@ def bounds_to_file( *signed_number(pl.col("upper")), ] - kwargs: Any = dict( - separator=" ", null_value="", quote_style="never", include_header=False - ) - formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv(f, **kwargs) + _format_and_write(df, columns, f) def binaries_to_file( @@ -296,11 +309,7 @@ def binaries_to_file( *print_variable(pl.col("labels")), ] - kwargs: Any = dict( - separator=" ", null_value="", quote_style="never", include_header=False - ) - formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv(f, **kwargs) + _format_and_write(df, columns, f) def integers_to_file( @@ -339,11 +348,7 @@ def integers_to_file( *print_variable(pl.col("labels")), ] - kwargs: Any = dict( - separator=" ", null_value="", quote_style="never", include_header=False - ) - formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv(f, **kwargs) + _format_and_write(df, columns, f) def sos_to_file( @@ -399,11 +404,7 @@ def sos_to_file( pl.col("var_weights"), ] - kwargs: Any = dict( - separator=" ", null_value="", quote_style="never", include_header=False - ) - formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv(f, **kwargs) + _format_and_write(df, columns, f) def constraints_to_file( @@ -487,11 +488,7 @@ def constraints_to_file( pl.when(pl.col("is_last_in_group")).then(pl.col("rhs").cast(pl.String)), ] - kwargs: Any = dict( - separator=" ", null_value="", quote_style="never", include_header=False - ) - formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv(f, **kwargs) + _format_and_write(df, columns, f) # in the future, we could use lazy dataframes when they support appending # tp existent files From b1e9864592374957f58696da0a6d215efb542de0 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 22:07:47 +0100 Subject: [PATCH 02/19] fix: log warning with traceback when Polars streaming fallback triggers --- linopy/io.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/linopy/io.py b/linopy/io.py index 8b53fea1..cd83f6b8 100644 --- a/linopy/io.py +++ b/linopy/io.py @@ -73,6 +73,11 @@ def _format_and_write( .collect(engine="streaming") ) except Exception: + logger.warning( + "Polars streaming engine failed, falling back to eager evaluation. " + "Please report this at https://github.com/PyPSA/linopy/issues", + exc_info=True, + ) formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) formatted.write_csv(f, **kwargs) From d30914d1b619ac5e668706a3062be0973313e1ea Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 17:24:23 +0100 Subject: [PATCH 03/19] perf: speed up LP constraint writing by replacing concat+sort with join Replace the vertical concat + sort approach in Constraint.to_polars() with an inner join, so every row has all columns populated. This removes the need for the group_by validation step in constraints_to_file() and simplifies the formatting expressions by eliminating null checks on coeffs/vars columns. --- linopy/constraints.py | 6 +----- linopy/io.py | 37 +++++++------------------------------ 2 files changed, 8 insertions(+), 35 deletions(-) diff --git a/linopy/constraints.py b/linopy/constraints.py index 291beb1d..e6216dba 100644 --- a/linopy/constraints.py +++ b/linopy/constraints.py @@ -632,11 +632,7 @@ def to_polars(self) -> pl.DataFrame: short = filter_nulls_polars(short) check_has_nulls_polars(short, name=f"{self.type} {self.name}") - df = pl.concat([short, long], how="diagonal_relaxed").sort(["labels", "rhs"]) - # delete subsequent non-null rhs (happens is all vars per label are -1) - is_non_null = df["rhs"].is_not_null() - prev_non_is_null = is_non_null.shift(1).fill_null(False) - df = df.filter(is_non_null & ~prev_non_is_null | ~is_non_null) + df = long.join(short, on="labels", how="inner") return df[["labels", "coeffs", "vars", "sign", "rhs"]] # Wrapped function which would convert variable to dataarray diff --git a/linopy/io.py b/linopy/io.py index cd83f6b8..f6cac315 100644 --- a/linopy/io.py +++ b/linopy/io.py @@ -446,48 +446,25 @@ def constraints_to_file( if df.height == 0: continue - # Ensure each constraint has both coefficient and RHS terms - analysis = df.group_by("labels").agg( - [ - pl.col("coeffs").is_not_null().sum().alias("coeff_rows"), - pl.col("sign").is_not_null().sum().alias("rhs_rows"), - ] - ) - - valid = analysis.filter( - (pl.col("coeff_rows") > 0) & (pl.col("rhs_rows") > 0) - ) - - if valid.height == 0: - continue - - # Keep only constraints that have both parts - df = df.join(valid.select("labels"), on="labels", how="inner") - # Sort by labels and mark first/last occurrences df = df.sort("labels").with_columns( [ - pl.when(pl.col("labels").is_first_distinct()) - .then(pl.col("labels")) - .otherwise(pl.lit(None)) - .alias("labels_first"), + pl.col("labels").is_first_distinct().alias("is_first_in_group"), (pl.col("labels") != pl.col("labels").shift(-1)) .fill_null(True) .alias("is_last_in_group"), ] ) - row_labels = print_constraint(pl.col("labels_first")) + row_labels = print_constraint(pl.col("labels")) col_labels = print_variable(pl.col("vars")) columns = [ - pl.when(pl.col("labels_first").is_not_null()).then(row_labels[0]), - pl.when(pl.col("labels_first").is_not_null()).then(row_labels[1]), - pl.when(pl.col("labels_first").is_not_null()) - .then(pl.lit(":\n")) - .alias(":"), + pl.when(pl.col("is_first_in_group")).then(row_labels[0]), + pl.when(pl.col("is_first_in_group")).then(row_labels[1]), + pl.when(pl.col("is_first_in_group")).then(pl.lit(":\n")).alias(":"), *signed_number(pl.col("coeffs")), - pl.when(pl.col("vars").is_not_null()).then(col_labels[0]), - pl.when(pl.col("vars").is_not_null()).then(col_labels[1]), + col_labels[0], + col_labels[1], pl.when(pl.col("is_last_in_group")).then(pl.col("sign")), pl.when(pl.col("is_last_in_group")).then(pl.lit(" ")), pl.when(pl.col("is_last_in_group")).then(pl.col("rhs").cast(pl.String)), From d15ff4055a4f704d27b76829a9344b92f1b4b0aa Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 17:34:11 +0100 Subject: [PATCH 04/19] fix: missing space in lp file --- linopy/io.py | 1 + 1 file changed, 1 insertion(+) diff --git a/linopy/io.py b/linopy/io.py index f6cac315..fa83e022 100644 --- a/linopy/io.py +++ b/linopy/io.py @@ -465,6 +465,7 @@ def constraints_to_file( *signed_number(pl.col("coeffs")), col_labels[0], col_labels[1], + pl.when(pl.col("is_last_in_group")).then(pl.lit("\n")), pl.when(pl.col("is_last_in_group")).then(pl.col("sign")), pl.when(pl.col("is_last_in_group")).then(pl.lit(" ")), pl.when(pl.col("is_last_in_group")).then(pl.col("rhs").cast(pl.String)), From 96a2e85816319279414c986fb66231912ce61210 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 18:04:07 +0100 Subject: [PATCH 05/19] perf: skip group_terms when unnecessary and avoid xarray broadcast for short DataFrame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Skip group_terms_polars when _term dim size is 1 (no duplicate vars) - Build the short DataFrame (labels, rhs, sign) directly with numpy instead of going through xarray.broadcast + to_polars - Add sign column via pl.lit when uniform (common case), avoiding costly numpy string array → polars conversion Co-Authored-By: Claude Opus 4.5 --- linopy/constraints.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/linopy/constraints.py b/linopy/constraints.py index e6216dba..c239be60 100644 --- a/linopy/constraints.py +++ b/linopy/constraints.py @@ -42,7 +42,6 @@ get_label_position, group_terms_polars, has_optimized_model, - infer_schema_polars, iterate_slices, maybe_replace_signs, print_coord, @@ -622,14 +621,30 @@ def to_polars(self) -> pl.DataFrame: long = to_polars(ds[keys]) long = filter_nulls_polars(long) - long = group_terms_polars(long) + if ds.sizes.get("_term", 1) > 1: + long = group_terms_polars(long) check_has_nulls_polars(long, name=f"{self.type} {self.name}") - short_ds = ds[[k for k in ds if "_term" not in ds[k].dims]] - schema = infer_schema_polars(short_ds) - schema["sign"] = pl.Enum(["=", "<=", ">="]) - short = to_polars(short_ds, schema=schema) + # Build short DataFrame (labels, rhs) without xarray broadcast. + # Add sign separately to avoid costly numpy string→polars conversion. + labels_flat = ds["labels"].values.reshape(-1) + rhs_flat = np.broadcast_to(ds["rhs"].values, ds["labels"].shape).reshape(-1) + short = pl.DataFrame({"labels": labels_flat, "rhs": rhs_flat}) short = filter_nulls_polars(short) + + sign_values = ds["sign"].values + unique_signs = np.unique(sign_values) + if len(unique_signs) == 1: + short = short.with_columns( + pl.lit(unique_signs[0]).cast(pl.Enum(["=", "<=", ">="])).alias("sign") + ) + else: + sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1) + # Apply same mask as filter_nulls (labels != -1) + sign_flat = sign_flat[labels_flat != -1] + short = short.with_columns( + pl.Series("sign", sign_flat, dtype=pl.Enum(["=", "<=", ">="])) + ) check_has_nulls_polars(short, name=f"{self.type} {self.name}") df = long.join(short, on="labels", how="inner") From 95cdec7668a78391d96f13fa1396aafe1eff72ac Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 18:05:11 +0100 Subject: [PATCH 06/19] perf: skip group_terms in LinearExpression.to_polars when no duplicate vars Check n_unique before running the expensive group_by+sum. When all variable references are unique (common case for objectives), this saves ~31ms per 320k terms. Co-Authored-By: Claude Opus 4.5 --- linopy/expressions.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/linopy/expressions.py b/linopy/expressions.py index 10e243de..7550f2d5 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -1463,7 +1463,13 @@ def to_polars(self) -> pl.DataFrame: df = to_polars(self.data) df = filter_nulls_polars(df) - df = group_terms_polars(df) + if df["vars"].n_unique() < df.height: + df = group_terms_polars(df) + else: + # Match column order of group_terms (group-by keys, coeffs, rest) + varcols = [c for c in df.columns if c.startswith("vars")] + rest = [c for c in df.columns if c not in varcols and c != "coeffs"] + df = df.select(varcols + ["coeffs"] + rest) check_has_nulls_polars(df, name=self.type) return df From 489f04d2e845fa3d232731ff9cb5a9b2666d4812 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 18:58:53 +0100 Subject: [PATCH 07/19] perf: reduce per-constraint overhead in Constraint.to_polars() Replace np.unique with faster numpy equality check for sign uniformity. Eliminate redundant filter_nulls_polars and check_has_nulls_polars on the short DataFrame by applying the labels mask directly during construction. Co-Authored-By: Claude Opus 4.5 --- linopy/constraints.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/linopy/constraints.py b/linopy/constraints.py index c239be60..0bd124f0 100644 --- a/linopy/constraints.py +++ b/linopy/constraints.py @@ -625,27 +625,32 @@ def to_polars(self) -> pl.DataFrame: long = group_terms_polars(long) check_has_nulls_polars(long, name=f"{self.type} {self.name}") - # Build short DataFrame (labels, rhs) without xarray broadcast. - # Add sign separately to avoid costly numpy string→polars conversion. + # Build short DataFrame (labels, rhs, sign) without xarray broadcast. + # Apply labels mask directly instead of filter_nulls_polars. labels_flat = ds["labels"].values.reshape(-1) + mask = labels_flat != -1 + labels_masked = labels_flat[mask] rhs_flat = np.broadcast_to(ds["rhs"].values, ds["labels"].shape).reshape(-1) - short = pl.DataFrame({"labels": labels_flat, "rhs": rhs_flat}) - short = filter_nulls_polars(short) sign_values = ds["sign"].values - unique_signs = np.unique(sign_values) - if len(unique_signs) == 1: - short = short.with_columns( - pl.lit(unique_signs[0]).cast(pl.Enum(["=", "<=", ">="])).alias("sign") + sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1) + all_same_sign = ( + sign_flat[0] == sign_flat[-1] and (sign_flat[0] == sign_flat).all() + ) + + short_data: dict = { + "labels": labels_masked, + "rhs": rhs_flat[mask], + } + if all_same_sign: + short = pl.DataFrame(short_data).with_columns( + pl.lit(sign_flat[0]).cast(pl.Enum(["=", "<=", ">="])).alias("sign") ) else: - sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1) - # Apply same mask as filter_nulls (labels != -1) - sign_flat = sign_flat[labels_flat != -1] - short = short.with_columns( - pl.Series("sign", sign_flat, dtype=pl.Enum(["=", "<=", ">="])) + short_data["sign"] = pl.Series( + "sign", sign_flat[mask], dtype=pl.Enum(["=", "<=", ">="]) ) - check_has_nulls_polars(short, name=f"{self.type} {self.name}") + short = pl.DataFrame(short_data) df = long.join(short, on="labels", how="inner") return df[["labels", "coeffs", "vars", "sign", "rhs"]] From 0b413ddb269df74bc05339fba0d2e89d5c4995ea Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 21:04:38 +0100 Subject: [PATCH 08/19] fix: handle empty constraint slices in sign_flat check Guard against IndexError when sign_flat is empty (no valid labels) by checking len(sign_flat) > 0 before accessing sign_flat[0]. Co-Authored-By: Claude Opus 4.5 --- linopy/constraints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linopy/constraints.py b/linopy/constraints.py index 0bd124f0..3cea9f36 100644 --- a/linopy/constraints.py +++ b/linopy/constraints.py @@ -634,7 +634,7 @@ def to_polars(self) -> pl.DataFrame: sign_values = ds["sign"].values sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1) - all_same_sign = ( + all_same_sign = len(sign_flat) > 0 and ( sign_flat[0] == sign_flat[-1] and (sign_flat[0] == sign_flat).all() ) From 9f35550d9af6d29f95f469e7ad36697f46b0a65c Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 22:17:59 +0100 Subject: [PATCH 09/19] docs: add LP write speed improvement to release notes Co-Authored-By: Claude Opus 4.5 --- doc/release_notes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index b727c22d..cdca2be1 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -6,6 +6,7 @@ Upcoming Version * Fix docs (pick highs solver) * Add the `sphinx-copybutton` to the documentation +* Speed up LP file writing by 2-2.7x on large models through Polars streaming engine, join-based constraint assembly, and reduced per-constraint overhead Version 0.6.1 -------------- From 1896eeef5be6f64e1e3b985b1ebfa213d2ca797a Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 22:50:53 +0100 Subject: [PATCH 10/19] bench: add LP write benchmark script with plotting --- dev-scripts/benchmark_lp_writer.py | 335 +++++++++++++++++++++++++++++ 1 file changed, 335 insertions(+) create mode 100644 dev-scripts/benchmark_lp_writer.py diff --git a/dev-scripts/benchmark_lp_writer.py b/dev-scripts/benchmark_lp_writer.py new file mode 100644 index 00000000..63eeffaf --- /dev/null +++ b/dev-scripts/benchmark_lp_writer.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python3 +""" +Benchmark script for LP file writing performance. + +Usage: + # Run benchmark and save results to JSON: + python dev-scripts/benchmark_lp_writer.py --output results.json [--label "my branch"] + + # Plot comparison of two result files: + python dev-scripts/benchmark_lp_writer.py --plot master.json this_pr.json +""" + +from __future__ import annotations + +import argparse +import json +import tempfile +import time +from pathlib import Path + +import numpy as np +from numpy.random import default_rng + +from linopy import Model + +rng = default_rng(125) + + +def basic_model(n: int) -> Model: + """Create a basic model with 2*n^2 variables and 2*n^2 constraints.""" + m = Model() + N = np.arange(n) + x = m.add_variables(coords=[N, N], name="x") + y = m.add_variables(coords=[N, N], name="y") + m.add_constraints(x - y >= N, name="c1") + m.add_constraints(x + y >= 0, name="c2") + m.add_objective((2 * x).sum() + y.sum()) + return m + + +def knapsack_model(n: int) -> Model: + """Create a knapsack model with n binary variables and 1 constraint.""" + m = Model() + packages = m.add_variables(coords=[np.arange(n)], binary=True) + weight = rng.integers(1, 100, size=n) + value = rng.integers(1, 100, size=n) + m.add_constraints((weight * packages).sum() <= 200) + m.add_objective(-(value * packages).sum()) + return m + + +def pypsa_model(snapshots: int | None = None) -> Model | None: + """Create a model from the PyPSA SciGrid-DE example network.""" + try: + import pandas as pd + import pypsa + except ImportError: + return None + n = pypsa.examples.scigrid_de() + if snapshots is not None and snapshots > len(n.snapshots): + orig = n.snapshots + repeats = -(-snapshots // len(orig)) + new_index = pd.date_range(orig[0], periods=len(orig) * repeats, freq=orig.freq) + new_index = new_index[:snapshots] + n.set_snapshots(new_index) + n.optimize.create_model() + return n.model + + +def benchmark_model( + label: str, m: Model, iterations: int = 10, io_api: str | None = None +) -> dict: + """Benchmark LP file writing. Returns dict with results.""" + to_file_kwargs: dict = dict(progress=False) + if io_api is not None: + to_file_kwargs["io_api"] = io_api + with tempfile.TemporaryDirectory() as tmpdir: + m.to_file(Path(tmpdir) / "warmup.lp", **to_file_kwargs) + times = [] + for i in range(iterations): + fn = Path(tmpdir) / f"bench_{i}.lp" + start = time.perf_counter() + m.to_file(fn, **to_file_kwargs) + times.append(time.perf_counter() - start) + + avg = float(np.mean(times)) + std = float(np.std(times)) + nvars = int(m.nvars) + ncons = int(m.ncons) + print( + f" {label:55s} ({nvars:>9,} vars, {ncons:>9,} cons): " + f"{avg * 1000:7.1f}ms ± {std * 1000:5.1f}ms" + ) + return { + "label": label, + "nvars": nvars, + "ncons": ncons, + "mean_s": avg, + "std_s": std, + "times_s": times, + } + + +def run_benchmarks( + io_api: str | None = None, + iterations: int = 10, + model_type: str = "basic", +) -> list[dict]: + """ + Run benchmarks for a single model type across sizes. + + Parameters + ---------- + model_type : str + "basic" (default) — N from 5 to 1000, giving 50 to 2M vars. + "pypsa" — PyPSA SciGrid-DE with varying snapshot counts. + """ + results = [] + + if model_type == "basic": + print("\nbasic_model (2 x N^2 vars, 2 x N^2 constraints):") + for n in [5, 10, 20, 30, 50, 75, 100, 150, 200, 300, 500, 750, 1000]: + r = benchmark_model( + f"basic N={n}", basic_model(n), iterations, io_api=io_api + ) + r["model"] = "basic" + r["param"] = n + results.append(r) + + elif model_type == "pypsa": + print("\nPyPSA SciGrid-DE (realistic power system model):") + for snaps in [24, 50, 100, 200, 500, 1000]: + m = pypsa_model(snapshots=snaps) + if m is not None: + r = benchmark_model( + f"pypsa {snaps} snaps", m, iterations, io_api=io_api + ) + r["model"] = "pypsa" + r["param"] = snaps + results.append(r) + else: + print(" (skipped, pypsa not installed)") + break + else: + raise ValueError(f"Unknown model_type: {model_type!r}") + + return results + + +def plot_comparison(file_old: str, file_new: str) -> None: + """Create 4-panel comparison plot from two JSON result files.""" + import matplotlib.pyplot as plt + + with open(file_old) as f: + data_old = json.load(f) + with open(file_new) as f: + data_new = json.load(f) + + label_old = data_old.get("label", Path(file_old).stem) + label_new = data_new.get("label", Path(file_new).stem) + + nv_old = [r["nvars"] for r in data_old["results"]] + ms_old = [r["mean_s"] * 1000 for r in data_old["results"]] + std_old = [r["std_s"] * 1000 for r in data_old["results"]] + nv_new = [r["nvars"] for r in data_new["results"]] + ms_new = [r["mean_s"] * 1000 for r in data_new["results"]] + std_new = [r["std_s"] * 1000 for r in data_new["results"]] + + color_old, color_new = "#1f77b4", "#ff7f0e" + + fig, axes = plt.subplots(2, 2, figsize=(14, 10)) + fig.suptitle(f"LP Write Performance: {label_old} vs {label_new}", fontsize=14) + + # Panel 1: All data, log-log + ax = axes[0, 0] + ax.errorbar( + nv_old, + ms_old, + yerr=std_old, + marker="o", + color=color_old, + linestyle="--", + label=label_old, + alpha=0.8, + capsize=3, + ) + ax.errorbar( + nv_new, + ms_new, + yerr=std_new, + marker="s", + color=color_new, + linestyle="-", + label=label_new, + alpha=0.8, + capsize=3, + ) + ax.set_xscale("log") + ax.set_yscale("log") + ax.set_xlabel("Number of variables") + ax.set_ylabel("Write time (ms)") + ax.set_title("IO time vs problem size (log-log)") + ax.legend() + ax.grid(True, alpha=0.3) + + # Panel 2: Speedup ratio (old/new) + ax = axes[0, 1] + if len(nv_old) == len(nv_new): + speedup = [o / n for o, n in zip(ms_old, ms_new)] + ax.plot(nv_old, speedup, marker="o", color="#2ca02c") + ax.fill_between(nv_old, 1.0, speedup, alpha=0.15, color="#2ca02c") + ax.axhline(1.0, color="gray", linestyle="--", alpha=0.5) + ax.set_xscale("log") + ax.set_xlabel("Number of variables") + ax.set_ylabel(f"Speedup ({label_old} / {label_new})") + ax.set_title("Speedup vs problem size") + ax.grid(True, alpha=0.3) + + # Panel 3: Small models (nvars <= 25000) + ax = axes[1, 0] + cutoff = 25000 + idx_old = [i for i, n in enumerate(nv_old) if n <= cutoff] + idx_new = [i for i, n in enumerate(nv_new) if n <= cutoff] + ax.errorbar( + [nv_old[i] for i in idx_old], + [ms_old[i] for i in idx_old], + yerr=[std_old[i] for i in idx_old], + marker="o", + color=color_old, + linestyle="--", + label=label_old, + alpha=0.8, + capsize=3, + ) + ax.errorbar( + [nv_new[i] for i in idx_new], + [ms_new[i] for i in idx_new], + yerr=[std_new[i] for i in idx_new], + marker="s", + color=color_new, + linestyle="-", + label=label_new, + alpha=0.8, + capsize=3, + ) + ax.set_xlabel("Number of variables") + ax.set_ylabel("Write time (ms)") + ax.set_ylim(bottom=0) + ax.set_title(f"Small models (≤ {cutoff:,} vars)") + ax.legend() + ax.grid(True, alpha=0.3) + + # Panel 4: Large models (nvars > 25000) + ax = axes[1, 1] + idx_old = [i for i, n in enumerate(nv_old) if n > cutoff] + idx_new = [i for i, n in enumerate(nv_new) if n > cutoff] + ax.errorbar( + [nv_old[i] for i in idx_old], + [ms_old[i] for i in idx_old], + yerr=[std_old[i] for i in idx_old], + marker="o", + color=color_old, + linestyle="--", + label=label_old, + alpha=0.8, + capsize=3, + ) + ax.errorbar( + [nv_new[i] for i in idx_new], + [ms_new[i] for i in idx_new], + yerr=[std_new[i] for i in idx_new], + marker="s", + color=color_new, + linestyle="-", + label=label_new, + alpha=0.8, + capsize=3, + ) + ax.set_xscale("log") + ax.set_xlabel("Number of variables") + ax.set_ylabel("Write time (ms)") + ax.set_title(f"Large models (> {cutoff:,} vars)") + ax.legend() + ax.grid(True, alpha=0.3) + + plt.tight_layout() + out_path = "dev-scripts/benchmark_lp_comparison.png" + plt.savefig(out_path, dpi=150, bbox_inches="tight") + print(f"\nPlot saved to {out_path}") + plt.close() + + +def main() -> None: + parser = argparse.ArgumentParser(description="LP write benchmark") + parser.add_argument("--output", "-o", help="Save results to JSON file") + parser.add_argument("--label", default=None, help="Label for this run") + parser.add_argument("--io-api", default=None, help="io_api to pass to to_file()") + parser.add_argument( + "--model", + default="basic", + choices=["basic", "pypsa"], + help="Model type to benchmark (default: basic)", + ) + parser.add_argument( + "--plot", + nargs=2, + metavar=("OLD", "NEW"), + help="Plot comparison from two JSON files", + ) + args = parser.parse_args() + + if args.plot: + plot_comparison(args.plot[0], args.plot[1]) + return + + iterations = 10 + label = args.label or "benchmark" + print(f"LP file writing benchmark ({iterations} iterations, label={label!r})") + print("=" * 90) + + results = run_benchmarks( + io_api=args.io_api, iterations=iterations, model_type=args.model + ) + + output = {"label": label, "results": results} + if args.output: + with open(args.output, "w") as f: + json.dump(output, f, indent=2) + print(f"\nResults saved to {args.output}") + else: + print("\n(use --output FILE to save results for later plotting)") + + +if __name__ == "__main__": + main() From 68f1adc8fddb66b20ea57da50ff515112bb1e30f Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 23:06:49 +0100 Subject: [PATCH 11/19] bench: larger model --- dev-scripts/benchmark_lp_writer.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/dev-scripts/benchmark_lp_writer.py b/dev-scripts/benchmark_lp_writer.py index 63eeffaf..df0b4b5b 100644 --- a/dev-scripts/benchmark_lp_writer.py +++ b/dev-scripts/benchmark_lp_writer.py @@ -119,7 +119,23 @@ def run_benchmarks( if model_type == "basic": print("\nbasic_model (2 x N^2 vars, 2 x N^2 constraints):") - for n in [5, 10, 20, 30, 50, 75, 100, 150, 200, 300, 500, 750, 1000]: + for n in [ + 5, + 10, + 20, + 30, + 50, + 75, + 100, + 150, + 200, + 300, + 500, + 750, + 1000, + 1500, + 2000, + ]: r = benchmark_model( f"basic N={n}", basic_model(n), iterations, io_api=io_api ) From a293b6471b9bd21393307d02d002da179dbc4cac Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sat, 31 Jan 2026 23:30:20 +0100 Subject: [PATCH 12/19] perf: Add maybe_group_terms_polars() helper in common.py that checks for duplicate (labels, vars) pairs before calling group_terms_polars. Use it in both Constraint.to_polars() and LinearExpression.to_polars() to avoid expensive group_by when terms already reference distinct variables --- linopy/common.py | 19 +++++++++++++++++++ linopy/constraints.py | 4 ++-- linopy/expressions.py | 9 ++------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/linopy/common.py b/linopy/common.py index 7dd97b65..e6eef583 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -449,6 +449,25 @@ def group_terms_polars(df: pl.DataFrame) -> pl.DataFrame: return df +def maybe_group_terms_polars(df: pl.DataFrame) -> pl.DataFrame: + """ + Group terms only if there are duplicate (labels, vars) pairs. + + This avoids the expensive group_by operation when terms already + reference distinct variables (e.g. ``x - y`` has ``_term=2`` but + no duplicates). When skipping, columns are reordered to match the + output of ``group_terms_polars``. + """ + varcols = [c for c in df.columns if c.startswith("vars")] + keys = [c for c in ["labels"] + varcols if c in df.columns] + key_count = df.select(pl.struct(keys).n_unique()).item() + if key_count < df.height: + return group_terms_polars(df) + # Match column order of group_terms (group-by keys, coeffs, rest) + rest = [c for c in df.columns if c not in keys and c != "coeffs"] + return df.select(keys + ["coeffs"] + rest) + + def save_join(*dataarrays: DataArray, integer_dtype: bool = False) -> Dataset: """ Join multiple xarray Dataarray's to a Dataset and warn if coordinates are not equal. diff --git a/linopy/constraints.py b/linopy/constraints.py index 3cea9f36..d3ebef19 100644 --- a/linopy/constraints.py +++ b/linopy/constraints.py @@ -40,9 +40,9 @@ generate_indices_for_printout, get_dims_with_index_levels, get_label_position, - group_terms_polars, has_optimized_model, iterate_slices, + maybe_group_terms_polars, maybe_replace_signs, print_coord, print_single_constraint, @@ -622,7 +622,7 @@ def to_polars(self) -> pl.DataFrame: long = filter_nulls_polars(long) if ds.sizes.get("_term", 1) > 1: - long = group_terms_polars(long) + long = maybe_group_terms_polars(long) check_has_nulls_polars(long, name=f"{self.type} {self.name}") # Build short DataFrame (labels, rhs, sign) without xarray broadcast. diff --git a/linopy/expressions.py b/linopy/expressions.py index 7550f2d5..cf37b937 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -60,6 +60,7 @@ has_optimized_model, is_constant, iterate_slices, + maybe_group_terms_polars, print_coord, print_single_expression, to_dataframe, @@ -1463,13 +1464,7 @@ def to_polars(self) -> pl.DataFrame: df = to_polars(self.data) df = filter_nulls_polars(df) - if df["vars"].n_unique() < df.height: - df = group_terms_polars(df) - else: - # Match column order of group_terms (group-by keys, coeffs, rest) - varcols = [c for c in df.columns if c.startswith("vars")] - rest = [c for c in df.columns if c not in varcols and c != "coeffs"] - df = df.select(varcols + ["coeffs"] + rest) + df = maybe_group_terms_polars(df) check_has_nulls_polars(df, name=self.type) return df From 04c4beadd3ed977f42855161e107fe351f9d9d0d Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 1 Feb 2026 00:07:53 +0100 Subject: [PATCH 13/19] Add variance to plot --- dev-scripts/benchmark_lp_writer.py | 123 +++++++++++++++++++---------- 1 file changed, 80 insertions(+), 43 deletions(-) diff --git a/dev-scripts/benchmark_lp_writer.py b/dev-scripts/benchmark_lp_writer.py index df0b4b5b..ec45c904 100644 --- a/dev-scripts/benchmark_lp_writer.py +++ b/dev-scripts/benchmark_lp_writer.py @@ -84,19 +84,23 @@ def benchmark_model( times.append(time.perf_counter() - start) avg = float(np.mean(times)) - std = float(np.std(times)) + med = float(np.median(times)) + q25 = float(np.percentile(times, 25)) + q75 = float(np.percentile(times, 75)) nvars = int(m.nvars) ncons = int(m.ncons) print( f" {label:55s} ({nvars:>9,} vars, {ncons:>9,} cons): " - f"{avg * 1000:7.1f}ms ± {std * 1000:5.1f}ms" + f"{med * 1000:7.1f}ms (IQR {q25 * 1000:.1f}–{q75 * 1000:.1f}ms)" ) return { "label": label, "nvars": nvars, "ncons": ncons, "mean_s": avg, - "std_s": std, + "median_s": med, + "q25_s": q25, + "q75_s": q75, "times_s": times, } @@ -136,9 +140,9 @@ def run_benchmarks( 1500, 2000, ]: - r = benchmark_model( - f"basic N={n}", basic_model(n), iterations, io_api=io_api - ) + # More iterations for small models to reduce noise + iters = iterations * 5 if n <= 100 else iterations + r = benchmark_model(f"basic N={n}", basic_model(n), iters, io_api=io_api) r["model"] = "basic" r["param"] = n results.append(r) @@ -175,56 +179,85 @@ def plot_comparison(file_old: str, file_new: str) -> None: label_old = data_old.get("label", Path(file_old).stem) label_new = data_new.get("label", Path(file_new).stem) - nv_old = [r["nvars"] for r in data_old["results"]] - ms_old = [r["mean_s"] * 1000 for r in data_old["results"]] - std_old = [r["std_s"] * 1000 for r in data_old["results"]] - nv_new = [r["nvars"] for r in data_new["results"]] - ms_new = [r["mean_s"] * 1000 for r in data_new["results"]] - std_new = [r["std_s"] * 1000 for r in data_new["results"]] + def get_stats(data): + """Extract median and IQR from results, falling back to mean/std.""" + nv = [r["nvars"] for r in data["results"]] + if "median_s" in data["results"][0]: + med = [r["median_s"] * 1000 for r in data["results"]] + lo = [r["q25_s"] * 1000 for r in data["results"]] + hi = [r["q75_s"] * 1000 for r in data["results"]] + else: + med = [r["mean_s"] * 1000 for r in data["results"]] + std = [r["std_s"] * 1000 for r in data["results"]] + lo = [m - s for m, s in zip(med, std)] + hi = [m + s for m, s in zip(med, std)] + return nv, med, lo, hi + + nv_old, med_old, lo_old, hi_old = get_stats(data_old) + nv_new, med_new, lo_new, hi_new = get_stats(data_new) color_old, color_new = "#1f77b4", "#ff7f0e" fig, axes = plt.subplots(2, 2, figsize=(14, 10)) fig.suptitle(f"LP Write Performance: {label_old} vs {label_new}", fontsize=14) + def plot_errorbar(ax, nv, med, lo, hi, **kwargs): + yerr_lo = [m - l for m, l in zip(med, lo)] + yerr_hi = [h - m for m, h in zip(med, hi)] + ax.errorbar(nv, med, yerr=[yerr_lo, yerr_hi], capsize=3, **kwargs) + # Panel 1: All data, log-log ax = axes[0, 0] - ax.errorbar( + plot_errorbar( + ax, nv_old, - ms_old, - yerr=std_old, + med_old, + lo_old, + hi_old, marker="o", color=color_old, linestyle="--", label=label_old, alpha=0.8, - capsize=3, ) - ax.errorbar( + plot_errorbar( + ax, nv_new, - ms_new, - yerr=std_new, + med_new, + lo_new, + hi_new, marker="s", color=color_new, linestyle="-", label=label_new, alpha=0.8, - capsize=3, ) ax.set_xscale("log") ax.set_yscale("log") ax.set_xlabel("Number of variables") - ax.set_ylabel("Write time (ms)") + ax.set_ylabel("Write time (ms, median)") ax.set_title("IO time vs problem size (log-log)") ax.legend() ax.grid(True, alpha=0.3) - # Panel 2: Speedup ratio (old/new) + # Panel 2: Speedup ratio (old/new) with IQR-based bounds ax = axes[0, 1] if len(nv_old) == len(nv_new): - speedup = [o / n for o, n in zip(ms_old, ms_new)] - ax.plot(nv_old, speedup, marker="o", color="#2ca02c") - ax.fill_between(nv_old, 1.0, speedup, alpha=0.15, color="#2ca02c") + speedup = [o / n for o, n in zip(med_old, med_new)] + # Conservative bounds: best case = hi_old/lo_new, worst = lo_old/hi_new + speedup_lo = [l / h for l, h in zip(lo_old, hi_new)] + speedup_hi = [h / l for h, l in zip(hi_old, lo_new)] + yerr_lo = [s - sl for s, sl in zip(speedup, speedup_lo)] + yerr_hi = [sh - s for s, sh in zip(speedup, speedup_hi)] + ax.errorbar( + nv_old, + speedup, + yerr=[yerr_lo, yerr_hi], + marker="o", + color="#2ca02c", + capsize=3, + ) + ax.fill_between(nv_old, speedup_lo, speedup_hi, alpha=0.15, color="#2ca02c") ax.axhline(1.0, color="gray", linestyle="--", alpha=0.5) ax.set_xscale("log") ax.set_xlabel("Number of variables") @@ -237,30 +270,32 @@ def plot_comparison(file_old: str, file_new: str) -> None: cutoff = 25000 idx_old = [i for i, n in enumerate(nv_old) if n <= cutoff] idx_new = [i for i, n in enumerate(nv_new) if n <= cutoff] - ax.errorbar( + plot_errorbar( + ax, [nv_old[i] for i in idx_old], - [ms_old[i] for i in idx_old], - yerr=[std_old[i] for i in idx_old], + [med_old[i] for i in idx_old], + [lo_old[i] for i in idx_old], + [hi_old[i] for i in idx_old], marker="o", color=color_old, linestyle="--", label=label_old, alpha=0.8, - capsize=3, ) - ax.errorbar( + plot_errorbar( + ax, [nv_new[i] for i in idx_new], - [ms_new[i] for i in idx_new], - yerr=[std_new[i] for i in idx_new], + [med_new[i] for i in idx_new], + [lo_new[i] for i in idx_new], + [hi_new[i] for i in idx_new], marker="s", color=color_new, linestyle="-", label=label_new, alpha=0.8, - capsize=3, ) ax.set_xlabel("Number of variables") - ax.set_ylabel("Write time (ms)") + ax.set_ylabel("Write time (ms, median)") ax.set_ylim(bottom=0) ax.set_title(f"Small models (≤ {cutoff:,} vars)") ax.legend() @@ -270,31 +305,33 @@ def plot_comparison(file_old: str, file_new: str) -> None: ax = axes[1, 1] idx_old = [i for i, n in enumerate(nv_old) if n > cutoff] idx_new = [i for i, n in enumerate(nv_new) if n > cutoff] - ax.errorbar( + plot_errorbar( + ax, [nv_old[i] for i in idx_old], - [ms_old[i] for i in idx_old], - yerr=[std_old[i] for i in idx_old], + [med_old[i] for i in idx_old], + [lo_old[i] for i in idx_old], + [hi_old[i] for i in idx_old], marker="o", color=color_old, linestyle="--", label=label_old, alpha=0.8, - capsize=3, ) - ax.errorbar( + plot_errorbar( + ax, [nv_new[i] for i in idx_new], - [ms_new[i] for i in idx_new], - yerr=[std_new[i] for i in idx_new], + [med_new[i] for i in idx_new], + [lo_new[i] for i in idx_new], + [hi_new[i] for i in idx_new], marker="s", color=color_new, linestyle="-", label=label_new, alpha=0.8, - capsize=3, ) ax.set_xscale("log") ax.set_xlabel("Number of variables") - ax.set_ylabel("Write time (ms)") + ax.set_ylabel("Write time (ms, median)") ax.set_title(f"Large models (> {cutoff:,} vars)") ax.legend() ax.grid(True, alpha=0.3) From 3f52fef973dad6be9016b31b666b88c8f0512a29 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 1 Feb 2026 00:43:49 +0100 Subject: [PATCH 14/19] test: add coverage for streaming fallback and maybe_group_terms_polars --- test/test_common.py | 18 +++++++++++++++++ test/test_io.py | 48 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/test/test_common.py b/test/test_common.py index db218375..19c169a3 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -23,6 +23,7 @@ get_dims_with_index_levels, is_constant, iterate_slices, + maybe_group_terms_polars, ) from linopy.testing import assert_linequal, assert_varequal @@ -737,3 +738,20 @@ def test_is_constant() -> None: ] for cv in constant_values: assert is_constant(cv) + + +def test_maybe_group_terms_polars_no_duplicates(): + """Fast path: distinct (labels, vars) pairs skip group_by.""" + df = pl.DataFrame({"labels": [0, 0], "vars": [1, 2], "coeffs": [3.0, 4.0]}) + result = maybe_group_terms_polars(df) + assert result.shape == (2, 3) + assert result.columns == ["labels", "vars", "coeffs"] + assert result["coeffs"].to_list() == [3.0, 4.0] + + +def test_maybe_group_terms_polars_with_duplicates(): + """Slow path: duplicate (labels, vars) pairs trigger group_by.""" + df = pl.DataFrame({"labels": [0, 0], "vars": [1, 1], "coeffs": [3.0, 4.0]}) + result = maybe_group_terms_polars(df) + assert result.shape == (1, 3) + assert result["coeffs"].to_list() == [7.0] diff --git a/test/test_io.py b/test/test_io.py index 4336f29d..401b8359 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -7,6 +7,7 @@ import pickle from pathlib import Path +from unittest.mock import patch import numpy as np import pandas as pd @@ -15,7 +16,7 @@ import xarray as xr from linopy import LESS_EQUAL, Model, available_solvers, read_netcdf -from linopy.io import signed_number +from linopy.io import _format_and_write, signed_number from linopy.testing import assert_model_equal @@ -336,3 +337,48 @@ def test_to_file_lp_with_negative_zero_coefficients(tmp_path: Path) -> None: # Verify Gurobi can read it without errors gurobipy.read(str(fn)) + + +def test_format_and_write_streaming_fallback(tmp_path): + """Test that _format_and_write falls back to eager when streaming fails.""" + df = pl.DataFrame({"a": ["x", "y"], "b": ["1", "2"]}) + columns = [pl.col("a"), pl.lit(" "), pl.col("b")] + + # Normal path + fn1 = tmp_path / "normal.lp" + with open(fn1, "wb") as f: + _format_and_write(df, columns, f) + content_normal = fn1.read_text() + + # Force streaming to fail + original_collect = pl.LazyFrame.collect + + def failing_collect(self, *args, **kwargs): + if kwargs.get("engine") == "streaming": + raise RuntimeError("simulated streaming failure") + return original_collect(self, *args, **kwargs) + + fn2 = tmp_path / "fallback.lp" + with patch.object(pl.LazyFrame, "collect", failing_collect): + with open(fn2, "wb") as f: + _format_and_write(df, columns, f) + content_fallback = fn2.read_text() + + assert content_normal == content_fallback + + +def test_to_file_lp_same_sign_constraints(tmp_path): + """Test LP writing when all constraints have the same sign operator.""" + m = Model() + N = np.arange(5) + x = m.add_variables(coords=[N], name="x") + # All constraints use <= + m.add_constraints(x <= 10, name="upper") + m.add_constraints(x <= 20, name="upper2") + m.add_objective(x.sum()) + + fn = tmp_path / "same_sign.lp" + m.to_file(fn) + content = fn.read_text() + assert "s.t." in content + assert "<=" in content From 3d4a8159c5eb68b6373d1d0811cd21988fecdf14 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 2 Feb 2026 10:47:35 +0100 Subject: [PATCH 15/19] fix: mypy --- test/test_common.py | 4 ++-- test/test_io.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/test/test_common.py b/test/test_common.py index 19c169a3..c3500155 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -740,7 +740,7 @@ def test_is_constant() -> None: assert is_constant(cv) -def test_maybe_group_terms_polars_no_duplicates(): +def test_maybe_group_terms_polars_no_duplicates() -> None: """Fast path: distinct (labels, vars) pairs skip group_by.""" df = pl.DataFrame({"labels": [0, 0], "vars": [1, 2], "coeffs": [3.0, 4.0]}) result = maybe_group_terms_polars(df) @@ -749,7 +749,7 @@ def test_maybe_group_terms_polars_no_duplicates(): assert result["coeffs"].to_list() == [3.0, 4.0] -def test_maybe_group_terms_polars_with_duplicates(): +def test_maybe_group_terms_polars_with_duplicates() -> None: """Slow path: duplicate (labels, vars) pairs trigger group_by.""" df = pl.DataFrame({"labels": [0, 0], "vars": [1, 1], "coeffs": [3.0, 4.0]}) result = maybe_group_terms_polars(df) diff --git a/test/test_io.py b/test/test_io.py index 401b8359..c9ce5956 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -7,6 +7,7 @@ import pickle from pathlib import Path +from typing import Any from unittest.mock import patch import numpy as np @@ -339,7 +340,7 @@ def test_to_file_lp_with_negative_zero_coefficients(tmp_path: Path) -> None: gurobipy.read(str(fn)) -def test_format_and_write_streaming_fallback(tmp_path): +def test_format_and_write_streaming_fallback(tmp_path: Path) -> None: """Test that _format_and_write falls back to eager when streaming fails.""" df = pl.DataFrame({"a": ["x", "y"], "b": ["1", "2"]}) columns = [pl.col("a"), pl.lit(" "), pl.col("b")] @@ -353,7 +354,7 @@ def test_format_and_write_streaming_fallback(tmp_path): # Force streaming to fail original_collect = pl.LazyFrame.collect - def failing_collect(self, *args, **kwargs): + def failing_collect(self: pl.LazyFrame, *args: Any, **kwargs: Any) -> pl.DataFrame: if kwargs.get("engine") == "streaming": raise RuntimeError("simulated streaming failure") return original_collect(self, *args, **kwargs) @@ -367,7 +368,7 @@ def failing_collect(self, *args, **kwargs): assert content_normal == content_fallback -def test_to_file_lp_same_sign_constraints(tmp_path): +def test_to_file_lp_same_sign_constraints(tmp_path: Path) -> None: """Test LP writing when all constraints have the same sign operator.""" m = Model() N = np.arange(5) From 0dbe488395e32683b892aecbcdf981be1d815306 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 2 Feb 2026 13:24:16 +0100 Subject: [PATCH 16/19] fix: mypy --- test/test_constraint.py | 14 ++++++++++++++ test/test_io.py | 20 ++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/test/test_constraint.py b/test/test_constraint.py index 35f49ea2..bfd29a6e 100644 --- a/test/test_constraint.py +++ b/test/test_constraint.py @@ -437,6 +437,20 @@ def test_constraint_to_polars(c: linopy.constraints.Constraint) -> None: assert isinstance(c.to_polars(), pl.DataFrame) +def test_constraint_to_polars_mixed_signs(m: Model, x: linopy.Variable) -> None: + """Test to_polars when a constraint has mixed sign values across dims.""" + # Create a constraint, then manually patch the sign to have mixed values + m.add_constraints(x >= 0, name="mixed") + con = m.constraints["mixed"] + # Replace sign data with mixed signs across the first dimension + n = con.data.sizes["first"] + signs = np.array(["<=" if i % 2 == 0 else ">=" for i in range(n)]) + con.data["sign"] = xr.DataArray(signs, dims=con.data["sign"].dims) + df = con.to_polars() + assert isinstance(df, pl.DataFrame) + assert set(df["sign"].to_list()) == {"<=", ">="} + + def test_constraint_assignment_with_anonymous_constraints( m: Model, x: linopy.Variable, y: linopy.Variable ) -> None: diff --git a/test/test_io.py b/test/test_io.py index c9ce5956..bcd70897 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -383,3 +383,23 @@ def test_to_file_lp_same_sign_constraints(tmp_path: Path) -> None: content = fn.read_text() assert "s.t." in content assert "<=" in content + + +def test_to_file_lp_mixed_sign_constraints(tmp_path: Path) -> None: + """Test LP writing when constraints have different sign operators.""" + m = Model() + N = np.arange(5) + x = m.add_variables(coords=[N], name="x") + # Mix of <= and >= constraints in the same container + m.add_constraints(x <= 10, name="upper") + m.add_constraints(x >= 1, name="lower") + m.add_constraints(2 * x == 8, name="eq") + m.add_objective(x.sum()) + + fn = tmp_path / "mixed_sign.lp" + m.to_file(fn) + content = fn.read_text() + assert "s.t." in content + assert "<=" in content + assert ">=" in content + assert "=" in content From a12c8241c9f44b01efcbdbc844c20cbf5a70b0cb Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Fri, 6 Feb 2026 15:09:36 +0100 Subject: [PATCH 17/19] Move kwargs into method for readability --- linopy/io.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/linopy/io.py b/linopy/io.py index fa83e022..decb395c 100644 --- a/linopy/io.py +++ b/linopy/io.py @@ -63,9 +63,6 @@ def _format_and_write( Uses Polars streaming engine for better performance when available, with automatic fallback to eager evaluation. """ - kwargs: Any = dict( - separator=" ", null_value="", quote_style="never", include_header=False - ) try: formatted = ( df.lazy() @@ -79,7 +76,9 @@ def _format_and_write( exc_info=True, ) formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv(f, **kwargs) + formatted.write_csv( + f, separator=" ", null_value="", quote_style="never", include_header=False + ) def signed_number(expr: pl.Expr) -> tuple[pl.Expr, pl.Expr]: From f76d6c7b82c1bf039102325244cc4d03af93cb01 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Fri, 6 Feb 2026 15:34:47 +0100 Subject: [PATCH 18/19] Remove fallback and pin polars >=1.31 --- linopy/io.py | 20 ++++---------------- pyproject.toml | 2 +- test/test_io.py | 32 +------------------------------- 3 files changed, 6 insertions(+), 48 deletions(-) diff --git a/linopy/io.py b/linopy/io.py index decb395c..b23ef10c 100644 --- a/linopy/io.py +++ b/linopy/io.py @@ -60,23 +60,11 @@ def _format_and_write( """ Format columns via concat_str and write to file. - Uses Polars streaming engine for better performance when available, - with automatic fallback to eager evaluation. + Uses Polars streaming engine for better memory efficiency. """ - try: - formatted = ( - df.lazy() - .select(pl.concat_str(columns, ignore_nulls=True)) - .collect(engine="streaming") - ) - except Exception: - logger.warning( - "Polars streaming engine failed, falling back to eager evaluation. " - "Please report this at https://github.com/PyPSA/linopy/issues", - exc_info=True, - ) - formatted = df.select(pl.concat_str(columns, ignore_nulls=True)) - formatted.write_csv( + df.lazy().select(pl.concat_str(columns, ignore_nulls=True)).collect( + engine="streaming" + ).write_csv( f, separator=" ", null_value="", quote_style="never", include_header=False ) diff --git a/pyproject.toml b/pyproject.toml index 52d5e3d5..621a2d6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "numexpr", "xarray>=2024.2.0", "dask>=0.18.0", - "polars", + "polars>=1.31", "tqdm", "deprecation", "packaging", diff --git a/test/test_io.py b/test/test_io.py index bcd70897..e8ded144 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -7,8 +7,6 @@ import pickle from pathlib import Path -from typing import Any -from unittest.mock import patch import numpy as np import pandas as pd @@ -17,7 +15,7 @@ import xarray as xr from linopy import LESS_EQUAL, Model, available_solvers, read_netcdf -from linopy.io import _format_and_write, signed_number +from linopy.io import signed_number from linopy.testing import assert_model_equal @@ -340,34 +338,6 @@ def test_to_file_lp_with_negative_zero_coefficients(tmp_path: Path) -> None: gurobipy.read(str(fn)) -def test_format_and_write_streaming_fallback(tmp_path: Path) -> None: - """Test that _format_and_write falls back to eager when streaming fails.""" - df = pl.DataFrame({"a": ["x", "y"], "b": ["1", "2"]}) - columns = [pl.col("a"), pl.lit(" "), pl.col("b")] - - # Normal path - fn1 = tmp_path / "normal.lp" - with open(fn1, "wb") as f: - _format_and_write(df, columns, f) - content_normal = fn1.read_text() - - # Force streaming to fail - original_collect = pl.LazyFrame.collect - - def failing_collect(self: pl.LazyFrame, *args: Any, **kwargs: Any) -> pl.DataFrame: - if kwargs.get("engine") == "streaming": - raise RuntimeError("simulated streaming failure") - return original_collect(self, *args, **kwargs) - - fn2 = tmp_path / "fallback.lp" - with patch.object(pl.LazyFrame, "collect", failing_collect): - with open(fn2, "wb") as f: - _format_and_write(df, columns, f) - content_fallback = fn2.read_text() - - assert content_normal == content_fallback - - def test_to_file_lp_same_sign_constraints(tmp_path: Path) -> None: """Test LP writing when all constraints have the same sign operator.""" m = Model() From ee889a30cd1e7a65b160bbba5917a77b87c24c16 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Fri, 6 Feb 2026 15:50:03 +0100 Subject: [PATCH 19/19] Remove the benchmark_lp_writer.py --- dev-scripts/benchmark_lp_writer.py | 388 ----------------------------- 1 file changed, 388 deletions(-) delete mode 100644 dev-scripts/benchmark_lp_writer.py diff --git a/dev-scripts/benchmark_lp_writer.py b/dev-scripts/benchmark_lp_writer.py deleted file mode 100644 index ec45c904..00000000 --- a/dev-scripts/benchmark_lp_writer.py +++ /dev/null @@ -1,388 +0,0 @@ -#!/usr/bin/env python3 -""" -Benchmark script for LP file writing performance. - -Usage: - # Run benchmark and save results to JSON: - python dev-scripts/benchmark_lp_writer.py --output results.json [--label "my branch"] - - # Plot comparison of two result files: - python dev-scripts/benchmark_lp_writer.py --plot master.json this_pr.json -""" - -from __future__ import annotations - -import argparse -import json -import tempfile -import time -from pathlib import Path - -import numpy as np -from numpy.random import default_rng - -from linopy import Model - -rng = default_rng(125) - - -def basic_model(n: int) -> Model: - """Create a basic model with 2*n^2 variables and 2*n^2 constraints.""" - m = Model() - N = np.arange(n) - x = m.add_variables(coords=[N, N], name="x") - y = m.add_variables(coords=[N, N], name="y") - m.add_constraints(x - y >= N, name="c1") - m.add_constraints(x + y >= 0, name="c2") - m.add_objective((2 * x).sum() + y.sum()) - return m - - -def knapsack_model(n: int) -> Model: - """Create a knapsack model with n binary variables and 1 constraint.""" - m = Model() - packages = m.add_variables(coords=[np.arange(n)], binary=True) - weight = rng.integers(1, 100, size=n) - value = rng.integers(1, 100, size=n) - m.add_constraints((weight * packages).sum() <= 200) - m.add_objective(-(value * packages).sum()) - return m - - -def pypsa_model(snapshots: int | None = None) -> Model | None: - """Create a model from the PyPSA SciGrid-DE example network.""" - try: - import pandas as pd - import pypsa - except ImportError: - return None - n = pypsa.examples.scigrid_de() - if snapshots is not None and snapshots > len(n.snapshots): - orig = n.snapshots - repeats = -(-snapshots // len(orig)) - new_index = pd.date_range(orig[0], periods=len(orig) * repeats, freq=orig.freq) - new_index = new_index[:snapshots] - n.set_snapshots(new_index) - n.optimize.create_model() - return n.model - - -def benchmark_model( - label: str, m: Model, iterations: int = 10, io_api: str | None = None -) -> dict: - """Benchmark LP file writing. Returns dict with results.""" - to_file_kwargs: dict = dict(progress=False) - if io_api is not None: - to_file_kwargs["io_api"] = io_api - with tempfile.TemporaryDirectory() as tmpdir: - m.to_file(Path(tmpdir) / "warmup.lp", **to_file_kwargs) - times = [] - for i in range(iterations): - fn = Path(tmpdir) / f"bench_{i}.lp" - start = time.perf_counter() - m.to_file(fn, **to_file_kwargs) - times.append(time.perf_counter() - start) - - avg = float(np.mean(times)) - med = float(np.median(times)) - q25 = float(np.percentile(times, 25)) - q75 = float(np.percentile(times, 75)) - nvars = int(m.nvars) - ncons = int(m.ncons) - print( - f" {label:55s} ({nvars:>9,} vars, {ncons:>9,} cons): " - f"{med * 1000:7.1f}ms (IQR {q25 * 1000:.1f}–{q75 * 1000:.1f}ms)" - ) - return { - "label": label, - "nvars": nvars, - "ncons": ncons, - "mean_s": avg, - "median_s": med, - "q25_s": q25, - "q75_s": q75, - "times_s": times, - } - - -def run_benchmarks( - io_api: str | None = None, - iterations: int = 10, - model_type: str = "basic", -) -> list[dict]: - """ - Run benchmarks for a single model type across sizes. - - Parameters - ---------- - model_type : str - "basic" (default) — N from 5 to 1000, giving 50 to 2M vars. - "pypsa" — PyPSA SciGrid-DE with varying snapshot counts. - """ - results = [] - - if model_type == "basic": - print("\nbasic_model (2 x N^2 vars, 2 x N^2 constraints):") - for n in [ - 5, - 10, - 20, - 30, - 50, - 75, - 100, - 150, - 200, - 300, - 500, - 750, - 1000, - 1500, - 2000, - ]: - # More iterations for small models to reduce noise - iters = iterations * 5 if n <= 100 else iterations - r = benchmark_model(f"basic N={n}", basic_model(n), iters, io_api=io_api) - r["model"] = "basic" - r["param"] = n - results.append(r) - - elif model_type == "pypsa": - print("\nPyPSA SciGrid-DE (realistic power system model):") - for snaps in [24, 50, 100, 200, 500, 1000]: - m = pypsa_model(snapshots=snaps) - if m is not None: - r = benchmark_model( - f"pypsa {snaps} snaps", m, iterations, io_api=io_api - ) - r["model"] = "pypsa" - r["param"] = snaps - results.append(r) - else: - print(" (skipped, pypsa not installed)") - break - else: - raise ValueError(f"Unknown model_type: {model_type!r}") - - return results - - -def plot_comparison(file_old: str, file_new: str) -> None: - """Create 4-panel comparison plot from two JSON result files.""" - import matplotlib.pyplot as plt - - with open(file_old) as f: - data_old = json.load(f) - with open(file_new) as f: - data_new = json.load(f) - - label_old = data_old.get("label", Path(file_old).stem) - label_new = data_new.get("label", Path(file_new).stem) - - def get_stats(data): - """Extract median and IQR from results, falling back to mean/std.""" - nv = [r["nvars"] for r in data["results"]] - if "median_s" in data["results"][0]: - med = [r["median_s"] * 1000 for r in data["results"]] - lo = [r["q25_s"] * 1000 for r in data["results"]] - hi = [r["q75_s"] * 1000 for r in data["results"]] - else: - med = [r["mean_s"] * 1000 for r in data["results"]] - std = [r["std_s"] * 1000 for r in data["results"]] - lo = [m - s for m, s in zip(med, std)] - hi = [m + s for m, s in zip(med, std)] - return nv, med, lo, hi - - nv_old, med_old, lo_old, hi_old = get_stats(data_old) - nv_new, med_new, lo_new, hi_new = get_stats(data_new) - - color_old, color_new = "#1f77b4", "#ff7f0e" - - fig, axes = plt.subplots(2, 2, figsize=(14, 10)) - fig.suptitle(f"LP Write Performance: {label_old} vs {label_new}", fontsize=14) - - def plot_errorbar(ax, nv, med, lo, hi, **kwargs): - yerr_lo = [m - l for m, l in zip(med, lo)] - yerr_hi = [h - m for m, h in zip(med, hi)] - ax.errorbar(nv, med, yerr=[yerr_lo, yerr_hi], capsize=3, **kwargs) - - # Panel 1: All data, log-log - ax = axes[0, 0] - plot_errorbar( - ax, - nv_old, - med_old, - lo_old, - hi_old, - marker="o", - color=color_old, - linestyle="--", - label=label_old, - alpha=0.8, - ) - plot_errorbar( - ax, - nv_new, - med_new, - lo_new, - hi_new, - marker="s", - color=color_new, - linestyle="-", - label=label_new, - alpha=0.8, - ) - ax.set_xscale("log") - ax.set_yscale("log") - ax.set_xlabel("Number of variables") - ax.set_ylabel("Write time (ms, median)") - ax.set_title("IO time vs problem size (log-log)") - ax.legend() - ax.grid(True, alpha=0.3) - - # Panel 2: Speedup ratio (old/new) with IQR-based bounds - ax = axes[0, 1] - if len(nv_old) == len(nv_new): - speedup = [o / n for o, n in zip(med_old, med_new)] - # Conservative bounds: best case = hi_old/lo_new, worst = lo_old/hi_new - speedup_lo = [l / h for l, h in zip(lo_old, hi_new)] - speedup_hi = [h / l for h, l in zip(hi_old, lo_new)] - yerr_lo = [s - sl for s, sl in zip(speedup, speedup_lo)] - yerr_hi = [sh - s for s, sh in zip(speedup, speedup_hi)] - ax.errorbar( - nv_old, - speedup, - yerr=[yerr_lo, yerr_hi], - marker="o", - color="#2ca02c", - capsize=3, - ) - ax.fill_between(nv_old, speedup_lo, speedup_hi, alpha=0.15, color="#2ca02c") - ax.axhline(1.0, color="gray", linestyle="--", alpha=0.5) - ax.set_xscale("log") - ax.set_xlabel("Number of variables") - ax.set_ylabel(f"Speedup ({label_old} / {label_new})") - ax.set_title("Speedup vs problem size") - ax.grid(True, alpha=0.3) - - # Panel 3: Small models (nvars <= 25000) - ax = axes[1, 0] - cutoff = 25000 - idx_old = [i for i, n in enumerate(nv_old) if n <= cutoff] - idx_new = [i for i, n in enumerate(nv_new) if n <= cutoff] - plot_errorbar( - ax, - [nv_old[i] for i in idx_old], - [med_old[i] for i in idx_old], - [lo_old[i] for i in idx_old], - [hi_old[i] for i in idx_old], - marker="o", - color=color_old, - linestyle="--", - label=label_old, - alpha=0.8, - ) - plot_errorbar( - ax, - [nv_new[i] for i in idx_new], - [med_new[i] for i in idx_new], - [lo_new[i] for i in idx_new], - [hi_new[i] for i in idx_new], - marker="s", - color=color_new, - linestyle="-", - label=label_new, - alpha=0.8, - ) - ax.set_xlabel("Number of variables") - ax.set_ylabel("Write time (ms, median)") - ax.set_ylim(bottom=0) - ax.set_title(f"Small models (≤ {cutoff:,} vars)") - ax.legend() - ax.grid(True, alpha=0.3) - - # Panel 4: Large models (nvars > 25000) - ax = axes[1, 1] - idx_old = [i for i, n in enumerate(nv_old) if n > cutoff] - idx_new = [i for i, n in enumerate(nv_new) if n > cutoff] - plot_errorbar( - ax, - [nv_old[i] for i in idx_old], - [med_old[i] for i in idx_old], - [lo_old[i] for i in idx_old], - [hi_old[i] for i in idx_old], - marker="o", - color=color_old, - linestyle="--", - label=label_old, - alpha=0.8, - ) - plot_errorbar( - ax, - [nv_new[i] for i in idx_new], - [med_new[i] for i in idx_new], - [lo_new[i] for i in idx_new], - [hi_new[i] for i in idx_new], - marker="s", - color=color_new, - linestyle="-", - label=label_new, - alpha=0.8, - ) - ax.set_xscale("log") - ax.set_xlabel("Number of variables") - ax.set_ylabel("Write time (ms, median)") - ax.set_title(f"Large models (> {cutoff:,} vars)") - ax.legend() - ax.grid(True, alpha=0.3) - - plt.tight_layout() - out_path = "dev-scripts/benchmark_lp_comparison.png" - plt.savefig(out_path, dpi=150, bbox_inches="tight") - print(f"\nPlot saved to {out_path}") - plt.close() - - -def main() -> None: - parser = argparse.ArgumentParser(description="LP write benchmark") - parser.add_argument("--output", "-o", help="Save results to JSON file") - parser.add_argument("--label", default=None, help="Label for this run") - parser.add_argument("--io-api", default=None, help="io_api to pass to to_file()") - parser.add_argument( - "--model", - default="basic", - choices=["basic", "pypsa"], - help="Model type to benchmark (default: basic)", - ) - parser.add_argument( - "--plot", - nargs=2, - metavar=("OLD", "NEW"), - help="Plot comparison from two JSON files", - ) - args = parser.parse_args() - - if args.plot: - plot_comparison(args.plot[0], args.plot[1]) - return - - iterations = 10 - label = args.label or "benchmark" - print(f"LP file writing benchmark ({iterations} iterations, label={label!r})") - print("=" * 90) - - results = run_benchmarks( - io_api=args.io_api, iterations=iterations, model_type=args.model - ) - - output = {"label": label, "results": results} - if args.output: - with open(args.output, "w") as f: - json.dump(output, f, indent=2) - print(f"\nResults saved to {args.output}") - else: - print("\n(use --output FILE to save results for later plotting)") - - -if __name__ == "__main__": - main()