From 86232e8bf1402647a7fa9acc10d964ea934d155e Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 21:02:40 +0100
Subject: [PATCH 01/19] perf: use Polars streaming engine for LP file writing

Extract _format_and_write() helper that uses lazy().collect(engine="streaming")
with automatic fallback, replacing 7 instances of df.select(concat_str(...)).write_csv(...).
---
 linopy/io.py | 63 +++++++++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 33 deletions(-)

diff --git a/linopy/io.py b/linopy/io.py
index 56fe033d..8b53fea1 100644
--- a/linopy/io.py
+++ b/linopy/io.py
@@ -54,6 +54,29 @@ def clean_name(name: str) -> str:
 coord_sanitizer = str.maketrans("[,]", "(,)", " ")
 
 
+def _format_and_write(
+    df: pl.DataFrame, columns: list[pl.Expr], f: BufferedWriter
+) -> None:
+    """
+    Format columns via concat_str and write to file.
+
+    Uses Polars streaming engine for better performance when available,
+    with automatic fallback to eager evaluation.
+    """
+    kwargs: Any = dict(
+        separator=" ", null_value="", quote_style="never", include_header=False
+    )
+    try:
+        formatted = (
+            df.lazy()
+            .select(pl.concat_str(columns, ignore_nulls=True))
+            .collect(engine="streaming")
+        )
+    except Exception:
+        formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
+    formatted.write_csv(f, **kwargs)
+
+
 def signed_number(expr: pl.Expr) -> tuple[pl.Expr, pl.Expr]:
     """
     Return polars expressions for a signed number string, handling -0.0 correctly.
@@ -155,10 +178,7 @@ def objective_write_linear_terms(
         *signed_number(pl.col("coeffs")),
         *print_variable(pl.col("vars")),
     ]
-    df = df.select(pl.concat_str(cols, ignore_nulls=True))
-    df.write_csv(
-        f, separator=" ", null_value="", quote_style="never", include_header=False
-    )
+    _format_and_write(df, cols, f)
 
 
 def objective_write_quadratic_terms(
@@ -171,10 +191,7 @@ def objective_write_quadratic_terms(
         *print_variable(pl.col("vars2")),
     ]
     f.write(b"+ [\n")
-    df = df.select(pl.concat_str(cols, ignore_nulls=True))
-    df.write_csv(
-        f, separator=" ", null_value="", quote_style="never", include_header=False
-    )
+    _format_and_write(df, cols, f)
     f.write(b"] / 2\n")
 
 
@@ -254,11 +271,7 @@ def bounds_to_file(
                 *signed_number(pl.col("upper")),
             ]
 
-            kwargs: Any = dict(
-                separator=" ", null_value="", quote_style="never", include_header=False
-            )
-            formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
-            formatted.write_csv(f, **kwargs)
+            _format_and_write(df, columns, f)
 
 
 def binaries_to_file(
@@ -296,11 +309,7 @@ def binaries_to_file(
                 *print_variable(pl.col("labels")),
             ]
 
-            kwargs: Any = dict(
-                separator=" ", null_value="", quote_style="never", include_header=False
-            )
-            formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
-            formatted.write_csv(f, **kwargs)
+            _format_and_write(df, columns, f)
 
 
 def integers_to_file(
@@ -339,11 +348,7 @@ def integers_to_file(
                 *print_variable(pl.col("labels")),
             ]
 
-            kwargs: Any = dict(
-                separator=" ", null_value="", quote_style="never", include_header=False
-            )
-            formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
-            formatted.write_csv(f, **kwargs)
+            _format_and_write(df, columns, f)
 
 
 def sos_to_file(
@@ -399,11 +404,7 @@ def sos_to_file(
                 pl.col("var_weights"),
             ]
 
-            kwargs: Any = dict(
-                separator=" ", null_value="", quote_style="never", include_header=False
-            )
-            formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
-            formatted.write_csv(f, **kwargs)
+            _format_and_write(df, columns, f)
 
 
 def constraints_to_file(
@@ -487,11 +488,7 @@ def constraints_to_file(
                 pl.when(pl.col("is_last_in_group")).then(pl.col("rhs").cast(pl.String)),
             ]
 
-            kwargs: Any = dict(
-                separator=" ", null_value="", quote_style="never", include_header=False
-            )
-            formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
-            formatted.write_csv(f, **kwargs)
+            _format_and_write(df, columns, f)
 
             # in the future, we could use lazy dataframes when they support appending
             # tp existent files

From b1e9864592374957f58696da0a6d215efb542de0 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 22:07:47 +0100
Subject: [PATCH 02/19] fix: log warning with traceback when Polars streaming
 fallback triggers

---
 linopy/io.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/linopy/io.py b/linopy/io.py
index 8b53fea1..cd83f6b8 100644
--- a/linopy/io.py
+++ b/linopy/io.py
@@ -73,6 +73,11 @@ def _format_and_write(
             .collect(engine="streaming")
         )
     except Exception:
+        logger.warning(
+            "Polars streaming engine failed, falling back to eager evaluation. "
+            "Please report this at https://github.com/PyPSA/linopy/issues",
+            exc_info=True,
+        )
         formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
     formatted.write_csv(f, **kwargs)
 

From d30914d1b619ac5e668706a3062be0973313e1ea Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 17:24:23 +0100
Subject: [PATCH 03/19] perf: speed up LP constraint writing by replacing
 concat+sort with join

Replace the vertical concat + sort approach in Constraint.to_polars()
with an inner join, so every row has all columns populated. This removes
the need for the group_by validation step in constraints_to_file() and
simplifies the formatting expressions by eliminating null checks on
coeffs/vars columns.
---
 linopy/constraints.py |  6 +-----
 linopy/io.py          | 37 +++++++------------------------------
 2 files changed, 8 insertions(+), 35 deletions(-)

diff --git a/linopy/constraints.py b/linopy/constraints.py
index 291beb1d..e6216dba 100644
--- a/linopy/constraints.py
+++ b/linopy/constraints.py
@@ -632,11 +632,7 @@ def to_polars(self) -> pl.DataFrame:
         short = filter_nulls_polars(short)
         check_has_nulls_polars(short, name=f"{self.type} {self.name}")
 
-        df = pl.concat([short, long], how="diagonal_relaxed").sort(["labels", "rhs"])
-        # delete subsequent non-null rhs (happens is all vars per label are -1)
-        is_non_null = df["rhs"].is_not_null()
-        prev_non_is_null = is_non_null.shift(1).fill_null(False)
-        df = df.filter(is_non_null & ~prev_non_is_null | ~is_non_null)
+        df = long.join(short, on="labels", how="inner")
         return df[["labels", "coeffs", "vars", "sign", "rhs"]]
 
     # Wrapped function which would convert variable to dataarray
diff --git a/linopy/io.py b/linopy/io.py
index cd83f6b8..f6cac315 100644
--- a/linopy/io.py
+++ b/linopy/io.py
@@ -446,48 +446,25 @@ def constraints_to_file(
             if df.height == 0:
                 continue
 
-            # Ensure each constraint has both coefficient and RHS terms
-            analysis = df.group_by("labels").agg(
-                [
-                    pl.col("coeffs").is_not_null().sum().alias("coeff_rows"),
-                    pl.col("sign").is_not_null().sum().alias("rhs_rows"),
-                ]
-            )
-
-            valid = analysis.filter(
-                (pl.col("coeff_rows") > 0) & (pl.col("rhs_rows") > 0)
-            )
-
-            if valid.height == 0:
-                continue
-
-            # Keep only constraints that have both parts
-            df = df.join(valid.select("labels"), on="labels", how="inner")
-
             # Sort by labels and mark first/last occurrences
             df = df.sort("labels").with_columns(
                 [
-                    pl.when(pl.col("labels").is_first_distinct())
-                    .then(pl.col("labels"))
-                    .otherwise(pl.lit(None))
-                    .alias("labels_first"),
+                    pl.col("labels").is_first_distinct().alias("is_first_in_group"),
                     (pl.col("labels") != pl.col("labels").shift(-1))
                     .fill_null(True)
                     .alias("is_last_in_group"),
                 ]
             )
 
-            row_labels = print_constraint(pl.col("labels_first"))
+            row_labels = print_constraint(pl.col("labels"))
             col_labels = print_variable(pl.col("vars"))
             columns = [
-                pl.when(pl.col("labels_first").is_not_null()).then(row_labels[0]),
-                pl.when(pl.col("labels_first").is_not_null()).then(row_labels[1]),
-                pl.when(pl.col("labels_first").is_not_null())
-                .then(pl.lit(":\n"))
-                .alias(":"),
+                pl.when(pl.col("is_first_in_group")).then(row_labels[0]),
+                pl.when(pl.col("is_first_in_group")).then(row_labels[1]),
+                pl.when(pl.col("is_first_in_group")).then(pl.lit(":\n")).alias(":"),
                 *signed_number(pl.col("coeffs")),
-                pl.when(pl.col("vars").is_not_null()).then(col_labels[0]),
-                pl.when(pl.col("vars").is_not_null()).then(col_labels[1]),
+                col_labels[0],
+                col_labels[1],
                 pl.when(pl.col("is_last_in_group")).then(pl.col("sign")),
                 pl.when(pl.col("is_last_in_group")).then(pl.lit(" ")),
                 pl.when(pl.col("is_last_in_group")).then(pl.col("rhs").cast(pl.String)),

From d15ff4055a4f704d27b76829a9344b92f1b4b0aa Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 17:34:11 +0100
Subject: [PATCH 04/19] fix: missing space in lp file

---
 linopy/io.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/linopy/io.py b/linopy/io.py
index f6cac315..fa83e022 100644
--- a/linopy/io.py
+++ b/linopy/io.py
@@ -465,6 +465,7 @@ def constraints_to_file(
                 *signed_number(pl.col("coeffs")),
                 col_labels[0],
                 col_labels[1],
+                pl.when(pl.col("is_last_in_group")).then(pl.lit("\n")),
                 pl.when(pl.col("is_last_in_group")).then(pl.col("sign")),
                 pl.when(pl.col("is_last_in_group")).then(pl.lit(" ")),
                 pl.when(pl.col("is_last_in_group")).then(pl.col("rhs").cast(pl.String)),

From 96a2e85816319279414c986fb66231912ce61210 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 18:04:07 +0100
Subject: [PATCH 05/19] perf: skip group_terms when unnecessary and avoid
 xarray broadcast for short DataFrame
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Skip group_terms_polars when _term dim size is 1 (no duplicate vars)
- Build the short DataFrame (labels, rhs, sign) directly with numpy
  instead of going through xarray.broadcast + to_polars
- Add sign column via pl.lit when uniform (common case), avoiding
  costly numpy string array → polars conversion

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 linopy/constraints.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/linopy/constraints.py b/linopy/constraints.py
index e6216dba..c239be60 100644
--- a/linopy/constraints.py
+++ b/linopy/constraints.py
@@ -42,7 +42,6 @@
     get_label_position,
     group_terms_polars,
     has_optimized_model,
-    infer_schema_polars,
     iterate_slices,
     maybe_replace_signs,
     print_coord,
@@ -622,14 +621,30 @@ def to_polars(self) -> pl.DataFrame:
         long = to_polars(ds[keys])
 
         long = filter_nulls_polars(long)
-        long = group_terms_polars(long)
+        if ds.sizes.get("_term", 1) > 1:
+            long = group_terms_polars(long)
         check_has_nulls_polars(long, name=f"{self.type} {self.name}")
 
-        short_ds = ds[[k for k in ds if "_term" not in ds[k].dims]]
-        schema = infer_schema_polars(short_ds)
-        schema["sign"] = pl.Enum(["=", "<=", ">="])
-        short = to_polars(short_ds, schema=schema)
+        # Build short DataFrame (labels, rhs) without xarray broadcast.
+        # Add sign separately to avoid costly numpy string→polars conversion.
+        labels_flat = ds["labels"].values.reshape(-1)
+        rhs_flat = np.broadcast_to(ds["rhs"].values, ds["labels"].shape).reshape(-1)
+        short = pl.DataFrame({"labels": labels_flat, "rhs": rhs_flat})
         short = filter_nulls_polars(short)
+
+        sign_values = ds["sign"].values
+        unique_signs = np.unique(sign_values)
+        if len(unique_signs) == 1:
+            short = short.with_columns(
+                pl.lit(unique_signs[0]).cast(pl.Enum(["=", "<=", ">="])).alias("sign")
+            )
+        else:
+            sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1)
+            # Apply same mask as filter_nulls (labels != -1)
+            sign_flat = sign_flat[labels_flat != -1]
+            short = short.with_columns(
+                pl.Series("sign", sign_flat, dtype=pl.Enum(["=", "<=", ">="]))
+            )
         check_has_nulls_polars(short, name=f"{self.type} {self.name}")
 
         df = long.join(short, on="labels", how="inner")

From 95cdec7668a78391d96f13fa1396aafe1eff72ac Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 18:05:11 +0100
Subject: [PATCH 06/19] perf: skip group_terms in LinearExpression.to_polars
 when no duplicate vars

Check n_unique before running the expensive group_by+sum. When all
variable references are unique (common case for objectives), this
saves ~31ms per 320k terms.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 linopy/expressions.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/linopy/expressions.py b/linopy/expressions.py
index 10e243de..7550f2d5 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -1463,7 +1463,13 @@ def to_polars(self) -> pl.DataFrame:
 
         df = to_polars(self.data)
         df = filter_nulls_polars(df)
-        df = group_terms_polars(df)
+        if df["vars"].n_unique() < df.height:
+            df = group_terms_polars(df)
+        else:
+            # Match column order of group_terms (group-by keys, coeffs, rest)
+            varcols = [c for c in df.columns if c.startswith("vars")]
+            rest = [c for c in df.columns if c not in varcols and c != "coeffs"]
+            df = df.select(varcols + ["coeffs"] + rest)
         check_has_nulls_polars(df, name=self.type)
         return df
 

From 489f04d2e845fa3d232731ff9cb5a9b2666d4812 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 18:58:53 +0100
Subject: [PATCH 07/19] perf: reduce per-constraint overhead in
 Constraint.to_polars()

Replace np.unique with faster numpy equality check for sign uniformity.
Eliminate redundant filter_nulls_polars and check_has_nulls_polars on
the short DataFrame by applying the labels mask directly during
construction.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 linopy/constraints.py | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/linopy/constraints.py b/linopy/constraints.py
index c239be60..0bd124f0 100644
--- a/linopy/constraints.py
+++ b/linopy/constraints.py
@@ -625,27 +625,32 @@ def to_polars(self) -> pl.DataFrame:
             long = group_terms_polars(long)
         check_has_nulls_polars(long, name=f"{self.type} {self.name}")
 
-        # Build short DataFrame (labels, rhs) without xarray broadcast.
-        # Add sign separately to avoid costly numpy string→polars conversion.
+        # Build short DataFrame (labels, rhs, sign) without xarray broadcast.
+        # Apply labels mask directly instead of filter_nulls_polars.
         labels_flat = ds["labels"].values.reshape(-1)
+        mask = labels_flat != -1
+        labels_masked = labels_flat[mask]
         rhs_flat = np.broadcast_to(ds["rhs"].values, ds["labels"].shape).reshape(-1)
-        short = pl.DataFrame({"labels": labels_flat, "rhs": rhs_flat})
-        short = filter_nulls_polars(short)
 
         sign_values = ds["sign"].values
-        unique_signs = np.unique(sign_values)
-        if len(unique_signs) == 1:
-            short = short.with_columns(
-                pl.lit(unique_signs[0]).cast(pl.Enum(["=", "<=", ">="])).alias("sign")
+        sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1)
+        all_same_sign = (
+            sign_flat[0] == sign_flat[-1] and (sign_flat[0] == sign_flat).all()
+        )
+
+        short_data: dict = {
+            "labels": labels_masked,
+            "rhs": rhs_flat[mask],
+        }
+        if all_same_sign:
+            short = pl.DataFrame(short_data).with_columns(
+                pl.lit(sign_flat[0]).cast(pl.Enum(["=", "<=", ">="])).alias("sign")
             )
         else:
-            sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1)
-            # Apply same mask as filter_nulls (labels != -1)
-            sign_flat = sign_flat[labels_flat != -1]
-            short = short.with_columns(
-                pl.Series("sign", sign_flat, dtype=pl.Enum(["=", "<=", ">="]))
+            short_data["sign"] = pl.Series(
+                "sign", sign_flat[mask], dtype=pl.Enum(["=", "<=", ">="])
             )
-        check_has_nulls_polars(short, name=f"{self.type} {self.name}")
+            short = pl.DataFrame(short_data)
 
         df = long.join(short, on="labels", how="inner")
         return df[["labels", "coeffs", "vars", "sign", "rhs"]]

From 0b413ddb269df74bc05339fba0d2e89d5c4995ea Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 21:04:38 +0100
Subject: [PATCH 08/19] fix: handle empty constraint slices in sign_flat check

Guard against IndexError when sign_flat is empty (no valid labels)
by checking len(sign_flat) > 0 before accessing sign_flat[0].

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 linopy/constraints.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linopy/constraints.py b/linopy/constraints.py
index 0bd124f0..3cea9f36 100644
--- a/linopy/constraints.py
+++ b/linopy/constraints.py
@@ -634,7 +634,7 @@ def to_polars(self) -> pl.DataFrame:
 
         sign_values = ds["sign"].values
         sign_flat = np.broadcast_to(sign_values, ds["labels"].shape).reshape(-1)
-        all_same_sign = (
+        all_same_sign = len(sign_flat) > 0 and (
             sign_flat[0] == sign_flat[-1] and (sign_flat[0] == sign_flat).all()
         )
 

From 9f35550d9af6d29f95f469e7ad36697f46b0a65c Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 22:17:59 +0100
Subject: [PATCH 09/19] docs: add LP write speed improvement to release notes

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 doc/release_notes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index b727c22d..cdca2be1 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -6,6 +6,7 @@ Upcoming Version
 
 * Fix docs (pick highs solver)
 * Add the `sphinx-copybutton` to the documentation
+* Speed up LP file writing by 2-2.7x on large models through Polars streaming engine, join-based constraint assembly, and reduced per-constraint overhead
 
 Version 0.6.1
 --------------

From 1896eeef5be6f64e1e3b985b1ebfa213d2ca797a Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 22:50:53 +0100
Subject: [PATCH 10/19] bench: add LP write benchmark script with plotting

---
 dev-scripts/benchmark_lp_writer.py | 335 +++++++++++++++++++++++++++++
 1 file changed, 335 insertions(+)
 create mode 100644 dev-scripts/benchmark_lp_writer.py

diff --git a/dev-scripts/benchmark_lp_writer.py b/dev-scripts/benchmark_lp_writer.py
new file mode 100644
index 00000000..63eeffaf
--- /dev/null
+++ b/dev-scripts/benchmark_lp_writer.py
@@ -0,0 +1,335 @@
+#!/usr/bin/env python3
+"""
+Benchmark script for LP file writing performance.
+
+Usage:
+    # Run benchmark and save results to JSON:
+    python dev-scripts/benchmark_lp_writer.py --output results.json [--label "my branch"]
+
+    # Plot comparison of two result files:
+    python dev-scripts/benchmark_lp_writer.py --plot master.json this_pr.json
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import tempfile
+import time
+from pathlib import Path
+
+import numpy as np
+from numpy.random import default_rng
+
+from linopy import Model
+
+rng = default_rng(125)
+
+
+def basic_model(n: int) -> Model:
+    """Create a basic model with 2*n^2 variables and 2*n^2 constraints."""
+    m = Model()
+    N = np.arange(n)
+    x = m.add_variables(coords=[N, N], name="x")
+    y = m.add_variables(coords=[N, N], name="y")
+    m.add_constraints(x - y >= N, name="c1")
+    m.add_constraints(x + y >= 0, name="c2")
+    m.add_objective((2 * x).sum() + y.sum())
+    return m
+
+
+def knapsack_model(n: int) -> Model:
+    """Create a knapsack model with n binary variables and 1 constraint."""
+    m = Model()
+    packages = m.add_variables(coords=[np.arange(n)], binary=True)
+    weight = rng.integers(1, 100, size=n)
+    value = rng.integers(1, 100, size=n)
+    m.add_constraints((weight * packages).sum() <= 200)
+    m.add_objective(-(value * packages).sum())
+    return m
+
+
+def pypsa_model(snapshots: int | None = None) -> Model | None:
+    """Create a model from the PyPSA SciGrid-DE example network."""
+    try:
+        import pandas as pd
+        import pypsa
+    except ImportError:
+        return None
+    n = pypsa.examples.scigrid_de()
+    if snapshots is not None and snapshots > len(n.snapshots):
+        orig = n.snapshots
+        repeats = -(-snapshots // len(orig))
+        new_index = pd.date_range(orig[0], periods=len(orig) * repeats, freq=orig.freq)
+        new_index = new_index[:snapshots]
+        n.set_snapshots(new_index)
+    n.optimize.create_model()
+    return n.model
+
+
+def benchmark_model(
+    label: str, m: Model, iterations: int = 10, io_api: str | None = None
+) -> dict:
+    """Benchmark LP file writing. Returns dict with results."""
+    to_file_kwargs: dict = dict(progress=False)
+    if io_api is not None:
+        to_file_kwargs["io_api"] = io_api
+    with tempfile.TemporaryDirectory() as tmpdir:
+        m.to_file(Path(tmpdir) / "warmup.lp", **to_file_kwargs)
+        times = []
+        for i in range(iterations):
+            fn = Path(tmpdir) / f"bench_{i}.lp"
+            start = time.perf_counter()
+            m.to_file(fn, **to_file_kwargs)
+            times.append(time.perf_counter() - start)
+
+    avg = float(np.mean(times))
+    std = float(np.std(times))
+    nvars = int(m.nvars)
+    ncons = int(m.ncons)
+    print(
+        f"  {label:55s} ({nvars:>9,} vars, {ncons:>9,} cons): "
+        f"{avg * 1000:7.1f}ms ± {std * 1000:5.1f}ms"
+    )
+    return {
+        "label": label,
+        "nvars": nvars,
+        "ncons": ncons,
+        "mean_s": avg,
+        "std_s": std,
+        "times_s": times,
+    }
+
+
+def run_benchmarks(
+    io_api: str | None = None,
+    iterations: int = 10,
+    model_type: str = "basic",
+) -> list[dict]:
+    """
+    Run benchmarks for a single model type across sizes.
+
+    Parameters
+    ----------
+    model_type : str
+        "basic" (default) — N from 5 to 1000, giving 50 to 2M vars.
+        "pypsa" — PyPSA SciGrid-DE with varying snapshot counts.
+    """
+    results = []
+
+    if model_type == "basic":
+        print("\nbasic_model (2 x N^2 vars, 2 x N^2 constraints):")
+        for n in [5, 10, 20, 30, 50, 75, 100, 150, 200, 300, 500, 750, 1000]:
+            r = benchmark_model(
+                f"basic N={n}", basic_model(n), iterations, io_api=io_api
+            )
+            r["model"] = "basic"
+            r["param"] = n
+            results.append(r)
+
+    elif model_type == "pypsa":
+        print("\nPyPSA SciGrid-DE (realistic power system model):")
+        for snaps in [24, 50, 100, 200, 500, 1000]:
+            m = pypsa_model(snapshots=snaps)
+            if m is not None:
+                r = benchmark_model(
+                    f"pypsa {snaps} snaps", m, iterations, io_api=io_api
+                )
+                r["model"] = "pypsa"
+                r["param"] = snaps
+                results.append(r)
+            else:
+                print("  (skipped, pypsa not installed)")
+                break
+    else:
+        raise ValueError(f"Unknown model_type: {model_type!r}")
+
+    return results
+
+
+def plot_comparison(file_old: str, file_new: str) -> None:
+    """Create 4-panel comparison plot from two JSON result files."""
+    import matplotlib.pyplot as plt
+
+    with open(file_old) as f:
+        data_old = json.load(f)
+    with open(file_new) as f:
+        data_new = json.load(f)
+
+    label_old = data_old.get("label", Path(file_old).stem)
+    label_new = data_new.get("label", Path(file_new).stem)
+
+    nv_old = [r["nvars"] for r in data_old["results"]]
+    ms_old = [r["mean_s"] * 1000 for r in data_old["results"]]
+    std_old = [r["std_s"] * 1000 for r in data_old["results"]]
+    nv_new = [r["nvars"] for r in data_new["results"]]
+    ms_new = [r["mean_s"] * 1000 for r in data_new["results"]]
+    std_new = [r["std_s"] * 1000 for r in data_new["results"]]
+
+    color_old, color_new = "#1f77b4", "#ff7f0e"
+
+    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+    fig.suptitle(f"LP Write Performance: {label_old} vs {label_new}", fontsize=14)
+
+    # Panel 1: All data, log-log
+    ax = axes[0, 0]
+    ax.errorbar(
+        nv_old,
+        ms_old,
+        yerr=std_old,
+        marker="o",
+        color=color_old,
+        linestyle="--",
+        label=label_old,
+        alpha=0.8,
+        capsize=3,
+    )
+    ax.errorbar(
+        nv_new,
+        ms_new,
+        yerr=std_new,
+        marker="s",
+        color=color_new,
+        linestyle="-",
+        label=label_new,
+        alpha=0.8,
+        capsize=3,
+    )
+    ax.set_xscale("log")
+    ax.set_yscale("log")
+    ax.set_xlabel("Number of variables")
+    ax.set_ylabel("Write time (ms)")
+    ax.set_title("IO time vs problem size (log-log)")
+    ax.legend()
+    ax.grid(True, alpha=0.3)
+
+    # Panel 2: Speedup ratio (old/new)
+    ax = axes[0, 1]
+    if len(nv_old) == len(nv_new):
+        speedup = [o / n for o, n in zip(ms_old, ms_new)]
+        ax.plot(nv_old, speedup, marker="o", color="#2ca02c")
+        ax.fill_between(nv_old, 1.0, speedup, alpha=0.15, color="#2ca02c")
+    ax.axhline(1.0, color="gray", linestyle="--", alpha=0.5)
+    ax.set_xscale("log")
+    ax.set_xlabel("Number of variables")
+    ax.set_ylabel(f"Speedup ({label_old} / {label_new})")
+    ax.set_title("Speedup vs problem size")
+    ax.grid(True, alpha=0.3)
+
+    # Panel 3: Small models (nvars <= 25000)
+    ax = axes[1, 0]
+    cutoff = 25000
+    idx_old = [i for i, n in enumerate(nv_old) if n <= cutoff]
+    idx_new = [i for i, n in enumerate(nv_new) if n <= cutoff]
+    ax.errorbar(
+        [nv_old[i] for i in idx_old],
+        [ms_old[i] for i in idx_old],
+        yerr=[std_old[i] for i in idx_old],
+        marker="o",
+        color=color_old,
+        linestyle="--",
+        label=label_old,
+        alpha=0.8,
+        capsize=3,
+    )
+    ax.errorbar(
+        [nv_new[i] for i in idx_new],
+        [ms_new[i] for i in idx_new],
+        yerr=[std_new[i] for i in idx_new],
+        marker="s",
+        color=color_new,
+        linestyle="-",
+        label=label_new,
+        alpha=0.8,
+        capsize=3,
+    )
+    ax.set_xlabel("Number of variables")
+    ax.set_ylabel("Write time (ms)")
+    ax.set_ylim(bottom=0)
+    ax.set_title(f"Small models (≤ {cutoff:,} vars)")
+    ax.legend()
+    ax.grid(True, alpha=0.3)
+
+    # Panel 4: Large models (nvars > 25000)
+    ax = axes[1, 1]
+    idx_old = [i for i, n in enumerate(nv_old) if n > cutoff]
+    idx_new = [i for i, n in enumerate(nv_new) if n > cutoff]
+    ax.errorbar(
+        [nv_old[i] for i in idx_old],
+        [ms_old[i] for i in idx_old],
+        yerr=[std_old[i] for i in idx_old],
+        marker="o",
+        color=color_old,
+        linestyle="--",
+        label=label_old,
+        alpha=0.8,
+        capsize=3,
+    )
+    ax.errorbar(
+        [nv_new[i] for i in idx_new],
+        [ms_new[i] for i in idx_new],
+        yerr=[std_new[i] for i in idx_new],
+        marker="s",
+        color=color_new,
+        linestyle="-",
+        label=label_new,
+        alpha=0.8,
+        capsize=3,
+    )
+    ax.set_xscale("log")
+    ax.set_xlabel("Number of variables")
+    ax.set_ylabel("Write time (ms)")
+    ax.set_title(f"Large models (> {cutoff:,} vars)")
+    ax.legend()
+    ax.grid(True, alpha=0.3)
+
+    plt.tight_layout()
+    out_path = "dev-scripts/benchmark_lp_comparison.png"
+    plt.savefig(out_path, dpi=150, bbox_inches="tight")
+    print(f"\nPlot saved to {out_path}")
+    plt.close()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="LP write benchmark")
+    parser.add_argument("--output", "-o", help="Save results to JSON file")
+    parser.add_argument("--label", default=None, help="Label for this run")
+    parser.add_argument("--io-api", default=None, help="io_api to pass to to_file()")
+    parser.add_argument(
+        "--model",
+        default="basic",
+        choices=["basic", "pypsa"],
+        help="Model type to benchmark (default: basic)",
+    )
+    parser.add_argument(
+        "--plot",
+        nargs=2,
+        metavar=("OLD", "NEW"),
+        help="Plot comparison from two JSON files",
+    )
+    args = parser.parse_args()
+
+    if args.plot:
+        plot_comparison(args.plot[0], args.plot[1])
+        return
+
+    iterations = 10
+    label = args.label or "benchmark"
+    print(f"LP file writing benchmark ({iterations} iterations, label={label!r})")
+    print("=" * 90)
+
+    results = run_benchmarks(
+        io_api=args.io_api, iterations=iterations, model_type=args.model
+    )
+
+    output = {"label": label, "results": results}
+    if args.output:
+        with open(args.output, "w") as f:
+            json.dump(output, f, indent=2)
+        print(f"\nResults saved to {args.output}")
+    else:
+        print("\n(use --output FILE to save results for later plotting)")
+
+
+if __name__ == "__main__":
+    main()

From 68f1adc8fddb66b20ea57da50ff515112bb1e30f Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 23:06:49 +0100
Subject: [PATCH 11/19] bench: larger model

---
 dev-scripts/benchmark_lp_writer.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/dev-scripts/benchmark_lp_writer.py b/dev-scripts/benchmark_lp_writer.py
index 63eeffaf..df0b4b5b 100644
--- a/dev-scripts/benchmark_lp_writer.py
+++ b/dev-scripts/benchmark_lp_writer.py
@@ -119,7 +119,23 @@ def run_benchmarks(
 
     if model_type == "basic":
         print("\nbasic_model (2 x N^2 vars, 2 x N^2 constraints):")
-        for n in [5, 10, 20, 30, 50, 75, 100, 150, 200, 300, 500, 750, 1000]:
+        for n in [
+            5,
+            10,
+            20,
+            30,
+            50,
+            75,
+            100,
+            150,
+            200,
+            300,
+            500,
+            750,
+            1000,
+            1500,
+            2000,
+        ]:
             r = benchmark_model(
                 f"basic N={n}", basic_model(n), iterations, io_api=io_api
             )

From a293b6471b9bd21393307d02d002da179dbc4cac Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sat, 31 Jan 2026 23:30:20 +0100
Subject: [PATCH 12/19] perf: Add maybe_group_terms_polars() helper in
 common.py that checks for duplicate (labels, vars) pairs before calling
 group_terms_polars. Use it in both Constraint.to_polars() and
 LinearExpression.to_polars() to avoid expensive group_by when terms already
 reference distinct variables

---
 linopy/common.py      | 19 +++++++++++++++++++
 linopy/constraints.py |  4 ++--
 linopy/expressions.py |  9 ++-------
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/linopy/common.py b/linopy/common.py
index 7dd97b65..e6eef583 100644
--- a/linopy/common.py
+++ b/linopy/common.py
@@ -449,6 +449,25 @@ def group_terms_polars(df: pl.DataFrame) -> pl.DataFrame:
     return df
 
 
+def maybe_group_terms_polars(df: pl.DataFrame) -> pl.DataFrame:
+    """
+    Group terms only if there are duplicate (labels, vars) pairs.
+
+    This avoids the expensive group_by operation when terms already
+    reference distinct variables (e.g. ``x - y`` has ``_term=2`` but
+    no duplicates). When skipping, columns are reordered to match the
+    output of ``group_terms_polars``.
+    """
+    varcols = [c for c in df.columns if c.startswith("vars")]
+    keys = [c for c in ["labels"] + varcols if c in df.columns]
+    key_count = df.select(pl.struct(keys).n_unique()).item()
+    if key_count < df.height:
+        return group_terms_polars(df)
+    # Match column order of group_terms (group-by keys, coeffs, rest)
+    rest = [c for c in df.columns if c not in keys and c != "coeffs"]
+    return df.select(keys + ["coeffs"] + rest)
+
+
 def save_join(*dataarrays: DataArray, integer_dtype: bool = False) -> Dataset:
     """
     Join multiple xarray Dataarray's to a Dataset and warn if coordinates are not equal.
diff --git a/linopy/constraints.py b/linopy/constraints.py
index 3cea9f36..d3ebef19 100644
--- a/linopy/constraints.py
+++ b/linopy/constraints.py
@@ -40,9 +40,9 @@
     generate_indices_for_printout,
     get_dims_with_index_levels,
     get_label_position,
-    group_terms_polars,
     has_optimized_model,
     iterate_slices,
+    maybe_group_terms_polars,
     maybe_replace_signs,
     print_coord,
     print_single_constraint,
@@ -622,7 +622,7 @@ def to_polars(self) -> pl.DataFrame:
 
         long = filter_nulls_polars(long)
         if ds.sizes.get("_term", 1) > 1:
-            long = group_terms_polars(long)
+            long = maybe_group_terms_polars(long)
         check_has_nulls_polars(long, name=f"{self.type} {self.name}")
 
         # Build short DataFrame (labels, rhs, sign) without xarray broadcast.
diff --git a/linopy/expressions.py b/linopy/expressions.py
index 7550f2d5..cf37b937 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -60,6 +60,7 @@
     has_optimized_model,
     is_constant,
     iterate_slices,
+    maybe_group_terms_polars,
     print_coord,
     print_single_expression,
     to_dataframe,
@@ -1463,13 +1464,7 @@ def to_polars(self) -> pl.DataFrame:
 
         df = to_polars(self.data)
         df = filter_nulls_polars(df)
-        if df["vars"].n_unique() < df.height:
-            df = group_terms_polars(df)
-        else:
-            # Match column order of group_terms (group-by keys, coeffs, rest)
-            varcols = [c for c in df.columns if c.startswith("vars")]
-            rest = [c for c in df.columns if c not in varcols and c != "coeffs"]
-            df = df.select(varcols + ["coeffs"] + rest)
+        df = maybe_group_terms_polars(df)
         check_has_nulls_polars(df, name=self.type)
         return df
 

From 04c4beadd3ed977f42855161e107fe351f9d9d0d Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 00:07:53 +0100
Subject: [PATCH 13/19] Add variance to plot

---
 dev-scripts/benchmark_lp_writer.py | 123 +++++++++++++++++++----------
 1 file changed, 80 insertions(+), 43 deletions(-)

diff --git a/dev-scripts/benchmark_lp_writer.py b/dev-scripts/benchmark_lp_writer.py
index df0b4b5b..ec45c904 100644
--- a/dev-scripts/benchmark_lp_writer.py
+++ b/dev-scripts/benchmark_lp_writer.py
@@ -84,19 +84,23 @@ def benchmark_model(
             times.append(time.perf_counter() - start)
 
     avg = float(np.mean(times))
-    std = float(np.std(times))
+    med = float(np.median(times))
+    q25 = float(np.percentile(times, 25))
+    q75 = float(np.percentile(times, 75))
     nvars = int(m.nvars)
     ncons = int(m.ncons)
     print(
         f"  {label:55s} ({nvars:>9,} vars, {ncons:>9,} cons): "
-        f"{avg * 1000:7.1f}ms ± {std * 1000:5.1f}ms"
+        f"{med * 1000:7.1f}ms (IQR {q25 * 1000:.1f}–{q75 * 1000:.1f}ms)"
     )
     return {
         "label": label,
         "nvars": nvars,
         "ncons": ncons,
         "mean_s": avg,
-        "std_s": std,
+        "median_s": med,
+        "q25_s": q25,
+        "q75_s": q75,
         "times_s": times,
     }
 
@@ -136,9 +140,9 @@ def run_benchmarks(
             1500,
             2000,
         ]:
-            r = benchmark_model(
-                f"basic N={n}", basic_model(n), iterations, io_api=io_api
-            )
+            # More iterations for small models to reduce noise
+            iters = iterations * 5 if n <= 100 else iterations
+            r = benchmark_model(f"basic N={n}", basic_model(n), iters, io_api=io_api)
             r["model"] = "basic"
             r["param"] = n
             results.append(r)
@@ -175,56 +179,85 @@ def plot_comparison(file_old: str, file_new: str) -> None:
     label_old = data_old.get("label", Path(file_old).stem)
     label_new = data_new.get("label", Path(file_new).stem)
 
-    nv_old = [r["nvars"] for r in data_old["results"]]
-    ms_old = [r["mean_s"] * 1000 for r in data_old["results"]]
-    std_old = [r["std_s"] * 1000 for r in data_old["results"]]
-    nv_new = [r["nvars"] for r in data_new["results"]]
-    ms_new = [r["mean_s"] * 1000 for r in data_new["results"]]
-    std_new = [r["std_s"] * 1000 for r in data_new["results"]]
+    def get_stats(data):
+        """Extract median and IQR from results, falling back to mean/std."""
+        nv = [r["nvars"] for r in data["results"]]
+        if "median_s" in data["results"][0]:
+            med = [r["median_s"] * 1000 for r in data["results"]]
+            lo = [r["q25_s"] * 1000 for r in data["results"]]
+            hi = [r["q75_s"] * 1000 for r in data["results"]]
+        else:
+            med = [r["mean_s"] * 1000 for r in data["results"]]
+            std = [r["std_s"] * 1000 for r in data["results"]]
+            lo = [m - s for m, s in zip(med, std)]
+            hi = [m + s for m, s in zip(med, std)]
+        return nv, med, lo, hi
+
+    nv_old, med_old, lo_old, hi_old = get_stats(data_old)
+    nv_new, med_new, lo_new, hi_new = get_stats(data_new)
 
     color_old, color_new = "#1f77b4", "#ff7f0e"
 
     fig, axes = plt.subplots(2, 2, figsize=(14, 10))
     fig.suptitle(f"LP Write Performance: {label_old} vs {label_new}", fontsize=14)
 
+    def plot_errorbar(ax, nv, med, lo, hi, **kwargs):
+        yerr_lo = [m - l for m, l in zip(med, lo)]
+        yerr_hi = [h - m for m, h in zip(med, hi)]
+        ax.errorbar(nv, med, yerr=[yerr_lo, yerr_hi], capsize=3, **kwargs)
+
     # Panel 1: All data, log-log
     ax = axes[0, 0]
-    ax.errorbar(
+    plot_errorbar(
+        ax,
         nv_old,
-        ms_old,
-        yerr=std_old,
+        med_old,
+        lo_old,
+        hi_old,
         marker="o",
         color=color_old,
         linestyle="--",
         label=label_old,
         alpha=0.8,
-        capsize=3,
     )
-    ax.errorbar(
+    plot_errorbar(
+        ax,
         nv_new,
-        ms_new,
-        yerr=std_new,
+        med_new,
+        lo_new,
+        hi_new,
         marker="s",
         color=color_new,
         linestyle="-",
         label=label_new,
         alpha=0.8,
-        capsize=3,
     )
     ax.set_xscale("log")
     ax.set_yscale("log")
     ax.set_xlabel("Number of variables")
-    ax.set_ylabel("Write time (ms)")
+    ax.set_ylabel("Write time (ms, median)")
     ax.set_title("IO time vs problem size (log-log)")
     ax.legend()
     ax.grid(True, alpha=0.3)
 
-    # Panel 2: Speedup ratio (old/new)
+    # Panel 2: Speedup ratio (old/new) with IQR-based bounds
     ax = axes[0, 1]
     if len(nv_old) == len(nv_new):
-        speedup = [o / n for o, n in zip(ms_old, ms_new)]
-        ax.plot(nv_old, speedup, marker="o", color="#2ca02c")
-        ax.fill_between(nv_old, 1.0, speedup, alpha=0.15, color="#2ca02c")
+        speedup = [o / n for o, n in zip(med_old, med_new)]
+        # Conservative bounds: best case = hi_old/lo_new, worst = lo_old/hi_new
+        speedup_lo = [l / h for l, h in zip(lo_old, hi_new)]
+        speedup_hi = [h / l for h, l in zip(hi_old, lo_new)]
+        yerr_lo = [s - sl for s, sl in zip(speedup, speedup_lo)]
+        yerr_hi = [sh - s for s, sh in zip(speedup, speedup_hi)]
+        ax.errorbar(
+            nv_old,
+            speedup,
+            yerr=[yerr_lo, yerr_hi],
+            marker="o",
+            color="#2ca02c",
+            capsize=3,
+        )
+        ax.fill_between(nv_old, speedup_lo, speedup_hi, alpha=0.15, color="#2ca02c")
     ax.axhline(1.0, color="gray", linestyle="--", alpha=0.5)
     ax.set_xscale("log")
     ax.set_xlabel("Number of variables")
@@ -237,30 +270,32 @@ def plot_comparison(file_old: str, file_new: str) -> None:
     cutoff = 25000
     idx_old = [i for i, n in enumerate(nv_old) if n <= cutoff]
     idx_new = [i for i, n in enumerate(nv_new) if n <= cutoff]
-    ax.errorbar(
+    plot_errorbar(
+        ax,
         [nv_old[i] for i in idx_old],
-        [ms_old[i] for i in idx_old],
-        yerr=[std_old[i] for i in idx_old],
+        [med_old[i] for i in idx_old],
+        [lo_old[i] for i in idx_old],
+        [hi_old[i] for i in idx_old],
         marker="o",
         color=color_old,
         linestyle="--",
         label=label_old,
         alpha=0.8,
-        capsize=3,
     )
-    ax.errorbar(
+    plot_errorbar(
+        ax,
         [nv_new[i] for i in idx_new],
-        [ms_new[i] for i in idx_new],
-        yerr=[std_new[i] for i in idx_new],
+        [med_new[i] for i in idx_new],
+        [lo_new[i] for i in idx_new],
+        [hi_new[i] for i in idx_new],
         marker="s",
         color=color_new,
         linestyle="-",
         label=label_new,
         alpha=0.8,
-        capsize=3,
     )
     ax.set_xlabel("Number of variables")
-    ax.set_ylabel("Write time (ms)")
+    ax.set_ylabel("Write time (ms, median)")
     ax.set_ylim(bottom=0)
     ax.set_title(f"Small models (≤ {cutoff:,} vars)")
     ax.legend()
@@ -270,31 +305,33 @@ def plot_comparison(file_old: str, file_new: str) -> None:
     ax = axes[1, 1]
     idx_old = [i for i, n in enumerate(nv_old) if n > cutoff]
     idx_new = [i for i, n in enumerate(nv_new) if n > cutoff]
-    ax.errorbar(
+    plot_errorbar(
+        ax,
         [nv_old[i] for i in idx_old],
-        [ms_old[i] for i in idx_old],
-        yerr=[std_old[i] for i in idx_old],
+        [med_old[i] for i in idx_old],
+        [lo_old[i] for i in idx_old],
+        [hi_old[i] for i in idx_old],
         marker="o",
         color=color_old,
         linestyle="--",
         label=label_old,
         alpha=0.8,
-        capsize=3,
     )
-    ax.errorbar(
+    plot_errorbar(
+        ax,
         [nv_new[i] for i in idx_new],
-        [ms_new[i] for i in idx_new],
-        yerr=[std_new[i] for i in idx_new],
+        [med_new[i] for i in idx_new],
+        [lo_new[i] for i in idx_new],
+        [hi_new[i] for i in idx_new],
         marker="s",
         color=color_new,
         linestyle="-",
         label=label_new,
         alpha=0.8,
-        capsize=3,
     )
     ax.set_xscale("log")
     ax.set_xlabel("Number of variables")
-    ax.set_ylabel("Write time (ms)")
+    ax.set_ylabel("Write time (ms, median)")
     ax.set_title(f"Large models (> {cutoff:,} vars)")
     ax.legend()
     ax.grid(True, alpha=0.3)

From 3f52fef973dad6be9016b31b666b88c8f0512a29 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Sun, 1 Feb 2026 00:43:49 +0100
Subject: [PATCH 14/19] test: add coverage for streaming fallback and
 maybe_group_terms_polars

---
 test/test_common.py | 18 +++++++++++++++++
 test/test_io.py     | 48 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/test/test_common.py b/test/test_common.py
index db218375..19c169a3 100644
--- a/test/test_common.py
+++ b/test/test_common.py
@@ -23,6 +23,7 @@
     get_dims_with_index_levels,
     is_constant,
     iterate_slices,
+    maybe_group_terms_polars,
 )
 from linopy.testing import assert_linequal, assert_varequal
 
@@ -737,3 +738,20 @@ def test_is_constant() -> None:
     ]
     for cv in constant_values:
         assert is_constant(cv)
+
+
+def test_maybe_group_terms_polars_no_duplicates():
+    """Fast path: distinct (labels, vars) pairs skip group_by."""
+    df = pl.DataFrame({"labels": [0, 0], "vars": [1, 2], "coeffs": [3.0, 4.0]})
+    result = maybe_group_terms_polars(df)
+    assert result.shape == (2, 3)
+    assert result.columns == ["labels", "vars", "coeffs"]
+    assert result["coeffs"].to_list() == [3.0, 4.0]
+
+
+def test_maybe_group_terms_polars_with_duplicates():
+    """Slow path: duplicate (labels, vars) pairs trigger group_by."""
+    df = pl.DataFrame({"labels": [0, 0], "vars": [1, 1], "coeffs": [3.0, 4.0]})
+    result = maybe_group_terms_polars(df)
+    assert result.shape == (1, 3)
+    assert result["coeffs"].to_list() == [7.0]
diff --git a/test/test_io.py b/test/test_io.py
index 4336f29d..401b8359 100644
--- a/test/test_io.py
+++ b/test/test_io.py
@@ -7,6 +7,7 @@
 
 import pickle
 from pathlib import Path
+from unittest.mock import patch
 
 import numpy as np
 import pandas as pd
@@ -15,7 +16,7 @@
 import xarray as xr
 
 from linopy import LESS_EQUAL, Model, available_solvers, read_netcdf
-from linopy.io import signed_number
+from linopy.io import _format_and_write, signed_number
 from linopy.testing import assert_model_equal
 
 
@@ -336,3 +337,48 @@ def test_to_file_lp_with_negative_zero_coefficients(tmp_path: Path) -> None:
 
     # Verify Gurobi can read it without errors
     gurobipy.read(str(fn))
+
+
+def test_format_and_write_streaming_fallback(tmp_path):
+    """Test that _format_and_write falls back to eager when streaming fails."""
+    df = pl.DataFrame({"a": ["x", "y"], "b": ["1", "2"]})
+    columns = [pl.col("a"), pl.lit(" "), pl.col("b")]
+
+    # Normal path
+    fn1 = tmp_path / "normal.lp"
+    with open(fn1, "wb") as f:
+        _format_and_write(df, columns, f)
+    content_normal = fn1.read_text()
+
+    # Force streaming to fail
+    original_collect = pl.LazyFrame.collect
+
+    def failing_collect(self, *args, **kwargs):
+        if kwargs.get("engine") == "streaming":
+            raise RuntimeError("simulated streaming failure")
+        return original_collect(self, *args, **kwargs)
+
+    fn2 = tmp_path / "fallback.lp"
+    with patch.object(pl.LazyFrame, "collect", failing_collect):
+        with open(fn2, "wb") as f:
+            _format_and_write(df, columns, f)
+    content_fallback = fn2.read_text()
+
+    assert content_normal == content_fallback
+
+
+def test_to_file_lp_same_sign_constraints(tmp_path):
+    """Test LP writing when all constraints have the same sign operator."""
+    m = Model()
+    N = np.arange(5)
+    x = m.add_variables(coords=[N], name="x")
+    # All constraints use <=
+    m.add_constraints(x <= 10, name="upper")
+    m.add_constraints(x <= 20, name="upper2")
+    m.add_objective(x.sum())
+
+    fn = tmp_path / "same_sign.lp"
+    m.to_file(fn)
+    content = fn.read_text()
+    assert "s.t." in content
+    assert "<=" in content

From 3d4a8159c5eb68b6373d1d0811cd21988fecdf14 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Mon, 2 Feb 2026 10:47:35 +0100
Subject: [PATCH 15/19] fix: mypy

---
 test/test_common.py | 4 ++--
 test/test_io.py     | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/test/test_common.py b/test/test_common.py
index 19c169a3..c3500155 100644
--- a/test/test_common.py
+++ b/test/test_common.py
@@ -740,7 +740,7 @@ def test_is_constant() -> None:
         assert is_constant(cv)
 
 
-def test_maybe_group_terms_polars_no_duplicates():
+def test_maybe_group_terms_polars_no_duplicates() -> None:
     """Fast path: distinct (labels, vars) pairs skip group_by."""
     df = pl.DataFrame({"labels": [0, 0], "vars": [1, 2], "coeffs": [3.0, 4.0]})
     result = maybe_group_terms_polars(df)
@@ -749,7 +749,7 @@ def test_maybe_group_terms_polars_no_duplicates():
     assert result["coeffs"].to_list() == [3.0, 4.0]
 
 
-def test_maybe_group_terms_polars_with_duplicates():
+def test_maybe_group_terms_polars_with_duplicates() -> None:
     """Slow path: duplicate (labels, vars) pairs trigger group_by."""
     df = pl.DataFrame({"labels": [0, 0], "vars": [1, 1], "coeffs": [3.0, 4.0]})
     result = maybe_group_terms_polars(df)
diff --git a/test/test_io.py b/test/test_io.py
index 401b8359..c9ce5956 100644
--- a/test/test_io.py
+++ b/test/test_io.py
@@ -7,6 +7,7 @@
 
 import pickle
 from pathlib import Path
+from typing import Any
 from unittest.mock import patch
 
 import numpy as np
@@ -339,7 +340,7 @@ def test_to_file_lp_with_negative_zero_coefficients(tmp_path: Path) -> None:
     gurobipy.read(str(fn))
 
 
-def test_format_and_write_streaming_fallback(tmp_path):
+def test_format_and_write_streaming_fallback(tmp_path: Path) -> None:
     """Test that _format_and_write falls back to eager when streaming fails."""
     df = pl.DataFrame({"a": ["x", "y"], "b": ["1", "2"]})
     columns = [pl.col("a"), pl.lit(" "), pl.col("b")]
@@ -353,7 +354,7 @@ def test_format_and_write_streaming_fallback(tmp_path):
     # Force streaming to fail
     original_collect = pl.LazyFrame.collect
 
-    def failing_collect(self, *args, **kwargs):
+    def failing_collect(self: pl.LazyFrame, *args: Any, **kwargs: Any) -> pl.DataFrame:
         if kwargs.get("engine") == "streaming":
             raise RuntimeError("simulated streaming failure")
         return original_collect(self, *args, **kwargs)
@@ -367,7 +368,7 @@ def failing_collect(self, *args, **kwargs):
     assert content_normal == content_fallback
 
 
-def test_to_file_lp_same_sign_constraints(tmp_path):
+def test_to_file_lp_same_sign_constraints(tmp_path: Path) -> None:
     """Test LP writing when all constraints have the same sign operator."""
     m = Model()
     N = np.arange(5)

From 0dbe488395e32683b892aecbcdf981be1d815306 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Mon, 2 Feb 2026 13:24:16 +0100
Subject: [PATCH 16/19] fix: mypy

---
 test/test_constraint.py | 14 ++++++++++++++
 test/test_io.py         | 20 ++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/test/test_constraint.py b/test/test_constraint.py
index 35f49ea2..bfd29a6e 100644
--- a/test/test_constraint.py
+++ b/test/test_constraint.py
@@ -437,6 +437,20 @@ def test_constraint_to_polars(c: linopy.constraints.Constraint) -> None:
     assert isinstance(c.to_polars(), pl.DataFrame)
 
 
+def test_constraint_to_polars_mixed_signs(m: Model, x: linopy.Variable) -> None:
+    """Test to_polars when a constraint has mixed sign values across dims."""
+    # Create a constraint, then manually patch the sign to have mixed values
+    m.add_constraints(x >= 0, name="mixed")
+    con = m.constraints["mixed"]
+    # Replace sign data with mixed signs across the first dimension
+    n = con.data.sizes["first"]
+    signs = np.array(["<=" if i % 2 == 0 else ">=" for i in range(n)])
+    con.data["sign"] = xr.DataArray(signs, dims=con.data["sign"].dims)
+    df = con.to_polars()
+    assert isinstance(df, pl.DataFrame)
+    assert set(df["sign"].to_list()) == {"<=", ">="}
+
+
 def test_constraint_assignment_with_anonymous_constraints(
     m: Model, x: linopy.Variable, y: linopy.Variable
 ) -> None:
diff --git a/test/test_io.py b/test/test_io.py
index c9ce5956..bcd70897 100644
--- a/test/test_io.py
+++ b/test/test_io.py
@@ -383,3 +383,23 @@ def test_to_file_lp_same_sign_constraints(tmp_path: Path) -> None:
     content = fn.read_text()
     assert "s.t." in content
     assert "<=" in content
+
+
+def test_to_file_lp_mixed_sign_constraints(tmp_path: Path) -> None:
+    """Test LP writing when constraints have different sign operators."""
+    m = Model()
+    N = np.arange(5)
+    x = m.add_variables(coords=[N], name="x")
+    # Mix of <= and >= constraints in the same container
+    m.add_constraints(x <= 10, name="upper")
+    m.add_constraints(x >= 1, name="lower")
+    m.add_constraints(2 * x == 8, name="eq")
+    m.add_objective(x.sum())
+
+    fn = tmp_path / "mixed_sign.lp"
+    m.to_file(fn)
+    content = fn.read_text()
+    assert "s.t." in content
+    assert "<=" in content
+    assert ">=" in content
+    assert "=" in content

From a12c8241c9f44b01efcbdbc844c20cbf5a70b0cb Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Fri, 6 Feb 2026 15:09:36 +0100
Subject: [PATCH 17/19] Move kwargs into method for readability

---
 linopy/io.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/linopy/io.py b/linopy/io.py
index fa83e022..decb395c 100644
--- a/linopy/io.py
+++ b/linopy/io.py
@@ -63,9 +63,6 @@ def _format_and_write(
     Uses Polars streaming engine for better performance when available,
     with automatic fallback to eager evaluation.
     """
-    kwargs: Any = dict(
-        separator=" ", null_value="", quote_style="never", include_header=False
-    )
     try:
         formatted = (
             df.lazy()
@@ -79,7 +76,9 @@ def _format_and_write(
             exc_info=True,
         )
         formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
-    formatted.write_csv(f, **kwargs)
+    formatted.write_csv(
+        f, separator=" ", null_value="", quote_style="never", include_header=False
+    )
 
 
 def signed_number(expr: pl.Expr) -> tuple[pl.Expr, pl.Expr]:

From f76d6c7b82c1bf039102325244cc4d03af93cb01 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Fri, 6 Feb 2026 15:34:47 +0100
Subject: [PATCH 18/19] Remove fallback and pin polars >=1.31

---
 linopy/io.py    | 20 ++++----------------
 pyproject.toml  |  2 +-
 test/test_io.py | 32 +-------------------------------
 3 files changed, 6 insertions(+), 48 deletions(-)

diff --git a/linopy/io.py b/linopy/io.py
index decb395c..b23ef10c 100644
--- a/linopy/io.py
+++ b/linopy/io.py
@@ -60,23 +60,11 @@ def _format_and_write(
     """
     Format columns via concat_str and write to file.
 
-    Uses Polars streaming engine for better performance when available,
-    with automatic fallback to eager evaluation.
+    Uses Polars streaming engine for better memory efficiency.
     """
-    try:
-        formatted = (
-            df.lazy()
-            .select(pl.concat_str(columns, ignore_nulls=True))
-            .collect(engine="streaming")
-        )
-    except Exception:
-        logger.warning(
-            "Polars streaming engine failed, falling back to eager evaluation. "
-            "Please report this at https://github.com/PyPSA/linopy/issues",
-            exc_info=True,
-        )
-        formatted = df.select(pl.concat_str(columns, ignore_nulls=True))
-    formatted.write_csv(
+    df.lazy().select(pl.concat_str(columns, ignore_nulls=True)).collect(
+        engine="streaming"
+    ).write_csv(
         f, separator=" ", null_value="", quote_style="never", include_header=False
     )
 
diff --git a/pyproject.toml b/pyproject.toml
index 52d5e3d5..621a2d6d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,7 @@ dependencies = [
     "numexpr",
     "xarray>=2024.2.0",
     "dask>=0.18.0",
-    "polars",
+    "polars>=1.31",
     "tqdm",
     "deprecation",
     "packaging",
diff --git a/test/test_io.py b/test/test_io.py
index bcd70897..e8ded144 100644
--- a/test/test_io.py
+++ b/test/test_io.py
@@ -7,8 +7,6 @@
 
 import pickle
 from pathlib import Path
-from typing import Any
-from unittest.mock import patch
 
 import numpy as np
 import pandas as pd
@@ -17,7 +15,7 @@
 import xarray as xr
 
 from linopy import LESS_EQUAL, Model, available_solvers, read_netcdf
-from linopy.io import _format_and_write, signed_number
+from linopy.io import signed_number
 from linopy.testing import assert_model_equal
 
 
@@ -340,34 +338,6 @@ def test_to_file_lp_with_negative_zero_coefficients(tmp_path: Path) -> None:
     gurobipy.read(str(fn))
 
 
-def test_format_and_write_streaming_fallback(tmp_path: Path) -> None:
-    """Test that _format_and_write falls back to eager when streaming fails."""
-    df = pl.DataFrame({"a": ["x", "y"], "b": ["1", "2"]})
-    columns = [pl.col("a"), pl.lit(" "), pl.col("b")]
-
-    # Normal path
-    fn1 = tmp_path / "normal.lp"
-    with open(fn1, "wb") as f:
-        _format_and_write(df, columns, f)
-    content_normal = fn1.read_text()
-
-    # Force streaming to fail
-    original_collect = pl.LazyFrame.collect
-
-    def failing_collect(self: pl.LazyFrame, *args: Any, **kwargs: Any) -> pl.DataFrame:
-        if kwargs.get("engine") == "streaming":
-            raise RuntimeError("simulated streaming failure")
-        return original_collect(self, *args, **kwargs)
-
-    fn2 = tmp_path / "fallback.lp"
-    with patch.object(pl.LazyFrame, "collect", failing_collect):
-        with open(fn2, "wb") as f:
-            _format_and_write(df, columns, f)
-    content_fallback = fn2.read_text()
-
-    assert content_normal == content_fallback
-
-
 def test_to_file_lp_same_sign_constraints(tmp_path: Path) -> None:
     """Test LP writing when all constraints have the same sign operator."""
     m = Model()

From ee889a30cd1e7a65b160bbba5917a77b87c24c16 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Fri, 6 Feb 2026 15:50:03 +0100
Subject: [PATCH 19/19] Remove the benchmark_lp_writer.py

---
 dev-scripts/benchmark_lp_writer.py | 388 -----------------------------
 1 file changed, 388 deletions(-)
 delete mode 100644 dev-scripts/benchmark_lp_writer.py

diff --git a/dev-scripts/benchmark_lp_writer.py b/dev-scripts/benchmark_lp_writer.py
deleted file mode 100644
index ec45c904..00000000
--- a/dev-scripts/benchmark_lp_writer.py
+++ /dev/null
@@ -1,388 +0,0 @@
-#!/usr/bin/env python3
-"""
-Benchmark script for LP file writing performance.
-
-Usage:
-    # Run benchmark and save results to JSON:
-    python dev-scripts/benchmark_lp_writer.py --output results.json [--label "my branch"]
-
-    # Plot comparison of two result files:
-    python dev-scripts/benchmark_lp_writer.py --plot master.json this_pr.json
-"""
-
-from __future__ import annotations
-
-import argparse
-import json
-import tempfile
-import time
-from pathlib import Path
-
-import numpy as np
-from numpy.random import default_rng
-
-from linopy import Model
-
-rng = default_rng(125)
-
-
-def basic_model(n: int) -> Model:
-    """Create a basic model with 2*n^2 variables and 2*n^2 constraints."""
-    m = Model()
-    N = np.arange(n)
-    x = m.add_variables(coords=[N, N], name="x")
-    y = m.add_variables(coords=[N, N], name="y")
-    m.add_constraints(x - y >= N, name="c1")
-    m.add_constraints(x + y >= 0, name="c2")
-    m.add_objective((2 * x).sum() + y.sum())
-    return m
-
-
-def knapsack_model(n: int) -> Model:
-    """Create a knapsack model with n binary variables and 1 constraint."""
-    m = Model()
-    packages = m.add_variables(coords=[np.arange(n)], binary=True)
-    weight = rng.integers(1, 100, size=n)
-    value = rng.integers(1, 100, size=n)
-    m.add_constraints((weight * packages).sum() <= 200)
-    m.add_objective(-(value * packages).sum())
-    return m
-
-
-def pypsa_model(snapshots: int | None = None) -> Model | None:
-    """Create a model from the PyPSA SciGrid-DE example network."""
-    try:
-        import pandas as pd
-        import pypsa
-    except ImportError:
-        return None
-    n = pypsa.examples.scigrid_de()
-    if snapshots is not None and snapshots > len(n.snapshots):
-        orig = n.snapshots
-        repeats = -(-snapshots // len(orig))
-        new_index = pd.date_range(orig[0], periods=len(orig) * repeats, freq=orig.freq)
-        new_index = new_index[:snapshots]
-        n.set_snapshots(new_index)
-    n.optimize.create_model()
-    return n.model
-
-
-def benchmark_model(
-    label: str, m: Model, iterations: int = 10, io_api: str | None = None
-) -> dict:
-    """Benchmark LP file writing. Returns dict with results."""
-    to_file_kwargs: dict = dict(progress=False)
-    if io_api is not None:
-        to_file_kwargs["io_api"] = io_api
-    with tempfile.TemporaryDirectory() as tmpdir:
-        m.to_file(Path(tmpdir) / "warmup.lp", **to_file_kwargs)
-        times = []
-        for i in range(iterations):
-            fn = Path(tmpdir) / f"bench_{i}.lp"
-            start = time.perf_counter()
-            m.to_file(fn, **to_file_kwargs)
-            times.append(time.perf_counter() - start)
-
-    avg = float(np.mean(times))
-    med = float(np.median(times))
-    q25 = float(np.percentile(times, 25))
-    q75 = float(np.percentile(times, 75))
-    nvars = int(m.nvars)
-    ncons = int(m.ncons)
-    print(
-        f"  {label:55s} ({nvars:>9,} vars, {ncons:>9,} cons): "
-        f"{med * 1000:7.1f}ms (IQR {q25 * 1000:.1f}–{q75 * 1000:.1f}ms)"
-    )
-    return {
-        "label": label,
-        "nvars": nvars,
-        "ncons": ncons,
-        "mean_s": avg,
-        "median_s": med,
-        "q25_s": q25,
-        "q75_s": q75,
-        "times_s": times,
-    }
-
-
-def run_benchmarks(
-    io_api: str | None = None,
-    iterations: int = 10,
-    model_type: str = "basic",
-) -> list[dict]:
-    """
-    Run benchmarks for a single model type across sizes.
-
-    Parameters
-    ----------
-    model_type : str
-        "basic" (default) — N from 5 to 1000, giving 50 to 2M vars.
-        "pypsa" — PyPSA SciGrid-DE with varying snapshot counts.
-    """
-    results = []
-
-    if model_type == "basic":
-        print("\nbasic_model (2 x N^2 vars, 2 x N^2 constraints):")
-        for n in [
-            5,
-            10,
-            20,
-            30,
-            50,
-            75,
-            100,
-            150,
-            200,
-            300,
-            500,
-            750,
-            1000,
-            1500,
-            2000,
-        ]:
-            # More iterations for small models to reduce noise
-            iters = iterations * 5 if n <= 100 else iterations
-            r = benchmark_model(f"basic N={n}", basic_model(n), iters, io_api=io_api)
-            r["model"] = "basic"
-            r["param"] = n
-            results.append(r)
-
-    elif model_type == "pypsa":
-        print("\nPyPSA SciGrid-DE (realistic power system model):")
-        for snaps in [24, 50, 100, 200, 500, 1000]:
-            m = pypsa_model(snapshots=snaps)
-            if m is not None:
-                r = benchmark_model(
-                    f"pypsa {snaps} snaps", m, iterations, io_api=io_api
-                )
-                r["model"] = "pypsa"
-                r["param"] = snaps
-                results.append(r)
-            else:
-                print("  (skipped, pypsa not installed)")
-                break
-    else:
-        raise ValueError(f"Unknown model_type: {model_type!r}")
-
-    return results
-
-
-def plot_comparison(file_old: str, file_new: str) -> None:
-    """Create 4-panel comparison plot from two JSON result files."""
-    import matplotlib.pyplot as plt
-
-    with open(file_old) as f:
-        data_old = json.load(f)
-    with open(file_new) as f:
-        data_new = json.load(f)
-
-    label_old = data_old.get("label", Path(file_old).stem)
-    label_new = data_new.get("label", Path(file_new).stem)
-
-    def get_stats(data):
-        """Extract median and IQR from results, falling back to mean/std."""
-        nv = [r["nvars"] for r in data["results"]]
-        if "median_s" in data["results"][0]:
-            med = [r["median_s"] * 1000 for r in data["results"]]
-            lo = [r["q25_s"] * 1000 for r in data["results"]]
-            hi = [r["q75_s"] * 1000 for r in data["results"]]
-        else:
-            med = [r["mean_s"] * 1000 for r in data["results"]]
-            std = [r["std_s"] * 1000 for r in data["results"]]
-            lo = [m - s for m, s in zip(med, std)]
-            hi = [m + s for m, s in zip(med, std)]
-        return nv, med, lo, hi
-
-    nv_old, med_old, lo_old, hi_old = get_stats(data_old)
-    nv_new, med_new, lo_new, hi_new = get_stats(data_new)
-
-    color_old, color_new = "#1f77b4", "#ff7f0e"
-
-    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
-    fig.suptitle(f"LP Write Performance: {label_old} vs {label_new}", fontsize=14)
-
-    def plot_errorbar(ax, nv, med, lo, hi, **kwargs):
-        yerr_lo = [m - l for m, l in zip(med, lo)]
-        yerr_hi = [h - m for m, h in zip(med, hi)]
-        ax.errorbar(nv, med, yerr=[yerr_lo, yerr_hi], capsize=3, **kwargs)
-
-    # Panel 1: All data, log-log
-    ax = axes[0, 0]
-    plot_errorbar(
-        ax,
-        nv_old,
-        med_old,
-        lo_old,
-        hi_old,
-        marker="o",
-        color=color_old,
-        linestyle="--",
-        label=label_old,
-        alpha=0.8,
-    )
-    plot_errorbar(
-        ax,
-        nv_new,
-        med_new,
-        lo_new,
-        hi_new,
-        marker="s",
-        color=color_new,
-        linestyle="-",
-        label=label_new,
-        alpha=0.8,
-    )
-    ax.set_xscale("log")
-    ax.set_yscale("log")
-    ax.set_xlabel("Number of variables")
-    ax.set_ylabel("Write time (ms, median)")
-    ax.set_title("IO time vs problem size (log-log)")
-    ax.legend()
-    ax.grid(True, alpha=0.3)
-
-    # Panel 2: Speedup ratio (old/new) with IQR-based bounds
-    ax = axes[0, 1]
-    if len(nv_old) == len(nv_new):
-        speedup = [o / n for o, n in zip(med_old, med_new)]
-        # Conservative bounds: best case = hi_old/lo_new, worst = lo_old/hi_new
-        speedup_lo = [l / h for l, h in zip(lo_old, hi_new)]
-        speedup_hi = [h / l for h, l in zip(hi_old, lo_new)]
-        yerr_lo = [s - sl for s, sl in zip(speedup, speedup_lo)]
-        yerr_hi = [sh - s for s, sh in zip(speedup, speedup_hi)]
-        ax.errorbar(
-            nv_old,
-            speedup,
-            yerr=[yerr_lo, yerr_hi],
-            marker="o",
-            color="#2ca02c",
-            capsize=3,
-        )
-        ax.fill_between(nv_old, speedup_lo, speedup_hi, alpha=0.15, color="#2ca02c")
-    ax.axhline(1.0, color="gray", linestyle="--", alpha=0.5)
-    ax.set_xscale("log")
-    ax.set_xlabel("Number of variables")
-    ax.set_ylabel(f"Speedup ({label_old} / {label_new})")
-    ax.set_title("Speedup vs problem size")
-    ax.grid(True, alpha=0.3)
-
-    # Panel 3: Small models (nvars <= 25000)
-    ax = axes[1, 0]
-    cutoff = 25000
-    idx_old = [i for i, n in enumerate(nv_old) if n <= cutoff]
-    idx_new = [i for i, n in enumerate(nv_new) if n <= cutoff]
-    plot_errorbar(
-        ax,
-        [nv_old[i] for i in idx_old],
-        [med_old[i] for i in idx_old],
-        [lo_old[i] for i in idx_old],
-        [hi_old[i] for i in idx_old],
-        marker="o",
-        color=color_old,
-        linestyle="--",
-        label=label_old,
-        alpha=0.8,
-    )
-    plot_errorbar(
-        ax,
-        [nv_new[i] for i in idx_new],
-        [med_new[i] for i in idx_new],
-        [lo_new[i] for i in idx_new],
-        [hi_new[i] for i in idx_new],
-        marker="s",
-        color=color_new,
-        linestyle="-",
-        label=label_new,
-        alpha=0.8,
-    )
-    ax.set_xlabel("Number of variables")
-    ax.set_ylabel("Write time (ms, median)")
-    ax.set_ylim(bottom=0)
-    ax.set_title(f"Small models (≤ {cutoff:,} vars)")
-    ax.legend()
-    ax.grid(True, alpha=0.3)
-
-    # Panel 4: Large models (nvars > 25000)
-    ax = axes[1, 1]
-    idx_old = [i for i, n in enumerate(nv_old) if n > cutoff]
-    idx_new = [i for i, n in enumerate(nv_new) if n > cutoff]
-    plot_errorbar(
-        ax,
-        [nv_old[i] for i in idx_old],
-        [med_old[i] for i in idx_old],
-        [lo_old[i] for i in idx_old],
-        [hi_old[i] for i in idx_old],
-        marker="o",
-        color=color_old,
-        linestyle="--",
-        label=label_old,
-        alpha=0.8,
-    )
-    plot_errorbar(
-        ax,
-        [nv_new[i] for i in idx_new],
-        [med_new[i] for i in idx_new],
-        [lo_new[i] for i in idx_new],
-        [hi_new[i] for i in idx_new],
-        marker="s",
-        color=color_new,
-        linestyle="-",
-        label=label_new,
-        alpha=0.8,
-    )
-    ax.set_xscale("log")
-    ax.set_xlabel("Number of variables")
-    ax.set_ylabel("Write time (ms, median)")
-    ax.set_title(f"Large models (> {cutoff:,} vars)")
-    ax.legend()
-    ax.grid(True, alpha=0.3)
-
-    plt.tight_layout()
-    out_path = "dev-scripts/benchmark_lp_comparison.png"
-    plt.savefig(out_path, dpi=150, bbox_inches="tight")
-    print(f"\nPlot saved to {out_path}")
-    plt.close()
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser(description="LP write benchmark")
-    parser.add_argument("--output", "-o", help="Save results to JSON file")
-    parser.add_argument("--label", default=None, help="Label for this run")
-    parser.add_argument("--io-api", default=None, help="io_api to pass to to_file()")
-    parser.add_argument(
-        "--model",
-        default="basic",
-        choices=["basic", "pypsa"],
-        help="Model type to benchmark (default: basic)",
-    )
-    parser.add_argument(
-        "--plot",
-        nargs=2,
-        metavar=("OLD", "NEW"),
-        help="Plot comparison from two JSON files",
-    )
-    args = parser.parse_args()
-
-    if args.plot:
-        plot_comparison(args.plot[0], args.plot[1])
-        return
-
-    iterations = 10
-    label = args.label or "benchmark"
-    print(f"LP file writing benchmark ({iterations} iterations, label={label!r})")
-    print("=" * 90)
-
-    results = run_benchmarks(
-        io_api=args.io_api, iterations=iterations, model_type=args.model
-    )
-
-    output = {"label": label, "results": results}
-    if args.output:
-        with open(args.output, "w") as f:
-            json.dump(output, f, indent=2)
-        print(f"\nResults saved to {args.output}")
-    else:
-        print("\n(use --output FILE to save results for later plotting)")
-
-
-if __name__ == "__main__":
-    main()