From 5a30e03cbf32fffaeb7ab9dcbb757abe8f43bd6d Mon Sep 17 00:00:00 2001
From: dario curreri <dariocurr@gmail.com>
Date: Thu, 19 Feb 2026 11:36:58 +0100
Subject: [PATCH 1/7] ci: update pre-commit hooks and fix linting issues

* Update Ruff version in pre-commit configuration to v0.15.1.
* Add noqa comments to suppress specific linting warnings in various files.
* Update regex patterns in test cases for better matching.
---
 .pre-commit-config.yaml             |  2 +-
 python/datafusion/dataframe.py      |  2 +-
 python/datafusion/expr.py           |  4 +++-
 python/datafusion/input/location.py |  4 ++--
 python/datafusion/plan.py           |  2 +-
 python/datafusion/user_defined.py   |  4 ++--
 python/tests/test_catalog.py        |  2 +-
 python/tests/test_dataframe.py      |  2 +-
 python/tests/test_functions.py      | 24 ++++++++++++------------
 python/tests/test_sql.py            |  4 ++--
 python/tests/test_udf.py            |  7 +++----
 python/tests/test_udwf.py           |  4 ++--
 12 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bcefa405d..8ae6a4e32 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,7 +22,7 @@ repos:
           - id: actionlint-docker
       - repo: https://github.com/astral-sh/ruff-pre-commit
         # Ruff version.
-        rev: v0.9.10
+        rev: v0.15.1
         hooks:
           # Run the linter.
           - id: ruff
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index d302c12a5..bf4ffcda6 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -330,7 +330,7 @@ def into_view(self, temporary: bool = False) -> Table:
             >>> df.collect()  # The DataFrame is still usable
             >>> ctx.sql("SELECT value FROM values_view").collect()
         """
-        from datafusion.catalog import Table as _Table
+        from datafusion.catalog import Table as _Table  # noqa: PLC0415
 
         return _Table(self.df.into_view(temporary))
 
diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py
index 9df58f52a..10a30ed1c 100644
--- a/python/datafusion/expr.py
+++ b/python/datafusion/expr.py
@@ -20,6 +20,8 @@
 See :ref:`Expressions` in the online documentation for more details.
 """
 
+# ruff: noqa: PLC0415
+
 from __future__ import annotations
 
 from collections.abc import Iterable, Sequence
@@ -340,7 +342,7 @@ def sort_list_to_raw_sort_list(
     return raw_sort_list
 
 
-class Expr:
+class Expr:  # noqa: PLW1641
     """Expression object.
 
     Expressions are one of the core concepts in DataFusion. See
diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py
index b804ac18b..779d94d23 100644
--- a/python/datafusion/input/location.py
+++ b/python/datafusion/input/location.py
@@ -46,7 +46,7 @@ def build_table(
         num_rows = 0  # Total number of rows in the file. Used for statistics
         columns = []
         if file_format == "parquet":
-            import pyarrow.parquet as pq
+            import pyarrow.parquet as pq  # noqa: PLC0415
 
             # Read the Parquet metadata
             metadata = pq.read_metadata(input_item)
@@ -61,7 +61,7 @@ def build_table(
             ]
 
         elif format == "csv":
-            import csv
+            import csv  # noqa: PLC0415
 
             # Consume header row and count number of rows for statistics.
             # TODO: Possibly makes sense to have the eager number of rows
diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py
index fb54fd624..9c96a18fc 100644
--- a/python/datafusion/plan.py
+++ b/python/datafusion/plan.py
@@ -32,7 +32,7 @@
 ]
 
 
-class LogicalPlan:
+class LogicalPlan:  # noqa: PLW1641
     """Logical Plan.
 
     A `LogicalPlan` is a node in a tree of relational operators (such as
diff --git a/python/datafusion/user_defined.py b/python/datafusion/user_defined.py
index d4e5302b5..eef23e741 100644
--- a/python/datafusion/user_defined.py
+++ b/python/datafusion/user_defined.py
@@ -583,11 +583,11 @@ def from_pycapsule(func: AggregateUDFExportable | _PyCapsule) -> AggregateUDF:
         AggregateUDF that is exported via the FFI bindings.
         """
         if _is_pycapsule(func):
-            aggregate = cast(AggregateUDF, object.__new__(AggregateUDF))
+            aggregate = cast("AggregateUDF", object.__new__(AggregateUDF))
             aggregate._udaf = df_internal.AggregateUDF.from_pycapsule(func)
             return aggregate
 
-        capsule = cast(AggregateUDFExportable, func)
+        capsule = cast("AggregateUDFExportable", func)
         name = str(capsule.__class__)
         return AggregateUDF(
             name=name,
diff --git a/python/tests/test_catalog.py b/python/tests/test_catalog.py
index 71c08da26..9310da506 100644
--- a/python/tests/test_catalog.py
+++ b/python/tests/test_catalog.py
@@ -248,7 +248,7 @@ def test_exception_not_mangled(ctx: SessionContext):
 
     schema.register_table("test_table", create_dataset())
 
-    with pytest.raises(ValueError, match="^test_table is not an acceptable name$"):
+    with pytest.raises(ValueError, match=r"^test_table is not an acceptable name$"):
         ctx.sql(f"select * from {catalog_name}.{schema_name}.test_table")
 
 
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 71abe2925..de6b00acf 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -2790,7 +2790,7 @@ def test_write_parquet_with_options_encoding(tmp_path, encoding, data_types, res
 def test_write_parquet_with_options_unsupported_encoding(df, tmp_path, encoding):
     """Test that unsupported Parquet encodings do not work."""
     # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
-    with pytest.raises(BaseException, match="Encoding .*? is not supported"):
+    with pytest.raises(BaseException, match=r"Encoding .*? is not supported"):
         df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding))
 
 
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index 7b3332ed7..5a61a2dd1 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -303,19 +303,19 @@ def py_flatten(arr):
             lambda data: [np.concatenate([arr, arr]) for arr in data],
         ),
         (
-            lambda col: f.array_dims(col),
+            f.array_dims,
             lambda data: [[len(r)] for r in data],
         ),
         (
-            lambda col: f.array_distinct(col),
+            f.array_distinct,
             lambda data: [list(set(r)) for r in data],
         ),
         (
-            lambda col: f.list_distinct(col),
+            f.list_distinct,
             lambda data: [list(set(r)) for r in data],
         ),
         (
-            lambda col: f.list_dims(col),
+            f.list_dims,
             lambda data: [[len(r)] for r in data],
         ),
         (
@@ -323,11 +323,11 @@ def py_flatten(arr):
             lambda data: [r[0] for r in data],
         ),
         (
-            lambda col: f.array_empty(col),
+            f.array_empty,
             lambda data: [len(r) == 0 for r in data],
         ),
         (
-            lambda col: f.empty(col),
+            f.empty,
             lambda data: [len(r) == 0 for r in data],
         ),
         (
@@ -343,11 +343,11 @@ def py_flatten(arr):
             lambda data: [r[0] for r in data],
         ),
         (
-            lambda col: f.array_length(col),
+            f.array_length,
             lambda data: [len(r) for r in data],
         ),
         (
-            lambda col: f.list_length(col),
+            f.list_length,
             lambda data: [len(r) for r in data],
         ),
         (
@@ -391,11 +391,11 @@ def py_flatten(arr):
             lambda data: [[i + 1 for i, _v in enumerate(r) if _v == 1.0] for r in data],
         ),
         (
-            lambda col: f.array_ndims(col),
+            f.array_ndims,
             lambda data: [np.array(r).ndim for r in data],
         ),
         (
-            lambda col: f.list_ndims(col),
+            f.list_ndims,
             lambda data: [np.array(r).ndim for r in data],
         ),
         (
@@ -415,11 +415,11 @@ def py_flatten(arr):
             lambda data: [np.insert(arr, 0, 99.0) for arr in data],
         ),
         (
-            lambda col: f.array_pop_back(col),
+            f.array_pop_back,
             lambda data: [arr[:-1] for arr in data],
         ),
         (
-            lambda col: f.array_pop_front(col),
+            f.array_pop_front,
             lambda data: [arr[1:] for arr in data],
         ),
         (
diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py
index 12710cf08..1ed1746e1 100644
--- a/python/tests/test_sql.py
+++ b/python/tests/test_sql.py
@@ -31,7 +31,7 @@
 def test_no_table(ctx):
     with pytest.raises(
         ValueError,
-        match="^Error during planning: table 'datafusion.public.b' not found$",
+        match=r"^Error during planning: table 'datafusion.public.b' not found$",
     ):
         ctx.sql("SELECT a FROM b").collect()
 
@@ -188,7 +188,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str, legacy_data_ty
     partition_data_type = "string" if legacy_data_type else pa.string()
 
     if legacy_data_type:
-        with pytest.warns(DeprecationWarning):
+        with pytest.warns(DeprecationWarning):  # noqa: PT030
             ctx.register_parquet(
                 "datapp",
                 dir_root,
diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py
index c0ba1d831..b2540fb57 100644
--- a/python/tests/test_udf.py
+++ b/python/tests/test_udf.py
@@ -15,7 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from uuid import UUID
+
 import pyarrow as pa
+import pyarrow.compute as pc
 import pytest
 from datafusion import SessionContext, column, udf
 from datafusion import functions as f
@@ -128,8 +131,6 @@ def udf_with_param(values: pa.Array) -> pa.Array:
 
 
 def test_udf_with_metadata(ctx) -> None:
-    from uuid import UUID
-
     @udf([pa.string()], pa.uuid(), "stable")
     def uuid_from_string(uuid_string):
         return pa.array((UUID(s).bytes for s in uuid_string.to_pylist()), pa.uuid())
@@ -151,8 +152,6 @@ def uuid_version(uuid):
 
 
 def test_udf_with_nullability(ctx: SessionContext) -> None:
-    import pyarrow.compute as pc
-
     field_nullable_i64 = pa.field("with_nulls", type=pa.int64(), nullable=True)
     field_non_nullable_i64 = pa.field("no_nulls", type=pa.int64(), nullable=False)
 
diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py
index 5aaf00664..38b935b7e 100644
--- a/python/tests/test_udwf.py
+++ b/python/tests/test_udwf.py
@@ -433,8 +433,8 @@ def test_udwf_functions(complex_window_df, name, expr, expected):
     [
         udwf(SimpleWindowCount, pa.int64(), pa.int64(), "immutable"),
         udwf(SimpleWindowCount, [pa.int64()], pa.int64(), "immutable"),
-        udwf([pa.int64()], pa.int64(), "immutable")(lambda: SimpleWindowCount()),
-        udwf(pa.int64(), pa.int64(), "immutable")(lambda: SimpleWindowCount()),
+        udwf([pa.int64()], pa.int64(), "immutable")(SimpleWindowCount),
+        udwf(pa.int64(), pa.int64(), "immutable")(SimpleWindowCount),
     ],
 )
 def test_udwf_overloads(udwf_func, count_window_df):

From cb90d925f336d083e3401c353b10e6864346db10 Mon Sep 17 00:00:00 2001
From: dario curreri <dariocurr@gmail.com>
Date: Thu, 19 Feb 2026 11:37:51 +0100
Subject: [PATCH 2/7] style: correct indentation in GitHub Actions workflow
 file

* Adjusted indentation for the enable-cache option in the test.yml workflow file to ensure proper YAML formatting.
---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 55248b6bf..4cad8db24 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -67,7 +67,7 @@ jobs:
       - name: Install dependencies
         uses: astral-sh/setup-uv@v7
         with:
-            enable-cache: true
+          enable-cache: true
 
       # Download the Linux wheel built in the build workflow
       - name: Download pre-built Linux wheel

From 3a1eccbc055b157d133b57f02fe2fae633c5366d Mon Sep 17 00:00:00 2001
From: dario curreri <dariocurr@gmail.com>
Date: Thu, 19 Feb 2026 11:38:04 +0100
Subject: [PATCH 3/7] refactor: reorder imports in indexed_field.rs for clarity

* Adjusted the order of imports in indexed_field.rs to improve readability and maintain consistency with project conventions.
---
 src/expr/indexed_field.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/expr/indexed_field.rs b/src/expr/indexed_field.rs
index 1dfa0ed2f..79f528179 100644
--- a/src/expr/indexed_field.rs
+++ b/src/expr/indexed_field.rs
@@ -15,12 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::expr::PyExpr;
+use std::fmt::{Display, Formatter};
+
 use datafusion::logical_expr::expr::{GetFieldAccess, GetIndexedField};
 use pyo3::prelude::*;
-use std::fmt::{Display, Formatter};
 
 use super::literal::PyLiteral;
+use crate::expr::PyExpr;
 
 #[pyclass(frozen, name = "GetIndexedField", module = "datafusion.expr", subclass)]
 #[derive(Clone)]

From 8afeb14eaab479fc893e060dcc9b67c59b3e9a66 Mon Sep 17 00:00:00 2001
From: dario curreri <dariocurr@gmail.com>
Date: Thu, 19 Feb 2026 11:38:37 +0100
Subject: [PATCH 4/7] build: update dependencies in Cargo.toml and Cargo.lock

* Bump versions of several dependencies including tokio, pyo3-log, prost, uuid, and log to their latest releases.
* Update Cargo.lock to reflect the changes in dependency versions.
---
 Cargo.lock |  4 ++--
 Cargo.toml | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index cd853a03f..d6fd05a77 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -593,9 +593,9 @@ dependencies = [
 
 [[package]]
 name = "bumpalo"
-version = "3.20.0"
+version = "3.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c81d250916401487680ed13b8b675660281dcfc3ab0121fe44c94bcab9eae2fb"
+checksum = "5c6f81257d10a0f602a294ae4182251151ff97dbb504ef9afcdda4a64b24d9b4"
 
 [[package]]
 name = "byteorder"
diff --git a/Cargo.toml b/Cargo.toml
index 3e632bafc..371554021 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -42,7 +42,7 @@ protoc = ["datafusion-substrait/protoc"]
 substrait = ["dep:datafusion-substrait"]
 
 [dependencies]
-tokio = { version = "1.47", features = [
+tokio = { version = "1.49", features = [
   "macros",
   "rt",
   "rt-multi-thread",
@@ -54,16 +54,16 @@ pyo3 = { version = "0.26", features = [
   "abi3-py310",
 ] }
 pyo3-async-runtimes = { version = "0.26", features = ["tokio-runtime"] }
-pyo3-log = "0.13.2"
+pyo3-log = "0.13.3"
 arrow = { version = "57", features = ["pyarrow"] }
 arrow-select = { version = "57" }
 datafusion = { version = "52", features = ["avro", "unicode_expressions"] }
 datafusion-substrait = { version = "52", optional = true }
 datafusion-proto = { version = "52" }
 datafusion-ffi = { version = "52" }
-prost = "0.14.1" # keep in line with `datafusion-substrait`
+prost = "0.14.3" # keep in line with `datafusion-substrait`
 serde_json = "1"
-uuid = { version = "1.18", features = ["v4"] }
+uuid = { version = "1.21", features = ["v4"] }
 mimalloc = { version = "0.1", optional = true, default-features = false, features = [
   "local_dynamic_tls",
 ] }
@@ -77,11 +77,11 @@ object_store = { version = "0.12.4", features = [
   "http",
 ] }
 url = "2"
-log = "0.4.27"
+log = "0.4.29"
 parking_lot = "0.12"
 
 [build-dependencies]
-prost-types = "0.14.1"     # keep in line with `datafusion-substrait`
+prost-types = "0.14.3"     # keep in line with `datafusion-substrait`
 pyo3-build-config = "0.26"
 
 [lib]

From 2b0c23d2e66bfcfcea16fffae430abf32db36ecd Mon Sep 17 00:00:00 2001
From: dario curreri <dariocurr@gmail.com>
Date: Thu, 19 Feb 2026 11:38:48 +0100
Subject: [PATCH 5/7] style: format pyproject.toml for consistency

* Adjusted formatting in pyproject.toml for improved readability by aligning lists and ensuring consistent indentation.
* Updated dependencies and configuration settings for better organization.
---
 pyproject.toml | 124 +++++++++++++++++++++++++++++++------------------
 1 file changed, 80 insertions(+), 44 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5a5128a2f..b994cd034 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,9 @@
 # under the License.
 
 [build-system]
-requires = ["maturin>=1.8.1"]
+requires = [
+  "maturin>=1.8.1",
+]
 build-backend = "maturin"
 
 [project]
@@ -25,7 +27,12 @@ description = "Build and run queries against data"
 readme = "README.md"
 license = { file = "LICENSE.txt" }
 requires-python = ">=3.10"
-keywords = ["datafusion", "dataframe", "rust", "query-engine"]
+keywords = [
+  "dataframe",
+  "datafusion",
+  "query-engine",
+  "rust",
+]
 classifiers = [
   "Development Status :: 2 - Pre-Alpha",
   "Intended Audience :: Developers",
@@ -48,7 +55,9 @@ dependencies = [
   "pyarrow>=22.0.0;python_version>='3.14'",
   "typing-extensions;python_version<'3.13'",
 ]
-dynamic = ["version"]
+dynamic = [
+  "version",
+]
 
 [project.urls]
 homepage = "https://datafusion.apache.org/python"
@@ -61,11 +70,19 @@ profile = "black"
 [tool.maturin]
 python-source = "python"
 module-name = "datafusion._internal"
-include = [{ path = "Cargo.lock", format = "sdist" }]
-exclude = [".github/**", "ci/**", ".asf.yaml"]
+include = [
+  { path = "Cargo.lock", format = "sdist" },
+]
+exclude = [
+  ".asf.yaml",
+  ".github/**",
+  "ci/**",
+]
 # Require Cargo.lock is up to date
 locked = true
-features = ["substrait"]
+features = [
+  "substrait",
+]
 
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
@@ -73,23 +90,25 @@ asyncio_default_fixture_loop_scope = "function"
 
 # Enable docstring linting using the google style guide
 [tool.ruff.lint]
-select = ["ALL"]
+select = [
+  "ALL",
+]
 ignore = [
   "A001",    # Allow using words like min as variable names
   "A002",    # Allow using words like filter as variable names
+  "A005",    # Allow module named io
   "ANN401",  # Allow Any for wrapper classes
   "COM812",  # Recommended to ignore these rules when using with ruff-format
-  "FIX002",  # Allow TODO lines - consider removing at some point
   "FBT001",  # Allow boolean positional args
   "FBT002",  # Allow boolean positional args
+  "FIX002",  # Allow TODO lines - consider removing at some point
   "ISC001",  # Recommended to ignore these rules when using with ruff-format
+  "N812",    # Allow importing functions as `F`
+  "PD901",   # Allow variable name df
+  "PLR0913", # Allow many arguments in function definition
   "SLF001",  # Allow accessing private members
   "TD002",   # Do not require author names in TODO statements
   "TD003",   # Allow TODO lines
-  "PLR0913", # Allow many arguments in function definition
-  "PD901",   # Allow variable name df
-  "N812",    # Allow importing functions as `F`
-  "A005",    # Allow module named io
 ]
 
 [tool.ruff.lint.pydocstyle]
@@ -99,7 +118,10 @@ convention = "google"
 max-doc-length = 88
 
 [tool.ruff.lint.flake8-boolean-trap]
-extend-allowed-calls = ["lit", "datafusion.lit"]
+extend-allowed-calls = [
+  "datafusion.lit",
+  "lit",
+]
 
 # Disable docstring checking for these directories
 [tool.ruff.lint.per-file-ignores]
@@ -108,68 +130,82 @@ extend-allowed-calls = ["lit", "datafusion.lit"]
   "ARG",
   "BLE001",
   "D",
-  "S101",
-  "SLF",
   "PD",
+  "PLC0415",
+  "PLR0913",
   "PLR2004",
+  "PT004",
   "PT011",
   "RUF015",
+  "S101",
   "S608",
-  "PLR0913",
-  "PT004",
+  "SLF",
 ]
 "examples/*" = [
-  "D",
-  "W505",
-  "E501",
-  "T201",
-  "S101",
-  "PLR2004",
   "ANN001",
   "ANN202",
-  "INP001",
+  "D",
   "DTZ007",
+  "E501",
+  "INP001",
+  "PLR2004",
   "RUF015",
+  "S101",
+  "T201",
+  "W505",
 ]
 "dev/*" = [
+  "ANN001",
+  "C",
   "D",
   "E",
-  "T",
-  "S",
+  "ERA001",
+  "EXE",
+  "N817",
   "PLR",
-  "C",
+  "S",
   "SIM",
+  "T",
   "UP",
-  "EXE",
-  "N817",
-  "ERA001",
-  "ANN001",
 ]
 "benchmarks/*" = [
+  "ANN001",
+  "BLE",
   "D",
+  "E",
+  "ERA001",
+  "EXE",
   "F",
-  "T",
-  "BLE",
   "FURB",
+  "INP001",
   "PLR",
-  "E",
-  "TD",
-  "TRY",
   "S",
   "SIM",
-  "EXE",
+  "T",
+  "TD",
+  "TRY",
   "UP",
-  "ERA001",
+]
+"docs/*" = [
+  "D",
+]
+"docs/source/conf.py" = [
   "ANN001",
+  "ERA001",
   "INP001",
 ]
-"docs/*" = ["D"]
-"docs/source/conf.py" = ["ERA001", "ANN001", "INP001"]
 
 [tool.codespell]
-skip = ["./target", "uv.lock", "./python/tests/test_functions.py"]
+skip = [
+  "./python/tests/test_functions.py",
+  "./target",
+  "uv.lock",
+]
 count = true
-ignore-words-list = ["ans", "IST"]
+ignore-words-list = [
+  "IST",
+  "ans",
+]
 
 [dependency-groups]
 dev = [
@@ -182,8 +218,8 @@ dev = [
   "pre-commit>=4.3.0",
   "pyarrow>=19.0.0",
   "pygithub==2.5.0",
-  "pytest>=7.4.4",
   "pytest-asyncio>=0.23.3",
+  "pytest>=7.4.4",
   "pyyaml>=6.0.3",
   "ruff>=0.9.1",
   "toml>=0.10.2",
@@ -196,6 +232,6 @@ docs = [
   "pickleshare>=0.7.5",
   "pydata-sphinx-theme==0.8.0",
   "setuptools>=75.3.0",
-  "sphinx>=7.1.2",
   "sphinx-autoapi>=3.4.0",
+  "sphinx>=7.1.2",
 ]

From 789531ea984454144daa12433acd93d73030a7e1 Mon Sep 17 00:00:00 2001
From: dario curreri <dariocurr@gmail.com>
Date: Thu, 19 Feb 2026 15:17:18 +0100
Subject: [PATCH 6/7] style: remove noqa comments for import statements

* Cleaned up import statements in multiple files by removing unnecessary noqa comments, enhancing code readability and maintaining consistency across the codebase.
---
 python/datafusion/dataframe.py      | 2 +-
 python/datafusion/expr.py           | 2 +-
 python/datafusion/input/location.py | 4 ++--
 python/datafusion/plan.py           | 2 +-
 python/tests/test_sql.py            | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index bf4ffcda6..d302c12a5 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -330,7 +330,7 @@ def into_view(self, temporary: bool = False) -> Table:
             >>> df.collect()  # The DataFrame is still usable
             >>> ctx.sql("SELECT value FROM values_view").collect()
         """
-        from datafusion.catalog import Table as _Table  # noqa: PLC0415
+        from datafusion.catalog import Table as _Table
 
         return _Table(self.df.into_view(temporary))
 
diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py
index 10a30ed1c..5760b8948 100644
--- a/python/datafusion/expr.py
+++ b/python/datafusion/expr.py
@@ -342,7 +342,7 @@ def sort_list_to_raw_sort_list(
     return raw_sort_list
 
 
-class Expr:  # noqa: PLW1641
+class Expr:
     """Expression object.
 
     Expressions are one of the core concepts in DataFusion. See
diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py
index 779d94d23..b804ac18b 100644
--- a/python/datafusion/input/location.py
+++ b/python/datafusion/input/location.py
@@ -46,7 +46,7 @@ def build_table(
         num_rows = 0  # Total number of rows in the file. Used for statistics
         columns = []
         if file_format == "parquet":
-            import pyarrow.parquet as pq  # noqa: PLC0415
+            import pyarrow.parquet as pq
 
             # Read the Parquet metadata
             metadata = pq.read_metadata(input_item)
@@ -61,7 +61,7 @@ def build_table(
             ]
 
         elif format == "csv":
-            import csv  # noqa: PLC0415
+            import csv
 
             # Consume header row and count number of rows for statistics.
             # TODO: Possibly makes sense to have the eager number of rows
diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py
index 9c96a18fc..fb54fd624 100644
--- a/python/datafusion/plan.py
+++ b/python/datafusion/plan.py
@@ -32,7 +32,7 @@
 ]
 
 
-class LogicalPlan:  # noqa: PLW1641
+class LogicalPlan:
     """Logical Plan.
 
     A `LogicalPlan` is a node in a tree of relational operators (such as
diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py
index 1ed1746e1..92c311930 100644
--- a/python/tests/test_sql.py
+++ b/python/tests/test_sql.py
@@ -188,7 +188,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str, legacy_data_ty
     partition_data_type = "string" if legacy_data_type else pa.string()
 
     if legacy_data_type:
-        with pytest.warns(DeprecationWarning):  # noqa: PT030
+        with pytest.warns(DeprecationWarning):
             ctx.register_parquet(
                 "datapp",
                 dir_root,

From 4c1e6e0c79d67afdb4f4d0fd2773b656c186266d Mon Sep 17 00:00:00 2001
From: dario curreri <dariocurr@gmail.com>
Date: Thu, 19 Feb 2026 15:20:24 +0100
Subject: [PATCH 7/7] style: simplify formatting in pyproject.toml

* Streamlined list formatting in pyproject.toml for improved readability by removing unnecessary line breaks and ensuring consistent structure across sections.
* No functional changes were made; the focus was solely on code style and organization.
---
 pyproject.toml | 59 ++++++++++----------------------------------------
 1 file changed, 12 insertions(+), 47 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b994cd034..08d64eca0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,9 +16,7 @@
 # under the License.
 
 [build-system]
-requires = [
-  "maturin>=1.8.1",
-]
+requires = ["maturin>=1.8.1"]
 build-backend = "maturin"
 
 [project]
@@ -27,12 +25,7 @@ description = "Build and run queries against data"
 readme = "README.md"
 license = { file = "LICENSE.txt" }
 requires-python = ">=3.10"
-keywords = [
-  "dataframe",
-  "datafusion",
-  "query-engine",
-  "rust",
-]
+keywords = ["dataframe", "datafusion", "query-engine", "rust"]
 classifiers = [
   "Development Status :: 2 - Pre-Alpha",
   "Intended Audience :: Developers",
@@ -55,9 +48,7 @@ dependencies = [
   "pyarrow>=22.0.0;python_version>='3.14'",
   "typing-extensions;python_version<'3.13'",
 ]
-dynamic = [
-  "version",
-]
+dynamic = ["version"]
 
 [project.urls]
 homepage = "https://datafusion.apache.org/python"
@@ -70,19 +61,11 @@ profile = "black"
 [tool.maturin]
 python-source = "python"
 module-name = "datafusion._internal"
-include = [
-  { path = "Cargo.lock", format = "sdist" },
-]
-exclude = [
-  ".asf.yaml",
-  ".github/**",
-  "ci/**",
-]
+include = [{ path = "Cargo.lock", format = "sdist" }]
+exclude = [".asf.yaml", ".github/**", "ci/**"]
 # Require Cargo.lock is up to date
 locked = true
-features = [
-  "substrait",
-]
+features = ["substrait"]
 
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
@@ -90,9 +73,7 @@ asyncio_default_fixture_loop_scope = "function"
 
 # Enable docstring linting using the google style guide
 [tool.ruff.lint]
-select = [
-  "ALL",
-]
+select = ["ALL"]
 ignore = [
   "A001",    # Allow using words like min as variable names
   "A002",    # Allow using words like filter as variable names
@@ -118,10 +99,7 @@ convention = "google"
 max-doc-length = 88
 
 [tool.ruff.lint.flake8-boolean-trap]
-extend-allowed-calls = [
-  "datafusion.lit",
-  "lit",
-]
+extend-allowed-calls = ["datafusion.lit", "lit"]
 
 # Disable docstring checking for these directories
 [tool.ruff.lint.per-file-ignores]
@@ -186,26 +164,13 @@ extend-allowed-calls = [
   "TRY",
   "UP",
 ]
-"docs/*" = [
-  "D",
-]
-"docs/source/conf.py" = [
-  "ANN001",
-  "ERA001",
-  "INP001",
-]
+"docs/*" = ["D"]
+"docs/source/conf.py" = ["ANN001", "ERA001", "INP001"]
 
 [tool.codespell]
-skip = [
-  "./python/tests/test_functions.py",
-  "./target",
-  "uv.lock",
-]
+skip = ["./python/tests/test_functions.py", "./target", "uv.lock"]
 count = true
-ignore-words-list = [
-  "IST",
-  "ans",
-]
+ignore-words-list = ["IST", "ans"]
 
 [dependency-groups]
 dev = [