From 5a30e03cbf32fffaeb7ab9dcbb757abe8f43bd6d Mon Sep 17 00:00:00 2001 From: dario curreri Date: Thu, 19 Feb 2026 11:36:58 +0100 Subject: [PATCH 1/7] ci: update pre-commit hooks and fix linting issues * Update Ruff version in pre-commit configuration to v0.15.1. * Add noqa comments to suppress specific linting warnings in various files. * Update regex patterns in test cases for better matching. --- .pre-commit-config.yaml | 2 +- python/datafusion/dataframe.py | 2 +- python/datafusion/expr.py | 4 +++- python/datafusion/input/location.py | 4 ++-- python/datafusion/plan.py | 2 +- python/datafusion/user_defined.py | 4 ++-- python/tests/test_catalog.py | 2 +- python/tests/test_dataframe.py | 2 +- python/tests/test_functions.py | 24 ++++++++++++------------ python/tests/test_sql.py | 4 ++-- python/tests/test_udf.py | 7 +++---- python/tests/test_udwf.py | 4 ++-- 12 files changed, 31 insertions(+), 30 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bcefa405d..8ae6a4e32 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: - id: actionlint-docker - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.9.10 + rev: v0.15.1 hooks: # Run the linter. - id: ruff diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index d302c12a5..bf4ffcda6 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -330,7 +330,7 @@ def into_view(self, temporary: bool = False) -> Table: >>> df.collect() # The DataFrame is still usable >>> ctx.sql("SELECT value FROM values_view").collect() """ - from datafusion.catalog import Table as _Table + from datafusion.catalog import Table as _Table # noqa: PLC0415 return _Table(self.df.into_view(temporary)) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 9df58f52a..10a30ed1c 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -20,6 +20,8 @@ See :ref:`Expressions` in the online documentation for more details. """ +# ruff: noqa: PLC0415 + from __future__ import annotations from collections.abc import Iterable, Sequence @@ -340,7 +342,7 @@ def sort_list_to_raw_sort_list( return raw_sort_list -class Expr: +class Expr: # noqa: PLW1641 """Expression object. Expressions are one of the core concepts in DataFusion. See diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py index b804ac18b..779d94d23 100644 --- a/python/datafusion/input/location.py +++ b/python/datafusion/input/location.py @@ -46,7 +46,7 @@ def build_table( num_rows = 0 # Total number of rows in the file. Used for statistics columns = [] if file_format == "parquet": - import pyarrow.parquet as pq + import pyarrow.parquet as pq # noqa: PLC0415 # Read the Parquet metadata metadata = pq.read_metadata(input_item) @@ -61,7 +61,7 @@ def build_table( ] elif format == "csv": - import csv + import csv # noqa: PLC0415 # Consume header row and count number of rows for statistics. # TODO: Possibly makes sense to have the eager number of rows diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py index fb54fd624..9c96a18fc 100644 --- a/python/datafusion/plan.py +++ b/python/datafusion/plan.py @@ -32,7 +32,7 @@ ] -class LogicalPlan: +class LogicalPlan: # noqa: PLW1641 """Logical Plan. A `LogicalPlan` is a node in a tree of relational operators (such as diff --git a/python/datafusion/user_defined.py b/python/datafusion/user_defined.py index d4e5302b5..eef23e741 100644 --- a/python/datafusion/user_defined.py +++ b/python/datafusion/user_defined.py @@ -583,11 +583,11 @@ def from_pycapsule(func: AggregateUDFExportable | _PyCapsule) -> AggregateUDF: AggregateUDF that is exported via the FFI bindings. """ if _is_pycapsule(func): - aggregate = cast(AggregateUDF, object.__new__(AggregateUDF)) + aggregate = cast("AggregateUDF", object.__new__(AggregateUDF)) aggregate._udaf = df_internal.AggregateUDF.from_pycapsule(func) return aggregate - capsule = cast(AggregateUDFExportable, func) + capsule = cast("AggregateUDFExportable", func) name = str(capsule.__class__) return AggregateUDF( name=name, diff --git a/python/tests/test_catalog.py b/python/tests/test_catalog.py index 71c08da26..9310da506 100644 --- a/python/tests/test_catalog.py +++ b/python/tests/test_catalog.py @@ -248,7 +248,7 @@ def test_exception_not_mangled(ctx: SessionContext): schema.register_table("test_table", create_dataset()) - with pytest.raises(ValueError, match="^test_table is not an acceptable name$"): + with pytest.raises(ValueError, match=r"^test_table is not an acceptable name$"): ctx.sql(f"select * from {catalog_name}.{schema_name}.test_table") diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 71abe2925..de6b00acf 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -2790,7 +2790,7 @@ def test_write_parquet_with_options_encoding(tmp_path, encoding, data_types, res def test_write_parquet_with_options_unsupported_encoding(df, tmp_path, encoding): """Test that unsupported Parquet encodings do not work.""" # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519 - with pytest.raises(BaseException, match="Encoding .*? is not supported"): + with pytest.raises(BaseException, match=r"Encoding .*? is not supported"): df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 7b3332ed7..5a61a2dd1 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -303,19 +303,19 @@ def py_flatten(arr): lambda data: [np.concatenate([arr, arr]) for arr in data], ), ( - lambda col: f.array_dims(col), + f.array_dims, lambda data: [[len(r)] for r in data], ), ( - lambda col: f.array_distinct(col), + f.array_distinct, lambda data: [list(set(r)) for r in data], ), ( - lambda col: f.list_distinct(col), + f.list_distinct, lambda data: [list(set(r)) for r in data], ), ( - lambda col: f.list_dims(col), + f.list_dims, lambda data: [[len(r)] for r in data], ), ( @@ -323,11 +323,11 @@ def py_flatten(arr): lambda data: [r[0] for r in data], ), ( - lambda col: f.array_empty(col), + f.array_empty, lambda data: [len(r) == 0 for r in data], ), ( - lambda col: f.empty(col), + f.empty, lambda data: [len(r) == 0 for r in data], ), ( @@ -343,11 +343,11 @@ def py_flatten(arr): lambda data: [r[0] for r in data], ), ( - lambda col: f.array_length(col), + f.array_length, lambda data: [len(r) for r in data], ), ( - lambda col: f.list_length(col), + f.list_length, lambda data: [len(r) for r in data], ), ( @@ -391,11 +391,11 @@ def py_flatten(arr): lambda data: [[i + 1 for i, _v in enumerate(r) if _v == 1.0] for r in data], ), ( - lambda col: f.array_ndims(col), + f.array_ndims, lambda data: [np.array(r).ndim for r in data], ), ( - lambda col: f.list_ndims(col), + f.list_ndims, lambda data: [np.array(r).ndim for r in data], ), ( @@ -415,11 +415,11 @@ def py_flatten(arr): lambda data: [np.insert(arr, 0, 99.0) for arr in data], ), ( - lambda col: f.array_pop_back(col), + f.array_pop_back, lambda data: [arr[:-1] for arr in data], ), ( - lambda col: f.array_pop_front(col), + f.array_pop_front, lambda data: [arr[1:] for arr in data], ), ( diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 12710cf08..1ed1746e1 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -31,7 +31,7 @@ def test_no_table(ctx): with pytest.raises( ValueError, - match="^Error during planning: table 'datafusion.public.b' not found$", + match=r"^Error during planning: table 'datafusion.public.b' not found$", ): ctx.sql("SELECT a FROM b").collect() @@ -188,7 +188,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str, legacy_data_ty partition_data_type = "string" if legacy_data_type else pa.string() if legacy_data_type: - with pytest.warns(DeprecationWarning): + with pytest.warns(DeprecationWarning): # noqa: PT030 ctx.register_parquet( "datapp", dir_root, diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py index c0ba1d831..b2540fb57 100644 --- a/python/tests/test_udf.py +++ b/python/tests/test_udf.py @@ -15,7 +15,10 @@ # specific language governing permissions and limitations # under the License. +from uuid import UUID + import pyarrow as pa +import pyarrow.compute as pc import pytest from datafusion import SessionContext, column, udf from datafusion import functions as f @@ -128,8 +131,6 @@ def udf_with_param(values: pa.Array) -> pa.Array: def test_udf_with_metadata(ctx) -> None: - from uuid import UUID - @udf([pa.string()], pa.uuid(), "stable") def uuid_from_string(uuid_string): return pa.array((UUID(s).bytes for s in uuid_string.to_pylist()), pa.uuid()) @@ -151,8 +152,6 @@ def uuid_version(uuid): def test_udf_with_nullability(ctx: SessionContext) -> None: - import pyarrow.compute as pc - field_nullable_i64 = pa.field("with_nulls", type=pa.int64(), nullable=True) field_non_nullable_i64 = pa.field("no_nulls", type=pa.int64(), nullable=False) diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py index 5aaf00664..38b935b7e 100644 --- a/python/tests/test_udwf.py +++ b/python/tests/test_udwf.py @@ -433,8 +433,8 @@ def test_udwf_functions(complex_window_df, name, expr, expected): [ udwf(SimpleWindowCount, pa.int64(), pa.int64(), "immutable"), udwf(SimpleWindowCount, [pa.int64()], pa.int64(), "immutable"), - udwf([pa.int64()], pa.int64(), "immutable")(lambda: SimpleWindowCount()), - udwf(pa.int64(), pa.int64(), "immutable")(lambda: SimpleWindowCount()), + udwf([pa.int64()], pa.int64(), "immutable")(SimpleWindowCount), + udwf(pa.int64(), pa.int64(), "immutable")(SimpleWindowCount), ], ) def test_udwf_overloads(udwf_func, count_window_df): From cb90d925f336d083e3401c353b10e6864346db10 Mon Sep 17 00:00:00 2001 From: dario curreri Date: Thu, 19 Feb 2026 11:37:51 +0100 Subject: [PATCH 2/7] style: correct indentation in GitHub Actions workflow file * Adjusted indentation for the enable-cache option in the test.yml workflow file to ensure proper YAML formatting. --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 55248b6bf..4cad8db24 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -67,7 +67,7 @@ jobs: - name: Install dependencies uses: astral-sh/setup-uv@v7 with: - enable-cache: true + enable-cache: true # Download the Linux wheel built in the build workflow - name: Download pre-built Linux wheel From 3a1eccbc055b157d133b57f02fe2fae633c5366d Mon Sep 17 00:00:00 2001 From: dario curreri Date: Thu, 19 Feb 2026 11:38:04 +0100 Subject: [PATCH 3/7] refactor: reorder imports in indexed_field.rs for clarity * Adjusted the order of imports in indexed_field.rs to improve readability and maintain consistency with project conventions. --- src/expr/indexed_field.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/expr/indexed_field.rs b/src/expr/indexed_field.rs index 1dfa0ed2f..79f528179 100644 --- a/src/expr/indexed_field.rs +++ b/src/expr/indexed_field.rs @@ -15,12 +15,13 @@ // specific language governing permissions and limitations // under the License. -use crate::expr::PyExpr; +use std::fmt::{Display, Formatter}; + use datafusion::logical_expr::expr::{GetFieldAccess, GetIndexedField}; use pyo3::prelude::*; -use std::fmt::{Display, Formatter}; use super::literal::PyLiteral; +use crate::expr::PyExpr; #[pyclass(frozen, name = "GetIndexedField", module = "datafusion.expr", subclass)] #[derive(Clone)] From 8afeb14eaab479fc893e060dcc9b67c59b3e9a66 Mon Sep 17 00:00:00 2001 From: dario curreri Date: Thu, 19 Feb 2026 11:38:37 +0100 Subject: [PATCH 4/7] build: update dependencies in Cargo.toml and Cargo.lock * Bump versions of several dependencies including tokio, pyo3-log, prost, uuid, and log to their latest releases. * Update Cargo.lock to reflect the changes in dependency versions. --- Cargo.lock | 4 ++-- Cargo.toml | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cd853a03f..d6fd05a77 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -593,9 +593,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.20.0" +version = "3.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c81d250916401487680ed13b8b675660281dcfc3ab0121fe44c94bcab9eae2fb" +checksum = "5c6f81257d10a0f602a294ae4182251151ff97dbb504ef9afcdda4a64b24d9b4" [[package]] name = "byteorder" diff --git a/Cargo.toml b/Cargo.toml index 3e632bafc..371554021 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,7 +42,7 @@ protoc = ["datafusion-substrait/protoc"] substrait = ["dep:datafusion-substrait"] [dependencies] -tokio = { version = "1.47", features = [ +tokio = { version = "1.49", features = [ "macros", "rt", "rt-multi-thread", @@ -54,16 +54,16 @@ pyo3 = { version = "0.26", features = [ "abi3-py310", ] } pyo3-async-runtimes = { version = "0.26", features = ["tokio-runtime"] } -pyo3-log = "0.13.2" +pyo3-log = "0.13.3" arrow = { version = "57", features = ["pyarrow"] } arrow-select = { version = "57" } datafusion = { version = "52", features = ["avro", "unicode_expressions"] } datafusion-substrait = { version = "52", optional = true } datafusion-proto = { version = "52" } datafusion-ffi = { version = "52" } -prost = "0.14.1" # keep in line with `datafusion-substrait` +prost = "0.14.3" # keep in line with `datafusion-substrait` serde_json = "1" -uuid = { version = "1.18", features = ["v4"] } +uuid = { version = "1.21", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = [ "local_dynamic_tls", ] } @@ -77,11 +77,11 @@ object_store = { version = "0.12.4", features = [ "http", ] } url = "2" -log = "0.4.27" +log = "0.4.29" parking_lot = "0.12" [build-dependencies] -prost-types = "0.14.1" # keep in line with `datafusion-substrait` +prost-types = "0.14.3" # keep in line with `datafusion-substrait` pyo3-build-config = "0.26" [lib] From 2b0c23d2e66bfcfcea16fffae430abf32db36ecd Mon Sep 17 00:00:00 2001 From: dario curreri Date: Thu, 19 Feb 2026 11:38:48 +0100 Subject: [PATCH 5/7] style: format pyproject.toml for consistency * Adjusted formatting in pyproject.toml for improved readability by aligning lists and ensuring consistent indentation. * Updated dependencies and configuration settings for better organization. --- pyproject.toml | 124 +++++++++++++++++++++++++++++++------------------ 1 file changed, 80 insertions(+), 44 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5a5128a2f..b994cd034 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,9 @@ # under the License. [build-system] -requires = ["maturin>=1.8.1"] +requires = [ + "maturin>=1.8.1", +] build-backend = "maturin" [project] @@ -25,7 +27,12 @@ description = "Build and run queries against data" readme = "README.md" license = { file = "LICENSE.txt" } requires-python = ">=3.10" -keywords = ["datafusion", "dataframe", "rust", "query-engine"] +keywords = [ + "dataframe", + "datafusion", + "query-engine", + "rust", +] classifiers = [ "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Developers", @@ -48,7 +55,9 @@ dependencies = [ "pyarrow>=22.0.0;python_version>='3.14'", "typing-extensions;python_version<'3.13'", ] -dynamic = ["version"] +dynamic = [ + "version", +] [project.urls] homepage = "https://datafusion.apache.org/python" @@ -61,11 +70,19 @@ profile = "black" [tool.maturin] python-source = "python" module-name = "datafusion._internal" -include = [{ path = "Cargo.lock", format = "sdist" }] -exclude = [".github/**", "ci/**", ".asf.yaml"] +include = [ + { path = "Cargo.lock", format = "sdist" }, +] +exclude = [ + ".asf.yaml", + ".github/**", + "ci/**", +] # Require Cargo.lock is up to date locked = true -features = ["substrait"] +features = [ + "substrait", +] [tool.pytest.ini_options] asyncio_mode = "auto" @@ -73,23 +90,25 @@ asyncio_default_fixture_loop_scope = "function" # Enable docstring linting using the google style guide [tool.ruff.lint] -select = ["ALL"] +select = [ + "ALL", +] ignore = [ "A001", # Allow using words like min as variable names "A002", # Allow using words like filter as variable names + "A005", # Allow module named io "ANN401", # Allow Any for wrapper classes "COM812", # Recommended to ignore these rules when using with ruff-format - "FIX002", # Allow TODO lines - consider removing at some point "FBT001", # Allow boolean positional args "FBT002", # Allow boolean positional args + "FIX002", # Allow TODO lines - consider removing at some point "ISC001", # Recommended to ignore these rules when using with ruff-format + "N812", # Allow importing functions as `F` + "PD901", # Allow variable name df + "PLR0913", # Allow many arguments in function definition "SLF001", # Allow accessing private members "TD002", # Do not require author names in TODO statements "TD003", # Allow TODO lines - "PLR0913", # Allow many arguments in function definition - "PD901", # Allow variable name df - "N812", # Allow importing functions as `F` - "A005", # Allow module named io ] [tool.ruff.lint.pydocstyle] @@ -99,7 +118,10 @@ convention = "google" max-doc-length = 88 [tool.ruff.lint.flake8-boolean-trap] -extend-allowed-calls = ["lit", "datafusion.lit"] +extend-allowed-calls = [ + "datafusion.lit", + "lit", +] # Disable docstring checking for these directories [tool.ruff.lint.per-file-ignores] @@ -108,68 +130,82 @@ extend-allowed-calls = ["lit", "datafusion.lit"] "ARG", "BLE001", "D", - "S101", - "SLF", "PD", + "PLC0415", + "PLR0913", "PLR2004", + "PT004", "PT011", "RUF015", + "S101", "S608", - "PLR0913", - "PT004", + "SLF", ] "examples/*" = [ - "D", - "W505", - "E501", - "T201", - "S101", - "PLR2004", "ANN001", "ANN202", - "INP001", + "D", "DTZ007", + "E501", + "INP001", + "PLR2004", "RUF015", + "S101", + "T201", + "W505", ] "dev/*" = [ + "ANN001", + "C", "D", "E", - "T", - "S", + "ERA001", + "EXE", + "N817", "PLR", - "C", + "S", "SIM", + "T", "UP", - "EXE", - "N817", - "ERA001", - "ANN001", ] "benchmarks/*" = [ + "ANN001", + "BLE", "D", + "E", + "ERA001", + "EXE", "F", - "T", - "BLE", "FURB", + "INP001", "PLR", - "E", - "TD", - "TRY", "S", "SIM", - "EXE", + "T", + "TD", + "TRY", "UP", - "ERA001", +] +"docs/*" = [ + "D", +] +"docs/source/conf.py" = [ "ANN001", + "ERA001", "INP001", ] -"docs/*" = ["D"] -"docs/source/conf.py" = ["ERA001", "ANN001", "INP001"] [tool.codespell] -skip = ["./target", "uv.lock", "./python/tests/test_functions.py"] +skip = [ + "./python/tests/test_functions.py", + "./target", + "uv.lock", +] count = true -ignore-words-list = ["ans", "IST"] +ignore-words-list = [ + "IST", + "ans", +] [dependency-groups] dev = [ @@ -182,8 +218,8 @@ dev = [ "pre-commit>=4.3.0", "pyarrow>=19.0.0", "pygithub==2.5.0", - "pytest>=7.4.4", "pytest-asyncio>=0.23.3", + "pytest>=7.4.4", "pyyaml>=6.0.3", "ruff>=0.9.1", "toml>=0.10.2", @@ -196,6 +232,6 @@ docs = [ "pickleshare>=0.7.5", "pydata-sphinx-theme==0.8.0", "setuptools>=75.3.0", - "sphinx>=7.1.2", "sphinx-autoapi>=3.4.0", + "sphinx>=7.1.2", ] From 789531ea984454144daa12433acd93d73030a7e1 Mon Sep 17 00:00:00 2001 From: dario curreri Date: Thu, 19 Feb 2026 15:17:18 +0100 Subject: [PATCH 6/7] style: remove noqa comments for import statements * Cleaned up import statements in multiple files by removing unnecessary noqa comments, enhancing code readability and maintaining consistency across the codebase. --- python/datafusion/dataframe.py | 2 +- python/datafusion/expr.py | 2 +- python/datafusion/input/location.py | 4 ++-- python/datafusion/plan.py | 2 +- python/tests/test_sql.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index bf4ffcda6..d302c12a5 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -330,7 +330,7 @@ def into_view(self, temporary: bool = False) -> Table: >>> df.collect() # The DataFrame is still usable >>> ctx.sql("SELECT value FROM values_view").collect() """ - from datafusion.catalog import Table as _Table # noqa: PLC0415 + from datafusion.catalog import Table as _Table return _Table(self.df.into_view(temporary)) diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py index 10a30ed1c..5760b8948 100644 --- a/python/datafusion/expr.py +++ b/python/datafusion/expr.py @@ -342,7 +342,7 @@ def sort_list_to_raw_sort_list( return raw_sort_list -class Expr: # noqa: PLW1641 +class Expr: """Expression object. Expressions are one of the core concepts in DataFusion. See diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py index 779d94d23..b804ac18b 100644 --- a/python/datafusion/input/location.py +++ b/python/datafusion/input/location.py @@ -46,7 +46,7 @@ def build_table( num_rows = 0 # Total number of rows in the file. Used for statistics columns = [] if file_format == "parquet": - import pyarrow.parquet as pq # noqa: PLC0415 + import pyarrow.parquet as pq # Read the Parquet metadata metadata = pq.read_metadata(input_item) @@ -61,7 +61,7 @@ def build_table( ] elif format == "csv": - import csv # noqa: PLC0415 + import csv # Consume header row and count number of rows for statistics. # TODO: Possibly makes sense to have the eager number of rows diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py index 9c96a18fc..fb54fd624 100644 --- a/python/datafusion/plan.py +++ b/python/datafusion/plan.py @@ -32,7 +32,7 @@ ] -class LogicalPlan: # noqa: PLW1641 +class LogicalPlan: """Logical Plan. A `LogicalPlan` is a node in a tree of relational operators (such as diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 1ed1746e1..92c311930 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -188,7 +188,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str, legacy_data_ty partition_data_type = "string" if legacy_data_type else pa.string() if legacy_data_type: - with pytest.warns(DeprecationWarning): # noqa: PT030 + with pytest.warns(DeprecationWarning): ctx.register_parquet( "datapp", dir_root, From 4c1e6e0c79d67afdb4f4d0fd2773b656c186266d Mon Sep 17 00:00:00 2001 From: dario curreri Date: Thu, 19 Feb 2026 15:20:24 +0100 Subject: [PATCH 7/7] style: simplify formatting in pyproject.toml * Streamlined list formatting in pyproject.toml for improved readability by removing unnecessary line breaks and ensuring consistent structure across sections. * No functional changes were made; the focus was solely on code style and organization. --- pyproject.toml | 59 ++++++++++---------------------------------------- 1 file changed, 12 insertions(+), 47 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b994cd034..08d64eca0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,9 +16,7 @@ # under the License. [build-system] -requires = [ - "maturin>=1.8.1", -] +requires = ["maturin>=1.8.1"] build-backend = "maturin" [project] @@ -27,12 +25,7 @@ description = "Build and run queries against data" readme = "README.md" license = { file = "LICENSE.txt" } requires-python = ">=3.10" -keywords = [ - "dataframe", - "datafusion", - "query-engine", - "rust", -] +keywords = ["dataframe", "datafusion", "query-engine", "rust"] classifiers = [ "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Developers", @@ -55,9 +48,7 @@ dependencies = [ "pyarrow>=22.0.0;python_version>='3.14'", "typing-extensions;python_version<'3.13'", ] -dynamic = [ - "version", -] +dynamic = ["version"] [project.urls] homepage = "https://datafusion.apache.org/python" @@ -70,19 +61,11 @@ profile = "black" [tool.maturin] python-source = "python" module-name = "datafusion._internal" -include = [ - { path = "Cargo.lock", format = "sdist" }, -] -exclude = [ - ".asf.yaml", - ".github/**", - "ci/**", -] +include = [{ path = "Cargo.lock", format = "sdist" }] +exclude = [".asf.yaml", ".github/**", "ci/**"] # Require Cargo.lock is up to date locked = true -features = [ - "substrait", -] +features = ["substrait"] [tool.pytest.ini_options] asyncio_mode = "auto" @@ -90,9 +73,7 @@ asyncio_default_fixture_loop_scope = "function" # Enable docstring linting using the google style guide [tool.ruff.lint] -select = [ - "ALL", -] +select = ["ALL"] ignore = [ "A001", # Allow using words like min as variable names "A002", # Allow using words like filter as variable names @@ -118,10 +99,7 @@ convention = "google" max-doc-length = 88 [tool.ruff.lint.flake8-boolean-trap] -extend-allowed-calls = [ - "datafusion.lit", - "lit", -] +extend-allowed-calls = ["datafusion.lit", "lit"] # Disable docstring checking for these directories [tool.ruff.lint.per-file-ignores] @@ -186,26 +164,13 @@ extend-allowed-calls = [ "TRY", "UP", ] -"docs/*" = [ - "D", -] -"docs/source/conf.py" = [ - "ANN001", - "ERA001", - "INP001", -] +"docs/*" = ["D"] +"docs/source/conf.py" = ["ANN001", "ERA001", "INP001"] [tool.codespell] -skip = [ - "./python/tests/test_functions.py", - "./target", - "uv.lock", -] +skip = ["./python/tests/test_functions.py", "./target", "uv.lock"] count = true -ignore-words-list = [ - "IST", - "ans", -] +ignore-words-list = ["IST", "ans"] [dependency-groups] dev = [