From 2cb27d143b967568cf31b074584b322786739481 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 14 Aug 2025 22:48:16 +0000 Subject: [PATCH] feat: Or, And, Xor can execute locally --- bigframes/core/compile/polars/compiler.py | 4 ++ bigframes/session/polars_executor.py | 7 ++- tests/system/small/engines/test_bool_ops.py | 64 +++++++++++++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 tests/system/small/engines/test_bool_ops.py diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py index a3c5d5a80e..8ae896816f 100644 --- a/bigframes/core/compile/polars/compiler.py +++ b/bigframes/core/compile/polars/compiler.py @@ -198,6 +198,10 @@ def _(self, op: ops.ScalarOp, l_input: pl.Expr, r_input: pl.Expr) -> pl.Expr: def _(self, op: ops.ScalarOp, l_input: pl.Expr, r_input: pl.Expr) -> pl.Expr: return l_input | r_input + @compile_op.register(bool_ops.XorOp) + def _(self, op: ops.ScalarOp, l_input: pl.Expr, r_input: pl.Expr) -> pl.Expr: + return l_input ^ r_input + @compile_op.register(num_ops.AddOp) def _(self, op: ops.ScalarOp, l_input: pl.Expr, r_input: pl.Expr) -> pl.Expr: return l_input + r_input diff --git a/bigframes/session/polars_executor.py b/bigframes/session/polars_executor.py index 8aa7fd9002..9c45a884e5 100644 --- a/bigframes/session/polars_executor.py +++ b/bigframes/session/polars_executor.py @@ -21,7 +21,7 @@ from bigframes.core import array_value, bigframe_node, expression, local_data, nodes import bigframes.operations from bigframes.operations import aggregations as agg_ops -from bigframes.operations import comparison_ops, generic_ops, numeric_ops +from bigframes.operations import bool_ops, comparison_ops, generic_ops, numeric_ops from bigframes.session import executor, semi_executor if TYPE_CHECKING: @@ -44,6 +44,9 @@ ) _COMPATIBLE_SCALAR_OPS = ( + bool_ops.AndOp, + bool_ops.OrOp, + bool_ops.XorOp, comparison_ops.EqOp, comparison_ops.EqNullsMatchOp, comparison_ops.NeOp, @@ -63,6 +66,8 @@ generic_ops.FillNaOp, generic_ops.CaseWhenOp, generic_ops.InvertOp, + generic_ops.IsNullOp, + generic_ops.NotNullOp, ) _COMPATIBLE_AGG_OPS = ( agg_ops.SizeOp, diff --git a/tests/system/small/engines/test_bool_ops.py b/tests/system/small/engines/test_bool_ops.py new file mode 100644 index 0000000000..065a43c209 --- /dev/null +++ b/tests/system/small/engines/test_bool_ops.py @@ -0,0 +1,64 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import pytest + +from bigframes.core import array_value +import bigframes.operations as ops +from bigframes.session import polars_executor +from bigframes.testing.engine_utils import assert_equivalence_execution + +pytest.importorskip("polars") + +# Polars used as reference as its fast and local. Generally though, prefer gbq engine where they disagree. +REFERENCE_ENGINE = polars_executor.PolarsExecutor() + + +def apply_op_pairwise( + array: array_value.ArrayValue, op: ops.BinaryOp, excluded_cols=[] +) -> array_value.ArrayValue: + exprs = [] + for l_arg, r_arg in itertools.permutations(array.column_ids, 2): + if (l_arg in excluded_cols) or (r_arg in excluded_cols): + continue + try: + _ = op.output_type( + array.get_column_type(l_arg), array.get_column_type(r_arg) + ) + exprs.append(op.as_expr(l_arg, r_arg)) + except TypeError: + continue + assert len(exprs) > 0 + new_arr, _ = array.compute_values(exprs) + return new_arr + + +@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True) +@pytest.mark.parametrize( + "op", + [ + ops.and_op, + ops.or_op, + ops.xor_op, + ], +) +def test_engines_project_boolean_op( + scalars_array_value: array_value.ArrayValue, engine, op +): + # exclude string cols as does not contain dates + # bool col actually doesn't work properly for bq engine + arr = apply_op_pairwise(scalars_array_value, op, excluded_cols=["string_col"]) + assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)