googleapis
diff --git a/‎CHANGELOG.md‎
Lines changed: 9 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎bigframes/core/bigframe_node.py‎
Lines changed: 20 additions & 32 deletions b/‎bigframes/core/bigframe_node.py‎
Lines changed: 20 additions & 32 deletions
diff --git a/‎bigframes/core/blocks.py‎
Lines changed: 36 additions & 46 deletions b/‎bigframes/core/blocks.py‎
Lines changed: 36 additions & 46 deletions
diff --git a/‎bigframes/core/compile/ibis_compiler/aggregate_compiler.py‎
Lines changed: 1 addition & 1 deletion b/‎bigframes/core/compile/ibis_compiler/aggregate_compiler.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/compile/ibis_compiler/scalar_op_registry.py‎
Lines changed: 22 additions & 0 deletions b/‎bigframes/core/compile/ibis_compiler/scalar_op_registry.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎bigframes/core/compile/polars/compiler.py‎
Lines changed: 112 additions & 0 deletions b/‎bigframes/core/compile/polars/compiler.py‎
Lines changed: 112 additions & 0 deletions
@@ -4,6 +4,15 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [2.17.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.16.0...v2.17.0) (2025-08-22)
+
+
+### Features
+
+* Add isin local execution impl ([#1993](https://github.com/googleapis/python-bigquery-dataframes/issues/1993)) ([26df6e6](https://github.com/googleapis/python-bigquery-dataframes/commit/26df6e691bb27ed09322a81214faedbf3639b32e))
+* Add reset_index names, col_level, col_fill, allow_duplicates args ([#2017](https://github.com/googleapis/python-bigquery-dataframes/issues/2017)) ([c02a1b6](https://github.com/googleapis/python-bigquery-dataframes/commit/c02a1b67d27758815430bb8006ac3a72cea55a89))
+* Support callable for series mask method ([#2014](https://github.com/googleapis/python-bigquery-dataframes/issues/2014)) ([5ac32eb](https://github.com/googleapis/python-bigquery-dataframes/commit/5ac32ebe17cfda447870859f5dd344b082b4d3d0))
+
 ## [2.16.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.15.0...v2.16.0) (2025-08-20)
 
 
 
@@ -20,17 +20,7 @@
 import functools
 import itertools
 import typing
-from typing import (
-    Callable,
-    Dict,
-    Generator,
-    Iterable,
-    Mapping,
-    Sequence,
-    Set,
-    Tuple,
-    Union,
-)
+from typing import Callable, Dict, Generator, Iterable, Mapping, Sequence, Tuple, Union
 
 from bigframes.core import expression, field, identifiers
 import bigframes.core.schema as schemata
@@ -309,33 +299,31 @@ def unique_nodes(
                 seen.add(item)
                 stack.extend(item.child_nodes)
 
-    def edges(
+    def iter_nodes_topo(
         self: BigFrameNode,
-    ) -> Generator[Tuple[BigFrameNode, BigFrameNode], None, None]:
-        for item in self.unique_nodes():
-            for child in item.child_nodes:
-                yield (item, child)
-
-    def iter_nodes_topo(self: BigFrameNode) -> Generator[BigFrameNode, None, None]:
-        """Returns nodes from bottom up."""
-        queue = collections.deque(
-            [node for node in self.unique_nodes() if not node.child_nodes]
-        )
-
+    ) -> Generator[BigFrameNode, None, None]:
+        """Returns nodes in reverse topological order, using Kahn's algorithm."""
         child_to_parents: Dict[
-            BigFrameNode, Set[BigFrameNode]
-        ] = collections.defaultdict(set)
-        for parent, child in self.edges():
-            child_to_parents[child].add(parent)
-
-        yielded = set()
+            BigFrameNode, list[BigFrameNode]
+        ] = collections.defaultdict(list)
+        out_degree: Dict[BigFrameNode, int] = collections.defaultdict(int)
+
+        queue: collections.deque["BigFrameNode"] = collections.deque()
+        for node in list(self.unique_nodes()):
+            num_children = len(node.child_nodes)
+            out_degree[node] = num_children
+            if num_children == 0:
+                queue.append(node)
+            for child in node.child_nodes:
+                child_to_parents[child].append(node)
 
         while queue:
             item = queue.popleft()
             yield item
-            yielded.add(item)
-            for parent in child_to_parents[item]:
-                if set(parent.child_nodes).issubset(yielded):
+            parents = child_to_parents.get(item, [])
+            for parent in parents:
+                out_degree[parent] -= 1
+                if out_degree[parent] == 0:
                     queue.append(parent)
 
     def top_down(
 
@@ -387,12 +387,21 @@ def reversed(self) -> Block:
             index_labels=self.index.names,
         )
 
-    def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
+    def reset_index(
+        self,
+        level: LevelsType = None,
+        drop: bool = True,
+        *,
+        col_level: Union[str, int] = 0,
+        col_fill: typing.Hashable = "",
+        allow_duplicates: bool = False,
+    ) -> Block:
         """Reset the index of the block, promoting the old index to a value column.
 
         Arguments:
             level: the label or index level of the index levels to remove.
             name: this is the column id for the new value id derived from the old index
+            allow_duplicates:
 
         Returns:
             A new Block because dropping index columns can break references
@@ -438,6 +447,11 @@ def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
             )
         else:
             # Add index names to column index
+            col_level_n = (
+                col_level
+                if isinstance(col_level, int)
+                else self.column_labels.names.index(col_level)
+            )
             column_labels_modified = self.column_labels
             for position, level_id in enumerate(level_ids):
                 label = self.col_id_to_index_name[level_id]
@@ -447,11 +461,15 @@ def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
                     else:
                         label = f"level_{self.index_columns.index(level_id)}"
 
-                if label in self.column_labels:
+                if (not allow_duplicates) and (label in self.column_labels):
                     raise ValueError(f"cannot insert {label}, already exists")
+
                 if isinstance(self.column_labels, pd.MultiIndex):
                     nlevels = self.column_labels.nlevels
-                    label = tuple(label if i == 0 else "" for i in range(nlevels))
+                    label = tuple(
+                        label if i == col_level_n else col_fill for i in range(nlevels)
+                    )
+
                 # Create index copy with label inserted
                 # See: https://pandas.pydata.org/docs/reference/api/pandas.Index.insert.html
                 column_labels_modified = column_labels_modified.insert(position, label)
@@ -1214,46 +1232,10 @@ def aggregate_all_and_stack(
                 index_labels=[None],
             ).transpose(original_row_index=pd.Index([None]), single_row_mode=True)
         else:  # axis_n == 1
-            # using offsets as identity to group on.
-            # TODO: Allow to promote identity/total_order columns instead for better perf
-            expr_with_offsets, offset_col = self.expr.promote_offsets()
-            stacked_expr, (_, value_col_ids, passthrough_cols,) = unpivot(
-                expr_with_offsets,
-                row_labels=self.column_labels,
-                unpivot_columns=[tuple(self.value_columns)],
-                passthrough_columns=[*self.index_columns, offset_col],
-            )
-            # these corresponed to passthrough_columns provided to unpivot
-            index_cols = passthrough_cols[:-1]
-            og_offset_col = passthrough_cols[-1]
-            index_aggregations = [
-                (
-                    ex.UnaryAggregation(agg_ops.AnyValueOp(), ex.deref(col_id)),
-                    col_id,
-                )
-                for col_id in index_cols
-            ]
-            # TODO: may need add NullaryAggregation in main_aggregation
-            # when agg add support for axis=1, needed for agg("size", axis=1)
-            assert isinstance(
-                operation, agg_ops.UnaryAggregateOp
-            ), f"Expected a unary operation, but got {operation}. Please report this error and how you got here to the BigQuery DataFrames team (bit.ly/bigframes-feedback)."
-            main_aggregation = (
-                ex.UnaryAggregation(operation, ex.deref(value_col_ids[0])),
-                value_col_ids[0],
-            )
-            # Drop row identity after aggregating over it
-            result_expr = stacked_expr.aggregate(
-                [*index_aggregations, main_aggregation],
-                by_column_ids=[og_offset_col],
-                dropna=dropna,
-            ).drop_columns([og_offset_col])
-            return Block(
-                result_expr,
-                index_columns=index_cols,
-                column_labels=[None],
-                index_labels=self.index.names,
-            )
+            as_array = ops.ToArrayOp().as_expr(*(col for col in self.value_columns))
+            reduced = ops.ArrayReduceOp(operation).as_expr(as_array)
+            block, id = self.project_expr(reduced, None)
+            return block.select_column(id)
 
     def aggregate_size(
         self,
@@ -2147,9 +2129,17 @@ def _get_unique_values(
         import bigframes.core.block_transforms as block_tf
         import bigframes.dataframe as df
 
-        unique_value_block = block_tf.drop_duplicates(
-            self.select_columns(columns), columns
-        )
+        if self.explicitly_ordered:
+            unique_value_block = block_tf.drop_duplicates(
+                self.select_columns(columns), columns
+            )
+        else:
+            unique_value_block, _ = self.aggregate(by_column_ids=columns, dropna=False)
+            col_labels = self._get_labels_for_columns(columns)
+            unique_value_block = unique_value_block.reset_index(
+                drop=False
+            ).with_column_labels(col_labels)
+
         pd_values = (
             df.DataFrame(unique_value_block).head(max_unique_values + 1).to_pandas()
         )
 
@@ -165,7 +165,7 @@ def _(
 ) -> ibis_types.NumericValue:
     # Will be null if all inputs are null. Pandas defaults to zero sum though.
     bq_sum = _apply_window_if_present(column.sum(), window)
-    return bq_sum.fill_null(ibis_types.literal(0))
+    return bq_sum.coalesce(ibis_types.literal(0))
 
 
 @compile_unary_agg.register
 
@@ -1201,6 +1201,28 @@ def array_slice_op_impl(x: ibis_types.Value, op: ops.ArraySliceOp):
         return res
 
 
+@scalar_op_compiler.register_nary_op(ops.ToArrayOp, pass_op=False)
+def to_arry_op_impl(*values: ibis_types.Value):
+    do_upcast_bool = any(t.type().is_numeric() for t in values)
+    if do_upcast_bool:
+        values = tuple(
+            val.cast(ibis_dtypes.int64) if val.type().is_boolean() else val
+            for val in values
+        )
+    return ibis_api.array(values)
+
+
+@scalar_op_compiler.register_unary_op(ops.ArrayReduceOp, pass_op=True)
+def array_reduce_op_impl(x: ibis_types.Value, op: ops.ArrayReduceOp):
+    import bigframes.core.compile.ibis_compiler.aggregate_compiler as agg_compilers
+
+    return typing.cast(ibis_types.ArrayValue, x).reduce(
+        lambda arr_vals: agg_compilers.compile_unary_agg(
+            op.aggregation, typing.cast(ibis_types.Column, arr_vals)
+        )
+    )
+
+
 # JSON Ops
 @scalar_op_compiler.register_binary_op(ops.JSONSet, pass_op=True)
 def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
 
@@ -31,9 +31,12 @@
 import bigframes.dtypes
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
+import bigframes.operations.array_ops as arr_ops
 import bigframes.operations.bool_ops as bool_ops
 import bigframes.operations.comparison_ops as comp_ops
+import bigframes.operations.date_ops as date_ops
 import bigframes.operations.datetime_ops as dt_ops
+import bigframes.operations.frequency_ops as freq_ops
 import bigframes.operations.generic_ops as gen_ops
 import bigframes.operations.json_ops as json_ops
 import bigframes.operations.numeric_ops as num_ops
@@ -74,6 +77,20 @@ def decorator(func):
 
 
 if polars_installed:
+    _FREQ_MAPPING = {
+        "Y": "1y",
+        "Q": "1q",
+        "M": "1mo",
+        "W": "1w",
+        "D": "1d",
+        "h": "1h",
+        "min": "1m",
+        "s": "1s",
+        "ms": "1ms",
+        "us": "1us",
+        "ns": "1ns",
+    }
+
     _DTYPE_MAPPING = {
         # Direct mappings
         bigframes.dtypes.INT_DTYPE: pl.Int64(),
@@ -301,11 +318,76 @@ def _(self, op: ops.ScalarOp, l_input: pl.Expr, r_input: pl.Expr) -> pl.Expr:
             assert isinstance(op, string_ops.StrConcatOp)
             return pl.concat_str(l_input, r_input)
 
+        @compile_op.register(string_ops.StrContainsOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            assert isinstance(op, string_ops.StrContainsOp)
+            return input.str.contains(pattern=op.pat, literal=True)
+
+        @compile_op.register(string_ops.StrContainsRegexOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            assert isinstance(op, string_ops.StrContainsRegexOp)
+            return input.str.contains(pattern=op.pat, literal=False)
+
+        @compile_op.register(string_ops.StartsWithOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            assert isinstance(op, string_ops.StartsWithOp)
+            if len(op.pat) == 1:
+                return input.str.starts_with(op.pat[0])
+            else:
+                return pl.any_horizontal(
+                    *(input.str.starts_with(pat) for pat in op.pat)
+                )
+
+        @compile_op.register(string_ops.EndsWithOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            assert isinstance(op, string_ops.EndsWithOp)
+            if len(op.pat) == 1:
+                return input.str.ends_with(op.pat[0])
+            else:
+                return pl.any_horizontal(*(input.str.ends_with(pat) for pat in op.pat))
+
+        @compile_op.register(freq_ops.FloorDtOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            assert isinstance(op, freq_ops.FloorDtOp)
+            return input.dt.truncate(every=_FREQ_MAPPING[op.freq])
+
         @compile_op.register(dt_ops.StrftimeOp)
         def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
             assert isinstance(op, dt_ops.StrftimeOp)
             return input.dt.strftime(op.date_format)
 
+        @compile_op.register(date_ops.YearOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            return input.dt.year()
+
+        @compile_op.register(date_ops.QuarterOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            return input.dt.quarter()
+
+        @compile_op.register(date_ops.MonthOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            return input.dt.month()
+
+        @compile_op.register(date_ops.DayOfWeekOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            return input.dt.weekday() - 1
+
+        @compile_op.register(date_ops.DayOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            return input.dt.day()
+
+        @compile_op.register(date_ops.IsoYearOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            return input.dt.iso_year()
+
+        @compile_op.register(date_ops.IsoWeekOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            return input.dt.week()
+
+        @compile_op.register(date_ops.IsoDayOp)
+        def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
+            return input.dt.weekday()
+
         @compile_op.register(dt_ops.ParseDatetimeOp)
         def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
             assert isinstance(op, dt_ops.ParseDatetimeOp)
@@ -325,6 +407,36 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
             assert isinstance(op, json_ops.JSONDecode)
             return input.str.json_decode(_DTYPE_MAPPING[op.to_type])
 
+        @compile_op.register(arr_ops.ToArrayOp)
+        def _(self, op: ops.ToArrayOp, *inputs: pl.Expr) -> pl.Expr:
+            return pl.concat_list(*inputs)
+
+        @compile_op.register(arr_ops.ArrayReduceOp)
+        def _(self, op: ops.ArrayReduceOp, input: pl.Expr) -> pl.Expr:
+            # TODO: Unify this with general aggregation compilation?
+            if isinstance(op.aggregation, agg_ops.MinOp):
+                return input.list.min()
+            if isinstance(op.aggregation, agg_ops.MaxOp):
+                return input.list.max()
+            if isinstance(op.aggregation, agg_ops.SumOp):
+                return input.list.sum()
+            if isinstance(op.aggregation, agg_ops.MeanOp):
+                return input.list.mean()
+            if isinstance(op.aggregation, agg_ops.CountOp):
+                return input.list.len()
+            if isinstance(op.aggregation, agg_ops.StdOp):
+                return input.list.std()
+            if isinstance(op.aggregation, agg_ops.VarOp):
+                return input.list.var()
+            if isinstance(op.aggregation, agg_ops.AnyOp):
+                return input.list.any()
+            if isinstance(op.aggregation, agg_ops.AllOp):
+                return input.list.all()
+            else:
+                raise NotImplementedError(
+                    f"Haven't implemented array aggregation: {op.aggregation}"
+                )
+
     @dataclasses.dataclass(frozen=True)
     class PolarsAggregateCompiler:
         scalar_compiler = PolarsExpressionCompiler()