googleapis
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 5 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 19 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎MANIFEST.in‎
Lines changed: 1 addition & 1 deletion b/‎MANIFEST.in‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/block_transforms.py‎
Lines changed: 27 additions & 5 deletions b/‎bigframes/core/block_transforms.py‎
Lines changed: 27 additions & 5 deletions
diff --git a/‎bigframes/core/blocks.py‎
Lines changed: 55 additions & 42 deletions b/‎bigframes/core/blocks.py‎
Lines changed: 55 additions & 42 deletions
@@ -42,3 +42,8 @@ repos:
         additional_dependencies: [types-requests, types-tabulate, types-PyYAML, pandas-stubs<=2.2.3.241126]
         exclude: "^third_party"
         args: ["--check-untyped-defs", "--explicit-package-bases", "--ignore-missing-imports"]
+-   repo: https://github.com/biomejs/pre-commit
+    rev: v2.0.2
+    hooks:
+    -   id: biome-check
+        files: '\.js$'
@@ -4,6 +4,25 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [2.10.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.9.0...v2.10.0) (2025-07-08)
+
+
+### Features
+
+* `df.to_pandas_batches()` returns one empty DataFrame if `df` is empty ([#1878](https://github.com/googleapis/python-bigquery-dataframes/issues/1878)) ([e43d15d](https://github.com/googleapis/python-bigquery-dataframes/commit/e43d15d535d6d5fd73c33967271f3591c41dffb3))
+* Add filter pushdown to hybrid engine ([#1871](https://github.com/googleapis/python-bigquery-dataframes/issues/1871)) ([6454aff](https://github.com/googleapis/python-bigquery-dataframes/commit/6454aff726dee791acbac98f893075ee5ee6d9a1))
+* Add simple stats support to hybrid local pushdown ([#1873](https://github.com/googleapis/python-bigquery-dataframes/issues/1873)) ([8715105](https://github.com/googleapis/python-bigquery-dataframes/commit/8715105239216bffe899ddcbb15805f2e3063af4))
+
+
+### Bug Fixes
+
+* Fix issues where duration type returned as int ([#1875](https://github.com/googleapis/python-bigquery-dataframes/issues/1875)) ([f30f750](https://github.com/googleapis/python-bigquery-dataframes/commit/f30f75053a6966abd1a6a644c23efb86b2ac568d))
+
+
+### Documentation
+
+* Update gsutil commands to gcloud commands ([#1876](https://github.com/googleapis/python-bigquery-dataframes/issues/1876)) ([c289f70](https://github.com/googleapis/python-bigquery-dataframes/commit/c289f7061320ec6d9de099cab2416cc9f289baac))
+
 ## [2.9.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.8.0...v2.9.0) (2025-06-30)
 
 
 
@@ -17,7 +17,7 @@
 # Generated by synthtool. DO NOT EDIT!
 include README.rst LICENSE
 recursive-include third_party/bigframes_vendored *
-recursive-include bigframes *.json *.proto py.typed
+recursive-include bigframes *.json *.proto *.js py.typed
 recursive-include tests *
 global-exclude *.py[co]
 global-exclude __pycache__
 
@@ -522,7 +522,8 @@ def rank(
 def dropna(
     block: blocks.Block,
     column_ids: typing.Sequence[str],
-    how: typing.Literal["all", "any"] = "any",
+    how: str = "any",
+    thresh: typing.Optional[int] = None,
     subset: Optional[typing.Sequence[str]] = None,
 ):
     """
@@ -531,17 +532,38 @@ def dropna(
     if subset is None:
         subset = column_ids
 
+    # Predicates to check for non-null values in the subset of columns
     predicates = [
         ops.notnull_op.as_expr(column_id)
         for column_id in column_ids
         if column_id in subset
     ]
+
     if len(predicates) == 0:
         return block
-    if how == "any":
-        predicate = functools.reduce(ops.and_op.as_expr, predicates)
-    else:  # "all"
-        predicate = functools.reduce(ops.or_op.as_expr, predicates)
+
+    if thresh is not None:
+        # Handle single predicate case
+        if len(predicates) == 1:
+            count_expr = ops.AsTypeOp(pd.Int64Dtype()).as_expr(predicates[0])
+        else:
+            # Sum the boolean expressions to count non-null values
+            count_expr = functools.reduce(
+                lambda a, b: ops.add_op.as_expr(
+                    ops.AsTypeOp(pd.Int64Dtype()).as_expr(a),
+                    ops.AsTypeOp(pd.Int64Dtype()).as_expr(b),
+                ),
+                predicates,
+            )
+        # Filter rows where count >= thresh
+        predicate = ops.ge_op.as_expr(count_expr, ex.const(thresh))
+    else:
+        # Only handle 'how' parameter when thresh is not specified
+        if how == "any":
+            predicate = functools.reduce(ops.and_op.as_expr, predicates)
+        else:  # "all"
+            predicate = functools.reduce(ops.or_op.as_expr, predicates)
+
     return block.filter(predicate)
 
 
 
@@ -29,7 +29,17 @@
 import random
 import textwrap
 import typing
-from typing import Iterable, List, Literal, Mapping, Optional, Sequence, Tuple, Union
+from typing import (
+    Iterable,
+    Iterator,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+)
 import warnings
 
 import bigframes_vendored.constants as constants
@@ -87,14 +97,22 @@
 LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]]
 
 
-class BlockHolder(typing.Protocol):
+@dataclasses.dataclass
+class PandasBatches(Iterator[pd.DataFrame]):
     """Interface for mutable objects with state represented by a block value object."""
 
-    def _set_block(self, block: Block):
-        """Set the underlying block value of the object"""
+    def __init__(
+        self, pandas_batches: Iterator[pd.DataFrame], total_rows: Optional[int] = 0
+    ):
+        self._dataframes: Iterator[pd.DataFrame] = pandas_batches
+        self._total_rows: Optional[int] = total_rows
+
+    @property
+    def total_rows(self) -> Optional[int]:
+        return self._total_rows
 
-    def _get_block(self) -> Block:
-        """Get the underlying block value of the object"""
+    def __next__(self) -> pd.DataFrame:
+        return next(self._dataframes)
 
 
 @dataclasses.dataclass()
@@ -599,8 +617,7 @@ def try_peek(
                 self.expr, n, use_explicit_destination=allow_large_results
             )
             df = result.to_pandas()
-            self._copy_index_to_pandas(df)
-            return df
+            return self._copy_index_to_pandas(df)
         else:
             return None
 
@@ -609,8 +626,7 @@ def to_pandas_batches(
         page_size: Optional[int] = None,
         max_results: Optional[int] = None,
         allow_large_results: Optional[bool] = None,
-        squeeze: Optional[bool] = False,
-    ):
+    ) -> Iterator[pd.DataFrame]:
         """Download results one message at a time.
 
         page_size and max_results determine the size and number of batches,
@@ -621,43 +637,43 @@ def to_pandas_batches(
             use_explicit_destination=allow_large_results,
         )
 
-        total_batches = 0
-        for df in execute_result.to_pandas_batches(
-            page_size=page_size, max_results=max_results
-        ):
-            total_batches += 1
-            self._copy_index_to_pandas(df)
-            if squeeze:
-                yield df.squeeze(axis=1)
-            else:
-                yield df
-
         # To reduce the number of edge cases to consider when working with the
         # results of this, always return at least one DataFrame. See:
         # b/428918844.
-        if total_batches == 0:
-            df = pd.DataFrame(
-                {
-                    col: pd.Series([], dtype=self.expr.get_column_type(col))
-                    for col in itertools.chain(self.value_columns, self.index_columns)
-                }
-            )
-            self._copy_index_to_pandas(df)
-            yield df
+        empty_val = pd.DataFrame(
+            {
+                col: pd.Series([], dtype=self.expr.get_column_type(col))
+                for col in itertools.chain(self.value_columns, self.index_columns)
+            }
+        )
+        dfs = map(
+            lambda a: a[0],
+            itertools.zip_longest(
+                execute_result.to_pandas_batches(page_size, max_results),
+                [0],
+                fillvalue=empty_val,
+            ),
+        )
+        dfs = iter(map(self._copy_index_to_pandas, dfs))
 
-    def _copy_index_to_pandas(self, df: pd.DataFrame):
-        """Set the index on pandas DataFrame to match this block.
+        total_rows = execute_result.total_rows
+        if (total_rows is not None) and (max_results is not None):
+            total_rows = min(total_rows, max_results)
 
-        Warning: This method modifies ``df`` inplace.
-        """
+        return PandasBatches(dfs, total_rows)
+
+    def _copy_index_to_pandas(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Set the index on pandas DataFrame to match this block."""
         # Note: If BigQuery DataFrame has null index, a default one will be created for the local materialization.
+        new_df = df.copy()
         if len(self.index_columns) > 0:
-            df.set_index(list(self.index_columns), inplace=True)
+            new_df.set_index(list(self.index_columns), inplace=True)
             # Pandas names is annotated as list[str] rather than the more
             # general Sequence[Label] that BigQuery DataFrames has.
             # See: https://github.com/pandas-dev/pandas-stubs/issues/804
-            df.index.names = self.index.names  # type: ignore
-        df.columns = self.column_labels
+            new_df.index.names = self.index.names  # type: ignore
+        new_df.columns = self.column_labels
+        return new_df
 
     def _materialize_local(
         self, materialize_options: MaterializationOptions = MaterializationOptions()
@@ -724,9 +740,7 @@ def _materialize_local(
             )
         else:
             df = execute_result.to_pandas()
-            self._copy_index_to_pandas(df)
-
-        return df, execute_result.query_job
+            return self._copy_index_to_pandas(df), execute_result.query_job
 
     def _downsample(
         self, total_rows: int, sampling_method: str, fraction: float, random_state
@@ -1591,8 +1605,7 @@ def retrieve_repr_request_results(
         row_count = self.session._executor.execute(self.expr.row_count()).to_py_scalar()
 
         head_df = head_result.to_pandas()
-        self._copy_index_to_pandas(head_df)
-        return head_df, row_count, head_result.query_job
+        return self._copy_index_to_pandas(head_df), row_count, head_result.query_job
 
     def promote_offsets(self, label: Label = None) -> typing.Tuple[Block, str]:
         expr, result_id = self._expr.promote_offsets()