From 4a8e0287a630226166f02f21b88e8e07f23bcca4 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Fri, 27 Jun 2025 21:48:44 +0000
Subject: [PATCH 01/13] support thresh in dropna

---
 bigframes/core/block_transforms.py            | 53 ++++++++++++++-----
 bigframes/dataframe.py                        | 49 ++++++++++++-----
 tests/system/small/test_dataframe.py          | 41 +++++++++-----
 .../bigframes_vendored/pandas/core/frame.py   | 24 +++++++++
 4 files changed, 130 insertions(+), 37 deletions(-)

diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index 09ef17dff5..939d41c885 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -523,6 +523,7 @@ def dropna(
     block: blocks.Block,
     column_ids: typing.Sequence[str],
     how: typing.Literal["all", "any"] = "any",
+    thresh: typing.Optional[int] = None,
     subset: Optional[typing.Sequence[str]] = None,
 ):
     """
@@ -531,18 +532,46 @@ def dropna(
     if subset is None:
         subset = column_ids
 
-    predicates = [
-        ops.notnull_op.as_expr(column_id)
-        for column_id in column_ids
-        if column_id in subset
-    ]
-    if len(predicates) == 0:
-        return block
-    if how == "any":
-        predicate = functools.reduce(ops.and_op.as_expr, predicates)
-    else:  # "all"
-        predicate = functools.reduce(ops.or_op.as_expr, predicates)
-    return block.filter(predicate)
+    if thresh is not None:
+        # Count non-null values per row
+        notnull_predicates = [
+            ops.notnull_op.as_expr(column_id)
+            for column_id in column_ids
+            if column_id in subset
+        ]
+
+        if len(notnull_predicates) == 0:
+            return block
+
+        # Handle single predicate case
+        if len(notnull_predicates) == 1:
+            count_expr = ops.AsTypeOp(pd.Int64Dtype()).as_expr(notnull_predicates[0])
+        else:
+            # Sum the boolean expressions to count non-null values
+            count_expr = functools.reduce(
+                lambda a, b: ops.add_op.as_expr(
+                    ops.AsTypeOp(pd.Int64Dtype()).as_expr(a),
+                    ops.AsTypeOp(pd.Int64Dtype()).as_expr(b),
+                ),
+                notnull_predicates,
+            )
+
+        # Filter rows where count >= thresh
+        thresh_predicate = ops.ge_op.as_expr(count_expr, ex.const(thresh))
+        return block.filter(thresh_predicate)
+    else:
+        predicates = [
+            ops.notnull_op.as_expr(column_id)
+            for column_id in column_ids
+            if column_id in subset
+        ]
+        if len(predicates) == 0:
+            return block
+        if how == "any":
+            predicate = functools.reduce(ops.and_op.as_expr, predicates)
+        else:  # "all"
+            predicate = functools.reduce(ops.or_op.as_expr, predicates)
+        return block.filter(predicate)
 
 
 def nsmallest(
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 1884f0beff..5430371433 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2801,7 +2801,8 @@ def dropna(
         self,
         *,
         axis: int | str = 0,
-        how: str = "any",
+        how: typing.Literal["all", "any"] = "any",
+        thresh: typing.Optional[int] = None,
         subset: typing.Union[None, blocks.Label, Sequence[blocks.Label]] = None,
         inplace: bool = False,
         ignore_index=False,
@@ -2810,6 +2811,10 @@ def dropna(
             raise NotImplementedError(
                 f"'inplace'=True not supported. {constants.FEEDBACK_LINK}"
             )
+        if thresh is not None and how != "any":
+            raise TypeError(
+                "You cannot set both the how and thresh arguments at the same time."
+            )
         if how not in ("any", "all"):
             raise ValueError("'how' must be one of 'any', 'all'")
 
@@ -2833,21 +2838,41 @@ def dropna(
                     for id_ in self._block.label_to_col_id[label]
                 ]
 
-            result = block_ops.dropna(self._block, self._block.value_columns, how=how, subset=subset_ids)  # type: ignore
+            result = block_ops.dropna(
+                self._block,
+                self._block.value_columns,
+                how=how,
+                thresh=thresh,
+                subset=subset_ids,
+            )  # type: ignore
             if ignore_index:
                 result = result.reset_index()
             return DataFrame(result)
         else:
-            isnull_block = self._block.multi_apply_unary_op(ops.isnull_op)
-            if how == "any":
-                null_locations = DataFrame(isnull_block).any().to_pandas()
-            else:  # 'all'
-                null_locations = DataFrame(isnull_block).all().to_pandas()
-            keep_columns = [
-                col
-                for col, to_drop in zip(self._block.value_columns, null_locations)
-                if not to_drop
-            ]
+            if thresh is not None:
+                # Count non-null values per column
+                isnull_block = self._block.multi_apply_unary_op(ops.isnull_op)
+                notnull_block = self._block.multi_apply_unary_op(ops.notnull_op)
+
+                # Sum non-null values for each column
+                notnull_counts = DataFrame(notnull_block).sum().to_pandas()
+
+                keep_columns = [
+                    col
+                    for col, count in zip(self._block.value_columns, notnull_counts)
+                    if count >= thresh
+                ]
+            else:
+                isnull_block = self._block.multi_apply_unary_op(ops.isnull_op)
+                if how == "any":
+                    null_locations = DataFrame(isnull_block).any().to_pandas()
+                else:  # 'all'
+                    null_locations = DataFrame(isnull_block).all().to_pandas()
+                keep_columns = [
+                    col
+                    for col, to_drop in zip(self._block.value_columns, null_locations)
+                    if not to_drop
+                ]
             return DataFrame(self._block.select_columns(keep_columns))
 
     def any(
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 8ea1259325..5f0c4f3b2a 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -1196,26 +1196,41 @@ def test_assign_callable_lambda(scalars_dfs):
 
 
 @pytest.mark.parametrize(
-    ("axis", "how", "ignore_index", "subset"),
+    ("axis", "how", "ignore_index", "subset", "thresh"),
     [
-        (0, "any", False, None),
-        (0, "any", True, None),
-        (0, "all", False, ["bool_col", "time_col"]),
-        (0, "any", False, ["bool_col", "time_col"]),
-        (0, "all", False, "time_col"),
-        (1, "any", False, None),
-        (1, "all", False, None),
+        (0, "any", False, None, None),
+        (0, "any", True, None, None),
+        (0, "all", False, ["bool_col", "time_col"], None),
+        (0, "any", False, ["bool_col", "time_col"], None),
+        (0, "all", False, "time_col", None),
+        (1, "any", False, None, None),
+        (1, "all", False, None, None),
+        (0, "any", False, None, 2),
+        (0, "any", True, None, 3),
+        (1, "any", False, None, 2),
     ],
 )
-def test_df_dropna(scalars_dfs, axis, how, ignore_index, subset):
+def test_df_dropna(scalars_dfs, axis, how, ignore_index, subset, thresh):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
-    df = scalars_df.dropna(axis=axis, how=how, ignore_index=ignore_index, subset=subset)
+
+    if thresh is not None:
+        df = scalars_df.dropna(
+            axis=axis, thresh=thresh, ignore_index=ignore_index, subset=subset
+        )
+        pd_result = scalars_pandas_df.dropna(
+            axis=axis, thresh=thresh, ignore_index=ignore_index, subset=subset
+        )
+    else:
+        df = scalars_df.dropna(
+            axis=axis, how=how, ignore_index=ignore_index, subset=subset
+        )
+        pd_result = scalars_pandas_df.dropna(
+            axis=axis, how=how, ignore_index=ignore_index, subset=subset
+        )
+
     bf_result = df.to_pandas()
-    pd_result = scalars_pandas_df.dropna(
-        axis=axis, how=how, ignore_index=ignore_index, subset=subset
-    )
 
     # Pandas uses int64 instead of Int64 (nullable) dtype.
     pd_result.index = pd_result.index.astype(pd.Int64Dtype())
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 40ab5a7352..3bd10281d9 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -1762,6 +1762,7 @@ def dropna(
         *,
         axis: int | str = 0,
         how: str = "any",
+        thresh: Optional[int] = None,
         subset=None,
         inplace: bool = False,
         ignore_index=False,
@@ -1812,6 +1813,25 @@ def dropna(
             <BLANKLINE>
             [3 rows x 3 columns]
 
+        Keep rows with at least 2 non-null values.
+
+            >>> df.dropna(thresh=2)
+                            name        toy        born
+            1    Batman  Batmobile  1940-04-25
+            2  Catwoman   Bullwhip        <NA>
+            <BLANKLINE>
+            [2 rows x 3 columns]
+
+        Keep columns with at least 2 non-null values:
+
+            >>> df.dropna(axis='columns', thresh=2)
+                name        toy
+            0    Alfred       <NA>
+            1    Batman  Batmobile
+            2  Catwoman   Bullwhip
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
         Define in which columns to look for missing values.
 
             >>> df.dropna(subset=['name', 'toy'])
@@ -1834,6 +1854,8 @@ def dropna(
 
                 * 'any' : If any NA values are present, drop that row or column.
                 * 'all' : If all values are NA, drop that row or column.
+            typing(int, optional):
+                Require that many non-NA values. Cannot be combined with how.
             subset (column label or sequence of labels, optional):
                 Labels along other axis to consider, e.g. if you are dropping
                 rows these would be a list of columns to include.
@@ -1851,6 +1873,8 @@ def dropna(
         Raises:
             ValueError:
                 If ``how`` is not one of ``any`` or ``all``.
+            TyperError:
+                If both ``how`` and ``thresh`` are specified.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 

From cbe83a96dcbd523f15b80a879be641ae08b0b7b4 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 30 Jun 2025 05:33:02 +0000
Subject: [PATCH 02/13] update docstring, and polish function

---
 bigframes/core/block_transforms.py            | 37 ++++------
 bigframes/dataframe.py                        |  5 +-
 tests/system/small/test_dataframe.py          | 71 +++++++++++--------
 .../bigframes_vendored/pandas/core/frame.py   |  4 +-
 4 files changed, 61 insertions(+), 56 deletions(-)

diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index 939d41c885..cf0d8ec57f 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -532,20 +532,20 @@ def dropna(
     if subset is None:
         subset = column_ids
 
-    if thresh is not None:
-        # Count non-null values per row
-        notnull_predicates = [
-            ops.notnull_op.as_expr(column_id)
-            for column_id in column_ids
-            if column_id in subset
-        ]
+    # Predicates to check for non-null values in the subset of columns
+    predicates = [
+        ops.notnull_op.as_expr(column_id)
+        for column_id in column_ids
+        if column_id in subset
+    ]
 
-        if len(notnull_predicates) == 0:
-            return block
+    if len(predicates) == 0:
+        return block
 
+    if thresh is not None:
         # Handle single predicate case
-        if len(notnull_predicates) == 1:
-            count_expr = ops.AsTypeOp(pd.Int64Dtype()).as_expr(notnull_predicates[0])
+        if len(predicates) == 1:
+            count_expr = ops.AsTypeOp(pd.Int64Dtype()).as_expr(predicates[0])
         else:
             # Sum the boolean expressions to count non-null values
             count_expr = functools.reduce(
@@ -553,25 +553,18 @@ def dropna(
                     ops.AsTypeOp(pd.Int64Dtype()).as_expr(a),
                     ops.AsTypeOp(pd.Int64Dtype()).as_expr(b),
                 ),
-                notnull_predicates,
+                predicates,
             )
 
         # Filter rows where count >= thresh
-        thresh_predicate = ops.ge_op.as_expr(count_expr, ex.const(thresh))
-        return block.filter(thresh_predicate)
+        predicate = ops.ge_op.as_expr(count_expr, ex.const(thresh))
     else:
-        predicates = [
-            ops.notnull_op.as_expr(column_id)
-            for column_id in column_ids
-            if column_id in subset
-        ]
-        if len(predicates) == 0:
-            return block
         if how == "any":
             predicate = functools.reduce(ops.and_op.as_expr, predicates)
         else:  # "all"
             predicate = functools.reduce(ops.or_op.as_expr, predicates)
-        return block.filter(predicate)
+
+    return block.filter(predicate)
 
 
 def nsmallest(
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 5430371433..c3ea03045d 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2850,11 +2850,8 @@ def dropna(
             return DataFrame(result)
         else:
             if thresh is not None:
-                # Count non-null values per column
-                isnull_block = self._block.multi_apply_unary_op(ops.isnull_op)
+                # Keep columns with at least 'thresh' non-null values
                 notnull_block = self._block.multi_apply_unary_op(ops.notnull_op)
-
-                # Sum non-null values for each column
                 notnull_counts = DataFrame(notnull_block).sum().to_pandas()
 
                 keep_columns = [
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 5f0c4f3b2a..70f551ef6f 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -1196,47 +1196,62 @@ def test_assign_callable_lambda(scalars_dfs):
 
 
 @pytest.mark.parametrize(
-    ("axis", "how", "ignore_index", "subset", "thresh"),
+    ("axis", "how", "ignore_index", "subset"),
     [
-        (0, "any", False, None, None),
-        (0, "any", True, None, None),
-        (0, "all", False, ["bool_col", "time_col"], None),
-        (0, "any", False, ["bool_col", "time_col"], None),
-        (0, "all", False, "time_col", None),
-        (1, "any", False, None, None),
-        (1, "all", False, None, None),
-        (0, "any", False, None, 2),
-        (0, "any", True, None, 3),
-        (1, "any", False, None, 2),
+        (0, "any", False, None),
+        (0, "any", True, None),
+        (0, "all", False, ["bool_col", "time_col"]),
+        (0, "any", False, ["bool_col", "time_col"]),
+        (0, "all", False, "time_col"),
+        (1, "any", False, None),
+        (1, "all", False, None),
     ],
 )
-def test_df_dropna(scalars_dfs, axis, how, ignore_index, subset, thresh):
+def test_df_dropna_by_how(scalars_dfs, axis, how, ignore_index, subset):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df, scalars_pandas_df = scalars_dfs
-
-    if thresh is not None:
-        df = scalars_df.dropna(
-            axis=axis, thresh=thresh, ignore_index=ignore_index, subset=subset
-        )
-        pd_result = scalars_pandas_df.dropna(
-            axis=axis, thresh=thresh, ignore_index=ignore_index, subset=subset
-        )
-    else:
-        df = scalars_df.dropna(
-            axis=axis, how=how, ignore_index=ignore_index, subset=subset
-        )
-        pd_result = scalars_pandas_df.dropna(
-            axis=axis, how=how, ignore_index=ignore_index, subset=subset
-        )
-
+    df = scalars_df.dropna(axis=axis, how=how, ignore_index=ignore_index, subset=subset)
     bf_result = df.to_pandas()
+    pd_result = scalars_pandas_df.dropna(
+        axis=axis, how=how, ignore_index=ignore_index, subset=subset
+    )
 
     # Pandas uses int64 instead of Int64 (nullable) dtype.
     pd_result.index = pd_result.index.astype(pd.Int64Dtype())
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("axis", "ignore_index", "subset", "thresh"),
+    [
+        (0, False, None, 2),
+        (0, True, None, 3),
+        (1, False, None, 2),
+    ],
+)
+def test_df_dropna_by_thresh(scalars_dfs, axis, ignore_index, subset, thresh):
+    """
+    Tests that dropna correctly keeps rows/columns with a minimum number
+    of non-null values.
+    """
+    # TODO: supply a reason why this isn't compatible with pandas 1.x
+    pytest.importorskip("pandas", minversion="2.0.0")
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    df_result = scalars_df.dropna(
+        axis=axis, thresh=thresh, ignore_index=ignore_index, subset=subset
+    )
+    pd_result = scalars_pandas_df.dropna(
+        axis=axis, thresh=thresh, ignore_index=ignore_index, subset=subset
+    )
+
+    bf_result = df_result.to_pandas()
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
 def test_df_dropna_range_columns(scalars_dfs):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 3bd10281d9..61abca74db 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -1842,7 +1842,7 @@ def dropna(
             [2 rows x 3 columns]
 
         Args:
-            axis ({0 or 'index', 1 or 'columns'}, default 'columns'):
+            axis ({0 or 'index', 1 or 'columns'}, default 0):
                 Determine if rows or columns which contain missing values are
                 removed.
 
@@ -1854,7 +1854,7 @@ def dropna(
 
                 * 'any' : If any NA values are present, drop that row or column.
                 * 'all' : If all values are NA, drop that row or column.
-            typing(int, optional):
+            thresh (int, optional):
                 Require that many non-NA values. Cannot be combined with how.
             subset (column label or sequence of labels, optional):
                 Labels along other axis to consider, e.g. if you are dropping

From e1a0fafa784177f86a608022f8ecb52858763dd8 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 7 Jul 2025 16:30:32 +0000
Subject: [PATCH 03/13] fix mypy

---
 bigframes/core/block_transforms.py |  4 ++--
 bigframes/dataframe.py             | 20 +++++++++++++-------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index cf0d8ec57f..cb7c1923cf 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -522,7 +522,7 @@ def rank(
 def dropna(
     block: blocks.Block,
     column_ids: typing.Sequence[str],
-    how: typing.Literal["all", "any"] = "any",
+    how: str = "any",
     thresh: typing.Optional[int] = None,
     subset: Optional[typing.Sequence[str]] = None,
 ):
@@ -555,10 +555,10 @@ def dropna(
                 ),
                 predicates,
             )
-
         # Filter rows where count >= thresh
         predicate = ops.ge_op.as_expr(count_expr, ex.const(thresh))
     else:
+        # Only handle 'how' parameter when thresh is not specified
         if how == "any":
             predicate = functools.reduce(ops.and_op.as_expr, predicates)
         else:  # "all"
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index c3ea03045d..1821c5791b 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2801,7 +2801,7 @@ def dropna(
         self,
         *,
         axis: int | str = 0,
-        how: typing.Literal["all", "any"] = "any",
+        how: str = "any",
         thresh: typing.Optional[int] = None,
         subset: typing.Union[None, blocks.Label, Sequence[blocks.Label]] = None,
         inplace: bool = False,
@@ -2811,12 +2811,18 @@ def dropna(
             raise NotImplementedError(
                 f"'inplace'=True not supported. {constants.FEEDBACK_LINK}"
             )
-        if thresh is not None and how != "any":
-            raise TypeError(
-                "You cannot set both the how and thresh arguments at the same time."
-            )
-        if how not in ("any", "all"):
-            raise ValueError("'how' must be one of 'any', 'all'")
+
+        # Check if both thresh and how are explicitly provided
+        if thresh is not None:
+            # cannot specify both thresh and how parameters
+            if how != "any":
+                raise TypeError(
+                    "You cannot set both the how and thresh arguments at the same time."
+                )
+        else:
+            # Only validate 'how' when thresh is not provided
+            if how not in ("any", "all"):
+                raise ValueError("'how' must be one of 'any', 'all'")
 
         axis_n = utils.get_axis_number(axis)
 

From 03f86ba7b08162e913e03d882e064093e8e7844c Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 7 Jul 2025 20:31:35 +0000
Subject: [PATCH 04/13] add explicit handling for empty slices

---
 bigframes/core/rewrite/slices.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/bigframes/core/rewrite/slices.py b/bigframes/core/rewrite/slices.py
index bed3a8a3f3..c503a6d36d 100644
--- a/bigframes/core/rewrite/slices.py
+++ b/bigframes/core/rewrite/slices.py
@@ -76,6 +76,17 @@ def rewrite_slice(node: nodes.BigFrameNode):
         return node
 
     slice_def = (node.start, node.stop, node.step)
+
+    # Handle empty slice cases exlicitly (e.g. [0:0])
+    if (
+        node.start is not None
+        and node.stop is not None
+        and node.start >= node.stop
+        and (node.step is None or node.step > 0)
+    ):
+        # Return empty result by filtering with impossible condition
+        return slice_as_filter(node.child, node.start, node.start, node.step or 1)
+
     # no-op (eg. df[::1])
     if slices.is_noop(slice_def, node.child.row_count):
         return node.child

From 6a188588025da0bd2eddefdf56f3a06cdf0a46d3 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 7 Jul 2025 21:16:03 +0000
Subject: [PATCH 05/13] fix the kokoro issue with unreliable row count

---
 bigframes/core/rewrite/slices.py | 35 ++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/bigframes/core/rewrite/slices.py b/bigframes/core/rewrite/slices.py
index c503a6d36d..6703810404 100644
--- a/bigframes/core/rewrite/slices.py
+++ b/bigframes/core/rewrite/slices.py
@@ -77,15 +77,12 @@ def rewrite_slice(node: nodes.BigFrameNode):
 
     slice_def = (node.start, node.stop, node.step)
 
-    # Handle empty slice cases exlicitly (e.g. [0:0])
-    if (
-        node.start is not None
-        and node.stop is not None
-        and node.start >= node.stop
-        and (node.step is None or node.step > 0)
-    ):
-        # Return empty result by filtering with impossible condition
-        return slice_as_filter(node.child, node.start, node.start, node.step or 1)
+    # Handle empty slice cases explicitly before any row count dependent logic
+    # This ensures empty slices always return empty results regardless of statistics
+    if _is_empty_slice(node.start, node.stop, node.step):
+        # Create a filter that will always return empty results
+        # Use start=0, stop=0, step=1 to ensure empty result
+        return slice_as_filter(node.child, 0, 0, 1)
 
     # no-op (eg. df[::1])
     if slices.is_noop(slice_def, node.child.row_count):
@@ -100,6 +97,26 @@ def rewrite_slice(node: nodes.BigFrameNode):
     return slice_as_filter(node.child, *slice_def)
 
 
+def _is_empty_slice(start, stop, step):
+    """Check if a slice will always return empty results."""
+    if start is None or stop is None:
+        return False
+
+    # Normalize step
+    if step is None:
+        step = 1
+
+    # For positive step, empty if start >= stop
+    # For negative step, empty if start <= stop
+    if step > 0:
+        return start >= stop
+    elif step < 0:
+        return start <= stop
+    else:
+        # step == 0 is invalid, but handle gracefully
+        return True
+
+
 def slice_as_filter(
     node: nodes.BigFrameNode, start: Optional[int], stop: Optional[int], step: int
 ) -> nodes.BigFrameNode:

From dbcce52831458dc53b5ee8c22acf655527fafc59 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 7 Jul 2025 22:01:14 +0000
Subject: [PATCH 06/13] debug for row count issue in kokoro

---
 bigframes/core/rewrite/slices.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bigframes/core/rewrite/slices.py b/bigframes/core/rewrite/slices.py
index 6703810404..b2278b356c 100644
--- a/bigframes/core/rewrite/slices.py
+++ b/bigframes/core/rewrite/slices.py
@@ -76,12 +76,14 @@ def rewrite_slice(node: nodes.BigFrameNode):
         return node
 
     slice_def = (node.start, node.stop, node.step)
+    print(f"DEBUG: Processing slice {slice_def}, row_count={node.child.row_count}")
 
     # Handle empty slice cases explicitly before any row count dependent logic
     # This ensures empty slices always return empty results regardless of statistics
     if _is_empty_slice(node.start, node.stop, node.step):
         # Create a filter that will always return empty results
         # Use start=0, stop=0, step=1 to ensure empty result
+        print("DEBUG: Detected empty slice, returning filtered result")
         return slice_as_filter(node.child, 0, 0, 1)
 
     # no-op (eg. df[::1])

From 236a6e54a84391f0ae8317eb529823cf0d39f024 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 7 Jul 2025 23:55:56 +0000
Subject: [PATCH 07/13] Revert "debug for row count issue in kokoro"

This reverts commit 0141a7cda596b1da928c7c2d770653b9c6be10e7.
---
 bigframes/core/rewrite/slices.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bigframes/core/rewrite/slices.py b/bigframes/core/rewrite/slices.py
index b2278b356c..6703810404 100644
--- a/bigframes/core/rewrite/slices.py
+++ b/bigframes/core/rewrite/slices.py
@@ -76,14 +76,12 @@ def rewrite_slice(node: nodes.BigFrameNode):
         return node
 
     slice_def = (node.start, node.stop, node.step)
-    print(f"DEBUG: Processing slice {slice_def}, row_count={node.child.row_count}")
 
     # Handle empty slice cases explicitly before any row count dependent logic
     # This ensures empty slices always return empty results regardless of statistics
     if _is_empty_slice(node.start, node.stop, node.step):
         # Create a filter that will always return empty results
         # Use start=0, stop=0, step=1 to ensure empty result
-        print("DEBUG: Detected empty slice, returning filtered result")
         return slice_as_filter(node.child, 0, 0, 1)
 
     # no-op (eg. df[::1])

From 8fd03f8364c5bf980ad82acd881aab7801c70f96 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 7 Jul 2025 23:56:01 +0000
Subject: [PATCH 08/13] Revert "fix the kokoro issue with unreliable row count"

This reverts commit e3180f7f5b2dbcc8bc933545fc6c31c64cb92702.
---
 bigframes/core/rewrite/slices.py | 35 ++++++++------------------------
 1 file changed, 9 insertions(+), 26 deletions(-)

diff --git a/bigframes/core/rewrite/slices.py b/bigframes/core/rewrite/slices.py
index 6703810404..c503a6d36d 100644
--- a/bigframes/core/rewrite/slices.py
+++ b/bigframes/core/rewrite/slices.py
@@ -77,12 +77,15 @@ def rewrite_slice(node: nodes.BigFrameNode):
 
     slice_def = (node.start, node.stop, node.step)
 
-    # Handle empty slice cases explicitly before any row count dependent logic
-    # This ensures empty slices always return empty results regardless of statistics
-    if _is_empty_slice(node.start, node.stop, node.step):
-        # Create a filter that will always return empty results
-        # Use start=0, stop=0, step=1 to ensure empty result
-        return slice_as_filter(node.child, 0, 0, 1)
+    # Handle empty slice cases exlicitly (e.g. [0:0])
+    if (
+        node.start is not None
+        and node.stop is not None
+        and node.start >= node.stop
+        and (node.step is None or node.step > 0)
+    ):
+        # Return empty result by filtering with impossible condition
+        return slice_as_filter(node.child, node.start, node.start, node.step or 1)
 
     # no-op (eg. df[::1])
     if slices.is_noop(slice_def, node.child.row_count):
@@ -97,26 +100,6 @@ def rewrite_slice(node: nodes.BigFrameNode):
     return slice_as_filter(node.child, *slice_def)
 
 
-def _is_empty_slice(start, stop, step):
-    """Check if a slice will always return empty results."""
-    if start is None or stop is None:
-        return False
-
-    # Normalize step
-    if step is None:
-        step = 1
-
-    # For positive step, empty if start >= stop
-    # For negative step, empty if start <= stop
-    if step > 0:
-        return start >= stop
-    elif step < 0:
-        return start <= stop
-    else:
-        # step == 0 is invalid, but handle gracefully
-        return True
-
-
 def slice_as_filter(
     node: nodes.BigFrameNode, start: Optional[int], stop: Optional[int], step: int
 ) -> nodes.BigFrameNode:

From 8c181949dc7b223bdb4fe299f00f653fbf0c2504 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Mon, 7 Jul 2025 23:56:03 +0000
Subject: [PATCH 09/13] Revert "add explicit handling for empty slices"

This reverts commit d0d616b0eca4247dd3d7498b6413f92e72e46e72.
---
 bigframes/core/rewrite/slices.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/bigframes/core/rewrite/slices.py b/bigframes/core/rewrite/slices.py
index c503a6d36d..bed3a8a3f3 100644
--- a/bigframes/core/rewrite/slices.py
+++ b/bigframes/core/rewrite/slices.py
@@ -76,17 +76,6 @@ def rewrite_slice(node: nodes.BigFrameNode):
         return node
 
     slice_def = (node.start, node.stop, node.step)
-
-    # Handle empty slice cases exlicitly (e.g. [0:0])
-    if (
-        node.start is not None
-        and node.stop is not None
-        and node.start >= node.stop
-        and (node.step is None or node.step > 0)
-    ):
-        # Return empty result by filtering with impossible condition
-        return slice_as_filter(node.child, node.start, node.start, node.step or 1)
-
     # no-op (eg. df[::1])
     if slices.is_noop(slice_def, node.child.row_count):
         return node.child

From 8af0582987f7f9b8288de97220c6542854be4c0e Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 8 Jul 2025 00:26:17 +0000
Subject: [PATCH 10/13] Reapply "fix the kokoro issue with unreliable row
 count"

This reverts commit b5c8f52c065652dbb8912ec9259fe57b27ae2a94.
---
 bigframes/core/rewrite/slices.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/bigframes/core/rewrite/slices.py b/bigframes/core/rewrite/slices.py
index bed3a8a3f3..6703810404 100644
--- a/bigframes/core/rewrite/slices.py
+++ b/bigframes/core/rewrite/slices.py
@@ -76,6 +76,14 @@ def rewrite_slice(node: nodes.BigFrameNode):
         return node
 
     slice_def = (node.start, node.stop, node.step)
+
+    # Handle empty slice cases explicitly before any row count dependent logic
+    # This ensures empty slices always return empty results regardless of statistics
+    if _is_empty_slice(node.start, node.stop, node.step):
+        # Create a filter that will always return empty results
+        # Use start=0, stop=0, step=1 to ensure empty result
+        return slice_as_filter(node.child, 0, 0, 1)
+
     # no-op (eg. df[::1])
     if slices.is_noop(slice_def, node.child.row_count):
         return node.child
@@ -89,6 +97,26 @@ def rewrite_slice(node: nodes.BigFrameNode):
     return slice_as_filter(node.child, *slice_def)
 
 
+def _is_empty_slice(start, stop, step):
+    """Check if a slice will always return empty results."""
+    if start is None or stop is None:
+        return False
+
+    # Normalize step
+    if step is None:
+        step = 1
+
+    # For positive step, empty if start >= stop
+    # For negative step, empty if start <= stop
+    if step > 0:
+        return start >= stop
+    elif step < 0:
+        return start <= stop
+    else:
+        # step == 0 is invalid, but handle gracefully
+        return True
+
+
 def slice_as_filter(
     node: nodes.BigFrameNode, start: Optional[int], stop: Optional[int], step: int
 ) -> nodes.BigFrameNode:

From 78755e20b38d86602fce2ffb57e1257e35d3f654 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 8 Jul 2025 20:43:23 +0000
Subject: [PATCH 11/13] Revert "Reapply "fix the kokoro issue with unreliable
 row count""

This reverts commit a6bc2c5a3b21113bcdfdbc3d81a9cc45ca259a12.
---
 bigframes/core/rewrite/slices.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/bigframes/core/rewrite/slices.py b/bigframes/core/rewrite/slices.py
index 6703810404..bed3a8a3f3 100644
--- a/bigframes/core/rewrite/slices.py
+++ b/bigframes/core/rewrite/slices.py
@@ -76,14 +76,6 @@ def rewrite_slice(node: nodes.BigFrameNode):
         return node
 
     slice_def = (node.start, node.stop, node.step)
-
-    # Handle empty slice cases explicitly before any row count dependent logic
-    # This ensures empty slices always return empty results regardless of statistics
-    if _is_empty_slice(node.start, node.stop, node.step):
-        # Create a filter that will always return empty results
-        # Use start=0, stop=0, step=1 to ensure empty result
-        return slice_as_filter(node.child, 0, 0, 1)
-
     # no-op (eg. df[::1])
     if slices.is_noop(slice_def, node.child.row_count):
         return node.child
@@ -97,26 +89,6 @@ def rewrite_slice(node: nodes.BigFrameNode):
     return slice_as_filter(node.child, *slice_def)
 
 
-def _is_empty_slice(start, stop, step):
-    """Check if a slice will always return empty results."""
-    if start is None or stop is None:
-        return False
-
-    # Normalize step
-    if step is None:
-        step = 1
-
-    # For positive step, empty if start >= stop
-    # For negative step, empty if start <= stop
-    if step > 0:
-        return start >= stop
-    elif step < 0:
-        return start <= stop
-    else:
-        # step == 0 is invalid, but handle gracefully
-        return True
-
-
 def slice_as_filter(
     node: nodes.BigFrameNode, start: Optional[int], stop: Optional[int], step: int
 ) -> nodes.BigFrameNode:

From 64985266ec84f9433c5b6d313cc2f85f31a906ff Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 8 Jul 2025 20:43:26 +0000
Subject: [PATCH 12/13] Reapply "add explicit handling for empty slices"

This reverts commit 1a9a5a89fb23a4985a47a264dbb213e65cacbe38.
---
 bigframes/core/rewrite/slices.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/bigframes/core/rewrite/slices.py b/bigframes/core/rewrite/slices.py
index bed3a8a3f3..c503a6d36d 100644
--- a/bigframes/core/rewrite/slices.py
+++ b/bigframes/core/rewrite/slices.py
@@ -76,6 +76,17 @@ def rewrite_slice(node: nodes.BigFrameNode):
         return node
 
     slice_def = (node.start, node.stop, node.step)
+
+    # Handle empty slice cases exlicitly (e.g. [0:0])
+    if (
+        node.start is not None
+        and node.stop is not None
+        and node.start >= node.stop
+        and (node.step is None or node.step > 0)
+    ):
+        # Return empty result by filtering with impossible condition
+        return slice_as_filter(node.child, node.start, node.start, node.step or 1)
+
     # no-op (eg. df[::1])
     if slices.is_noop(slice_def, node.child.row_count):
         return node.child

From 0173100475fa9846f2bede71c7b61ef2ed536c72 Mon Sep 17 00:00:00 2001
From: Shuowei Li <shuowei@google.com>
Date: Tue, 8 Jul 2025 20:44:47 +0000
Subject: [PATCH 13/13] Revert "Reapply "add explicit handling for empty
 slices""

This reverts commit 42a08d1a6d5a5bf62f9342aa3ceaac7f62e49f3b.
---
 bigframes/core/rewrite/slices.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/bigframes/core/rewrite/slices.py b/bigframes/core/rewrite/slices.py
index c503a6d36d..bed3a8a3f3 100644
--- a/bigframes/core/rewrite/slices.py
+++ b/bigframes/core/rewrite/slices.py
@@ -76,17 +76,6 @@ def rewrite_slice(node: nodes.BigFrameNode):
         return node
 
     slice_def = (node.start, node.stop, node.step)
-
-    # Handle empty slice cases exlicitly (e.g. [0:0])
-    if (
-        node.start is not None
-        and node.stop is not None
-        and node.start >= node.stop
-        and (node.step is None or node.step > 0)
-    ):
-        # Return empty result by filtering with impossible condition
-        return slice_as_filter(node.child, node.start, node.start, node.step or 1)
-
     # no-op (eg. df[::1])
     if slices.is_noop(slice_def, node.child.row_count):
         return node.child