From eb9426fc89d9b441634801709ff6e763eebd53e8 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Tue, 21 Oct 2025 20:08:52 +0000
Subject: [PATCH 1/2] feat: include local data bytes in the dry run report when
 available

---
 bigframes/core/blocks.py           |  2 +-
 bigframes/session/dry_runs.py      | 35 ++++++++++++++++++++++++++++--
 tests/system/small/test_session.py | 16 ++++++++++++++
 3 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 166841dfbd..1900b7208a 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -967,7 +967,7 @@ def _compute_dry_run(
         }
 
         dry_run_stats = dry_runs.get_query_stats_with_dtypes(
-            query_job, column_dtypes, self.index.dtypes
+            query_job, column_dtypes, self.index.dtypes, self.expr.node
         )
         return dry_run_stats, query_job
 
diff --git a/bigframes/session/dry_runs.py b/bigframes/session/dry_runs.py
index 51e8e72c9a..38ba56cd61 100644
--- a/bigframes/session/dry_runs.py
+++ b/bigframes/session/dry_runs.py
@@ -20,6 +20,7 @@
 import pandas
 
 from bigframes import dtypes
+from bigframes.core import bigframe_node, nodes
 
 
 def get_table_stats(table: bigquery.Table) -> pandas.Series:
@@ -86,13 +87,26 @@ def get_query_stats_with_dtypes(
     query_job: bigquery.QueryJob,
     column_dtypes: Dict[str, dtypes.Dtype],
     index_dtypes: Sequence[dtypes.Dtype],
+    expr_root: bigframe_node.BigFrameNode | None = None,
 ) -> pandas.Series:
+    """
+    Returns important stats from the query job as a Pandas Series. The dtypes information is added too.
+
+    Args:
+        expr_root (Optional):
+            The root of the expression tree that may contain local data, whose size is added to the
+            total bytes count if available.
+
+    """
     index = ["columnCount", "columnDtypes", "indexLevel", "indexDtypes"]
     values = [len(column_dtypes), column_dtypes, len(index_dtypes), index_dtypes]
 
     s = pandas.Series(values, index=index)
 
-    return pandas.concat([s, get_query_stats(query_job)])
+    result = pandas.concat([s, get_query_stats(query_job)])
+    if expr_root is not None:
+        result["totalBytesProcessed"] += get_local_bytes(expr_root)
+    return result
 
 
 def get_query_stats(
@@ -145,4 +159,21 @@ def get_query_stats(
         else None
     )
 
-    return pandas.Series(values, index=index)
+    result = pandas.Series(values, index=index)
+    result["totalBytesProcessed"] = int(result["totalBytesProcessed"])
+
+    return result
+
+
+def get_local_bytes(root: bigframe_node.BigFrameNode) -> int:
+    def get_total_bytes(
+        root: bigframe_node.BigFrameNode, child_results: tuple[int, ...]
+    ) -> int:
+        child_bytes = sum(child_results)
+
+        if isinstance(root, nodes.ReadLocalNode):
+            return child_bytes + root.local_data_source.data.get_total_buffer_size()
+
+        return child_bytes
+
+    return root.reduce_up(get_total_bytes)
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 001e02c2fa..d3e646dc92 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -2173,6 +2173,22 @@ def test_read_gbq_query_dry_run(scalars_table_id, session):
     _assert_query_dry_run_stats_are_valid(result)
 
 
+def test_block_dry_run_includes_local_data(session):
+    df1 = bigframes.dataframe.DataFrame({"col_1": [1, 2, 3]}, session=session)
+    df2 = bigframes.dataframe.DataFrame({"col_2": [1, 2, 3]}, session=session)
+
+    result = df1.merge(df2, how="cross").to_pandas(dry_run=True)
+
+    assert isinstance(result, pd.Series)
+    _assert_query_dry_run_stats_are_valid(result)
+    assert result["totalBytesProcessed"] > 0
+    assert (
+        df1.to_pandas(dry_run=True)["totalBytesProcessed"]
+        + df2.to_pandas(dry_run=True)["totalBytesProcessed"]
+        == result["totalBytesProcessed"]
+    )
+
+
 def _assert_query_dry_run_stats_are_valid(result: pd.Series):
     expected_index = pd.Index(
         [

From 1769df0f10c2c267b0d94629ce86fdee630cab3a Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Tue, 21 Oct 2025 20:23:16 +0000
Subject: [PATCH 2/2] fix test

---
 bigframes/session/dry_runs.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/bigframes/session/dry_runs.py b/bigframes/session/dry_runs.py
index 38ba56cd61..bd54bb65d7 100644
--- a/bigframes/session/dry_runs.py
+++ b/bigframes/session/dry_runs.py
@@ -160,7 +160,10 @@ def get_query_stats(
     )
 
     result = pandas.Series(values, index=index)
-    result["totalBytesProcessed"] = int(result["totalBytesProcessed"])
+    if result["totalBytesProcessed"] is None:
+        result["totalBytesProcessed"] = 0
+    else:
+        result["totalBytesProcessed"] = int(result["totalBytesProcessed"])
 
     return result