update benchmarks to use the total_rows parameter

shuoweil · shuoweil · commit 01fc06978799 · 2025-08-06T21:08:14.000Z
diff --git a/scripts/run_and_publish_benchmark.py b/scripts/run_and_publish_benchmark.py
@@ -89,6 +89,7 @@ def collect_benchmark_result(
         bq_seconds_files = sorted(path.rglob("*.bq_exec_time_seconds"))
         local_seconds_files = sorted(path.rglob("*.local_exec_time_seconds"))
         query_char_count_files = sorted(path.rglob("*.query_char_count"))
+        total_rows_files = sorted(path.rglob("*.totalrows"))
 
         error_files = sorted(path.rglob("*.error"))
 
@@ -98,6 +99,7 @@ def collect_benchmark_result(
             <= len(bytes_files)
             == len(query_char_count_files)
             == len(local_seconds_files)
+            == len(total_rows_files)
         ):
             raise ValueError(
                 "Mismatch in the number of report files for bytes, millis, seconds and query char count: \n"
@@ -106,6 +108,7 @@ def collect_benchmark_result(
                 f"bytes_files: {len(bytes_files)}\n"
                 f"query_char_count_files: {len(query_char_count_files)}\n"
                 f"local_seconds_files: {len(local_seconds_files)}\n"
+                f"total_rows_files: {len(total_rows_files)}\n"
             )
 
         has_full_metrics = len(bq_seconds_files) == len(local_seconds_files)
@@ -138,14 +141,18 @@ def collect_benchmark_result(
             if not has_full_metrics:
                 total_slot_millis = None
                 bq_seconds = None
+                total_rows = None
             else:
                 millis_file = millis_files[idx]
                 bq_seconds_file = bq_seconds_files[idx]
-                if filename != millis_file.relative_to(path).with_suffix(
-                    ""
-                ) or filename != bq_seconds_file.relative_to(path).with_suffix(""):
+                total_rows_file = total_rows_files[idx]
+                if (
+                    filename != millis_file.relative_to(path).with_suffix("")
+                    or filename != bq_seconds_file.relative_to(path).with_suffix("")
+                    or filename != total_rows_file.relative_to(path).with_suffix("")
+                ):
                     raise ValueError(
-                        "File name mismatch among query_char_count, bytes, millis, and seconds reports."
+                        "File name mismatch among query_char_count, bytes, millis, seconds and total_rows reports."
                     )
 
                 with open(millis_file, "r") as file:
@@ -156,13 +163,18 @@ def collect_benchmark_result(
                     lines = file.read().splitlines()
                     bq_seconds = sum(float(line) for line in lines) / iterations
 
+                with open(total_rows_file, "r") as file:
+                    lines = file.read().splitlines()
+                    total_rows = sum(int(line) for line in lines) / iterations
+
             results_dict[str(filename)] = [
                 query_count,
                 total_bytes,
                 total_slot_millis,
                 local_seconds,
                 bq_seconds,
                 query_char_count,
+                total_rows,
             ]
     finally:
         for files_to_remove in (
@@ -171,6 +183,7 @@ def collect_benchmark_result(
             path.rglob("*.local_exec_time_seconds"),
             path.rglob("*.bq_exec_time_seconds"),
             path.rglob("*.query_char_count"),
+            path.rglob("*.totalrows"),
             path.rglob("*.error"),
         ):
             for log_file in files_to_remove:
@@ -183,6 +196,7 @@ def collect_benchmark_result(
         "Local_Execution_Time_Sec",
         "BigQuery_Execution_Time_Sec",
         "Query_Char_Count",
+        "Total_Rows",
     ]
 
     benchmark_metrics = pd.DataFrame.from_dict(
@@ -206,6 +220,7 @@ def collect_benchmark_result(
         print(
             f"{index} - query count: {row['Query_Count']},"
             + f" query char count: {row['Query_Char_Count']},"
+            + f" total rows: {row['Total_Rows']},"
             + f" bytes processed sum: {row['Bytes_Processed']},"
             + (f" slot millis sum: {row['Slot_Millis']}," if has_full_metrics else "")
             + f" local execution time: {formatted_local_exec_time} seconds"
@@ -234,10 +249,14 @@ def collect_benchmark_result(
     geometric_mean_bq_seconds = geometric_mean_excluding_zeros(
         benchmark_metrics["BigQuery_Execution_Time_Sec"]
     )
+    geometric_mean_total_rows = geometric_mean_excluding_zeros(
+        benchmark_metrics["Total_Rows"]
+    )
 
     print(
         f"---Geometric mean of queries: {geometric_mean_queries},"
         + f" Geometric mean of queries char counts: {geometric_mean_query_char_count},"
+        + f" Geometric mean of total rows: {geometric_mean_total_rows},"
         + f" Geometric mean of bytes processed: {geometric_mean_bytes},"
         + (
             f" Geometric mean of slot millis: {geometric_mean_slot_millis},"