Skip to content

Commit 65d71e1

Browse files
committed
update benchmarks to use the total_rows parameter
1 parent 7acc2f1 commit 65d71e1

File tree

1 file changed

+23
-4
lines changed

1 file changed

+23
-4
lines changed

scripts/run_and_publish_benchmark.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def collect_benchmark_result(
8989
bq_seconds_files = sorted(path.rglob("*.bq_exec_time_seconds"))
9090
local_seconds_files = sorted(path.rglob("*.local_exec_time_seconds"))
9191
query_char_count_files = sorted(path.rglob("*.query_char_count"))
92+
total_rows_files = sorted(path.rglob("*.totalrows"))
9293

9394
error_files = sorted(path.rglob("*.error"))
9495

@@ -98,6 +99,7 @@ def collect_benchmark_result(
9899
<= len(bytes_files)
99100
== len(query_char_count_files)
100101
== len(local_seconds_files)
102+
== len(total_rows_files)
101103
):
102104
raise ValueError(
103105
"Mismatch in the number of report files for bytes, millis, seconds and query char count: \n"
@@ -106,6 +108,7 @@ def collect_benchmark_result(
106108
f"bytes_files: {len(bytes_files)}\n"
107109
f"query_char_count_files: {len(query_char_count_files)}\n"
108110
f"local_seconds_files: {len(local_seconds_files)}\n"
111+
f"total_rows_files: {len(total_rows_files)}\n"
109112
)
110113

111114
has_full_metrics = len(bq_seconds_files) == len(local_seconds_files)
@@ -138,14 +141,18 @@ def collect_benchmark_result(
138141
if not has_full_metrics:
139142
total_slot_millis = None
140143
bq_seconds = None
144+
total_rows = None
141145
else:
142146
millis_file = millis_files[idx]
143147
bq_seconds_file = bq_seconds_files[idx]
144-
if filename != millis_file.relative_to(path).with_suffix(
145-
""
146-
) or filename != bq_seconds_file.relative_to(path).with_suffix(""):
148+
total_rows_file = total_rows_files[idx]
149+
if (
150+
filename != millis_file.relative_to(path).with_suffix("")
151+
or filename != bq_seconds_file.relative_to(path).with_suffix("")
152+
or filename != total_rows_file.relative_to(path).with_suffix("")
153+
):
147154
raise ValueError(
148-
"File name mismatch among query_char_count, bytes, millis, and seconds reports."
155+
"File name mismatch among query_char_count, bytes, millis, seconds and total_rows reports."
149156
)
150157

151158
with open(millis_file, "r") as file:
@@ -156,13 +163,18 @@ def collect_benchmark_result(
156163
lines = file.read().splitlines()
157164
bq_seconds = sum(float(line) for line in lines) / iterations
158165

166+
with open(total_rows_file, "r") as file:
167+
lines = file.read().splitlines()
168+
total_rows = sum(int(line) for line in lines) / iterations
169+
159170
results_dict[str(filename)] = [
160171
query_count,
161172
total_bytes,
162173
total_slot_millis,
163174
local_seconds,
164175
bq_seconds,
165176
query_char_count,
177+
total_rows,
166178
]
167179
finally:
168180
for files_to_remove in (
@@ -171,6 +183,7 @@ def collect_benchmark_result(
171183
path.rglob("*.local_exec_time_seconds"),
172184
path.rglob("*.bq_exec_time_seconds"),
173185
path.rglob("*.query_char_count"),
186+
path.rglob("*.totalrows"),
174187
path.rglob("*.error"),
175188
):
176189
for log_file in files_to_remove:
@@ -183,6 +196,7 @@ def collect_benchmark_result(
183196
"Local_Execution_Time_Sec",
184197
"BigQuery_Execution_Time_Sec",
185198
"Query_Char_Count",
199+
"Total_Rows",
186200
]
187201

188202
benchmark_metrics = pd.DataFrame.from_dict(
@@ -206,6 +220,7 @@ def collect_benchmark_result(
206220
print(
207221
f"{index} - query count: {row['Query_Count']},"
208222
+ f" query char count: {row['Query_Char_Count']},"
223+
+ f" total rows: {row['Total_Rows']},"
209224
+ f" bytes processed sum: {row['Bytes_Processed']},"
210225
+ (f" slot millis sum: {row['Slot_Millis']}," if has_full_metrics else "")
211226
+ f" local execution time: {formatted_local_exec_time} seconds"
@@ -234,10 +249,14 @@ def collect_benchmark_result(
234249
geometric_mean_bq_seconds = geometric_mean_excluding_zeros(
235250
benchmark_metrics["BigQuery_Execution_Time_Sec"]
236251
)
252+
geometric_mean_total_rows = geometric_mean_excluding_zeros(
253+
benchmark_metrics["Total_Rows"]
254+
)
237255

238256
print(
239257
f"---Geometric mean of queries: {geometric_mean_queries},"
240258
+ f" Geometric mean of queries char counts: {geometric_mean_query_char_count},"
259+
+ f" Geometric mean of total rows: {geometric_mean_total_rows},"
241260
+ f" Geometric mean of bytes processed: {geometric_mean_bytes},"
242261
+ (
243262
f" Geometric mean of slot millis: {geometric_mean_slot_millis},"

0 commit comments

Comments
 (0)