@@ -89,6 +89,7 @@ def collect_benchmark_result(
8989 bq_seconds_files = sorted (path .rglob ("*.bq_exec_time_seconds" ))
9090 local_seconds_files = sorted (path .rglob ("*.local_exec_time_seconds" ))
9191 query_char_count_files = sorted (path .rglob ("*.query_char_count" ))
92+ total_rows_files = sorted (path .rglob ("*.totalrows" ))
9293
9394 error_files = sorted (path .rglob ("*.error" ))
9495
@@ -98,6 +99,7 @@ def collect_benchmark_result(
9899 <= len (bytes_files )
99100 == len (query_char_count_files )
100101 == len (local_seconds_files )
102+ == len (total_rows_files )
101103 ):
102104 raise ValueError (
103105 "Mismatch in the number of report files for bytes, millis, seconds and query char count: \n "
@@ -106,6 +108,7 @@ def collect_benchmark_result(
106108 f"bytes_files: { len (bytes_files )} \n "
107109 f"query_char_count_files: { len (query_char_count_files )} \n "
108110 f"local_seconds_files: { len (local_seconds_files )} \n "
111+ f"total_rows_files: { len (total_rows_files )} \n "
109112 )
110113
111114 has_full_metrics = len (bq_seconds_files ) == len (local_seconds_files )
@@ -138,14 +141,18 @@ def collect_benchmark_result(
138141 if not has_full_metrics :
139142 total_slot_millis = None
140143 bq_seconds = None
144+ total_rows = None
141145 else :
142146 millis_file = millis_files [idx ]
143147 bq_seconds_file = bq_seconds_files [idx ]
144- if filename != millis_file .relative_to (path ).with_suffix (
145- ""
146- ) or filename != bq_seconds_file .relative_to (path ).with_suffix ("" ):
148+ total_rows_file = total_rows_files [idx ]
149+ if (
150+ filename != millis_file .relative_to (path ).with_suffix ("" )
151+ or filename != bq_seconds_file .relative_to (path ).with_suffix ("" )
152+ or filename != total_rows_file .relative_to (path ).with_suffix ("" )
153+ ):
147154 raise ValueError (
148- "File name mismatch among query_char_count, bytes, millis, and seconds reports."
155+ "File name mismatch among query_char_count, bytes, millis, seconds and total_rows reports."
149156 )
150157
151158 with open (millis_file , "r" ) as file :
@@ -156,13 +163,18 @@ def collect_benchmark_result(
156163 lines = file .read ().splitlines ()
157164 bq_seconds = sum (float (line ) for line in lines ) / iterations
158165
166+ with open (total_rows_file , "r" ) as file :
167+ lines = file .read ().splitlines ()
168+ total_rows = sum (int (line ) for line in lines ) / iterations
169+
159170 results_dict [str (filename )] = [
160171 query_count ,
161172 total_bytes ,
162173 total_slot_millis ,
163174 local_seconds ,
164175 bq_seconds ,
165176 query_char_count ,
177+ total_rows ,
166178 ]
167179 finally :
168180 for files_to_remove in (
@@ -171,6 +183,7 @@ def collect_benchmark_result(
171183 path .rglob ("*.local_exec_time_seconds" ),
172184 path .rglob ("*.bq_exec_time_seconds" ),
173185 path .rglob ("*.query_char_count" ),
186+ path .rglob ("*.totalrows" ),
174187 path .rglob ("*.error" ),
175188 ):
176189 for log_file in files_to_remove :
@@ -183,6 +196,7 @@ def collect_benchmark_result(
183196 "Local_Execution_Time_Sec" ,
184197 "BigQuery_Execution_Time_Sec" ,
185198 "Query_Char_Count" ,
199+ "Total_Rows" ,
186200 ]
187201
188202 benchmark_metrics = pd .DataFrame .from_dict (
@@ -206,6 +220,7 @@ def collect_benchmark_result(
206220 print (
207221 f"{ index } - query count: { row ['Query_Count' ]} ,"
208222 + f" query char count: { row ['Query_Char_Count' ]} ,"
223+ + f" total rows: { row ['Total_Rows' ]} ,"
209224 + f" bytes processed sum: { row ['Bytes_Processed' ]} ,"
210225 + (f" slot millis sum: { row ['Slot_Millis' ]} ," if has_full_metrics else "" )
211226 + f" local execution time: { formatted_local_exec_time } seconds"
@@ -234,10 +249,14 @@ def collect_benchmark_result(
234249 geometric_mean_bq_seconds = geometric_mean_excluding_zeros (
235250 benchmark_metrics ["BigQuery_Execution_Time_Sec" ]
236251 )
252+ geometric_mean_total_rows = geometric_mean_excluding_zeros (
253+ benchmark_metrics ["Total_Rows" ]
254+ )
237255
238256 print (
239257 f"---Geometric mean of queries: { geometric_mean_queries } ,"
240258 + f" Geometric mean of queries char counts: { geometric_mean_query_char_count } ,"
259+ + f" Geometric mean of total rows: { geometric_mean_total_rows } ,"
241260 + f" Geometric mean of bytes processed: { geometric_mean_bytes } ,"
242261 + (
243262 f" Geometric mean of slot millis: { geometric_mean_slot_millis } ,"
0 commit comments