@@ -84,43 +84,36 @@ def collect_benchmark_result(
8484 path = pathlib .Path (benchmark_path )
8585 try :
8686 results_dict : Dict [str , List [Union [int , float , None ]]] = {}
87- bytes_files = sorted (path .rglob ("*.bytesprocessed" ))
88- millis_files = sorted (path .rglob ("*.slotmillis" ))
89- bq_seconds_files = sorted (path .rglob ("*.bq_exec_time_seconds" ))
87+ # Use local_seconds_files as the baseline
9088 local_seconds_files = sorted (path .rglob ("*.local_exec_time_seconds" ))
91- query_char_count_files = sorted (path .rglob ("*.query_char_count" ))
92-
9389 error_files = sorted (path .rglob ("*.error" ))
94-
95- if not (
96- len (millis_files )
97- == len (bq_seconds_files )
98- <= len (bytes_files )
99- == len (query_char_count_files )
100- == len (local_seconds_files )
101- ):
102- raise ValueError (
103- "Mismatch in the number of report files for bytes, millis, seconds and query char count: \n "
104- f"millis_files: { len (millis_files )} \n "
105- f"bq_seconds_files: { len (bq_seconds_files )} \n "
106- f"bytes_files: { len (bytes_files )} \n "
107- f"query_char_count_files: { len (query_char_count_files )} \n "
108- f"local_seconds_files: { len (local_seconds_files )} \n "
109- )
110-
111- has_full_metrics = len (bq_seconds_files ) == len (local_seconds_files )
112-
113- for idx in range (len (local_seconds_files )):
114- query_char_count_file = query_char_count_files [idx ]
115- local_seconds_file = local_seconds_files [idx ]
116- bytes_file = bytes_files [idx ]
117- filename = query_char_count_file .relative_to (path ).with_suffix ("" )
118- if filename != local_seconds_file .relative_to (path ).with_suffix (
119- ""
120- ) or filename != bytes_file .relative_to (path ).with_suffix ("" ):
121- raise ValueError (
122- "File name mismatch among query_char_count, bytes and seconds reports."
123- )
90+ benchmarks_with_missing_files = []
91+
92+ for local_seconds_file in local_seconds_files :
93+ base_name = local_seconds_file .name .removesuffix (".local_exec_time_seconds" )
94+ base_path = local_seconds_file .parent / base_name
95+ filename = base_path .relative_to (path )
96+
97+ # Construct paths for other metric files
98+ bytes_file = pathlib .Path (f"{ base_path } .bytesprocessed" )
99+ millis_file = pathlib .Path (f"{ base_path } .slotmillis" )
100+ bq_seconds_file = pathlib .Path (f"{ base_path } .bq_exec_time_seconds" )
101+ query_char_count_file = pathlib .Path (f"{ base_path } .query_char_count" )
102+
103+ # Check if all corresponding files exist
104+ missing_files = []
105+ if not bytes_file .exists ():
106+ missing_files .append (bytes_file .name )
107+ if not millis_file .exists ():
108+ missing_files .append (millis_file .name )
109+ if not bq_seconds_file .exists ():
110+ missing_files .append (bq_seconds_file .name )
111+ if not query_char_count_file .exists ():
112+ missing_files .append (query_char_count_file .name )
113+
114+ if missing_files :
115+ benchmarks_with_missing_files .append ((str (filename ), missing_files ))
116+ continue
124117
125118 with open (query_char_count_file , "r" ) as file :
126119 lines = file .read ().splitlines ()
@@ -135,26 +128,13 @@ def collect_benchmark_result(
135128 lines = file .read ().splitlines ()
136129 total_bytes = sum (int (line ) for line in lines ) / iterations
137130
138- if not has_full_metrics :
139- total_slot_millis = None
140- bq_seconds = None
141- else :
142- millis_file = millis_files [idx ]
143- bq_seconds_file = bq_seconds_files [idx ]
144- if filename != millis_file .relative_to (path ).with_suffix (
145- ""
146- ) or filename != bq_seconds_file .relative_to (path ).with_suffix ("" ):
147- raise ValueError (
148- "File name mismatch among query_char_count, bytes, millis, and seconds reports."
149- )
150-
151- with open (millis_file , "r" ) as file :
152- lines = file .read ().splitlines ()
153- total_slot_millis = sum (int (line ) for line in lines ) / iterations
131+ with open (millis_file , "r" ) as file :
132+ lines = file .read ().splitlines ()
133+ total_slot_millis = sum (int (line ) for line in lines ) / iterations
154134
155- with open (bq_seconds_file , "r" ) as file :
156- lines = file .read ().splitlines ()
157- bq_seconds = sum (float (line ) for line in lines ) / iterations
135+ with open (bq_seconds_file , "r" ) as file :
136+ lines = file .read ().splitlines ()
137+ bq_seconds = sum (float (line ) for line in lines ) / iterations
158138
159139 results_dict [str (filename )] = [
160140 query_count ,
@@ -207,13 +187,9 @@ def collect_benchmark_result(
207187 f"{ index } - query count: { row ['Query_Count' ]} ,"
208188 + f" query char count: { row ['Query_Char_Count' ]} ,"
209189 + f" bytes processed sum: { row ['Bytes_Processed' ]} ,"
210- + (f" slot millis sum: { row ['Slot_Millis' ]} ," if has_full_metrics else "" )
211- + f" local execution time: { formatted_local_exec_time } seconds"
212- + (
213- f", bigquery execution time: { round (row ['BigQuery_Execution_Time_Sec' ], 1 )} seconds"
214- if has_full_metrics
215- else ""
216- )
190+ + f" slot millis sum: { row ['Slot_Millis' ]} ,"
191+ + f" local execution time: { formatted_local_exec_time } "
192+ + f", bigquery execution time: { round (row ['BigQuery_Execution_Time_Sec' ], 1 )} seconds"
217193 )
218194
219195 geometric_mean_queries = geometric_mean_excluding_zeros (
@@ -239,30 +215,26 @@ def collect_benchmark_result(
239215 f"---Geometric mean of queries: { geometric_mean_queries } ,"
240216 + f" Geometric mean of queries char counts: { geometric_mean_query_char_count } ,"
241217 + f" Geometric mean of bytes processed: { geometric_mean_bytes } ,"
242- + (
243- f" Geometric mean of slot millis: { geometric_mean_slot_millis } ,"
244- if has_full_metrics
245- else ""
246- )
218+ + f" Geometric mean of slot millis: { geometric_mean_slot_millis } ,"
247219 + f" Geometric mean of local execution time: { geometric_mean_local_seconds } seconds"
248- + (
249- f", Geometric mean of BigQuery execution time: { geometric_mean_bq_seconds } seconds---"
250- if has_full_metrics
251- else ""
252- )
220+ + f", Geometric mean of BigQuery execution time: { geometric_mean_bq_seconds } seconds---"
253221 )
254222
255- error_message = (
256- "\n "
257- + "\n " .join (
258- [
259- f"Failed: { error_file .relative_to (path ).with_suffix ('' )} "
260- for error_file in error_files
261- ]
223+ all_errors : List [str ] = []
224+ if error_files :
225+ all_errors .extend (
226+ f"Failed: { error_file .relative_to (path ).with_suffix ('' )} "
227+ for error_file in error_files
262228 )
263- if error_files
264- else None
265- )
229+ if (
230+ benchmarks_with_missing_files
231+ and os .getenv ("BENCHMARK_AND_PUBLISH" , "false" ) == "true"
232+ ):
233+ all_errors .extend (
234+ f"Missing files for benchmark '{ name } ': { files } "
235+ for name , files in benchmarks_with_missing_files
236+ )
237+ error_message = "\n " + "\n " .join (all_errors ) if all_errors else None
266238 return (
267239 benchmark_metrics .reset_index ().rename (columns = {"index" : "Benchmark_Name" }),
268240 error_message ,
0 commit comments