@@ -162,6 +162,18 @@ async def evaluate(
162162 )
163163
164164 async def run_evaluation (inference_result ):
165+ if (inference_result .status == InferenceStatus .FAILURE or
166+ inference_result .inferences is None ):
167+ logger .warning ('Skipping evaluation for eval case `%s` due to inference failure: %s' ,
168+ inference_result .eval_case_id , inference_result .error_message )
169+ return (
170+ inference_result ,
171+ self ._build_not_evaluated_eval_case_result (
172+ inference_result = inference_result ,
173+ eval_case = eval_case ,
174+ reason = 'Inference failed' ,
175+ )
176+ )
165177 async with semaphore :
166178 return await self ._evaluate_single_inference_result (
167179 inference_result = inference_result ,
@@ -269,10 +281,11 @@ async def _evaluate_single_inference_result(
269281 return (inference_result , eval_case_result )
270282
271283 if inference_result .inferences is None :
272- logger .warning (
273- 'Inference result for eval case `%s` did not include inferences; '
274- ' marking as not evaluated. ' ,
284+ logger .error (
285+ 'Evaluation attempted on failed inference for eval case `%s`. '
286+ ' Error: %s ' ,
275287 inference_result .eval_case_id ,
288+ inference_result .error_message
276289 )
277290 eval_case_result = await self ._build_not_evaluated_eval_case_result (
278291 inference_result = inference_result ,
0 commit comments