[Hot Fix] Ensure proper handling of agent response reformatting in Tool Input Accuracy evaluator (#44066)

salma-elshafey · Salma Elshafey · web-flow · commit cef636869475 · 2025-12-18T12:02:42.000+02:00
* modify the method used to reformat agent response in tool input acc eval

* To retrigger build pipeline

---------

Co-authored-by: Salma Elshafey &lt;selshafey@microsoft.com&gt;
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py
@@ -14,7 +14,7 @@
     ErrorTarget,
     EvaluationException,
 )
-from ..._common.utils import reformat_conversation_history, _get_agent_response
+from ..._common.utils import reformat_conversation_history, reformat_agent_response
 from azure.ai.evaluation._common._experimental import experimental
 
 logger = logging.getLogger(__name__)
@@ -25,11 +25,11 @@ class _ToolInputAccuracyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
     """The Tool Input Accuracy evaluator performs a strict binary evaluation (PASS/FAIL) of parameters
     passed to tool calls. It ensures that ALL parameters meet ALL criteria:
 
-        - Parameter grounding: All parameters must be derived from conversation history/query
-        - Type compliance: All parameters must match exact types specified in tool definitions
-        - Format compliance: All parameters must follow exact format and structure requirements
-        - Completeness: All required parameters must be provided
-        - No unexpected parameters: Only defined parameters are allowed
+        - Parameter grounding: All parameters must be derived from conversation history/query.
+        - Type compliance: All parameters must match exact types specified in tool definitions.
+        - Format compliance: All parameters must follow exact format and structure requirements.
+        - Completeness: All required parameters must be provided.
+        - No unexpected parameters: Only defined parameters are allowed.
 
     The evaluator uses strict binary evaluation:
         - 1: Only when ALL criteria are satisfied perfectly for ALL parameters
@@ -135,8 +135,8 @@ def _convert_kwargs_to_eval_input(self, **kwargs):
         if len(needed_tool_definitions) == 0:
             return {"error_message": self._NO_TOOL_DEFINITIONS_MESSAGE}
 
-        # Get agent response with tool calls and results using _get_agent_response
-        agent_response_with_tools = _get_agent_response(response, include_tool_messages=True)
+        # Reformat agent response with tool calls and results using reformat_agent_response
+        agent_response_with_tools = reformat_agent_response(response, include_tool_messages=True)
 
         return {
             "query": query,