From c4d83ff5dce06c25eaf1cf7c724b466a31ac4a13 Mon Sep 17 00:00:00 2001 From: Shannon Suhendra Date: Fri, 6 Feb 2026 13:12:35 -0800 Subject: [PATCH 1/9] feat: get conversational output and map to eval output --- .../_cli/_evals/_conversational_mapper.py | 36 +++++++++++++ src/uipath/_cli/_evals/_runtime.py | 54 +++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 src/uipath/_cli/_evals/_conversational_mapper.py diff --git a/src/uipath/_cli/_evals/_conversational_mapper.py b/src/uipath/_cli/_evals/_conversational_mapper.py new file mode 100644 index 000000000..43cac0382 --- /dev/null +++ b/src/uipath/_cli/_evals/_conversational_mapper.py @@ -0,0 +1,36 @@ +from typing import Any, Dict, List +from uipath.core.chat import UiPathConversationMessage + +def to_conversational_eval_output_schema( + messages: List[UiPathConversationMessage], +) -> Dict[str, Any]: + """Convert list of messages to conversational eval output schema. + + Args: + messages: List of message dictionaries with role, content, tool_calls, etc. + + Returns: + Dict with structure: {"agentResponse": [{"text": str, "toolCalls": [...]}]} + """ + agent_messages = [] + + for message in messages: + if message.get("type") == "ai": + tool_calls = [] + if message.get("tool_calls"): + tool_calls = [ + { + "name": tc.get("name") or tc.get("function", {}).get("name"), + "arguments": tc.get("arguments") + or tc.get("function", {}).get("arguments"), + } + for tc in message["tool_calls"] + ] + + agent_message = { + "text": message.get("content") or "", + "toolCalls": tool_calls if tool_calls else None, + } + agent_messages.append(agent_message) + + return {"agentResponse": agent_messages} \ No newline at end of file diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py index 48c64139b..ea9ee474e 100644 --- a/src/uipath/_cli/_evals/_runtime.py +++ b/src/uipath/_cli/_evals/_runtime.py @@ -47,6 +47,10 @@ from uipath.runtime.logging import UiPathRuntimeExecutionLogHandler from uipath.runtime.schema import UiPathRuntimeSchema +from uipath._cli._evals._conversational_mapper import ( + to_conversational_eval_output_schema +) + from uipath._cli._evals._span_utils import ( configure_eval_set_run_span, configure_evaluation_span, @@ -309,6 +313,7 @@ async def initiate_evaluation( ) async def execute(self) -> UiPathRuntimeResult: + print("EXECUTEE!!!") logger.info("=" * 80) logger.info("EVAL RUNTIME: Starting evaluation execution") logger.info(f"EVAL RUNTIME: Execution ID: {self.execution_id}") @@ -848,6 +853,31 @@ async def execute_runtime( eval_id=eval_item.id, ) + # todo: map eval input type to this type + # inputs_with_overrides = { + # "messages": [ + # { + # "messageId": "E6928DF4-AA36-46BE-B4FC-52ADA2B636D0", + # "role": "user", + # "contentParts": [ + # { + # "contentPartId": "E75CBEA6-7A2C-442B-B0B6-39FFBF17E986", + # "mimeType": "text/plain", + # "data": {"inline": "Hi what can you do"}, + # "citations": [], + # "createdAt": "2026-01-18T05:32:39.620Z", + # "updatedAt": "2026-01-18T05:32:39.620Z", + # } + # ], + # "toolCalls": [], + # "interrupts": [], + # "spanId": "0f32ee22-0def-4906-9cde-dbb9860c050f", + # "createdAt": "2026-01-18T05:32:38.807Z", + # "updatedAt": "2026-01-18T05:32:38.807Z", + # } + # ] + # } + # In resume mode, pass None as input # The UiPathResumableRuntime wrapper will automatically: # 1. Fetch triggers from storage @@ -887,6 +917,30 @@ async def execute_runtime( if result is None: raise ValueError("Execution result cannot be None for eval runs") + + if result is None: + raise ValueError("Execution result cannot be None for eval runs") + + schema = await self.get_schema() + is_conversational = False + + if schema.metadata and isinstance(schema.metadata, dict): + engine = schema.metadata.get("settings").get("engine") + is_conversational = "conversational" in engine + + # print("result.output: " + str(result.output)) + if is_conversational and result.output: + converted_output = to_conversational_eval_output_schema(result.output.get("messages")) + print("converted_output: " + str(converted_output)) + result = UiPathRuntimeResult( + output=converted_output, + status=result.status, + error=result.error, + trigger=result.trigger, + triggers=result.triggers, + ) + + print("result: " + str(result)) return UiPathEvalRunExecutionOutput( execution_time=end_time - start_time, From 24ac54467bf90afde6d0d21f785d250f4de90b02 Mon Sep 17 00:00:00 2001 From: Maxwell Du <60411452+maxduu@users.noreply.github.com> Date: Fri, 13 Feb 2026 22:40:51 -0600 Subject: [PATCH 2/9] feat(temp): preliminary eval mapper changes --- .../_cli/_evals/_conversational_mapper.py | 36 -- .../_cli/_evals/_conversational_utils.py | 316 ++++++++++++++++++ .../_cli/_evals/_models/_evaluation_set.py | 8 + src/uipath/_cli/_evals/_runtime.py | 8 +- src/uipath/_cli/_utils/_eval_set.py | 14 + 5 files changed, 342 insertions(+), 40 deletions(-) delete mode 100644 src/uipath/_cli/_evals/_conversational_mapper.py create mode 100644 src/uipath/_cli/_evals/_conversational_utils.py diff --git a/src/uipath/_cli/_evals/_conversational_mapper.py b/src/uipath/_cli/_evals/_conversational_mapper.py deleted file mode 100644 index 43cac0382..000000000 --- a/src/uipath/_cli/_evals/_conversational_mapper.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import Any, Dict, List -from uipath.core.chat import UiPathConversationMessage - -def to_conversational_eval_output_schema( - messages: List[UiPathConversationMessage], -) -> Dict[str, Any]: - """Convert list of messages to conversational eval output schema. - - Args: - messages: List of message dictionaries with role, content, tool_calls, etc. - - Returns: - Dict with structure: {"agentResponse": [{"text": str, "toolCalls": [...]}]} - """ - agent_messages = [] - - for message in messages: - if message.get("type") == "ai": - tool_calls = [] - if message.get("tool_calls"): - tool_calls = [ - { - "name": tc.get("name") or tc.get("function", {}).get("name"), - "arguments": tc.get("arguments") - or tc.get("function", {}).get("arguments"), - } - for tc in message["tool_calls"] - ] - - agent_message = { - "text": message.get("content") or "", - "toolCalls": tool_calls if tool_calls else None, - } - agent_messages.append(agent_message) - - return {"agentResponse": agent_messages} \ No newline at end of file diff --git a/src/uipath/_cli/_evals/_conversational_utils.py b/src/uipath/_cli/_evals/_conversational_utils.py new file mode 100644 index 000000000..1fc4769de --- /dev/null +++ b/src/uipath/_cli/_evals/_conversational_utils.py @@ -0,0 +1,316 @@ +from typing import Any, Dict, Literal, List +from datetime import datetime, timezone +import uuid +from uipath.core.chat import UiPathConversationMessage, UiPathConversationContentPart, UiPathConversationToolCall, UiPathConversationToolCallResult +from uipath.core.chat.content import UiPathInlineValue + +from pydantic import BaseModel, Field + +# Types for legacy conversational-agent evaluation input/outputs. + +class LegacyConversationalEvalJobAttachmentReference(BaseModel): + """File attachment reference in eval messages.""" + + id: str + full_name: str = Field(..., alias="fullName") + mime_type: str = Field(..., alias="mimeType") + + +class LegacyConversationalEvalOutputToolCall(BaseModel): + """Tool call in eval output schema (no result field).""" + + name: str + arguments: dict[str, Any] + +class LegacyConversationalEvalInputToolCallResult(BaseModel): + """Tool call result in eval input schema.""" + + value: Any + is_error: bool | None = Field(default=None, alias="isError") + +class LegacyConversationalEvalInputToolCall(LegacyConversationalEvalOutputToolCall): + """Tool call in eval input schema (extends output tool call with result).""" + + result: LegacyConversationalEvalInputToolCallResult + + +class LegacyConversationalEvalMessage(BaseModel): + """Base eval message type.""" + + role: Literal["agent", "user"] + text: str + + +class LegacyConversationalEvalUserMessage(LegacyConversationalEvalMessage): + """User message in eval schema.""" + + role: Literal["user"] = "user" + attachments: list[LegacyConversationalEvalJobAttachmentReference] | None = Field(default=None) + + +class LegacyConversationalEvalInputAgentMessage(LegacyConversationalEvalMessage): + """Agent message in eval input schema (input tool-calls contain results field).""" + + role: Literal["agent"] = "agent" + tool_calls: list[LegacyConversationalEvalInputToolCall] | None = Field(default=None, alias="toolCalls") + + +class LegacyConversationalEvalOutputAgentMessage(LegacyConversationalEvalMessage): + """Agent message in eval output schema (output tool-calls don't contain result field).""" + + role: Literal["agent"] = "agent" + tool_calls: list[LegacyConversationalEvalOutputToolCall] = Field(default=None, alias="toolCalls") + + +class LegacyConversationalEvalInput(BaseModel): + """Complete conversational eval input schema. + + conversationHistory: Array of exchanges, where each exchange is + [userMessage, ...agentMessages[]] + currentUserPrompt: The current user message to evaluate + """ + + conversation_history: list[ + list[LegacyConversationalEvalUserMessage | LegacyConversationalEvalInputAgentMessage] + ] = Field(alias="conversationHistory") + current_user_prompt: LegacyConversationalEvalUserMessage = Field(alias="currentUserPrompt") + +class LegacyConversationalEvalOutput(BaseModel): + """Complete eval output schema matching TypeScript definition. + + agentResponse: Sequence of agent messages ending with a message without tool calls + """ + + agent_response: list[LegacyConversationalEvalOutputAgentMessage] = Field(alias="agentResponse") + +# Mapper functions to convert between UiPath standard Message format and legacy conversational formats + +class UiPathLegacyEvalChatMessagesMapper: + @staticmethod + def legacy_conversational_eval_input_to_messages( + eval_input: LegacyConversationalEvalInput + ) -> List[UiPathConversationMessage]: + """Convert legacy eval input format to list of UiPathConversationMessage. + + Args: + eval_input: Legacy conversational eval input with conversation_history and current_user_prompt + + Returns: + List of UiPathConversationMessage objects representing the full conversation + """ + messages: List[UiPathConversationMessage] = [] + timestamp = ( + datetime.now(timezone.utc) + .isoformat(timespec="milliseconds") + .replace("+00:00", "Z") + ) + + # Process conversation history (list of exchanges) + for eval_exchange in eval_input.conversation_history: + for eval_message in eval_exchange: + if eval_message.role == "user": + # Convert user message + content_parts = [ + UiPathConversationContentPart( + content_part_id=str(uuid.uuid4()), + mime_type="text/plain", + data=UiPathInlineValue(inline=eval_message.text), + citations=[], + created_at=timestamp, + updated_at=timestamp, + ) + ] + + # TODO: Add attachments if present + # if message.attachments: + # for attachment in message.attachments: + # content_parts.append( + # UiPathConversationContentPart(...) + # ) + + messages.append( + UiPathConversationMessage( + message_id=str(uuid.uuid4()), + role="user", + content_parts=content_parts, + tool_calls=[], + interrupts=[], + created_at=timestamp, + updated_at=timestamp, + ) + ) + elif eval_message.role == "agent": + # Convert agent message + content_parts = [ + UiPathConversationContentPart( + content_part_id=str(uuid.uuid4()), + mime_type="text/markdown", + data=UiPathInlineValue(inline=eval_message.text), + citations=[], + created_at=timestamp, + updated_at=timestamp, + ) + ] + + # Convert tool calls if present + tool_calls: List[UiPathConversationToolCall] = [] + if eval_message.tool_calls: + for tc in eval_message.tool_calls: + tool_call = UiPathConversationToolCall( + tool_call_id=str(uuid.uuid4()), + name=tc.name, + input=tc.arguments, + timestamp=timestamp, + result=UiPathConversationToolCallResult( + timestamp=timestamp, + output=tc.result.value, + is_error=tc.result.is_error, + ), + created_at=timestamp, + updated_at=timestamp, + ) + tool_calls.append(tool_call) + + messages.append( + UiPathConversationMessage( + message_id=str(uuid.uuid4()), + role="assistant", + content_parts=content_parts, + tool_calls=tool_calls, + interrupts=[], + created_at=timestamp, + updated_at=timestamp, + ) + ) + + # Add current user prompt + content_parts = [ + UiPathConversationContentPart( + content_part_id=str(uuid.uuid4()), + mime_type="text/plain", + data=UiPathInlineValue(inline=eval_input.current_user_prompt.text), + citations=[], + created_at=timestamp, + updated_at=timestamp, + ) + ] + + # TODO Add attachments if present + # if eval_input.current_user_prompt.attachments: + # for attachment in eval_input.current_user_prompt.attachments: + # content_parts.append( + # UiPathConversationContentPart(...) + # ) + + messages.append( + UiPathConversationMessage( + message_id=str(uuid.uuid4()), + role="user", + content_parts=content_parts, + tool_calls=[], + interrupts=[], + created_at=timestamp, + updated_at=timestamp, + ) + ) + + return messages + + + # def messages_to_legacy_conversational_eval_output( + # messages: List[UiPathConversationMessage], + # ) -> LegacyConversationalEvalOutput: + # """Convert list of UiPathConversationMessage to LegacyConversationalEvalOutput. + + # Args: + # messages: List of UiPathConversationMessage objects + + # Returns: + # LegacyConversationalEvalOutput containing agent response messages + # """ + # agent_messages = [] + + # for message in messages: + # # Only process assistant/agent messages + # if message.role in ("assistant", "agent", "ai"): + # # Extract text from content parts + # text = "" + # if message.content_parts: + # for content_part in message.content_parts: + # if content_part.mime_type == "text/plain": + # # Extract inline value + # if hasattr(content_part.data, 'inline'): + # text += str(content_part.data.inline) + + # # Convert tool calls if present + # tool_calls = None + # if message.tool_calls: + # tool_calls = [] + # for tc in message.tool_calls: + # # Extract input arguments + # arguments = {} + # if tc.input: + # if hasattr(tc.input, 'inline'): + # arguments = tc.input.inline if isinstance(tc.input.inline, dict) else {} + + # tool_call = LegacyConversationalEvalOutputToolCall( + # name=tc.name, + # arguments=arguments, + # ) + # tool_calls.append(tool_call) + + # agent_message = LegacyConversationalEvalOutputAgentMessage( + # role="agent", + # text=text, + # tool_calls=tool_calls, + # ) + # agent_messages.append(agent_message) + + # return LegacyConversationalEvalOutput(agent_response=agent_messages) + + + # TODO Check on below. I think that messages_to_legacy_conversational_eval_output was converting + # the core langgraph message and we would first need that to be converted into the UiPathConversationMessage. + + # def messages_to_legacy_conversational_eval_output_schema( + # messages: List[UiPathConversationMessage], + # ) -> Dict[str, Any]: + # """Convert list of UiPathConversationMessage to legacy eval output schema dict. + + # Args: + # messages: List of UiPathConversationMessage objects + + # Returns: + # Dictionary matching LegacyConversationalEvalOutput schema (with camelCase keys) + # """ + # output = messages_to_legacy_conversational_eval_output(messages) + # return output.model_dump(by_alias=True, exclude_none=True) + + @staticmethod + def messages_to_legacy_conversational_eval_output( + messages: List[UiPathConversationMessage], + ) -> LegacyConversationalEvalOutput: + """Convert list of messages to conversational eval output schema.""" + + agent_messages = [] + + for message in messages: + if message.get("type") == "ai": + tool_calls = [] + if message.get("tool_calls"): + tool_calls = [ + { + "name": tc.get("name") or tc.get("function", {}).get("name"), + "arguments": tc.get("arguments") + or tc.get("function", {}).get("arguments"), + } + for tc in message["tool_calls"] + ] + + agent_message = { + "text": message.get("content") or "", + "toolCalls": tool_calls if tool_calls else None, + } + agent_messages.append(agent_message) + + return {"agentResponse": agent_messages} \ No newline at end of file diff --git a/src/uipath/_cli/_evals/_models/_evaluation_set.py b/src/uipath/_cli/_evals/_models/_evaluation_set.py index bf1a08d89..1ef1e5eb8 100644 --- a/src/uipath/_cli/_evals/_models/_evaluation_set.py +++ b/src/uipath/_cli/_evals/_models/_evaluation_set.py @@ -4,6 +4,8 @@ from pydantic import BaseModel, ConfigDict, Field from pydantic.alias_generators import to_camel +from uipath._cli._evals._conversational_utils import LegacyConversationalEvalInput, LegacyConversationalEvalOutput + from uipath._cli._evals.mocks.types import ( InputMockingStrategy, MockingStrategy, @@ -115,6 +117,12 @@ class LegacyEvaluationItem(BaseModel): tools_to_simulate: list[ToolSimulation] = Field( default_factory=list, alias="toolsToSimulate" ) + conversational_inputs: LegacyConversationalEvalInput | None = Field( + default=None, alias="conversationalInputs" + ) + conversational_expected_output: LegacyConversationalEvalOutput | None = Field( + default=None, alias="conversationalExpectedOutput" + ) class EvaluationSet(BaseModel): diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py index 6f3c944ad..fcf5d662f 100644 --- a/src/uipath/_cli/_evals/_runtime.py +++ b/src/uipath/_cli/_evals/_runtime.py @@ -47,9 +47,7 @@ from uipath.runtime.logging import UiPathRuntimeExecutionLogHandler from uipath.runtime.schema import UiPathRuntimeSchema -from uipath._cli._evals._conversational_mapper import ( - to_conversational_eval_output_schema -) +from uipath._cli._evals._conversational_utils import UiPathLegacyEvalChatMessagesMapper from uipath._cli._evals._span_utils import ( configure_eval_set_run_span, @@ -925,12 +923,14 @@ async def execute_runtime( is_conversational = False if schema.metadata and isinstance(schema.metadata, dict): + print("=== Schema metadata: ") + print(schema.metadata) engine = schema.metadata.get("settings").get("engine") is_conversational = "conversational" in engine # print("result.output: " + str(result.output)) if is_conversational and result.output: - converted_output = to_conversational_eval_output_schema(result.output.get("messages")) + converted_output = UiPathLegacyEvalChatMessagesMapper.messages_to_legacy_conversational_eval_output(result.output.get("messages")) print("converted_output: " + str(converted_output)) result = UiPathRuntimeResult( output=converted_output, diff --git a/src/uipath/_cli/_utils/_eval_set.py b/src/uipath/_cli/_utils/_eval_set.py index bbc9d5047..6915c51a2 100644 --- a/src/uipath/_cli/_utils/_eval_set.py +++ b/src/uipath/_cli/_utils/_eval_set.py @@ -15,6 +15,7 @@ from uipath._cli._evals.mocks.types import InputMockingStrategy, LLMMockingStrategy from uipath._cli._utils._console import ConsoleLogger from uipath.eval.evaluators.base_evaluator import GenericBaseEvaluator +from uipath._cli._evals._conversational_utils import UiPathLegacyEvalChatMessagesMapper console = ConsoleLogger() @@ -141,6 +142,19 @@ def migrate_evaluation_item( prompt=evaluation.simulation_instructions or "", tools_to_simulate=evaluation.tools_to_simulate or [], ) + + print("--- migrate_evaluation_item: conversational_inputs ---") + print(evaluation.conversational_inputs) + print("--- migrate_evaluation_item: conversational_expected_output ---") + print(evaluation.conversational_expected_output) + + if evaluation.conversational_inputs: + conversational_messages_input = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_messages(evaluation.conversational_inputs) + evaluation.inputs["messages"] = [message.model_dump(by_alias=True) for message in conversational_messages_input] + + print("--- migrate_evaluation_item: evaluation.inputs[messages] ---") + print(evaluation.inputs["messages"]) + return EvaluationItem.model_validate( { "id": evaluation.id, From c225c797672a112bfdcdddd0b00dc7a8eb0777dc Mon Sep 17 00:00:00 2001 From: Maxwell Du <60411452+maxduu@users.noreply.github.com> Date: Wed, 18 Feb 2026 14:25:44 -0600 Subject: [PATCH 3/9] feat: map legacy conversation eval inputs and outputs --- .../_cli/_evals/_conversational_utils.py | 189 +++++++----------- .../_cli/_evals/_models/_evaluation_set.py | 6 +- src/uipath/_cli/_evals/_runtime.py | 52 +---- src/uipath/_cli/_utils/_eval_set.py | 30 ++- 4 files changed, 97 insertions(+), 180 deletions(-) diff --git a/src/uipath/_cli/_evals/_conversational_utils.py b/src/uipath/_cli/_evals/_conversational_utils.py index 1fc4769de..cc8c32064 100644 --- a/src/uipath/_cli/_evals/_conversational_utils.py +++ b/src/uipath/_cli/_evals/_conversational_utils.py @@ -1,13 +1,22 @@ -from typing import Any, Dict, Literal, List -from datetime import datetime, timezone import uuid -from uipath.core.chat import UiPathConversationMessage, UiPathConversationContentPart, UiPathConversationToolCall, UiPathConversationToolCallResult -from uipath.core.chat.content import UiPathInlineValue +from datetime import datetime, timezone +from typing import Any, List, Literal from pydantic import BaseModel, Field +from uipath.core.chat import ( + UiPathConversationContentPart, + UiPathConversationContentPartData, + UiPathConversationMessage, + UiPathConversationMessageData, + UiPathConversationToolCall, + UiPathConversationToolCallData, + UiPathConversationToolCallResult, + UiPathInlineValue, +) # Types for legacy conversational-agent evaluation input/outputs. + class LegacyConversationalEvalJobAttachmentReference(BaseModel): """File attachment reference in eval messages.""" @@ -22,12 +31,14 @@ class LegacyConversationalEvalOutputToolCall(BaseModel): name: str arguments: dict[str, Any] + class LegacyConversationalEvalInputToolCallResult(BaseModel): """Tool call result in eval input schema.""" value: Any is_error: bool | None = Field(default=None, alias="isError") + class LegacyConversationalEvalInputToolCall(LegacyConversationalEvalOutputToolCall): """Tool call in eval input schema (extends output tool call with result).""" @@ -45,21 +56,27 @@ class LegacyConversationalEvalUserMessage(LegacyConversationalEvalMessage): """User message in eval schema.""" role: Literal["user"] = "user" - attachments: list[LegacyConversationalEvalJobAttachmentReference] | None = Field(default=None) + attachments: list[LegacyConversationalEvalJobAttachmentReference] | None = Field( + default=None + ) class LegacyConversationalEvalInputAgentMessage(LegacyConversationalEvalMessage): """Agent message in eval input schema (input tool-calls contain results field).""" role: Literal["agent"] = "agent" - tool_calls: list[LegacyConversationalEvalInputToolCall] | None = Field(default=None, alias="toolCalls") + tool_calls: list[LegacyConversationalEvalInputToolCall] | None = Field( + default=None, alias="toolCalls" + ) class LegacyConversationalEvalOutputAgentMessage(LegacyConversationalEvalMessage): """Agent message in eval output schema (output tool-calls don't contain result field).""" role: Literal["agent"] = "agent" - tool_calls: list[LegacyConversationalEvalOutputToolCall] = Field(default=None, alias="toolCalls") + tool_calls: list[LegacyConversationalEvalOutputToolCall] | None = Field( + default=None, alias="toolCalls" + ) class LegacyConversationalEvalInput(BaseModel): @@ -71,9 +88,15 @@ class LegacyConversationalEvalInput(BaseModel): """ conversation_history: list[ - list[LegacyConversationalEvalUserMessage | LegacyConversationalEvalInputAgentMessage] + list[ + LegacyConversationalEvalUserMessage + | LegacyConversationalEvalInputAgentMessage + ] ] = Field(alias="conversationHistory") - current_user_prompt: LegacyConversationalEvalUserMessage = Field(alias="currentUserPrompt") + current_user_prompt: LegacyConversationalEvalUserMessage = Field( + alias="currentUserPrompt" + ) + class LegacyConversationalEvalOutput(BaseModel): """Complete eval output schema matching TypeScript definition. @@ -81,23 +104,20 @@ class LegacyConversationalEvalOutput(BaseModel): agentResponse: Sequence of agent messages ending with a message without tool calls """ - agent_response: list[LegacyConversationalEvalOutputAgentMessage] = Field(alias="agentResponse") + agent_response: list[LegacyConversationalEvalOutputAgentMessage] = Field( + alias="agentResponse" + ) + # Mapper functions to convert between UiPath standard Message format and legacy conversational formats + class UiPathLegacyEvalChatMessagesMapper: @staticmethod - def legacy_conversational_eval_input_to_messages( - eval_input: LegacyConversationalEvalInput + def legacy_conversational_eval_input_to_uipath_message_list( + eval_input: LegacyConversationalEvalInput, ) -> List[UiPathConversationMessage]: - """Convert legacy eval input format to list of UiPathConversationMessage. - - Args: - eval_input: Legacy conversational eval input with conversation_history and current_user_prompt - - Returns: - List of UiPathConversationMessage objects representing the full conversation - """ + """Convert legacy eval input format to list of UiPathConversationMessage.""" messages: List[UiPathConversationMessage] = [] timestamp = ( datetime.now(timezone.utc) @@ -216,101 +236,38 @@ def legacy_conversational_eval_input_to_messages( return messages - - # def messages_to_legacy_conversational_eval_output( - # messages: List[UiPathConversationMessage], - # ) -> LegacyConversationalEvalOutput: - # """Convert list of UiPathConversationMessage to LegacyConversationalEvalOutput. - - # Args: - # messages: List of UiPathConversationMessage objects - - # Returns: - # LegacyConversationalEvalOutput containing agent response messages - # """ - # agent_messages = [] - - # for message in messages: - # # Only process assistant/agent messages - # if message.role in ("assistant", "agent", "ai"): - # # Extract text from content parts - # text = "" - # if message.content_parts: - # for content_part in message.content_parts: - # if content_part.mime_type == "text/plain": - # # Extract inline value - # if hasattr(content_part.data, 'inline'): - # text += str(content_part.data.inline) - - # # Convert tool calls if present - # tool_calls = None - # if message.tool_calls: - # tool_calls = [] - # for tc in message.tool_calls: - # # Extract input arguments - # arguments = {} - # if tc.input: - # if hasattr(tc.input, 'inline'): - # arguments = tc.input.inline if isinstance(tc.input.inline, dict) else {} - - # tool_call = LegacyConversationalEvalOutputToolCall( - # name=tc.name, - # arguments=arguments, - # ) - # tool_calls.append(tool_call) - - # agent_message = LegacyConversationalEvalOutputAgentMessage( - # role="agent", - # text=text, - # tool_calls=tool_calls, - # ) - # agent_messages.append(agent_message) - - # return LegacyConversationalEvalOutput(agent_response=agent_messages) - - - # TODO Check on below. I think that messages_to_legacy_conversational_eval_output was converting - # the core langgraph message and we would first need that to be converted into the UiPathConversationMessage. - - # def messages_to_legacy_conversational_eval_output_schema( - # messages: List[UiPathConversationMessage], - # ) -> Dict[str, Any]: - # """Convert list of UiPathConversationMessage to legacy eval output schema dict. - - # Args: - # messages: List of UiPathConversationMessage objects - - # Returns: - # Dictionary matching LegacyConversationalEvalOutput schema (with camelCase keys) - # """ - # output = messages_to_legacy_conversational_eval_output(messages) - # return output.model_dump(by_alias=True, exclude_none=True) - @staticmethod - def messages_to_legacy_conversational_eval_output( - messages: List[UiPathConversationMessage], - ) -> LegacyConversationalEvalOutput: - """Convert list of messages to conversational eval output schema.""" - - agent_messages = [] - - for message in messages: - if message.get("type") == "ai": - tool_calls = [] - if message.get("tool_calls"): - tool_calls = [ - { - "name": tc.get("name") or tc.get("function", {}).get("name"), - "arguments": tc.get("arguments") - or tc.get("function", {}).get("arguments"), - } - for tc in message["tool_calls"] - ] - - agent_message = { - "text": message.get("content") or "", - "toolCalls": tool_calls if tool_calls else None, - } - agent_messages.append(agent_message) + def legacy_conversational_eval_output_to_uipath_message_data_list( + eval_output: LegacyConversationalEvalOutput, + ) -> List[UiPathConversationMessageData]: + """Convert legacy eval output format to list of UiPathConversationMessageData.""" + messages: List[UiPathConversationMessageData] = [] + + for eval_agent_message in eval_output.agent_response: + content_parts = [ + UiPathConversationContentPartData( + mime_type="text/markdown", + data=UiPathInlineValue(inline=eval_agent_message.text), + citations=[], + ) + ] + + tool_calls: List[UiPathConversationToolCallData] = [] + if eval_agent_message.tool_calls: + for tc in eval_agent_message.tool_calls: + tool_call = UiPathConversationToolCallData( + name=tc.name, + input=tc.arguments, + ) + tool_calls.append(tool_call) + + messages.append( + UiPathConversationMessageData( + role="assistant", + content_parts=content_parts, + tool_calls=tool_calls, + interrupts=[], + ) + ) - return {"agentResponse": agent_messages} \ No newline at end of file + return messages diff --git a/src/uipath/_cli/_evals/_models/_evaluation_set.py b/src/uipath/_cli/_evals/_models/_evaluation_set.py index 1ef1e5eb8..258cfcf79 100644 --- a/src/uipath/_cli/_evals/_models/_evaluation_set.py +++ b/src/uipath/_cli/_evals/_models/_evaluation_set.py @@ -4,8 +4,10 @@ from pydantic import BaseModel, ConfigDict, Field from pydantic.alias_generators import to_camel -from uipath._cli._evals._conversational_utils import LegacyConversationalEvalInput, LegacyConversationalEvalOutput - +from uipath._cli._evals._conversational_utils import ( + LegacyConversationalEvalInput, + LegacyConversationalEvalOutput, +) from uipath._cli._evals.mocks.types import ( InputMockingStrategy, MockingStrategy, diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py index fcf5d662f..213149495 100644 --- a/src/uipath/_cli/_evals/_runtime.py +++ b/src/uipath/_cli/_evals/_runtime.py @@ -47,8 +47,6 @@ from uipath.runtime.logging import UiPathRuntimeExecutionLogHandler from uipath.runtime.schema import UiPathRuntimeSchema -from uipath._cli._evals._conversational_utils import UiPathLegacyEvalChatMessagesMapper - from uipath._cli._evals._span_utils import ( configure_eval_set_run_span, configure_evaluation_span, @@ -851,31 +849,6 @@ async def execute_runtime( eval_id=eval_item.id, ) - # todo: map eval input type to this type - # inputs_with_overrides = { - # "messages": [ - # { - # "messageId": "E6928DF4-AA36-46BE-B4FC-52ADA2B636D0", - # "role": "user", - # "contentParts": [ - # { - # "contentPartId": "E75CBEA6-7A2C-442B-B0B6-39FFBF17E986", - # "mimeType": "text/plain", - # "data": {"inline": "Hi what can you do"}, - # "citations": [], - # "createdAt": "2026-01-18T05:32:39.620Z", - # "updatedAt": "2026-01-18T05:32:39.620Z", - # } - # ], - # "toolCalls": [], - # "interrupts": [], - # "spanId": "0f32ee22-0def-4906-9cde-dbb9860c050f", - # "createdAt": "2026-01-18T05:32:38.807Z", - # "updatedAt": "2026-01-18T05:32:38.807Z", - # } - # ] - # } - # In resume mode, pass None as input # The UiPathResumableRuntime wrapper will automatically: # 1. Fetch triggers from storage @@ -915,33 +888,10 @@ async def execute_runtime( if result is None: raise ValueError("Execution result cannot be None for eval runs") - + if result is None: raise ValueError("Execution result cannot be None for eval runs") - schema = await self.get_schema() - is_conversational = False - - if schema.metadata and isinstance(schema.metadata, dict): - print("=== Schema metadata: ") - print(schema.metadata) - engine = schema.metadata.get("settings").get("engine") - is_conversational = "conversational" in engine - - # print("result.output: " + str(result.output)) - if is_conversational and result.output: - converted_output = UiPathLegacyEvalChatMessagesMapper.messages_to_legacy_conversational_eval_output(result.output.get("messages")) - print("converted_output: " + str(converted_output)) - result = UiPathRuntimeResult( - output=converted_output, - status=result.status, - error=result.error, - trigger=result.trigger, - triggers=result.triggers, - ) - - print("result: " + str(result)) - return UiPathEvalRunExecutionOutput( execution_time=end_time - start_time, spans=spans, diff --git a/src/uipath/_cli/_utils/_eval_set.py b/src/uipath/_cli/_utils/_eval_set.py index 6915c51a2..4155b9b01 100644 --- a/src/uipath/_cli/_utils/_eval_set.py +++ b/src/uipath/_cli/_utils/_eval_set.py @@ -5,6 +5,7 @@ import click from pydantic import ValidationError +from uipath._cli._evals._conversational_utils import UiPathLegacyEvalChatMessagesMapper from uipath._cli._evals._evaluator_factory import EvaluatorFactory from uipath._cli._evals._models._evaluation_set import ( EvaluationItem, @@ -15,7 +16,6 @@ from uipath._cli._evals.mocks.types import InputMockingStrategy, LLMMockingStrategy from uipath._cli._utils._console import ConsoleLogger from uipath.eval.evaluators.base_evaluator import GenericBaseEvaluator -from uipath._cli._evals._conversational_utils import UiPathLegacyEvalChatMessagesMapper console = ConsoleLogger() @@ -143,17 +143,25 @@ def migrate_evaluation_item( tools_to_simulate=evaluation.tools_to_simulate or [], ) - print("--- migrate_evaluation_item: conversational_inputs ---") - print(evaluation.conversational_inputs) - print("--- migrate_evaluation_item: conversational_expected_output ---") - print(evaluation.conversational_expected_output) - if evaluation.conversational_inputs: - conversational_messages_input = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_messages(evaluation.conversational_inputs) - evaluation.inputs["messages"] = [message.model_dump(by_alias=True) for message in conversational_messages_input] - - print("--- migrate_evaluation_item: evaluation.inputs[messages] ---") - print(evaluation.inputs["messages"]) + conversational_messages_input = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + evaluation.conversational_inputs + ) + evaluation.inputs["messages"] = [ + message.model_dump(by_alias=True) + for message in conversational_messages_input + ] + + if evaluation.conversational_expected_output: + conversational_messages_expected_output = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_output_to_uipath_message_data_list( + evaluation.conversational_expected_output + ) + evaluation.expected_output[ + "uipath__agent_response_messages" + ] = [ + message.model_dump(by_alias=True) + for message in conversational_messages_expected_output + ] return EvaluationItem.model_validate( { From 7086fe7998cdd68e909b1d98701a309bb4f216f4 Mon Sep 17 00:00:00 2001 From: Maxwell Du <60411452+maxduu@users.noreply.github.com> Date: Wed, 18 Feb 2026 14:27:05 -0600 Subject: [PATCH 4/9] fix: remove logs --- src/uipath/_cli/_evals/_runtime.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py index 213149495..a6cab5e51 100644 --- a/src/uipath/_cli/_evals/_runtime.py +++ b/src/uipath/_cli/_evals/_runtime.py @@ -309,7 +309,6 @@ async def initiate_evaluation( ) async def execute(self) -> UiPathRuntimeResult: - print("EXECUTEE!!!") logger.info("=" * 80) logger.info("EVAL RUNTIME: Starting evaluation execution") logger.info(f"EVAL RUNTIME: Execution ID: {self.execution_id}") @@ -886,9 +885,6 @@ async def execute_runtime( end_time = time() spans, logs = self._get_and_clear_execution_data(execution_id) - if result is None: - raise ValueError("Execution result cannot be None for eval runs") - if result is None: raise ValueError("Execution result cannot be None for eval runs") From 10cf34ffe4cb90ba193adbdf8734b11833fffda7 Mon Sep 17 00:00:00 2001 From: Maxwell Du <60411452+maxduu@users.noreply.github.com> Date: Thu, 19 Feb 2026 01:44:42 -0600 Subject: [PATCH 5/9] fix: attachment aliases --- src/uipath/_cli/_evals/_conversational_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uipath/_cli/_evals/_conversational_utils.py b/src/uipath/_cli/_evals/_conversational_utils.py index cc8c32064..844386876 100644 --- a/src/uipath/_cli/_evals/_conversational_utils.py +++ b/src/uipath/_cli/_evals/_conversational_utils.py @@ -20,9 +20,9 @@ class LegacyConversationalEvalJobAttachmentReference(BaseModel): """File attachment reference in eval messages.""" - id: str - full_name: str = Field(..., alias="fullName") - mime_type: str = Field(..., alias="mimeType") + id: str = Field(..., alias="ID") + full_name: str = Field(..., alias="FullName") + mime_type: str = Field(..., alias="MimeType") class LegacyConversationalEvalOutputToolCall(BaseModel): From 9c6cd46a3d6a6fa3ff97ebc5a3acb94836350ca0 Mon Sep 17 00:00:00 2001 From: Maxwell Du <60411452+maxduu@users.noreply.github.com> Date: Thu, 19 Feb 2026 11:09:01 -0600 Subject: [PATCH 6/9] feat: add tests for conversational_utils --- tests/cli/eval/test_conversational_utils.py | 421 ++++++++++++++++++++ 1 file changed, 421 insertions(+) create mode 100644 tests/cli/eval/test_conversational_utils.py diff --git a/tests/cli/eval/test_conversational_utils.py b/tests/cli/eval/test_conversational_utils.py new file mode 100644 index 000000000..ea5f6e389 --- /dev/null +++ b/tests/cli/eval/test_conversational_utils.py @@ -0,0 +1,421 @@ +"""Tests for conversational eval utilities.""" + +from uipath._cli._evals._conversational_utils import ( + LegacyConversationalEvalInput, + LegacyConversationalEvalInputAgentMessage, + LegacyConversationalEvalInputToolCall, + LegacyConversationalEvalInputToolCallResult, + LegacyConversationalEvalOutput, + LegacyConversationalEvalOutputAgentMessage, + LegacyConversationalEvalOutputToolCall, + LegacyConversationalEvalUserMessage, + UiPathLegacyEvalChatMessagesMapper, +) + + +class TestLegacyConversationalEvalInputToUiPathMessages: + """Tests for converting legacy eval input to UiPath messages.""" + + def test_converts_simple_conversation(self): + """Should convert simple user-agent conversation.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[ + [ + LegacyConversationalEvalUserMessage(text="Hello"), + LegacyConversationalEvalInputAgentMessage(text="Hi there!"), + ] + ], + currentUserPrompt=LegacyConversationalEvalUserMessage(text="How are you?"), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + # Should have 3 messages: user, agent, user + assert len(result) == 3 + assert result[0].role == "user" + assert result[0].content_parts[0].data.inline == "Hello" + assert result[1].role == "assistant" + assert result[1].content_parts[0].data.inline == "Hi there!" + assert result[2].role == "user" + assert result[2].content_parts[0].data.inline == "How are you?" + + def test_converts_user_message_with_text_plain_mime_type(self): + """User messages should have text/plain mime type.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[], + currentUserPrompt=LegacyConversationalEvalUserMessage(text="Test"), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + assert len(result) == 1 + assert result[0].content_parts[0].mime_type == "text/plain" + + def test_converts_agent_message_with_text_markdown_mime_type(self): + """Agent messages should have text/markdown mime type.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[ + [ + LegacyConversationalEvalUserMessage(text="Question"), + LegacyConversationalEvalInputAgentMessage(text="**Answer**"), + ] + ], + currentUserPrompt=LegacyConversationalEvalUserMessage(text="Next"), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + # Agent message is at index 1 + assert result[1].content_parts[0].mime_type == "text/markdown" + + def test_converts_agent_message_with_tool_calls(self): + """Should convert agent messages with tool calls and results.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[ + [ + LegacyConversationalEvalUserMessage(text="Search for data"), + LegacyConversationalEvalInputAgentMessage( + text="Let me search", + toolCalls=[ + LegacyConversationalEvalInputToolCall( + name="search_tool", + arguments={"query": "test"}, + result=LegacyConversationalEvalInputToolCallResult( + value={"results": ["item1", "item2"]}, + isError=False, + ), + ) + ], + ), + ] + ], + currentUserPrompt=LegacyConversationalEvalUserMessage(text="Thanks"), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + agent_message = result[1] + assert agent_message.role == "assistant" + assert len(agent_message.tool_calls) == 1 + assert agent_message.tool_calls[0].name == "search_tool" + assert agent_message.tool_calls[0].input == {"query": "test"} + assert agent_message.tool_calls[0].result is not None + assert agent_message.tool_calls[0].result.output == { + "results": ["item1", "item2"] + } + assert agent_message.tool_calls[0].result.is_error is False + + def test_converts_tool_call_with_error_result(self): + """Should handle tool calls with error results.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[ + [ + LegacyConversationalEvalUserMessage(text="Do something"), + LegacyConversationalEvalInputAgentMessage( + text="Trying", + toolCalls=[ + LegacyConversationalEvalInputToolCall( + name="failing_tool", + arguments={}, + result=LegacyConversationalEvalInputToolCallResult( + value="Error occurred", + isError=True, + ), + ) + ], + ), + ] + ], + currentUserPrompt=LegacyConversationalEvalUserMessage(text="Ok"), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + tool_call = result[1].tool_calls[0] + assert tool_call.result.is_error is True + assert tool_call.result.output == "Error occurred" + + def test_converts_multiple_exchanges(self): + """Should handle multiple conversation exchanges.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[ + [ + LegacyConversationalEvalUserMessage(text="First question"), + LegacyConversationalEvalInputAgentMessage(text="First answer"), + ], + [ + LegacyConversationalEvalUserMessage(text="Second question"), + LegacyConversationalEvalInputAgentMessage(text="Second answer"), + ], + ], + currentUserPrompt=LegacyConversationalEvalUserMessage( + text="Third question" + ), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + assert len(result) == 5 # 2 exchanges (4 messages) + current prompt + assert result[0].content_parts[0].data.inline == "First question" + assert result[1].content_parts[0].data.inline == "First answer" + assert result[2].content_parts[0].data.inline == "Second question" + assert result[3].content_parts[0].data.inline == "Second answer" + assert result[4].content_parts[0].data.inline == "Third question" + + def test_converts_exchange_with_multiple_agent_messages(self): + """Should handle exchanges with multiple agent responses.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[ + [ + LegacyConversationalEvalUserMessage(text="Question"), + LegacyConversationalEvalInputAgentMessage( + text="Using tool", + toolCalls=[ + LegacyConversationalEvalInputToolCall( + name="tool1", + arguments={"x": 1}, + result=LegacyConversationalEvalInputToolCallResult( + value="result1", + isError=False, + ), + ) + ], + ), + LegacyConversationalEvalInputAgentMessage(text="Final answer"), + ] + ], + currentUserPrompt=LegacyConversationalEvalUserMessage(text="Next"), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + assert len(result) == 4 # user, agent with tool, agent final, current user + assert result[0].role == "user" + assert result[1].role == "assistant" + assert len(result[1].tool_calls) == 1 + assert result[2].role == "assistant" + assert len(result[2].tool_calls) == 0 + assert result[3].role == "user" + + def test_generates_unique_ids_for_messages(self): + """Should generate unique message IDs.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[ + [ + LegacyConversationalEvalUserMessage(text="Q1"), + LegacyConversationalEvalInputAgentMessage(text="A1"), + ] + ], + currentUserPrompt=LegacyConversationalEvalUserMessage(text="Q2"), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + message_ids = [msg.message_id for msg in result] + assert len(message_ids) == len(set(message_ids)) # All unique + + def test_generates_unique_content_part_ids(self): + """Should generate unique content part IDs.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[ + [ + LegacyConversationalEvalUserMessage(text="Q"), + LegacyConversationalEvalInputAgentMessage(text="A"), + ] + ], + currentUserPrompt=LegacyConversationalEvalUserMessage(text="Q2"), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + content_part_ids = [ + part.content_part_id for msg in result for part in msg.content_parts + ] + assert len(content_part_ids) == len(set(content_part_ids)) + + def test_empty_conversation_history(self): + """Should handle empty conversation history.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[], + currentUserPrompt=LegacyConversationalEvalUserMessage(text="First message"), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + assert len(result) == 1 + assert result[0].role == "user" + assert result[0].content_parts[0].data.inline == "First message" + + +class TestLegacyConversationalEvalOutputToUiPathMessageData: + """Tests for converting legacy eval output to UiPath message data.""" + + def test_converts_simple_agent_response(self): + """Should convert simple agent response.""" + eval_output = LegacyConversationalEvalOutput( + agentResponse=[ + LegacyConversationalEvalOutputAgentMessage(text="Here is the answer") + ] + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_output_to_uipath_message_data_list( + eval_output + ) + + assert len(result) == 1 + assert result[0].role == "assistant" + assert len(result[0].content_parts) == 1 + assert result[0].content_parts[0].data.inline == "Here is the answer" + assert result[0].content_parts[0].mime_type == "text/markdown" + + def test_converts_agent_response_with_tool_calls(self): + """Should convert agent responses with tool calls.""" + eval_output = LegacyConversationalEvalOutput( + agentResponse=[ + LegacyConversationalEvalOutputAgentMessage( + text="Using tool", + toolCalls=[ + LegacyConversationalEvalOutputToolCall( + name="search", + arguments={"query": "test"}, + ) + ], + ) + ] + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_output_to_uipath_message_data_list( + eval_output + ) + + assert len(result) == 1 + assert len(result[0].tool_calls) == 1 + assert result[0].tool_calls[0].name == "search" + assert result[0].tool_calls[0].input == {"query": "test"} + # Output tool calls should not have result field + assert result[0].tool_calls[0].result is None + + def test_converts_multiple_agent_messages(self): + """Should convert multiple agent messages in sequence.""" + eval_output = LegacyConversationalEvalOutput( + agentResponse=[ + LegacyConversationalEvalOutputAgentMessage( + text="First response", + toolCalls=[ + LegacyConversationalEvalOutputToolCall( + name="tool1", + arguments={}, + ) + ], + ), + LegacyConversationalEvalOutputAgentMessage(text="Final response"), + ] + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_output_to_uipath_message_data_list( + eval_output + ) + + assert len(result) == 2 + assert result[0].content_parts[0].data.inline == "First response" + assert len(result[0].tool_calls) == 1 + assert result[1].content_parts[0].data.inline == "Final response" + assert len(result[1].tool_calls) == 0 + + def test_converts_multiple_tool_calls_in_message(self): + """Should handle multiple tool calls in a single message.""" + eval_output = LegacyConversationalEvalOutput( + agentResponse=[ + LegacyConversationalEvalOutputAgentMessage( + text="Using multiple tools", + toolCalls=[ + LegacyConversationalEvalOutputToolCall( + name="tool1", + arguments={"a": 1}, + ), + LegacyConversationalEvalOutputToolCall( + name="tool2", + arguments={"b": 2}, + ), + ], + ) + ] + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_output_to_uipath_message_data_list( + eval_output + ) + + assert len(result) == 1 + assert len(result[0].tool_calls) == 2 + assert result[0].tool_calls[0].name == "tool1" + assert result[0].tool_calls[0].input == {"a": 1} + assert result[0].tool_calls[1].name == "tool2" + assert result[0].tool_calls[1].input == {"b": 2} + + def test_agent_message_without_tool_calls(self): + """Should handle agent messages without tool calls.""" + eval_output = LegacyConversationalEvalOutput( + agentResponse=[ + LegacyConversationalEvalOutputAgentMessage(text="Simple response") + ] + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_output_to_uipath_message_data_list( + eval_output + ) + + assert len(result) == 1 + assert len(result[0].tool_calls) == 0 + + def test_empty_agent_response(self): + """Should handle empty agent response list.""" + eval_output = LegacyConversationalEvalOutput(agentResponse=[]) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_output_to_uipath_message_data_list( + eval_output + ) + + assert result == [] + + def test_preserves_empty_tool_arguments(self): + """Should preserve empty tool arguments dict.""" + eval_output = LegacyConversationalEvalOutput( + agentResponse=[ + LegacyConversationalEvalOutputAgentMessage( + text="Using tool", + toolCalls=[ + LegacyConversationalEvalOutputToolCall( + name="no_arg_tool", + arguments={}, + ) + ], + ) + ] + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_output_to_uipath_message_data_list( + eval_output + ) + + assert result[0].tool_calls[0].input == {} From 27df9df02dfa0040ccfd9b3b45c580571e251096 Mon Sep 17 00:00:00 2001 From: Maxwell Du <60411452+maxduu@users.noreply.github.com> Date: Thu, 19 Feb 2026 11:12:54 -0600 Subject: [PATCH 7/9] fix: test mypy issues --- tests/cli/eval/test_conversational_utils.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/cli/eval/test_conversational_utils.py b/tests/cli/eval/test_conversational_utils.py index ea5f6e389..84e1923b3 100644 --- a/tests/cli/eval/test_conversational_utils.py +++ b/tests/cli/eval/test_conversational_utils.py @@ -1,5 +1,7 @@ """Tests for conversational eval utilities.""" +from uipath.core.chat import UiPathInlineValue + from uipath._cli._evals._conversational_utils import ( LegacyConversationalEvalInput, LegacyConversationalEvalInputAgentMessage, @@ -35,10 +37,13 @@ def test_converts_simple_conversation(self): # Should have 3 messages: user, agent, user assert len(result) == 3 assert result[0].role == "user" + assert isinstance(result[0].content_parts[0].data, UiPathInlineValue) assert result[0].content_parts[0].data.inline == "Hello" assert result[1].role == "assistant" + assert isinstance(result[1].content_parts[0].data, UiPathInlineValue) assert result[1].content_parts[0].data.inline == "Hi there!" assert result[2].role == "user" + assert isinstance(result[2].content_parts[0].data, UiPathInlineValue) assert result[2].content_parts[0].data.inline == "How are you?" def test_converts_user_message_with_text_plain_mime_type(self): @@ -142,6 +147,7 @@ def test_converts_tool_call_with_error_result(self): ) tool_call = result[1].tool_calls[0] + assert tool_call.result is not None assert tool_call.result.is_error is True assert tool_call.result.output == "Error occurred" @@ -168,10 +174,15 @@ def test_converts_multiple_exchanges(self): ) assert len(result) == 5 # 2 exchanges (4 messages) + current prompt + assert isinstance(result[0].content_parts[0].data, UiPathInlineValue) assert result[0].content_parts[0].data.inline == "First question" + assert isinstance(result[1].content_parts[0].data, UiPathInlineValue) assert result[1].content_parts[0].data.inline == "First answer" + assert isinstance(result[2].content_parts[0].data, UiPathInlineValue) assert result[2].content_parts[0].data.inline == "Second question" + assert isinstance(result[3].content_parts[0].data, UiPathInlineValue) assert result[3].content_parts[0].data.inline == "Second answer" + assert isinstance(result[4].content_parts[0].data, UiPathInlineValue) assert result[4].content_parts[0].data.inline == "Third question" def test_converts_exchange_with_multiple_agent_messages(self): @@ -264,6 +275,7 @@ def test_empty_conversation_history(self): assert len(result) == 1 assert result[0].role == "user" + assert isinstance(result[0].content_parts[0].data, UiPathInlineValue) assert result[0].content_parts[0].data.inline == "First message" @@ -285,6 +297,7 @@ def test_converts_simple_agent_response(self): assert len(result) == 1 assert result[0].role == "assistant" assert len(result[0].content_parts) == 1 + assert isinstance(result[0].content_parts[0].data, UiPathInlineValue) assert result[0].content_parts[0].data.inline == "Here is the answer" assert result[0].content_parts[0].mime_type == "text/markdown" @@ -337,8 +350,10 @@ def test_converts_multiple_agent_messages(self): ) assert len(result) == 2 + assert isinstance(result[0].content_parts[0].data, UiPathInlineValue) assert result[0].content_parts[0].data.inline == "First response" assert len(result[0].tool_calls) == 1 + assert isinstance(result[1].content_parts[0].data, UiPathInlineValue) assert result[1].content_parts[0].data.inline == "Final response" assert len(result[1].tool_calls) == 0 From dc9326372342801fabe8a18a459207fb816fa272 Mon Sep 17 00:00:00 2001 From: Maxwell Du <60411452+maxduu@users.noreply.github.com> Date: Thu, 19 Feb 2026 13:19:33 -0600 Subject: [PATCH 8/9] fix: add tests and handle empty content --- .../_cli/_evals/_conversational_utils.py | 90 ++++++++++------- tests/cli/eval/test_conversational_utils.py | 99 +++++++++++++++++++ 2 files changed, 152 insertions(+), 37 deletions(-) diff --git a/src/uipath/_cli/_evals/_conversational_utils.py b/src/uipath/_cli/_evals/_conversational_utils.py index 844386876..70d878b71 100644 --- a/src/uipath/_cli/_evals/_conversational_utils.py +++ b/src/uipath/_cli/_evals/_conversational_utils.py @@ -130,16 +130,20 @@ def legacy_conversational_eval_input_to_uipath_message_list( for eval_message in eval_exchange: if eval_message.role == "user": # Convert user message - content_parts = [ - UiPathConversationContentPart( - content_part_id=str(uuid.uuid4()), - mime_type="text/plain", - data=UiPathInlineValue(inline=eval_message.text), - citations=[], - created_at=timestamp, - updated_at=timestamp, - ) - ] + content_parts = ( + [ + UiPathConversationContentPart( + content_part_id=str(uuid.uuid4()), + mime_type="text/plain", + data=UiPathInlineValue(inline=eval_message.text), + citations=[], + created_at=timestamp, + updated_at=timestamp, + ) + ] + if eval_message.text + else [] + ) # TODO: Add attachments if present # if message.attachments: @@ -161,16 +165,20 @@ def legacy_conversational_eval_input_to_uipath_message_list( ) elif eval_message.role == "agent": # Convert agent message - content_parts = [ - UiPathConversationContentPart( - content_part_id=str(uuid.uuid4()), - mime_type="text/markdown", - data=UiPathInlineValue(inline=eval_message.text), - citations=[], - created_at=timestamp, - updated_at=timestamp, - ) - ] + content_parts = ( + [ + UiPathConversationContentPart( + content_part_id=str(uuid.uuid4()), + mime_type="text/markdown", + data=UiPathInlineValue(inline=eval_message.text), + citations=[], + created_at=timestamp, + updated_at=timestamp, + ) + ] + if eval_message.text + else [] + ) # Convert tool calls if present tool_calls: List[UiPathConversationToolCall] = [] @@ -204,16 +212,20 @@ def legacy_conversational_eval_input_to_uipath_message_list( ) # Add current user prompt - content_parts = [ - UiPathConversationContentPart( - content_part_id=str(uuid.uuid4()), - mime_type="text/plain", - data=UiPathInlineValue(inline=eval_input.current_user_prompt.text), - citations=[], - created_at=timestamp, - updated_at=timestamp, - ) - ] + content_parts = ( + [ + UiPathConversationContentPart( + content_part_id=str(uuid.uuid4()), + mime_type="text/plain", + data=UiPathInlineValue(inline=eval_input.current_user_prompt.text), + citations=[], + created_at=timestamp, + updated_at=timestamp, + ) + ] + if eval_input.current_user_prompt.text + else [] + ) # TODO Add attachments if present # if eval_input.current_user_prompt.attachments: @@ -244,13 +256,17 @@ def legacy_conversational_eval_output_to_uipath_message_data_list( messages: List[UiPathConversationMessageData] = [] for eval_agent_message in eval_output.agent_response: - content_parts = [ - UiPathConversationContentPartData( - mime_type="text/markdown", - data=UiPathInlineValue(inline=eval_agent_message.text), - citations=[], - ) - ] + content_parts = ( + [ + UiPathConversationContentPartData( + mime_type="text/markdown", + data=UiPathInlineValue(inline=eval_agent_message.text), + citations=[], + ) + ] + if eval_agent_message.text + else [] + ) tool_calls: List[UiPathConversationToolCallData] = [] if eval_agent_message.tool_calls: diff --git a/tests/cli/eval/test_conversational_utils.py b/tests/cli/eval/test_conversational_utils.py index 84e1923b3..953fce94f 100644 --- a/tests/cli/eval/test_conversational_utils.py +++ b/tests/cli/eval/test_conversational_utils.py @@ -278,6 +278,62 @@ def test_empty_conversation_history(self): assert isinstance(result[0].content_parts[0].data, UiPathInlineValue) assert result[0].content_parts[0].data.inline == "First message" + def test_blank_text_in_message_creates_empty_content_parts(self): + """Should create empty content_parts when user message has blank text.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[ + [ + LegacyConversationalEvalUserMessage(text=""), + LegacyConversationalEvalInputAgentMessage(text=""), + ] + ], + currentUserPrompt=LegacyConversationalEvalUserMessage(text=""), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + # Empty text content should result in no text content-parts + assert len(result[0].content_parts) == 0 + assert len(result[1].content_parts) == 0 + assert len(result[2].content_parts) == 0 + + def test_blank_text_with_tool_calls_creates_empty_content_parts(self): + """Should create empty content_parts when agent message with tool calls has blank text.""" + eval_input = LegacyConversationalEvalInput( + conversationHistory=[ + [ + LegacyConversationalEvalUserMessage(text="Search for data"), + LegacyConversationalEvalInputAgentMessage( + text="", + toolCalls=[ + LegacyConversationalEvalInputToolCall( + name="search_tool", + arguments={"query": "test"}, + result=LegacyConversationalEvalInputToolCallResult( + value={"results": ["item1"]}, + isError=False, + ), + ) + ], + ), + ] + ], + currentUserPrompt=LegacyConversationalEvalUserMessage(text="Thanks"), + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_input_to_uipath_message_list( + eval_input + ) + + agent_message = result[1] + assert agent_message.role == "assistant" + # Empty content should result in no text content-parts + assert len(agent_message.content_parts) == 0 + # Tool calls should still be present + assert len(agent_message.tool_calls) == 1 + class TestLegacyConversationalEvalOutputToUiPathMessageData: """Tests for converting legacy eval output to UiPath message data.""" @@ -434,3 +490,46 @@ def test_preserves_empty_tool_arguments(self): ) assert result[0].tool_calls[0].input == {} + + def test_blank_text_in_agent_response_creates_empty_content_parts(self): + """Should create empty content_parts when agent response has blank text.""" + eval_output = LegacyConversationalEvalOutput( + agentResponse=[LegacyConversationalEvalOutputAgentMessage(text="")] + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_output_to_uipath_message_data_list( + eval_output + ) + + assert len(result) == 1 + assert result[0].role == "assistant" + # Empty text should result in no text content-parts + assert len(result[0].content_parts) == 0 + + def test_blank_text_with_tool_calls_in_agent_response_creates_empty_content_parts( + self, + ): + """Should create empty content_parts when agent response with tool calls has blank text.""" + eval_output = LegacyConversationalEvalOutput( + agentResponse=[ + LegacyConversationalEvalOutputAgentMessage( + text="", + toolCalls=[ + LegacyConversationalEvalOutputToolCall( + name="search", + arguments={"query": "test"}, + ) + ], + ) + ] + ) + + result = UiPathLegacyEvalChatMessagesMapper.legacy_conversational_eval_output_to_uipath_message_data_list( + eval_output + ) + + assert len(result) == 1 + # Empty text should result in no text content-parts + assert len(result[0].content_parts) == 0 + # Tool calls should still be present + assert len(result[0].tool_calls) == 1 From fe731c0a1b22d067e9c5b460bc338b5e5fdfa6e5 Mon Sep 17 00:00:00 2001 From: Maxwell Du <60411452+maxduu@users.noreply.github.com> Date: Thu, 19 Feb 2026 13:20:03 -0600 Subject: [PATCH 9/9] chore: update version --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3f85d7c7e..dbccd220a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath" -version = "2.8.44" +version = "2.8.45" description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools." readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" diff --git a/uv.lock b/uv.lock index c6fa2bc97..d1920ee55 100644 --- a/uv.lock +++ b/uv.lock @@ -2531,7 +2531,7 @@ wheels = [ [[package]] name = "uipath" -version = "2.8.44" +version = "2.8.45" source = { editable = "." } dependencies = [ { name = "applicationinsights" },