fix: support structured output extraction for sequential workflows

cristipufu · claude · cristipufu · commit e450650cdcd5 · 2026-02-21T20:02:46.000+02:00
The runtime and schema extraction only checked output_executors for
response_format, which missed sequential workflows where the output
executor is _EndWithConversation (not an AgentExecutor). Add fallback
to scan all workflow executors and pick the last agent's response_format.

Includes e2e streaming tests and a sequential-structured-output sample.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/packages/uipath-agent-framework/samples/README.md b/packages/uipath-agent-framework/samples/README.md
@@ -8,6 +8,7 @@ Sample agents built with [Agent Framework](https://github.com/microsoft/agent-fr
 |--------|-------------|
 | [quickstart-workflow](./quickstart-workflow/) | Single workflow agent with tool calling: fetches live weather data for any location |
 | [structured-output](./structured-output/) | Structured output workflow: extracts city information and returns it as a typed Pydantic model |
+| [sequential-structured-output](./sequential-structured-output/) | Sequential pipeline with structured output: researcher and editor agents produce a typed Pydantic city profile |
 | [hitl-workflow](./hitl-workflow/) | Human-in-the-loop workflow: customer support with approval-gated billing and refund operations |
 | [sequential](./sequential/) | Sequential pipeline: writer, reviewer, and editor agents process a task one after another |
 | [concurrent](./concurrent/) | Concurrent orchestration: sentiment, topic extraction, and summarization agents analyze text in parallel |
diff --git a/packages/uipath-agent-framework/samples/sequential-structured-output/README.md b/packages/uipath-agent-framework/samples/sequential-structured-output/README.md
@@ -0,0 +1,46 @@
+# Sequential + Structured Output
+
+A sequential pipeline that combines multi-agent processing with structured output. A researcher gathers facts about a city, then an editor organizes them into a well-defined Pydantic model (`CityInfo`). The final output is a typed JSON object — not free-form text.
+
+## Agent Graph
+
+```mermaid
+flowchart TB
+  __start__(__start__)
+  __end__(__end__)
+  input-conversation(input-conversation)
+  researcher(researcher)
+  editor(editor)
+  end_(end)
+  __start__ --> |input|input-conversation
+  input-conversation --> researcher
+  researcher --> editor
+  editor --> end_
+  end_ --> |output|__end__
+```
+
+Internally, the sequential orchestration chains:
+- **researcher** — gathers key facts about the city (country, population, landmarks, cultural significance)
+- **editor** — organizes the research into a structured `CityInfo` schema with `response_format`
+
+Each agent sees the full conversation history from previous agents. The last agent's `response_format` determines the output schema.
+
+## Prerequisites
+
+Authenticate with UiPath to configure your `.env` file:
+
+```bash
+uipath auth
+```
+
+## Run
+
+```
+uipath run agent '{"messages": [{"contentParts": [{"data": {"inline": "Tell me about Tokyo"}}], "role": "user"}]}'
+```
+
+## Debug
+
+```
+uipath dev web
+```
diff --git a/packages/uipath-agent-framework/samples/sequential-structured-output/agent.mermaid b/packages/uipath-agent-framework/samples/sequential-structured-output/agent.mermaid
@@ -0,0 +1,12 @@
+flowchart TB
+  __start__(__start__)
+  __end__(__end__)
+  input-conversation(input-conversation)
+  researcher(researcher)
+  editor(editor)
+  end(end)
+  __start__ --> |input|input-conversation
+  input-conversation --> researcher
+  researcher --> editor
+  editor --> end
+  end --> |output|__end__
diff --git a/packages/uipath-agent-framework/samples/sequential-structured-output/agent_framework.json b/packages/uipath-agent-framework/samples/sequential-structured-output/agent_framework.json
@@ -0,0 +1,5 @@
+{
+  "agents": {
+    "agent": "main.py:agent"
+  }
+}
diff --git a/packages/uipath-agent-framework/samples/sequential-structured-output/main.py b/packages/uipath-agent-framework/samples/sequential-structured-output/main.py
@@ -0,0 +1,44 @@
+from agent_framework.orchestrations import SequentialBuilder
+from pydantic import BaseModel
+
+from uipath_agent_framework.chat import UiPathOpenAIChatClient
+
+
+class CityInfo(BaseModel):
+    """Structured output for city information."""
+
+    city: str
+    country: str
+    description: str
+    population_estimate: str
+    famous_for: list[str]
+
+
+client = UiPathOpenAIChatClient(model="gpt-5-mini-2025-08-07")
+
+researcher = client.as_agent(
+    name="researcher",
+    description="Researches factual information about a city.",
+    instructions=(
+        "You are a thorough researcher. Given a city name, gather key facts "
+        "including its country, population, notable landmarks, cultural "
+        "significance, and what it is famous for. Present your findings clearly."
+    ),
+)
+
+editor = client.as_agent(
+    name="editor",
+    description="Edits research into a structured city profile.",
+    instructions=(
+        "You are a precise editor. Take the researcher's findings and organize "
+        "them into a well-structured city profile. Ensure all facts are accurate "
+        "and the description is concise and informative."
+    ),
+    default_options={"response_format": CityInfo},
+)
+
+workflow = SequentialBuilder(
+    participants=[researcher, editor],
+).build()
+
+agent = workflow.as_agent(name="sequential_structured_output_workflow")
diff --git a/packages/uipath-agent-framework/samples/sequential-structured-output/pyproject.toml b/packages/uipath-agent-framework/samples/sequential-structured-output/pyproject.toml
@@ -0,0 +1,25 @@
+[project]
+name = "sequential-structured-output"
+version = "0.0.1"
+description = "Sequential + structured output: agents process a task in a pipeline, the last agent returns data in a well-defined Pydantic schema"
+authors = [{ name = "John Doe" }]
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "uipath",
+    "uipath-agent-framework",
+    "agent-framework-core>=1.0.0rc1",
+    "agent-framework-orchestrations>=1.0.0b260219",
+]
+
+[dependency-groups]
+dev = [
+    "uipath-dev",
+]
+
+[tool.uv]
+prerelease = "allow"
+
+[tool.uv.sources]
+uipath-dev = { path = "../../../../../uipath-dev-python", editable = true }
+uipath-agent-framework = { path = "../../", editable = true }
diff --git a/packages/uipath-agent-framework/samples/sequential-structured-output/uipath.json b/packages/uipath-agent-framework/samples/sequential-structured-output/uipath.json
@@ -0,0 +1,14 @@
+{
+  "$schema": "https://cloud.uipath.com/draft/2024-12/uipath",
+  "runtimeOptions": {
+    "isConversational": false
+  },
+  "packOptions": {
+    "fileExtensionsIncluded": [],
+    "filesIncluded": [],
+    "filesExcluded": [],
+    "directoriesExcluded": [],
+    "includeUvLock": true
+  },
+  "functions": {}
+}
diff --git a/packages/uipath-agent-framework/samples/structured-output/pyproject.toml b/packages/uipath-agent-framework/samples/structured-output/pyproject.toml
@@ -18,4 +18,3 @@ dev = [
 
 [tool.uv]
 prerelease = "allow"
-
diff --git a/packages/uipath-agent-framework/src/uipath_agent_framework/runtime/runtime.py b/packages/uipath-agent-framework/src/uipath_agent_framework/runtime/runtime.py
@@ -614,6 +614,22 @@ async def _stream_workflow(
         # (COMPLETED) is only in executor_completed.
         executor_tool_phases: dict[str, set[UiPathRuntimeStatePhase]] = {}
 
+        # Determine which executors are output executors so we can emit
+        # intermediate message events from non-output agent executors on
+        # completion.  This enables per-agent streaming for orchestrations
+        # like SequentialBuilder where output_executors=[end] and
+        # intermediate agent outputs are filtered from "output" events.
+        output_executor_ids: set[str] = set()
+        try:
+            for ex in workflow.get_output_executors():
+                output_executor_ids.add(ex.id)
+        except Exception:
+            pass
+        # Track executors that already emitted message events so we don't
+        # duplicate when the same data appears in both executor_completed
+        # and "output" events.
+        executors_with_messages: set[str] = set()
+
         # Emit an early STARTED event for the start executor so the graph
         # visualization shows it immediately rather than after it finishes.
         # The framework's _run_workflow_with_tracing awaits the entire start
@@ -679,6 +695,33 @@ async def _stream_workflow(
                                             tool_event.node_name
                                         )
                                 yield tool_event
+
+                        # For non-output AgentExecutor instances, extract
+                        # message events from executor_completed data.
+                        # This provides intermediate streaming for
+                        # orchestrations (e.g. sequential) where agent
+                        # output events are filtered by output_executors.
+                        # Only AgentExecutors produce meaningful chat
+                        # messages; framework-internal executors like
+                        # input-conversation would echo user input.
+                        executor = workflow.executors.get(event.executor_id)
+                        if (
+                            isinstance(executor, AgentExecutor)
+                            and event.executor_id not in output_executor_ids
+                            and event.executor_id not in executors_with_messages
+                        ):
+                            completed_msg_events = self._extract_workflow_messages(
+                                self._filter_completed_data(event.data)
+                            )
+                            if completed_msg_events:
+                                # Close prior message so each agent gets a
+                                # separate bubble in the UI.
+                                for close_evt in self.chat.close_message():
+                                    yield UiPathRuntimeMessageEvent(payload=close_evt)
+                                for msg_event in completed_msg_events:
+                                    yield UiPathRuntimeMessageEvent(payload=msg_event)
+                                executors_with_messages.add(event.executor_id)
+
                     yield UiPathRuntimeStateEvent(
                         payload=self._serialize_event_data(
                             self._filter_completed_data(event.data)
@@ -702,8 +745,15 @@ async def _stream_workflow(
                             elif tool_event.phase == UiPathRuntimeStatePhase.COMPLETED:
                                 self._pending_tool_nodes.discard(tool_event.node_name)
                         yield tool_event
-                    for msg_event in self._extract_workflow_messages(event.data):
-                        yield UiPathRuntimeMessageEvent(payload=msg_event)
+
+                    # When intermediate agents already emitted message
+                    # events via executor_completed, skip the final
+                    # orchestration output to avoid duplicating text.
+                    if not executors_with_messages:
+                        for msg_event in self._extract_workflow_messages(
+                            event.data, assistant_only=True
+                        ):
+                            yield UiPathRuntimeMessageEvent(payload=msg_event)
 
                 # Detect workflow suspension via state
                 if (
@@ -899,9 +949,31 @@ def _extract_contents(data: Any) -> list[Any]:
                 contents.extend(UiPathAgentFrameworkRuntime._extract_contents(item))
         return contents
 
-    def _extract_workflow_messages(self, data: Any) -> list[Any]:
-        """Extract UiPath conversation message events from workflow output data."""
+    def _extract_workflow_messages(
+        self, data: Any, *, assistant_only: bool = False
+    ) -> list[Any]:
+        """Extract UiPath conversation message events from workflow output data.
+
+        Args:
+            data: Workflow output data (AgentResponse, Message, list[Message], etc.)
+            assistant_only: When True, only extract content from assistant-role
+                messages.  Used for orchestration outputs (e.g. sequential
+                workflow full-conversation lists) to avoid echoing the user's
+                input back as AI output.
+        """
         events: list[Any] = []
+
+        if assistant_only and isinstance(data, list):
+            for item in data:
+                if isinstance(item, Message) and item.role != "assistant":
+                    continue
+                for content in self._extract_contents(item):
+                    if isinstance(content, Content):
+                        if content.type == "function_approval_request":
+                            continue
+                        events.extend(self.chat.map_streaming_content(content))
+            return events
+
         for content in self._extract_contents(data):
             if isinstance(content, Content):
                 # Skip HITL approval requests — handled by the suspension mechanism
@@ -964,7 +1036,12 @@ def _try_parse_structured_output(self, text: str) -> dict[str, Any] | None:
             return None
 
     def _get_output_response_format(self) -> type[BaseModel] | None:
-        """Get the response_format from the workflow's output executors."""
+        """Get the response_format from the workflow's output executors.
+
+        For orchestrations (e.g. SequentialBuilder) where output executors are
+        framework-internal adapters, falls back to scanning all workflow
+        executors and returns the response_format from the last AgentExecutor.
+        """
         try:
             output_executors = self.agent.workflow.get_output_executors()
         except Exception:
@@ -976,34 +1053,76 @@ def _get_output_response_format(self) -> type[BaseModel] | None:
             if not isinstance(inner_agent, Agent):
                 continue
             response_format = inner_agent.default_options.get("response_format")
-            if response_format is not None and isinstance(response_format, type) and issubclass(response_format, BaseModel):
+            if (
+                response_format is not None
+                and isinstance(response_format, type)
+                and issubclass(response_format, BaseModel)
+            ):
                 return response_format
-        return None
+
+        # Fallback: scan all workflow executors for the last AgentExecutor
+        # with a response_format. Needed for orchestrations like sequential
+        # where the output executor is an internal adapter (e.g. _EndWithConversation).
+        try:
+            all_executors = list(self.agent.workflow.executors.values())
+        except Exception:
+            return None
+        result: type[BaseModel] | None = None
+        for executor in all_executors:
+            if not isinstance(executor, AgentExecutor):
+                continue
+            inner_agent = executor._agent
+            if not isinstance(inner_agent, Agent):
+                continue
+            response_format = inner_agent.default_options.get("response_format")
+            if (
+                response_format is not None
+                and isinstance(response_format, type)
+                and issubclass(response_format, BaseModel)
+            ):
+                result = response_format
+        return result
 
     @staticmethod
     def _extract_text_from_data(data: Any) -> str:
-        """Extract text from any workflow data type."""
+        """Extract text from any workflow data type.
+
+        For list[Message] data (e.g. sequential workflow full-conversation
+        output), only the last assistant message is used.  The full
+        conversation includes intermediate agent turns but the workflow
+        result should be the final agent's output, not the concatenation
+        of every participant.
+        """
         if isinstance(data, (AgentResponseUpdate, AgentResponse)):
             return data.text or ""
         if isinstance(data, Message):
+            if data.role != "assistant":
+                return ""
             return "".join(
                 c.text for c in (data.contents or []) if hasattr(c, "text") and c.text
             )
         if isinstance(data, str):
             return data
         if isinstance(data, list):
-            parts: list[str] = []
+            # Collect assistant message texts, then return only the last
+            # one.  For single-agent workflows there is typically only one
+            # assistant message so this is equivalent to the old behavior.
+            # For multi-agent conversations (sequential, group-chat) the
+            # last assistant message is the final agent's output.
+            last_text: str = ""
             for item in data:
                 if isinstance(item, Message):
+                    if item.role != "assistant":
+                        continue
                     text = "".join(
                         c.text
                         for c in (item.contents or [])
                         if hasattr(c, "text") and c.text
                     )
                     if text:
-                        parts.append(text)
+                        last_text = text
                 elif isinstance(item, str):
-                    parts.append(item)
+                    last_text = item
                 elif isinstance(item, list):
                     for inner in item:
                         if isinstance(inner, Message) and inner.role == "assistant":
@@ -1013,8 +1132,8 @@ def _extract_text_from_data(data: Any) -> str:
                                 if hasattr(c, "text") and c.text
                             )
                             if text:
-                                parts.append(text)
-            return "".join(parts)
+                                last_text = text
+            return last_text
         return ""
 
     def _prepare_input(self, input: dict[str, Any] | None) -> str:
diff --git a/packages/uipath-agent-framework/src/uipath_agent_framework/runtime/schema.py b/packages/uipath-agent-framework/src/uipath_agent_framework/runtime/schema.py
diff --git a/packages/uipath-agent-framework/tests/test_schema.py b/packages/uipath-agent-framework/tests/test_schema.py

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +{
 +  "agents": {
 +    "agent": "main.py:agent"
 +  }
 +}
Original file line number	Diff line number	Diff line change
`@@ -18,4 +18,3 @@ dev = [`
`18`	`18`
`19`	`19`	`[tool.uv]`
`20`	`20`	`prerelease = "allow"`
`21`		`-`