Skip to content

Commit d9ab0d2

Browse files
committed
add workflow
1 parent 99058e1 commit d9ab0d2

File tree

2 files changed

+119
-9
lines changed

2 files changed

+119
-9
lines changed

src/uipath/_cli/_evals/_models/_output.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ class EvaluationRunResult(BaseModel):
9595

9696
evaluation_name: str
9797
evaluation_run_results: List[EvaluationRunResultDto]
98+
workflow: Optional[List[str]] = None
9899
agent_execution_output: Optional[UiPathSerializableEvalRunExecutionOutput] = None
99100

100101
@property

src/uipath/_cli/_evals/_runtime.py

Lines changed: 118 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@
1212
from opentelemetry.sdk.trace import ReadableSpan, Span
1313
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
1414

15-
from uipath._cli._evals.mocks.input_mocker import (
16-
generate_llm_input,
17-
)
15+
from uipath._cli._evals.mocks.input_mocker import generate_llm_input
1816

1917
from ..._events._event_bus import EventBus
2018
from ..._events._events import (
@@ -59,15 +57,115 @@
5957
convert_eval_execution_output_to_serializable,
6058
)
6159
from ._span_collection import ExecutionSpanCollector
62-
from .mocks.mocks import (
63-
clear_execution_context,
64-
set_execution_context,
65-
)
60+
from .mocks.mocks import clear_execution_context, set_execution_context
6661

6762
T = TypeVar("T", bound=UiPathBaseRuntime)
6863
C = TypeVar("C", bound=UiPathRuntimeContext)
6964

7065

66+
def extract_workflow_from_spans(spans: list[ReadableSpan]) -> list[str]:
67+
"""Extract ordered list of main workflow nodes from execution spans.
68+
69+
Only captures workflow nodes that are direct children of a LangGraph parent span,
70+
which naturally filters out sub-nodes and internal components.
71+
72+
Args:
73+
spans: List of ReadableSpan objects from agent execution
74+
75+
Returns:
76+
List of unique main node names in execution order
77+
"""
78+
79+
for i, span in enumerate(spans):
80+
span_name = getattr(span, "name", "NO_NAME")
81+
attributes = getattr(span, "attributes", {})
82+
parent_context = getattr(span, "parent", None)
83+
parent_span_id = None
84+
if parent_context:
85+
parent_span_id = getattr(parent_context, "span_id", None)
86+
87+
span_context = span.get_span_context()
88+
span_id = span_context.span_id if span_context else "NO_ID"
89+
90+
if isinstance(attributes, dict):
91+
node_name = attributes.get("node_name")
92+
langgraph_node = attributes.get("langgraph.node")
93+
94+
node_order = []
95+
seen_nodes = set()
96+
97+
# System nodes to exclude
98+
system_nodes = {"__start__", "__end__"}
99+
100+
# First pass: Find LangGraph-related parent span IDs
101+
# Look for spans that could be the main graph span (could have different names)
102+
langgraph_span_ids = set()
103+
for span in spans:
104+
span_name = getattr(span, "name", "")
105+
# Check if this is a LangGraph main span
106+
if span_name and "langgraph" in span_name.lower():
107+
span_context = span.get_span_context()
108+
if span_context:
109+
langgraph_span_ids.add(span_context.span_id)
110+
111+
112+
# If we found potential parent spans, use them; otherwise we'll check all spans with langgraph.node
113+
if langgraph_span_ids:
114+
# Second pass: Collect spans that have a LangGraph parent
115+
for span in spans:
116+
# Get parent span ID
117+
parent_context = getattr(span, "parent", None)
118+
parent_span_id = None
119+
if parent_context:
120+
parent_span_id = getattr(parent_context, "span_id", None)
121+
122+
# Skip if parent is not one of the LangGraph spans
123+
if parent_span_id not in langgraph_span_ids:
124+
continue
125+
126+
# Get node name - use span name directly since attributes might not have it
127+
span_name = getattr(span, "name", "")
128+
attributes = getattr(span, "attributes", {})
129+
130+
# Try to get from attributes first, then fall back to span name
131+
node_name = None
132+
if isinstance(attributes, dict):
133+
node_name = attributes.get("langgraph.node") or attributes.get("node_name")
134+
135+
if not node_name:
136+
node_name = span_name
137+
138+
# Skip if no node name found
139+
if not node_name:
140+
continue
141+
142+
# Filter out system nodes
143+
if node_name in system_nodes:
144+
continue
145+
146+
# Add to workflow if not seen before
147+
if node_name not in seen_nodes:
148+
seen_nodes.add(node_name)
149+
node_order.append(node_name)
150+
else:
151+
# Fallback: Just get all spans with langgraph.node attribute
152+
for span in spans:
153+
attributes = getattr(span, "attributes", None)
154+
if not attributes or not isinstance(attributes, dict):
155+
continue
156+
157+
node_name = attributes.get("langgraph.node")
158+
159+
if not node_name or node_name in system_nodes:
160+
continue
161+
162+
if node_name not in seen_nodes:
163+
seen_nodes.add(node_name)
164+
node_order.append(node_name)
165+
166+
return node_order
167+
168+
71169
class ExecutionSpanExporter(SpanExporter):
72170
"""Custom exporter that stores spans grouped by execution ids."""
73171

@@ -219,7 +317,7 @@ async def execute(self) -> UiPathRuntimeResult:
219317
eval_set_id=evaluation_set.id,
220318
no_of_evals=len(evaluation_set.evaluations),
221319
evaluators=evaluators,
222-
evaluator_weights=getattr(evaluation_set, 'evaluator_weights', None),
320+
evaluator_weights=getattr(evaluation_set, "evaluator_weights", None),
223321
),
224322
)
225323

@@ -253,7 +351,7 @@ async def execute(self) -> UiPathRuntimeResult:
253351
)
254352

255353
# Calculate weighted final score if weights are defined
256-
evaluator_weights = getattr(evaluation_set, 'evaluator_weights', None)
354+
evaluator_weights = getattr(evaluation_set, "evaluator_weights", None)
257355
weighted_final_score = None
258356
if evaluator_weights:
259357
weighted_total = 0.0
@@ -425,6 +523,11 @@ async def _execute_eval(
425523
)
426524
)
427525
)
526+
# Extract workflow nodes from spans even in error case
527+
if spans:
528+
workflow = extract_workflow_from_spans(spans)
529+
if workflow:
530+
evaluation_run_results.workflow = workflow
428531
raise
429532

430533
if self.context.verbose:
@@ -433,6 +536,12 @@ async def _execute_eval(
433536
agent_execution_output
434537
)
435538
)
539+
540+
# Extract workflow nodes from spans
541+
workflow = extract_workflow_from_spans(agent_execution_output.spans)
542+
# Always set workflow, even if empty, to distinguish from no extraction
543+
evaluation_run_results.workflow = workflow if workflow else None
544+
436545
evaluation_item_results: list[EvalItemResult] = []
437546

438547
for evaluator in evaluators:

0 commit comments

Comments
 (0)