1212from opentelemetry .sdk .trace import ReadableSpan , Span
1313from opentelemetry .sdk .trace .export import SpanExporter , SpanExportResult
1414
15- from uipath ._cli ._evals .mocks .input_mocker import (
16- generate_llm_input ,
17- )
15+ from uipath ._cli ._evals .mocks .input_mocker import generate_llm_input
1816
1917from ..._events ._event_bus import EventBus
2018from ..._events ._events import (
5957 convert_eval_execution_output_to_serializable ,
6058)
6159from ._span_collection import ExecutionSpanCollector
62- from .mocks .mocks import (
63- clear_execution_context ,
64- set_execution_context ,
65- )
60+ from .mocks .mocks import clear_execution_context , set_execution_context
6661
6762T = TypeVar ("T" , bound = UiPathBaseRuntime )
6863C = TypeVar ("C" , bound = UiPathRuntimeContext )
6964
7065
66+ def extract_workflow_from_spans (spans : list [ReadableSpan ]) -> list [str ]:
67+ """Extract ordered list of main workflow nodes from execution spans.
68+
69+ Only captures workflow nodes that are direct children of a LangGraph parent span,
70+ which naturally filters out sub-nodes and internal components.
71+
72+ Args:
73+ spans: List of ReadableSpan objects from agent execution
74+
75+ Returns:
76+ List of unique main node names in execution order
77+ """
78+
79+ for i , span in enumerate (spans ):
80+ span_name = getattr (span , "name" , "NO_NAME" )
81+ attributes = getattr (span , "attributes" , {})
82+ parent_context = getattr (span , "parent" , None )
83+ parent_span_id = None
84+ if parent_context :
85+ parent_span_id = getattr (parent_context , "span_id" , None )
86+
87+ span_context = span .get_span_context ()
88+ span_id = span_context .span_id if span_context else "NO_ID"
89+
90+ if isinstance (attributes , dict ):
91+ node_name = attributes .get ("node_name" )
92+ langgraph_node = attributes .get ("langgraph.node" )
93+
94+ node_order = []
95+ seen_nodes = set ()
96+
97+ # System nodes to exclude
98+ system_nodes = {"__start__" , "__end__" }
99+
100+ # First pass: Find LangGraph-related parent span IDs
101+ # Look for spans that could be the main graph span (could have different names)
102+ langgraph_span_ids = set ()
103+ for span in spans :
104+ span_name = getattr (span , "name" , "" )
105+ # Check if this is a LangGraph main span
106+ if span_name and "langgraph" in span_name .lower ():
107+ span_context = span .get_span_context ()
108+ if span_context :
109+ langgraph_span_ids .add (span_context .span_id )
110+
111+
112+ # If we found potential parent spans, use them; otherwise we'll check all spans with langgraph.node
113+ if langgraph_span_ids :
114+ # Second pass: Collect spans that have a LangGraph parent
115+ for span in spans :
116+ # Get parent span ID
117+ parent_context = getattr (span , "parent" , None )
118+ parent_span_id = None
119+ if parent_context :
120+ parent_span_id = getattr (parent_context , "span_id" , None )
121+
122+ # Skip if parent is not one of the LangGraph spans
123+ if parent_span_id not in langgraph_span_ids :
124+ continue
125+
126+ # Get node name - use span name directly since attributes might not have it
127+ span_name = getattr (span , "name" , "" )
128+ attributes = getattr (span , "attributes" , {})
129+
130+ # Try to get from attributes first, then fall back to span name
131+ node_name = None
132+ if isinstance (attributes , dict ):
133+ node_name = attributes .get ("langgraph.node" ) or attributes .get ("node_name" )
134+
135+ if not node_name :
136+ node_name = span_name
137+
138+ # Skip if no node name found
139+ if not node_name :
140+ continue
141+
142+ # Filter out system nodes
143+ if node_name in system_nodes :
144+ continue
145+
146+ # Add to workflow if not seen before
147+ if node_name not in seen_nodes :
148+ seen_nodes .add (node_name )
149+ node_order .append (node_name )
150+ else :
151+ # Fallback: Just get all spans with langgraph.node attribute
152+ for span in spans :
153+ attributes = getattr (span , "attributes" , None )
154+ if not attributes or not isinstance (attributes , dict ):
155+ continue
156+
157+ node_name = attributes .get ("langgraph.node" )
158+
159+ if not node_name or node_name in system_nodes :
160+ continue
161+
162+ if node_name not in seen_nodes :
163+ seen_nodes .add (node_name )
164+ node_order .append (node_name )
165+
166+ return node_order
167+
168+
71169class ExecutionSpanExporter (SpanExporter ):
72170 """Custom exporter that stores spans grouped by execution ids."""
73171
@@ -219,7 +317,7 @@ async def execute(self) -> UiPathRuntimeResult:
219317 eval_set_id = evaluation_set .id ,
220318 no_of_evals = len (evaluation_set .evaluations ),
221319 evaluators = evaluators ,
222- evaluator_weights = getattr (evaluation_set , ' evaluator_weights' , None ),
320+ evaluator_weights = getattr (evaluation_set , " evaluator_weights" , None ),
223321 ),
224322 )
225323
@@ -253,7 +351,7 @@ async def execute(self) -> UiPathRuntimeResult:
253351 )
254352
255353 # Calculate weighted final score if weights are defined
256- evaluator_weights = getattr (evaluation_set , ' evaluator_weights' , None )
354+ evaluator_weights = getattr (evaluation_set , " evaluator_weights" , None )
257355 weighted_final_score = None
258356 if evaluator_weights :
259357 weighted_total = 0.0
@@ -425,6 +523,11 @@ async def _execute_eval(
425523 )
426524 )
427525 )
526+ # Extract workflow nodes from spans even in error case
527+ if spans :
528+ workflow = extract_workflow_from_spans (spans )
529+ if workflow :
530+ evaluation_run_results .workflow = workflow
428531 raise
429532
430533 if self .context .verbose :
@@ -433,6 +536,12 @@ async def _execute_eval(
433536 agent_execution_output
434537 )
435538 )
539+
540+ # Extract workflow nodes from spans
541+ workflow = extract_workflow_from_spans (agent_execution_output .spans )
542+ # Always set workflow, even if empty, to distinguish from no extraction
543+ evaluation_run_results .workflow = workflow if workflow else None
544+
436545 evaluation_item_results : list [EvalItemResult ] = []
437546
438547 for evaluator in evaluators :
0 commit comments