Skip to content

Commit b72c9a8

Browse files
committed
fix(agents): fix lastMessage text extraction and tool_use/tool_result ordering
- extractLastMessageText now concatenates all assistant message text instead of only returning the last message's text. Due to streaming, each text chunk becomes a separate assistant message, so we need to join them all. - Fixed stream-parser to not add text chunks to assistantMessages after tool calls have started, which was causing tool_use blocks to be separated from their tool_result blocks (Anthropic API violation). - Updated extractSpawnResults and added extractSelectorResult helpers in editor-best-of-n for proper handling of lastMessage and structured outputs.
1 parent 6984a5f commit b72c9a8

File tree

3 files changed

+187
-69
lines changed

3 files changed

+187
-69
lines changed

.agents/editor/best-of-n/editor-best-of-n.ts

Lines changed: 169 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -110,20 +110,19 @@ function* handleStepsDefault({
110110
} satisfies ToolCall<'spawn_agents'>
111111

112112
// Extract spawn results
113-
const spawnedImplementations =
114-
extractSpawnResults<{ text: string }[]>(implementorResults)
113+
const spawnedImplementations = extractSpawnResults(implementorResults)
115114

116115
logger.info({ spawnedImplementations }, 'spawnedImplementations')
117116

118-
// Extract all the plans from the structured outputs
117+
// Extract all the plans from the lastMessage outputs
119118
const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
120119
// Parse implementations from spawn results
121120
const implementations = spawnedImplementations.map((result, index) => ({
122121
id: letters[index],
123122
content:
124123
'errorMessage' in result
125124
? `Error: ${result.errorMessage}`
126-
: result[0].text,
125+
: extractLastMessageText(result) ?? '',
127126
}))
128127

129128
// Spawn selector with implementations as params
@@ -140,10 +139,7 @@ function* handleStepsDefault({
140139
includeToolCall: false,
141140
} satisfies ToolCall<'spawn_agents'>
142141

143-
const selectorOutput = extractSpawnResults<{
144-
implementationId: string
145-
reasoning: string
146-
}>(selectorResult)[0]
142+
const selectorOutput = extractSelectorResult(selectorResult)
147143

148144
if ('errorMessage' in selectorOutput) {
149145
yield {
@@ -195,24 +191,83 @@ function* handleStepsDefault({
195191
includeToolCall: false,
196192
} satisfies ToolCall<'set_output'>
197193

198-
function extractSpawnResults<T>(
194+
/**
195+
* Extracts the array of subagent results from spawn_agents tool output.
196+
*
197+
* The spawn_agents tool result structure is:
198+
* [{ type: 'json', value: [{ agentName, agentType, value: AgentOutput }] }]
199+
*
200+
* Returns an array of agent outputs, one per spawned agent.
201+
*/
202+
function extractSpawnResults(results: any[] | undefined): any[] {
203+
if (!results || results.length === 0) return []
204+
205+
// Find the json result containing spawn results
206+
const jsonResult = results.find((r) => r.type === 'json')
207+
if (!jsonResult?.value) return []
208+
209+
// Get the spawned agent results array
210+
const spawnedResults = Array.isArray(jsonResult.value)
211+
? jsonResult.value
212+
: [jsonResult.value]
213+
214+
// Extract the value (AgentOutput) from each result
215+
return spawnedResults.map((result: any) => result?.value).filter(Boolean)
216+
}
217+
218+
/**
219+
* Extracts the structured output from a selector agent's spawn result.
220+
* Selector agents use outputMode: 'structured_output'.
221+
*/
222+
function extractSelectorResult(
199223
results: any[] | undefined,
200-
): (T | { errorMessage: string })[] {
201-
if (!results) return []
202-
const spawnedResults = results
203-
.filter((result) => result.type === 'json')
204-
.map((result) => result.value)
205-
.flat() as {
206-
agentType: string
207-
value: { value?: T; errorMessage?: string }
208-
}[]
209-
return spawnedResults.map(
210-
(result) =>
211-
result.value.value ?? {
212-
errorMessage:
213-
result.value.errorMessage ?? 'Error extracting spawn results',
214-
},
215-
)
224+
): { implementationId: string; reasoning: string } | { errorMessage: string } {
225+
const outputs = extractSpawnResults(results)
226+
const firstOutput = outputs[0]
227+
if (!firstOutput) {
228+
return { errorMessage: 'No selector output' }
229+
}
230+
if (firstOutput.type === 'structuredOutput' && firstOutput.value) {
231+
return firstOutput.value
232+
}
233+
if (firstOutput.type === 'error') {
234+
return { errorMessage: firstOutput.message ?? 'Selector error' }
235+
}
236+
return { errorMessage: 'Invalid selector output format' }
237+
}
238+
239+
/**
240+
* Extracts all text content from a 'lastMessage' AgentOutput.
241+
*
242+
* For agents with outputMode: 'last_message', the output structure is:
243+
* { type: 'lastMessage', value: [{ role: 'assistant', content: [{ type: 'text', text: '...' }] }] }
244+
*
245+
* Returns concatenated text from all assistant messages, or null if not found.
246+
* Note: Due to streaming, each text chunk may be a separate assistant message,
247+
* so we need to concatenate all of them to get the full response.
248+
*/
249+
function extractLastMessageText(agentOutput: any): string | null {
250+
if (!agentOutput) return null
251+
252+
// Handle 'lastMessage' output mode - the value contains an array of messages
253+
if (
254+
agentOutput.type === 'lastMessage' &&
255+
Array.isArray(agentOutput.value)
256+
) {
257+
// Collect text from all assistant messages (streaming creates multiple messages)
258+
const textParts: string[] = []
259+
for (const message of agentOutput.value) {
260+
if (message.role === 'assistant' && Array.isArray(message.content)) {
261+
for (const part of message.content) {
262+
if (part.type === 'text' && typeof part.text === 'string') {
263+
textParts.push(part.text)
264+
}
265+
}
266+
}
267+
}
268+
return textParts.length > 0 ? textParts.join('') : null
269+
}
270+
return null
216271
}
217272

218273
// Extract only tool calls from text, removing any commentary
@@ -395,12 +450,14 @@ function* handleStepsMax({
395450
}
396451

397452
/**
398-
* Extracts the text content from a 'lastMessage' AgentOutput.
453+
* Extracts all text content from a 'lastMessage' AgentOutput.
399454
*
400455
* For agents with outputMode: 'last_message', the output structure is:
401456
* { type: 'lastMessage', value: [{ role: 'assistant', content: [{ type: 'text', text: '...' }] }] }
402457
*
403-
* Returns the text from the last assistant message, or null if not found.
458+
* Returns concatenated text from all assistant messages, or null if not found.
459+
* Note: Due to streaming, each text chunk may be a separate assistant message,
460+
* so we need to concatenate all of them to get the full response.
404461
*/
405462
function extractLastMessageText(agentOutput: any): string | null {
406463
if (!agentOutput) return null
@@ -410,18 +467,18 @@ function* handleStepsMax({
410467
agentOutput.type === 'lastMessage' &&
411468
Array.isArray(agentOutput.value)
412469
) {
413-
// Find the last assistant message with text content
414-
for (let i = agentOutput.value.length - 1; i >= 0; i--) {
415-
const message = agentOutput.value[i]
470+
// Collect text from all assistant messages (streaming creates multiple messages)
471+
const textParts: string[] = []
472+
for (const message of agentOutput.value) {
416473
if (message.role === 'assistant' && Array.isArray(message.content)) {
417-
// Find text content in the message
418474
for (const part of message.content) {
419475
if (part.type === 'text' && typeof part.text === 'string') {
420-
return part.text
476+
textParts.push(part.text)
421477
}
422478
}
423479
}
424480
}
481+
return textParts.length > 0 ? textParts.join('') : null
425482
}
426483
return null
427484
}
@@ -457,18 +514,17 @@ function* handleStepsOpus({
457514
} satisfies ToolCall<'spawn_agents'>
458515

459516
// Extract spawn results
460-
const spawnedImplementations =
461-
extractSpawnResults<{ text: string }[]>(implementorResults)
517+
const spawnedImplementations = extractSpawnResults(implementorResults)
462518

463-
// Extract all the plans from the structured outputs
519+
// Extract all the plans from the lastMessage outputs
464520
const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
465521
// Parse implementations from spawn results
466522
const implementations = spawnedImplementations.map((result, index) => ({
467523
id: letters[index],
468524
content:
469525
'errorMessage' in result
470526
? `Error: ${result.errorMessage}`
471-
: result[0].text,
527+
: extractLastMessageText(result) ?? '',
472528
}))
473529

474530
// Spawn selector with implementations as params
@@ -485,10 +541,7 @@ function* handleStepsOpus({
485541
includeToolCall: false,
486542
} satisfies ToolCall<'spawn_agents'>
487543

488-
const selectorOutput = extractSpawnResults<{
489-
implementationId: string
490-
reasoning: string
491-
}>(selectorResult)[0]
544+
const selectorOutput = extractSelectorResult(selectorResult)
492545

493546
if ('errorMessage' in selectorOutput) {
494547
yield {
@@ -540,24 +593,83 @@ function* handleStepsOpus({
540593
includeToolCall: false,
541594
} satisfies ToolCall<'set_output'>
542595

543-
function extractSpawnResults<T>(
596+
/**
597+
* Extracts the array of subagent results from spawn_agents tool output.
598+
*
599+
* The spawn_agents tool result structure is:
600+
* [{ type: 'json', value: [{ agentName, agentType, value: AgentOutput }] }]
601+
*
602+
* Returns an array of agent outputs, one per spawned agent.
603+
*/
604+
function extractSpawnResults(results: any[] | undefined): any[] {
605+
if (!results || results.length === 0) return []
606+
607+
// Find the json result containing spawn results
608+
const jsonResult = results.find((r) => r.type === 'json')
609+
if (!jsonResult?.value) return []
610+
611+
// Get the spawned agent results array
612+
const spawnedResults = Array.isArray(jsonResult.value)
613+
? jsonResult.value
614+
: [jsonResult.value]
615+
616+
// Extract the value (AgentOutput) from each result
617+
return spawnedResults.map((result: any) => result?.value).filter(Boolean)
618+
}
619+
620+
/**
621+
* Extracts the structured output from a selector agent's spawn result.
622+
* Selector agents use outputMode: 'structured_output'.
623+
*/
624+
function extractSelectorResult(
544625
results: any[] | undefined,
545-
): (T | { errorMessage: string })[] {
546-
if (!results) return []
547-
const spawnedResults = results
548-
.filter((result) => result.type === 'json')
549-
.map((result) => result.value)
550-
.flat() as {
551-
agentType: string
552-
value: { value?: T; errorMessage?: string }
553-
}[]
554-
return spawnedResults.map(
555-
(result) =>
556-
result.value.value ?? {
557-
errorMessage:
558-
result.value.errorMessage ?? 'Error extracting spawn results',
559-
},
560-
)
626+
): { implementationId: string; reasoning: string } | { errorMessage: string } {
627+
const outputs = extractSpawnResults(results)
628+
const firstOutput = outputs[0]
629+
if (!firstOutput) {
630+
return { errorMessage: 'No selector output' }
631+
}
632+
if (firstOutput.type === 'structuredOutput' && firstOutput.value) {
633+
return firstOutput.value
634+
}
635+
if (firstOutput.type === 'error') {
636+
return { errorMessage: firstOutput.message ?? 'Selector error' }
637+
}
638+
return { errorMessage: 'Invalid selector output format' }
639+
}
640+
641+
/**
642+
* Extracts all text content from a 'lastMessage' AgentOutput.
643+
*
644+
* For agents with outputMode: 'last_message', the output structure is:
645+
* { type: 'lastMessage', value: [{ role: 'assistant', content: [{ type: 'text', text: '...' }] }] }
646+
*
647+
* Returns concatenated text from all assistant messages, or null if not found.
648+
* Note: Due to streaming, each text chunk may be a separate assistant message,
649+
* so we need to concatenate all of them to get the full response.
650+
*/
651+
function extractLastMessageText(agentOutput: any): string | null {
652+
if (!agentOutput) return null
653+
654+
// Handle 'lastMessage' output mode - the value contains an array of messages
655+
if (
656+
agentOutput.type === 'lastMessage' &&
657+
Array.isArray(agentOutput.value)
658+
) {
659+
// Collect text from all assistant messages (streaming creates multiple messages)
660+
const textParts: string[] = []
661+
for (const message of agentOutput.value) {
662+
if (message.role === 'assistant' && Array.isArray(message.content)) {
663+
for (const part of message.content) {
664+
if (part.type === 'text' && typeof part.text === 'string') {
665+
textParts.push(part.text)
666+
}
667+
}
668+
}
669+
}
670+
return textParts.length > 0 ? textParts.join('') : null
671+
}
672+
return null
561673
}
562674

563675
// Extract only tool calls from text, removing any commentary

.agents/file-explorer/file-picker.ts

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -113,32 +113,34 @@ Do not use any further tools or spawn any further agents.
113113
}
114114

115115
/**
116-
* Extracts the text content from a 'lastMessage' AgentOutput.
117-
*
116+
* Extracts all text content from a 'lastMessage' AgentOutput.
117+
*
118118
* For agents with outputMode: 'last_message', the output structure is:
119119
* { type: 'lastMessage', value: [{ role: 'assistant', content: [{ type: 'text', text: '...' }] }] }
120-
*
121-
* Returns the text from the last assistant message, or null if not found.
120+
*
121+
* Returns concatenated text from all assistant messages, or null if not found.
122+
* Note: Due to streaming, each text chunk may be a separate assistant message,
123+
* so we need to concatenate all of them to get the full response.
122124
*/
123125
function extractLastMessageText(agentOutput: any): string | null {
124126
if (!agentOutput) return null
125-
127+
126128
// Handle 'lastMessage' output mode - the value contains an array of messages
127129
if (agentOutput.type === 'lastMessage' && Array.isArray(agentOutput.value)) {
128-
// Find the last assistant message with text content
129-
for (let i = agentOutput.value.length - 1; i >= 0; i--) {
130-
const message = agentOutput.value[i]
130+
// Collect text from all assistant messages (streaming creates multiple messages)
131+
const textParts: string[] = []
132+
for (const message of agentOutput.value) {
131133
if (message.role === 'assistant' && Array.isArray(message.content)) {
132-
// Find text content in the message
133134
for (const part of message.content) {
134135
if (part.type === 'text' && typeof part.text === 'string') {
135-
return part.text
136+
textParts.push(part.text)
136137
}
137138
}
138139
}
139140
}
141+
return textParts.length > 0 ? textParts.join('') : null
140142
}
141-
143+
142144
return null
143145
}
144146

packages/agent-runtime/src/tools/stream-parser.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,11 @@ export async function processStream(
258258
} else if (chunk.type === 'text') {
259259
onResponseChunk(chunk.text)
260260
fullResponseChunks.push(chunk.text)
261-
assistantMessages.push(assistantMessage(chunk.text))
261+
// Only add text as assistant message if no tool calls have been made yet
262+
// to avoid separating tool_use from tool_result (Anthropic API requirement)
263+
if (toolCalls.length === 0) {
264+
assistantMessages.push(assistantMessage(chunk.text))
265+
}
262266
} else if (chunk.type === 'error') {
263267
onResponseChunk(chunk)
264268

0 commit comments

Comments
 (0)