CodebuffAI
diff --git a/‎backend/src/__tests__/cost-aggregation.test.ts‎
Lines changed: 6 additions & 0 deletions b/‎backend/src/__tests__/cost-aggregation.test.ts‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎backend/src/__tests__/main-prompt.integration.test.ts‎
Lines changed: 1 addition & 2 deletions b/‎backend/src/__tests__/main-prompt.integration.test.ts‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎backend/src/__tests__/main-prompt.test.ts‎
Lines changed: 8 additions & 5 deletions b/‎backend/src/__tests__/main-prompt.test.ts‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎backend/src/__tests__/spawn-agents-message-history.test.ts‎
Lines changed: 1 addition & 0 deletions b/‎backend/src/__tests__/spawn-agents-message-history.test.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/src/__tests__/spawn-agents-permissions.test.ts‎
Lines changed: 2 additions & 0 deletions b/‎backend/src/__tests__/spawn-agents-permissions.test.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backend/src/__tests__/subagent-streaming.test.ts‎
Lines changed: 1 addition & 0 deletions b/‎backend/src/__tests__/subagent-streaming.test.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/src/main-prompt.ts‎
Lines changed: 11 additions & 8 deletions b/‎backend/src/main-prompt.ts‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎backend/src/run-agent-step.ts‎
Lines changed: 67 additions & 19 deletions b/‎backend/src/run-agent-step.ts‎
Lines changed: 67 additions & 19 deletions
@@ -137,6 +137,7 @@ describe('Cost Aggregation System', () => {
             stepsRemaining: 10,
             creditsUsed: 75, // First subagent uses 75 credits
           },
+          output: { type: 'lastMessage', value: 'Sub-agent 1 response' },
         })
         .mockResolvedValueOnce({
           agentState: {
@@ -148,6 +149,7 @@ describe('Cost Aggregation System', () => {
             stepsRemaining: 10,
             creditsUsed: 100, // Second subagent uses 100 credits
           },
+          output: { type: 'lastMessage', value: 'Sub-agent 2 response' },
         })
 
       const mockToolCall = {
@@ -213,6 +215,7 @@ describe('Cost Aggregation System', () => {
             stepsRemaining: 10,
             creditsUsed: 50, // Successful agent
           },
+          output: { type: 'lastMessage', value: 'Successful response' },
         })
         .mockRejectedValueOnce((() => {
           const error = new Error('Agent failed') as any
@@ -225,6 +228,7 @@ describe('Cost Aggregation System', () => {
             stepsRemaining: 10,
             creditsUsed: 25, // Partial cost from failed agent
           }
+          error.output = { type: 'error', message: 'Agent failed' }
           return error
         })())
 
@@ -366,6 +370,7 @@ describe('Cost Aggregation System', () => {
             stepsRemaining: 10,
             creditsUsed: subAgent1Cost,
           } as AgentState,
+          output: { type: 'lastMessage', value: 'Sub-agent 1 response' },
         })
         .mockResolvedValueOnce({
           agentState: {
@@ -377,6 +382,7 @@ describe('Cost Aggregation System', () => {
             stepsRemaining: 10,
             creditsUsed: subAgent2Cost,
           } as AgentState,
+          output: { type: 'lastMessage', value: 'Sub-agent 2 response' },
         })
 
       const mockToolCall = {
 
@@ -384,8 +384,7 @@ export function getMessagesSubset(messages: Message[], otherTokens: number) {
     }
 
     const {
-      toolCalls,
-      toolResults,
+      output,
       sessionState: finalSessionState,
     } = await mainPrompt(new MockWebSocket() as unknown as WebSocket, action, {
       userId: TEST_USER_ID,
 
@@ -335,7 +335,7 @@ describe('mainPrompt', () => {
       toolResults: [],
     }
 
-    const { toolCalls, sessionState: newSessionState } = await mainPrompt(
+    const { sessionState: newSessionState, output } = await mainPrompt(
       new MockWebSocket() as unknown as WebSocket,
       action,
       {
@@ -361,6 +361,9 @@ describe('mainPrompt', () => {
       }),
     )
 
+    // Verify that the output contains the expected structure
+    expect(output.type).toBeDefined()
+
     // Verify that a tool result was added to message history
     const toolResultMessages =
       newSessionState.mainAgentState.messageHistory.filter(
@@ -466,7 +469,7 @@ describe('mainPrompt', () => {
       toolResults: [],
     }
 
-    const { toolCalls } = await mainPrompt(
+    const { output } = await mainPrompt(
       new MockWebSocket() as unknown as WebSocket,
       action,
       {
@@ -477,7 +480,7 @@ describe('mainPrompt', () => {
       },
     )
 
-    expect(toolCalls).toHaveLength(0) // No tool calls expected
+    expect(output.type).toBeDefined() // Output should exist
   })
 
   it('should update consecutiveAssistantMessages when new prompt is received', async () => {
@@ -556,7 +559,7 @@ describe('mainPrompt', () => {
       toolResults: [],
     }
 
-    const { toolCalls } = await mainPrompt(
+    const { output } = await mainPrompt(
       new MockWebSocket() as unknown as WebSocket,
       action,
       {
@@ -567,7 +570,7 @@ describe('mainPrompt', () => {
       },
     )
 
-    expect(toolCalls).toHaveLength(0) // No tool calls expected for empty response
+    expect(output.type).toBeDefined() // Output should exist even for empty response
   })
 
   it('should unescape ampersands in run_terminal_command tool calls', async () => {
 
@@ -52,6 +52,7 @@ describe('Spawn Agents Message History', () => {
             { role: 'assistant', content: 'Mock agent response' },
           ],
         },
+        output: { type: 'lastMessage', value: 'Mock agent response' },
       }
     })
   })
 
@@ -72,6 +72,7 @@ describe('Spawn Agents Permissions', () => {
             { role: 'assistant', content: 'Mock agent response' },
           ],
         },
+        output: { type: 'lastMessage', value: 'Mock agent response' },
       }
     })
   })
@@ -327,6 +328,7 @@ describe('Spawn Agents Permissions', () => {
       })
 
       const output = await result
+      console.log('output', output)
       expect(JSON.stringify(output)).toContain('Error spawning agent')
       expect(JSON.stringify(output)).toContain(
         'Agent type nonexistent not found',
 
@@ -89,6 +89,7 @@ describe('Subagent Streaming', () => {
             { role: 'assistant', content: 'Test response from subagent' },
           ],
         },
+        output: { type: 'lastMessage', value: 'Test response from subagent' },
       }
     })
 
 
@@ -12,11 +12,11 @@ import { requestToolCall } from './websockets/websocket-action'
 import type { AgentTemplate } from './templates/types'
 import type { ClientAction } from '@codebuff/common/actions'
 import type { CostMode } from '@codebuff/common/constants'
-import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type {
   SessionState,
   AgentTemplateType,
+  AgentOutput,
 } from '@codebuff/common/types/session-state'
 import type { WebSocket } from 'ws'
 
@@ -33,8 +33,7 @@ export const mainPrompt = async (
   options: MainPromptOptions,
 ): Promise<{
   sessionState: SessionState
-  toolCalls: []
-  toolResults: ToolResultPart[]
+  output: AgentOutput
 }> => {
   const { userId, clientSessionId, onResponseChunk, localAgentTemplates } =
     options
@@ -102,8 +101,10 @@ export const mainPrompt = async (
 
       return {
         sessionState: newSessionState,
-        toolCalls: [],
-        toolResults: [],
+        output: {
+          type: 'lastMessage',
+          value: output,
+        },
       }
     }
   }
@@ -178,7 +179,7 @@ export const mainPrompt = async (
   mainAgentTemplate.spawnableAgents = updatedSubagents
   localAgentTemplates[agentType] = mainAgentTemplate
 
-  const { agentState } = await loopAgentSteps(ws, {
+  const { agentState, output } = await loopAgentSteps(ws, {
     userInputId: promptId,
     prompt,
     params: promptParams,
@@ -200,7 +201,9 @@ export const mainPrompt = async (
       fileContext,
       mainAgentState: agentState,
     },
-    toolCalls: [],
-    toolResults: [],
+    output: output ?? {
+      type: 'error' as const,
+      message: 'No output from agent',
+    },
   }
 }
@@ -36,15 +36,20 @@ import { getRequestContext } from './websockets/request-context'
 import type { AgentResponseTrace } from '@codebuff/bigquery'
 import type { CodebuffToolMessage } from '@codebuff/common/tools/list'
 import type { AgentTemplate } from '@codebuff/common/types/agent-template'
-import type { Message } from '@codebuff/common/types/messages/codebuff-message'
+import type {
+  AssistantMessage,
+  Message,
+} from '@codebuff/common/types/messages/codebuff-message'
 import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 import type {
   AgentTemplateType,
   AgentState,
+  AgentOutput,
 } from '@codebuff/common/types/session-state'
 import type { ProjectFileContext } from '@codebuff/common/util/file'
 import type { WebSocket } from 'ws'
+import { getErrorObject } from '@codebuff/common/util/error'
 
 export interface AgentOptions {
   userId: string | undefined
@@ -473,7 +478,10 @@ export const loopAgentSteps = async (
     clientSessionId: string
     onResponseChunk: (chunk: string | PrintModeEvent) => void
   },
-) => {
+): Promise<{
+  agentState: AgentState
+  output: AgentOutput
+}> => {
   const agentTemplate = await getAgentTemplate(agentType, localAgentTemplates)
   if (!agentTemplate) {
     throw new Error(`Agent template not found for type: ${agentType}`)
@@ -539,7 +547,7 @@ export const loopAgentSteps = async (
     },
   )
 
-  let currentAgentState = {
+  let currentAgentState: AgentState = {
     ...agentState,
     messageHistory: initialMessages,
   }
@@ -584,15 +592,7 @@ export const loopAgentSteps = async (
 
       // End turn if programmatic step ended turn, or if the previous runAgentStep ended turn
       if (shouldEndTurn) {
-        if (clearUserPromptMessagesAfterResponse) {
-          currentAgentState.messageHistory = expireMessages(
-            currentAgentState.messageHistory,
-            'userPrompt',
-          )
-        }
-        return {
-          agentState: currentAgentState,
-        }
+        break
       }
 
       const { agentState: newAgentState, shouldEndTurn: llmShouldEndTurn } =
@@ -623,19 +623,67 @@ export const loopAgentSteps = async (
         'userPrompt',
       )
     }
-    return { agentState: currentAgentState }
+
+    return {
+      agentState: currentAgentState,
+      output: getAgentOutput(currentAgentState, agentTemplate),
+    }
   } catch (error) {
-    // Log the error but still return the state with partial costs
     logger.error(
       {
-        error,
+        error: getErrorObject(error),
         agentId: currentAgentState.agentId,
         creditsUsed: currentAgentState.creditsUsed,
       },
-      'Agent execution failed but returning state with partial costs',
+      'Agent execution failed',
     )
-    throw error
-  } finally {
-    // Ensure costs are always captured, even on failure
+    const errorObject = getErrorObject(error)
+    return {
+      agentState: currentAgentState,
+      output: {
+        type: 'error',
+        message: `${errorObject.name}: ${errorObject.message} ${errorObject.stack ? `\n${errorObject.stack}` : ''}`,
+      },
+    }
   }
 }
+
+function getAgentOutput(
+  agentState: AgentState,
+  agentTemplate: AgentTemplate,
+): AgentOutput {
+  if (agentTemplate.outputMode === 'structured_output') {
+    return {
+      type: 'structuredOutput',
+      value: agentState.output ?? null,
+    }
+  }
+  if (agentTemplate.outputMode === 'last_message') {
+    const assistantMessages = agentState.messageHistory.filter(
+      (message): message is AssistantMessage => message.role === 'assistant',
+    )
+    const lastAssistantMessage = assistantMessages[assistantMessages.length - 1]
+    if (!lastAssistantMessage) {
+      return {
+        type: 'error',
+        message: 'No response from agent',
+      }
+    }
+    return {
+      type: 'lastMessage',
+      value: lastAssistantMessage.content,
+    }
+  }
+  if (agentTemplate.outputMode === 'all_messages') {
+    // Remove the first message, which includes the previous conversation history.
+    const agentMessages = agentState.messageHistory.slice(1)
+    return {
+      type: 'allMessages',
+      value: agentMessages,
+    }
+  }
+  agentTemplate.outputMode satisfies never
+  throw new Error(
+    `Unknown output mode: ${'outputMode' in agentTemplate ? agentTemplate.outputMode : 'undefined'}`,
+  )
+}
Original file line number	Diff line number	Diff line change
`@@ -384,8 +384,7 @@ export function getMessagesSubset(messages: Message[], otherTokens: number) {`
`384`	`384`	`}`
`385`	`385`
`386`	`386`	`const {`
`387`		`- toolCalls,`
`388`		`- toolResults,`
	`387`	`+ output,`
`389`	`388`	`sessionState: finalSessionState,`
`390`	`389`	`} = await mainPrompt(new MockWebSocket() as unknown as WebSocket, action, {`
`391`	`390`	`userId: TEST_USER_ID,`
Original file line number	Diff line number	Diff line change
`@@ -335,7 +335,7 @@ describe('mainPrompt', () => {`
`335`	`335`	`toolResults: [],`
`336`	`336`	`}`
`337`	`337`
`338`		`- const { toolCalls, sessionState: newSessionState } = await mainPrompt(`
	`338`	`+ const { sessionState: newSessionState, output } = await mainPrompt(`
`339`	`339`	`new MockWebSocket() as unknown as WebSocket,`
`340`	`340`	`action,`
`341`	`341`	`{`
`@@ -361,6 +361,9 @@ describe('mainPrompt', () => {`
`361`	`361`	`}),`
`362`	`362`	`)`
`363`	`363`
	`364`	`+ // Verify that the output contains the expected structure`
	`365`	`+ expect(output.type).toBeDefined()`
	`366`	`+`
`364`	`367`	`// Verify that a tool result was added to message history`
`365`	`368`	`const toolResultMessages =`
`366`	`369`	`newSessionState.mainAgentState.messageHistory.filter(`
`@@ -466,7 +469,7 @@ describe('mainPrompt', () => {`
`466`	`469`	`toolResults: [],`
`467`	`470`	`}`
`468`	`471`
`469`		`- const { toolCalls } = await mainPrompt(`
	`472`	`+ const { output } = await mainPrompt(`
`470`	`473`	`new MockWebSocket() as unknown as WebSocket,`
`471`	`474`	`action,`
`472`	`475`	`{`
`@@ -477,7 +480,7 @@ describe('mainPrompt', () => {`
`477`	`480`	`},`
`478`	`481`	`)`
`479`	`482`
`480`		`- expect(toolCalls).toHaveLength(0) // No tool calls expected`
	`483`	`+ expect(output.type).toBeDefined() // Output should exist`
`481`	`484`	`})`
`482`	`485`
`483`	`486`	`it('should update consecutiveAssistantMessages when new prompt is received', async () => {`
`@@ -556,7 +559,7 @@ describe('mainPrompt', () => {`
`556`	`559`	`toolResults: [],`
`557`	`560`	`}`
`558`	`561`
`559`		`- const { toolCalls } = await mainPrompt(`
	`562`	`+ const { output } = await mainPrompt(`
`560`	`563`	`new MockWebSocket() as unknown as WebSocket,`
`561`	`564`	`action,`
`562`	`565`	`{`
`@@ -567,7 +570,7 @@ describe('mainPrompt', () => {`
`567`	`570`	`},`
`568`	`571`	`)`
`569`	`572`
`570`		`- expect(toolCalls).toHaveLength(0) // No tool calls expected for empty response`
	`573`	`+ expect(output.type).toBeDefined() // Output should exist even for empty response`
`571`	`574`	`})`
`572`	`575`
`573`	`576`	`it('should unescape ampersands in run_terminal_command tool calls', async () => {`
Original file line number	Diff line number	Diff line change
`@@ -52,6 +52,7 @@ describe('Spawn Agents Message History', () => {`
`52`	`52`	`{ role: 'assistant', content: 'Mock agent response' },`
`53`	`53`	`],`
`54`	`54`	`},`
	`55`	`+ output: { type: 'lastMessage', value: 'Mock agent response' },`
`55`	`56`	`}`
`56`	`57`	`})`
`57`	`58`	`})`
Original file line number	Diff line number	Diff line change
`@@ -89,6 +89,7 @@ describe('Subagent Streaming', () => {`
`89`	`89`	`{ role: 'assistant', content: 'Test response from subagent' },`
`90`	`90`	`],`
`91`	`91`	`},`
	`92`	`+ output: { type: 'lastMessage', value: 'Test response from subagent' },`
`92`	`93`	`}`
`93`	`94`	`})`
`94`	`95`