Skip to content

Commit ad79030

Browse files
committed
feat(sdk): propagate NetworkError through agent-runtime for retry
support - Throw NetworkError with proper error codes from promptAiSdkStream on AI SDK errors - Re-throw NetworkError from runAgentStep instead of converting to error output - Add reject() to promise in runOnce to properly propagate retryable errors - Export getRetryableErrorCode for reuse in CLI - Add comprehensive error code detection for AI_RetryError messages - Add debug logging in callMainPrompt error handler This allows the SDK retry wrapper to catch and retry network errors properly.
1 parent a694007 commit ad79030

File tree

4 files changed

+86
-13
lines changed

4 files changed

+86
-13
lines changed

packages/agent-runtime/src/run-agent-step.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,12 @@ export async function loopAgentSteps(
880880
errorMessage,
881881
})
882882

883+
// Re-throw NetworkError so retry logic can handle it
884+
// For other error types, wrap in error output for graceful handling
885+
if (error && typeof error === 'object' && 'code' in error && 'name' in error && error.name === 'NetworkError') {
886+
throw error
887+
}
888+
883889
const errorObject = getErrorObject(error)
884890
return {
885891
agentState: currentAgentState,

sdk/src/impl/llm.ts

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,11 @@ import {
2121
import { streamText, APICallError, generateText, generateObject } from 'ai'
2222

2323
import { WEBSITE_URL } from '../constants'
24+
import { NetworkError, ErrorCodes } from '../errors'
2425

26+
import type { ErrorCode } from '../errors'
2527
import type { LanguageModelV2 } from '@ai-sdk/provider'
28+
import type { OpenRouterProviderRoutingOptions } from '@codebuff/common/types/agent-template'
2629
import type {
2730
PromptAiSdkFn,
2831
PromptAiSdkStreamFn,
@@ -31,7 +34,6 @@ import type {
3134
} from '@codebuff/common/types/contracts/llm'
3235
import type { ParamsOf } from '@codebuff/common/types/function-params'
3336
import type { JSONObject } from '@codebuff/common/types/json'
34-
import type { OpenRouterProviderRoutingOptions } from '@codebuff/common/types/agent-template'
3537
import type { OpenRouterProviderOptions } from '@openrouter/ai-sdk-provider'
3638
import type z from 'zod/v4'
3739

@@ -252,12 +254,50 @@ export async function* promptAiSdkStream(
252254
? chunk.error
253255
: JSON.stringify(chunk.error)
254256
const errorMessage = `Error from AI SDK (model ${params.model}): ${buildArray([mainErrorMessage, errorBody]).join('\n')}`
255-
yield {
256-
type: 'error',
257-
message: errorMessage,
257+
258+
// Determine error code from the error
259+
let errorCode: ErrorCode = ErrorCodes.UNKNOWN_ERROR
260+
let statusCode: number | undefined
261+
262+
if (APICallError.isInstance(chunk.error)) {
263+
statusCode = chunk.error.statusCode
264+
if (statusCode) {
265+
if (statusCode === 503) {
266+
errorCode = ErrorCodes.SERVICE_UNAVAILABLE
267+
} else if (statusCode >= 500) {
268+
errorCode = ErrorCodes.SERVER_ERROR
269+
} else if (statusCode === 408 || statusCode === 429) {
270+
errorCode = ErrorCodes.TIMEOUT
271+
}
272+
}
273+
} else if (chunk.error instanceof Error) {
274+
// Check error message for error type indicators (case-insensitive)
275+
const msg = chunk.error.message.toLowerCase()
276+
if (msg.includes('service unavailable') || msg.includes('503')) {
277+
errorCode = ErrorCodes.SERVICE_UNAVAILABLE
278+
} else if (
279+
msg.includes('econnrefused') ||
280+
msg.includes('connection refused')
281+
) {
282+
errorCode = ErrorCodes.CONNECTION_REFUSED
283+
} else if (msg.includes('enotfound') || msg.includes('dns')) {
284+
errorCode = ErrorCodes.DNS_FAILURE
285+
} else if (msg.includes('timeout')) {
286+
errorCode = ErrorCodes.TIMEOUT
287+
} else if (
288+
msg.includes('server error') ||
289+
msg.includes('500') ||
290+
msg.includes('502') ||
291+
msg.includes('504')
292+
) {
293+
errorCode = ErrorCodes.SERVER_ERROR
294+
} else if (msg.includes('network') || msg.includes('fetch failed')) {
295+
errorCode = ErrorCodes.NETWORK_ERROR
296+
}
258297
}
259298

260-
return null
299+
// Throw NetworkError so retry logic can handle it
300+
throw new NetworkError(errorMessage, errorCode, statusCode, chunk.error)
261301
}
262302
if (chunk.type === 'reasoning-delta') {
263303
for (const provider of ['openrouter', 'codebuff'] as const) {

sdk/src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ export type * from '../../common/src/types/json'
22
export type * from '../../common/src/types/messages/codebuff-message'
33
export type * from '../../common/src/types/messages/data-content'
44
export type * from '../../common/src/types/print-mode'
5-
export { run } from './run'
5+
export { run, getRetryableErrorCode } from './run'
66
export type { RunOptions, RetryOptions } from './run'
77
// Agent type exports
88
export type { AgentDefinition } from '../../common/src/templates/initial-agents-dir/types/agent-definition'

sdk/src/run.ts

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@ import { cloneDeep } from 'lodash'
1212

1313
import { getAgentRuntimeImpl } from './impl/agent-runtime'
1414
import { getUserInfoFromApiKey } from './impl/database'
15-
import { RETRYABLE_ERROR_CODES, isNetworkError } from './errors'
15+
import { RETRYABLE_ERROR_CODES, isNetworkError, ErrorCodes, NetworkError } from './errors'
1616
import type { ErrorCode } from './errors'
17+
import { getErrorObject } from '@codebuff/common/util/error'
1718
import { initialSessionState, applyOverridesToSessionState } from './run-state'
1819
import {
1920
MAX_RETRIES_PER_MESSAGE,
@@ -428,8 +429,10 @@ export async function runOnce({
428429
}
429430

430431
let resolve: (value: RunReturnType) => any = () => {}
431-
const promise = new Promise<RunReturnType>((res) => {
432+
let reject: (error: any) => any = () => {}
433+
const promise = new Promise<RunReturnType>((res, rej) => {
432434
resolve = res
435+
reject = rej
433436
})
434437

435438
async function onError(error: { message: string }) {
@@ -700,8 +703,21 @@ export async function runOnce({
700703
signal: signal ?? new AbortController().signal,
701704
}).catch((error) => {
702705
// Let retryable errors propagate so the retry wrapper can handle them
703-
if (isRetryableError(error)) {
704-
throw error
706+
const isRetryable = isRetryableError(error)
707+
logger?.warn(
708+
{
709+
isNetworkError: isNetworkError(error),
710+
errorCode: isNetworkError(error) ? error.code : undefined,
711+
isRetryable,
712+
error: getErrorObject(error),
713+
},
714+
'callMainPrompt caught error, checking if retryable',
715+
)
716+
717+
if (isRetryable) {
718+
// Reject the promise so the retry wrapper can catch it
719+
reject(error)
720+
return
705721
}
706722

707723
// For non-retryable errors, resolve with cancelled state
@@ -857,13 +873,24 @@ async function handleToolCall({
857873
* Extracts an error code from a prompt error message.
858874
* Returns the appropriate ErrorCode if the error is retryable, null otherwise.
859875
*/
860-
const getRetryableErrorCode = (errorMessage: string): ErrorCode | null => {
876+
export const getRetryableErrorCode = (errorMessage: string): ErrorCode | null => {
861877
const lowerMessage = errorMessage.toLowerCase()
862878

863879
// AI SDK's built-in retry error (e.g., "Failed after 4 attempts. Last error: Service Unavailable")
864-
// Don't retry at SDK level since AI SDK already retried. Just log it for accountability.
880+
// The AI SDK already retried 4 times, but we still want our SDK wrapper to retry 3 more times
865881
if (lowerMessage.includes('failed after') && lowerMessage.includes('attempts')) {
866-
return null
882+
// Extract the underlying error type from the message
883+
if (lowerMessage.includes('service unavailable')) {
884+
return ErrorCodes.SERVICE_UNAVAILABLE
885+
}
886+
if (lowerMessage.includes('timeout')) {
887+
return ErrorCodes.TIMEOUT
888+
}
889+
if (lowerMessage.includes('connection refused')) {
890+
return ErrorCodes.CONNECTION_REFUSED
891+
}
892+
// Default to SERVER_ERROR for other AI SDK retry failures
893+
return ErrorCodes.SERVER_ERROR
867894
}
868895

869896
if (errorMessage.includes('503') || lowerMessage.includes('service unavailable')) {

0 commit comments

Comments
 (0)