Skip to content

Commit 5f7a7e7

Browse files
committed
feat(sdk): use return values for retry instead of exceptions
Previously errors were thrown and caught by retry wrapper. Now: - Errors flow as output: { type: 'error', message: '...' } - Retry wrapper checks result.output.type instead of catching - sessionState optional when error occurs before initialization - Zero throws in retry logic (except top-level abort check) This maintains return-success-failure pattern throughout the stack.
1 parent ad79030 commit 5f7a7e7

File tree

4 files changed

+135
-40
lines changed

4 files changed

+135
-40
lines changed

packages/agent-runtime/src/run-agent-step.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,12 @@ export async function loopAgentSteps(
867867
},
868868
'Agent execution failed',
869869
)
870+
871+
// Re-throw NetworkError to allow SDK retry wrapper to handle it
872+
if (error instanceof Error && error.name === 'NetworkError') {
873+
throw error
874+
}
875+
870876
const errorMessage = typeof error === 'string' ? error : `${error}`
871877

872878
const status = checkLiveUserInput(params) ? 'failed' : 'cancelled'
@@ -880,12 +886,6 @@ export async function loopAgentSteps(
880886
errorMessage,
881887
})
882888

883-
// Re-throw NetworkError so retry logic can handle it
884-
// For other error types, wrap in error output for graceful handling
885-
if (error && typeof error === 'object' && 'code' in error && 'name' in error && error.name === 'NetworkError') {
886-
throw error
887-
}
888-
889889
const errorObject = getErrorObject(error)
890890
return {
891891
agentState: currentAgentState,

sdk/src/__tests__/run-with-retry.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ describe('run retry wrapper', () => {
1919
})
2020

2121
it('returns immediately on success without retrying', async () => {
22-
const expectedState = { output: { type: 'success' } } as RunState
22+
const expectedState = { sessionState: {} as any, output: { type: 'lastMessage', value: 'hi' } } as RunState
2323
const runSpy = spyOn(runModule, 'runOnce').mockResolvedValueOnce(expectedState)
2424

2525
const result = await run(baseOptions)
@@ -29,7 +29,7 @@ describe('run retry wrapper', () => {
2929
})
3030

3131
it('retries once on retryable network error and then succeeds', async () => {
32-
const expectedState = { output: { type: 'success' } } as RunState
32+
const expectedState = { sessionState: {} as any, output: { type: 'lastMessage', value: 'hi' } } as RunState
3333
const runSpy = spyOn(runModule, 'runOnce')
3434
.mockRejectedValueOnce(
3535
new NetworkError('temporary', ErrorCodes.NETWORK_ERROR),
@@ -91,7 +91,7 @@ describe('run retry wrapper', () => {
9191
})
9292

9393
it('retries when provided custom retryableErrorCodes set', async () => {
94-
const expectedState = { output: { type: 'success' } } as RunState
94+
const expectedState = { sessionState: {} as any, output: { type: 'lastMessage', value: 'hi' } } as RunState
9595
const runSpy = spyOn(runModule, 'runOnce')
9696
.mockRejectedValueOnce(
9797
new NetworkError('temporary', ErrorCodes.SERVER_ERROR),

sdk/src/run-state.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import type {
2727
import type * as fsType from 'fs'
2828

2929
export type RunState = {
30-
sessionState: SessionState
30+
sessionState?: SessionState
3131
output: AgentOutput
3232
}
3333

@@ -459,7 +459,9 @@ export function withAdditionalMessage({
459459
}): RunState {
460460
const newRunState = cloneDeep(runState)
461461

462-
newRunState.sessionState.mainAgentState.messageHistory.push(message)
462+
if (newRunState.sessionState) {
463+
newRunState.sessionState.mainAgentState.messageHistory.push(message)
464+
}
463465

464466
return newRunState
465467
}
@@ -474,7 +476,9 @@ export function withMessageHistory({
474476
// Deep copy
475477
const newRunState = JSON.parse(JSON.stringify(runState)) as typeof runState
476478

477-
newRunState.sessionState.mainAgentState.messageHistory = messages
479+
if (newRunState.sessionState) {
480+
newRunState.sessionState.mainAgentState.messageHistory = messages
481+
}
478482

479483
return newRunState
480484
}

sdk/src/run.ts

Lines changed: 119 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,15 @@ export async function run(
274274
let attemptIndex = 0
275275
while (true) {
276276
if (signal?.aborted) {
277-
throw createAbortError(signal)
277+
// Return error output for abort instead of throwing
278+
const abortError = createAbortError(signal)
279+
return {
280+
sessionState: rest.previousRun?.sessionState,
281+
output: {
282+
type: 'error',
283+
message: abortError.message,
284+
},
285+
}
278286
}
279287

280288
try {
@@ -283,6 +291,66 @@ export async function run(
283291
signal,
284292
})
285293

294+
// Check if result contains a retryable error in the output
295+
if (result.output.type === 'error') {
296+
const retryableCode = getRetryableErrorCode(result.output.message)
297+
const canRetry =
298+
retryableCode &&
299+
attemptIndex < retryOptions.maxRetries &&
300+
retryOptions.retryableErrorCodes.has(retryableCode)
301+
302+
if (canRetry) {
303+
// Treat this as a retryable error - continue retry loop
304+
const delayMs = Math.min(
305+
retryOptions.backoffBaseMs * Math.pow(2, attemptIndex),
306+
retryOptions.backoffMaxMs,
307+
)
308+
309+
// Log retry attempt with full context
310+
if (rest.logger) {
311+
rest.logger.warn(
312+
{
313+
attempt: attemptIndex + 1,
314+
maxRetries: retryOptions.maxRetries,
315+
delayMs,
316+
errorCode: retryableCode,
317+
errorMessage: result.output.message,
318+
},
319+
'SDK retrying after error',
320+
)
321+
}
322+
323+
await retryOptions.onRetry?.({
324+
attempt: attemptIndex + 1,
325+
error: new Error(result.output.message),
326+
delayMs,
327+
errorCode: retryableCode,
328+
})
329+
330+
await waitWithAbort(delayMs, signal)
331+
attemptIndex++
332+
continue
333+
} else if (attemptIndex > 0) {
334+
// Non-retryable error or exhausted retries
335+
if (rest.logger) {
336+
rest.logger.warn(
337+
{
338+
attemptIndex,
339+
totalAttempts: attemptIndex + 1,
340+
errorCode: retryableCode,
341+
},
342+
'SDK exhausted all retries',
343+
)
344+
}
345+
346+
await retryOptions.onRetryExhausted?.({
347+
totalAttempts: attemptIndex + 1,
348+
error: new Error(result.output.message),
349+
errorCode: retryableCode ?? undefined,
350+
})
351+
}
352+
}
353+
286354
// Log successful completion after retries
287355
if (attemptIndex > 0 && rest.logger) {
288356
rest.logger.info(
@@ -293,63 +361,86 @@ export async function run(
293361

294362
return result
295363
} catch (error) {
364+
// Handle unexpected exceptions by converting to error output
296365
if (signal?.aborted) {
297-
throw createAbortError(signal)
366+
const abortError = createAbortError(signal)
367+
return {
368+
sessionState: rest.previousRun?.sessionState,
369+
output: {
370+
type: 'error',
371+
message: abortError.message,
372+
},
373+
}
298374
}
299375

376+
// Unexpected exception - convert to error output and check if retryable
377+
const errorMessage = error instanceof Error ? error.message : String(error)
378+
const errorCode = isNetworkError(error) ? error.code : undefined
379+
const retryableCode = errorCode ?? getRetryableErrorCode(errorMessage)
380+
300381
const canRetry =
382+
retryableCode &&
301383
attemptIndex < retryOptions.maxRetries &&
302-
shouldRetry(error, retryOptions.retryableErrorCodes)
384+
retryOptions.retryableErrorCodes.has(retryableCode)
303385

304-
const errorCode = isNetworkError(error) ? error.code : undefined
386+
if (rest.logger) {
387+
rest.logger.error(
388+
{
389+
attemptIndex,
390+
errorCode: retryableCode,
391+
canRetry,
392+
error: errorMessage,
393+
},
394+
'Unexpected exception in SDK run',
395+
)
396+
}
305397

306398
if (!canRetry) {
307-
// Notify caller that SDK exhausted all retries
308-
if (attemptIndex > 0) {
309-
if (rest.logger) {
310-
rest.logger.warn(
311-
{
312-
attemptIndex,
313-
totalAttempts: attemptIndex + 1,
314-
errorCode,
315-
},
316-
'SDK exhausted all retries',
317-
)
318-
}
399+
// Can't retry - convert to error output and return
400+
if (attemptIndex > 0 && rest.logger) {
401+
rest.logger.warn(
402+
{
403+
attemptIndex,
404+
totalAttempts: attemptIndex + 1,
405+
},
406+
'SDK exhausted all retries after unexpected exception',
407+
)
408+
}
319409

320-
await retryOptions.onRetryExhausted?.({
321-
totalAttempts: attemptIndex + 1,
322-
error,
323-
errorCode,
324-
})
410+
// Return error output instead of throwing
411+
return {
412+
sessionState: rest.previousRun?.sessionState,
413+
output: {
414+
type: 'error',
415+
message: errorMessage,
416+
},
325417
}
326-
throw error
327418
}
328419

420+
// Exception is retryable - trigger retry
329421
const delayMs = Math.min(
330422
retryOptions.backoffBaseMs * Math.pow(2, attemptIndex),
331423
retryOptions.backoffMaxMs,
332424
)
333425

334-
// Log retry attempt with full context
335426
if (rest.logger) {
336427
rest.logger.warn(
337428
{
338429
attempt: attemptIndex + 1,
339430
maxRetries: retryOptions.maxRetries,
340431
delayMs,
341-
errorCode,
342-
errorMessage: error instanceof Error ? error.message : String(error),
432+
errorCode: retryableCode,
433+
errorMessage,
343434
},
344-
'SDK retrying after error',
435+
'SDK retrying after unexpected exception',
345436
)
346437
}
347438

348439
await retryOptions.onRetry?.({
349440
attempt: attemptIndex + 1,
350-
error,
441+
error: error instanceof Error ? error : new Error(errorMessage),
351442
delayMs,
352-
errorCode,
443+
errorCode: retryableCode,
353444
})
354445

355446
await waitWithAbort(delayMs, signal)

0 commit comments

Comments
 (0)