From 51e533d09849137f66eb83ff19ea288772f3c497 Mon Sep 17 00:00:00 2001 From: ethan Date: Thu, 4 Dec 2025 15:29:10 +1100 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=A4=96=20fix:=20correct=20context=20u?= =?UTF-8?q?sage=20display=20for=20multi-step=20tool=20calls?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Context Usage UI was showing inflated cachedInputTokens for plan messages with multi-step tool calls (e.g., ~150k instead of ~50k). Root cause: contextUsage was falling back to cumulative usage (summed across all steps) when contextUsage was undefined. For multi-step requests, cachedInputTokens gets summed because each step reads from cache, but the actual context window only sees one step's worth. Changes: - Backend: Refactor getStreamMetadata() to fetch totalUsage (for costs) and contextUsage (last step, for context window) separately from AI SDK - Backend: Add contextProviderMetadata from streamResult.providerMetadata for accurate cache creation token display - Frontend: Remove fallback from contextUsage to usage - only use contextUsage for context window display The fix ensures context window shows last step's inputTokens (actual context size) while cost calculation still uses cumulative totals. --- bun.lock | 1 + src/browser/stores/WorkspaceStore.ts | 2 +- src/browser/stories/mockFactory.ts | 1 + src/common/orpc/schemas/message.ts | 2 + src/node/services/streamManager.ts | 63 +++++++++++++++++++--------- 5 files changed, 48 insertions(+), 21 deletions(-) diff --git a/bun.lock b/bun.lock index 3d38dc3a1b..365d7afd8b 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "mux", diff --git a/src/browser/stores/WorkspaceStore.ts b/src/browser/stores/WorkspaceStore.ts index 17d286fba9..73a6537262 100644 --- a/src/browser/stores/WorkspaceStore.ts +++ b/src/browser/stores/WorkspaceStore.ts @@ -486,7 +486,7 @@ export class WorkspaceStore { if (msg.metadata?.compacted) { continue; } - const rawUsage = msg.metadata?.contextUsage ?? msg.metadata?.usage; + const rawUsage = msg.metadata?.contextUsage; const providerMeta = msg.metadata?.contextProviderMetadata ?? msg.metadata?.providerMetadata; if (rawUsage) { diff --git a/src/browser/stories/mockFactory.ts b/src/browser/stories/mockFactory.ts index 99c101d760..cbe8b370f3 100644 --- a/src/browser/stories/mockFactory.ts +++ b/src/browser/stories/mockFactory.ts @@ -199,6 +199,7 @@ export function createAssistantMessage( timestamp: opts.timestamp ?? STABLE_TIMESTAMP, model: opts.model ?? DEFAULT_MODEL, usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 }, + contextUsage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 }, duration: 1000, }, }; diff --git a/src/common/orpc/schemas/message.ts b/src/common/orpc/schemas/message.ts index 5241d0f424..aad4387283 100644 --- a/src/common/orpc/schemas/message.ts +++ b/src/common/orpc/schemas/message.ts @@ -76,7 +76,9 @@ export const MuxMessageSchema = z.object({ timestamp: z.number().optional(), model: z.string().optional(), usage: z.any().optional(), + contextUsage: z.any().optional(), providerMetadata: z.record(z.string(), z.unknown()).optional(), + contextProviderMetadata: z.record(z.string(), z.unknown()).optional(), duration: z.number().optional(), systemMessageTokens: z.number().optional(), muxMetadata: z.any().optional(), diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts index 05eadbac75..bb15e73858 100644 --- a/src/node/services/streamManager.ts +++ b/src/node/services/streamManager.ts @@ -334,22 +334,42 @@ export class StreamManager extends EventEmitter { private async getStreamMetadata( streamInfo: WorkspaceStreamInfo, timeoutMs = 1000 - ): Promise<{ usage?: LanguageModelV2Usage; duration: number }> { - let usage = undefined; + ): Promise<{ + totalUsage?: LanguageModelV2Usage; + contextUsage?: LanguageModelV2Usage; + contextProviderMetadata?: Record; + duration: number; + }> { + let totalUsage: LanguageModelV2Usage | undefined; + let contextUsage: LanguageModelV2Usage | undefined; + let contextProviderMetadata: Record | undefined; + try { - // Race usage retrieval against timeout to prevent hanging on abort - // CRITICAL: Use totalUsage (sum of all steps) not usage (last step only) - // For multi-step tool calls, usage would severely undercount actual token consumption - usage = await Promise.race([ - streamInfo.streamResult.totalUsage, - new Promise((resolve) => setTimeout(() => resolve(undefined), timeoutMs)), + // Fetch all metadata in parallel with timeout + // - totalUsage: sum of all steps (for cost calculation) + // - usage: last step only (for context window display) + // - providerMetadata: last step (for context window cache display) + const [total, context, contextMeta] = await Promise.race([ + Promise.all([ + streamInfo.streamResult.totalUsage, + streamInfo.streamResult.usage, + streamInfo.streamResult.providerMetadata, + ]), + new Promise<[undefined, undefined, undefined]>((resolve) => + setTimeout(() => resolve([undefined, undefined, undefined]), timeoutMs) + ), ]); + totalUsage = total; + contextUsage = context; + contextProviderMetadata = contextMeta; } catch (error) { - log.debug("Could not retrieve usage:", error); + log.debug("Could not retrieve stream metadata:", error); } return { - usage, + totalUsage, + contextUsage, + contextProviderMetadata, duration: Date.now() - streamInfo.startTime, }; } @@ -1071,17 +1091,20 @@ export class StreamManager extends EventEmitter { // Check if stream completed successfully if (!streamInfo.abortController.signal.aborted) { - // Get usage, duration, and provider metadata from stream result - // CRITICAL: Use totalUsage (via getStreamMetadata) and aggregated providerMetadata - // to correctly account for all steps in multi-tool-call conversations - const { usage, duration } = await this.getStreamMetadata(streamInfo); + // Get all metadata from stream result in one call + // - totalUsage: sum of all steps (for cost calculation) + // - contextUsage: last step only (for context window display) + // - contextProviderMetadata: last step (for context window cache tokens) + // Falls back to tracked values from finish-step if streamResult fails/times out + const streamMeta = await this.getStreamMetadata(streamInfo); + const totalUsage = streamMeta.totalUsage; + const contextUsage = streamMeta.contextUsage ?? streamInfo.lastStepUsage; + const contextProviderMetadata = + streamMeta.contextProviderMetadata ?? streamInfo.lastStepProviderMetadata; + const duration = streamMeta.duration; + // Aggregated provider metadata across all steps (for cost calculation with cache tokens) const providerMetadata = await this.getAggregatedProviderMetadata(streamInfo); - // For context window display, use last step's usage (inputTokens = current context size) - // This is stored in streamInfo during finish-step handling - const contextUsage = streamInfo.lastStepUsage; - const contextProviderMetadata = streamInfo.lastStepProviderMetadata; - // Emit stream end event with parts preserved in temporal order const streamEndEvent: StreamEndEvent = { type: "stream-end", @@ -1090,7 +1113,7 @@ export class StreamManager extends EventEmitter { metadata: { ...streamInfo.initialMetadata, // AIService-provided metadata (systemMessageTokens, etc) model: streamInfo.model, - usage, // Total across all steps (for cost calculation) + usage: totalUsage, // Total across all steps (for cost calculation) contextUsage, // Last step only (for context window display) providerMetadata, // Aggregated (for cost calculation) contextProviderMetadata, // Last step (for context window display) From d735fe22902cbed8fc6eb0f40a23e94c7532f299 Mon Sep 17 00:00:00 2001 From: ethan Date: Thu, 4 Dec 2025 16:20:41 +1100 Subject: [PATCH 2/2] fix: decouple stream metadata fetches to prevent partial failures Each promise (totalUsage, contextUsage, contextProviderMetadata) now has independent timeout + error handling. If providerMetadata rejects or times out, totalUsage is still returned for cost calculation. --- Makefile | 2 +- src/node/services/streamManager.ts | 42 ++++++++++++------------------ 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index 7fccd4d170..49a6c5d0fa 100644 --- a/Makefile +++ b/Makefile @@ -224,7 +224,7 @@ lint: node_modules/.installed ## Run ESLint (typecheck runs in separate target) lint-fix: node_modules/.installed ## Run linter with --fix @./scripts/lint.sh --fix -ifeq ($(OS),Windows_NT) +ifeq ($(OS),Windows_NT) typecheck: node_modules/.installed src/version.ts ## Run TypeScript type checking (uses tsgo for 10x speedup) @# On Windows, use npm run because bun x doesn't correctly pass arguments @npmx concurrently -g \ diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts index bb15e73858..5a37dfebff 100644 --- a/src/node/services/streamManager.ts +++ b/src/node/services/streamManager.ts @@ -340,31 +340,23 @@ export class StreamManager extends EventEmitter { contextProviderMetadata?: Record; duration: number; }> { - let totalUsage: LanguageModelV2Usage | undefined; - let contextUsage: LanguageModelV2Usage | undefined; - let contextProviderMetadata: Record | undefined; - - try { - // Fetch all metadata in parallel with timeout - // - totalUsage: sum of all steps (for cost calculation) - // - usage: last step only (for context window display) - // - providerMetadata: last step (for context window cache display) - const [total, context, contextMeta] = await Promise.race([ - Promise.all([ - streamInfo.streamResult.totalUsage, - streamInfo.streamResult.usage, - streamInfo.streamResult.providerMetadata, - ]), - new Promise<[undefined, undefined, undefined]>((resolve) => - setTimeout(() => resolve([undefined, undefined, undefined]), timeoutMs) - ), - ]); - totalUsage = total; - contextUsage = context; - contextProviderMetadata = contextMeta; - } catch (error) { - log.debug("Could not retrieve stream metadata:", error); - } + // Helper: wrap promise with independent timeout + error handling + // Each promise resolves independently - one failure doesn't mask others + const withTimeout = (promise: Promise): Promise => + Promise.race([ + promise, + new Promise((resolve) => setTimeout(() => resolve(undefined), timeoutMs)), + ]).catch(() => undefined); + + // Fetch all metadata in parallel with independent timeouts + // - totalUsage: sum of all steps (for cost calculation) + // - contextUsage: last step only (for context window display) + // - contextProviderMetadata: last step (for context window cache display) + const [totalUsage, contextUsage, contextProviderMetadata] = await Promise.all([ + withTimeout(streamInfo.streamResult.totalUsage), + withTimeout(streamInfo.streamResult.usage), + withTimeout(streamInfo.streamResult.providerMetadata), + ]); return { totalUsage,