Skip to content

Commit 1425e24

Browse files
🤖 fix: correct context usage display for multi-step tool calls (#893)
## Problem The Context Usage UI was showing inflated `cachedInputTokens` for plan messages with multi-step tool calls (e.g., ~150k instead of ~50k), causing the context bar to show >200k tokens which is impossible since the user can still send messages. ## Root Cause For multi-step tool calls, `cachedInputTokens` was being summed across all steps: - Step 1: reads 50k cached tokens - Step 2: reads same 50k cached tokens - Step 3: reads same 50k cached tokens - Cumulative: 150k (wrong for context display) The frontend was falling back to cumulative `usage` when `contextUsage` was undefined, which had the incorrectly summed values. ## Changes **Backend (`streamManager.ts`)**: - Refactor `getStreamMetadata()` to fetch both `totalUsage` (sum of all steps, for costs) and `contextUsage` (last step only, for context window) from AI SDK - Add `contextProviderMetadata` from `streamResult.providerMetadata` for accurate cache creation token display - Falls back to tracked `lastStepUsage`/`lastStepProviderMetadata` if streamResult times out **Frontend (`WorkspaceStore.ts`)**: - Remove fallback from `contextUsage` to `usage` - only use `contextUsage` for context window display ## Result Context window now correctly shows last step's `inputTokens` (actual context size) while cost calculation still uses cumulative totals. ## Migration Note ⚠️ **Old workspaces** created before this fix may not have `contextUsage` stored in their message history. These workspaces will show no context usage in the UI until the next message is sent, at which point context usage will display correctly. --- _Generated with `mux`_
1 parent 4e85392 commit 1425e24

File tree

6 files changed

+44
-25
lines changed

6 files changed

+44
-25
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ lint: node_modules/.installed ## Run ESLint (typecheck runs in separate target)
224224
lint-fix: node_modules/.installed ## Run linter with --fix
225225
@./scripts/lint.sh --fix
226226

227-
ifeq ($(OS),Windows_NT)
227+
ifeq ($(OS),Windows_NT)
228228
typecheck: node_modules/.installed src/version.ts ## Run TypeScript type checking (uses tsgo for 10x speedup)
229229
@# On Windows, use npm run because bun x doesn't correctly pass arguments
230230
@npmx concurrently -g \

bun.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"lockfileVersion": 1,
3+
"configVersion": 0,
34
"workspaces": {
45
"": {
56
"name": "mux",

src/browser/stores/WorkspaceStore.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ export class WorkspaceStore {
486486
if (msg.metadata?.compacted) {
487487
continue;
488488
}
489-
const rawUsage = msg.metadata?.contextUsage ?? msg.metadata?.usage;
489+
const rawUsage = msg.metadata?.contextUsage;
490490
const providerMeta =
491491
msg.metadata?.contextProviderMetadata ?? msg.metadata?.providerMetadata;
492492
if (rawUsage) {

src/browser/stories/mockFactory.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ export function createAssistantMessage(
199199
timestamp: opts.timestamp ?? STABLE_TIMESTAMP,
200200
model: opts.model ?? DEFAULT_MODEL,
201201
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
202+
contextUsage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
202203
duration: 1000,
203204
},
204205
};

src/common/orpc/schemas/message.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ export const MuxMessageSchema = z.object({
7676
timestamp: z.number().optional(),
7777
model: z.string().optional(),
7878
usage: z.any().optional(),
79+
contextUsage: z.any().optional(),
7980
providerMetadata: z.record(z.string(), z.unknown()).optional(),
81+
contextProviderMetadata: z.record(z.string(), z.unknown()).optional(),
8082
duration: z.number().optional(),
8183
systemMessageTokens: z.number().optional(),
8284
muxMetadata: z.any().optional(),

src/node/services/streamManager.ts

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -334,22 +334,34 @@ export class StreamManager extends EventEmitter {
334334
private async getStreamMetadata(
335335
streamInfo: WorkspaceStreamInfo,
336336
timeoutMs = 1000
337-
): Promise<{ usage?: LanguageModelV2Usage; duration: number }> {
338-
let usage = undefined;
339-
try {
340-
// Race usage retrieval against timeout to prevent hanging on abort
341-
// CRITICAL: Use totalUsage (sum of all steps) not usage (last step only)
342-
// For multi-step tool calls, usage would severely undercount actual token consumption
343-
usage = await Promise.race([
344-
streamInfo.streamResult.totalUsage,
337+
): Promise<{
338+
totalUsage?: LanguageModelV2Usage;
339+
contextUsage?: LanguageModelV2Usage;
340+
contextProviderMetadata?: Record<string, unknown>;
341+
duration: number;
342+
}> {
343+
// Helper: wrap promise with independent timeout + error handling
344+
// Each promise resolves independently - one failure doesn't mask others
345+
const withTimeout = <T>(promise: Promise<T>): Promise<T | undefined> =>
346+
Promise.race([
347+
promise,
345348
new Promise<undefined>((resolve) => setTimeout(() => resolve(undefined), timeoutMs)),
346-
]);
347-
} catch (error) {
348-
log.debug("Could not retrieve usage:", error);
349-
}
349+
]).catch(() => undefined);
350+
351+
// Fetch all metadata in parallel with independent timeouts
352+
// - totalUsage: sum of all steps (for cost calculation)
353+
// - contextUsage: last step only (for context window display)
354+
// - contextProviderMetadata: last step (for context window cache display)
355+
const [totalUsage, contextUsage, contextProviderMetadata] = await Promise.all([
356+
withTimeout(streamInfo.streamResult.totalUsage),
357+
withTimeout(streamInfo.streamResult.usage),
358+
withTimeout(streamInfo.streamResult.providerMetadata),
359+
]);
350360

351361
return {
352-
usage,
362+
totalUsage,
363+
contextUsage,
364+
contextProviderMetadata,
353365
duration: Date.now() - streamInfo.startTime,
354366
};
355367
}
@@ -1045,17 +1057,20 @@ export class StreamManager extends EventEmitter {
10451057

10461058
// Check if stream completed successfully
10471059
if (!streamInfo.abortController.signal.aborted) {
1048-
// Get usage, duration, and provider metadata from stream result
1049-
// CRITICAL: Use totalUsage (via getStreamMetadata) and aggregated providerMetadata
1050-
// to correctly account for all steps in multi-tool-call conversations
1051-
const { usage, duration } = await this.getStreamMetadata(streamInfo);
1060+
// Get all metadata from stream result in one call
1061+
// - totalUsage: sum of all steps (for cost calculation)
1062+
// - contextUsage: last step only (for context window display)
1063+
// - contextProviderMetadata: last step (for context window cache tokens)
1064+
// Falls back to tracked values from finish-step if streamResult fails/times out
1065+
const streamMeta = await this.getStreamMetadata(streamInfo);
1066+
const totalUsage = streamMeta.totalUsage;
1067+
const contextUsage = streamMeta.contextUsage ?? streamInfo.lastStepUsage;
1068+
const contextProviderMetadata =
1069+
streamMeta.contextProviderMetadata ?? streamInfo.lastStepProviderMetadata;
1070+
const duration = streamMeta.duration;
1071+
// Aggregated provider metadata across all steps (for cost calculation with cache tokens)
10521072
const providerMetadata = await this.getAggregatedProviderMetadata(streamInfo);
10531073

1054-
// For context window display, use last step's usage (inputTokens = current context size)
1055-
// This is stored in streamInfo during finish-step handling
1056-
const contextUsage = streamInfo.lastStepUsage;
1057-
const contextProviderMetadata = streamInfo.lastStepProviderMetadata;
1058-
10591074
// Emit stream end event with parts preserved in temporal order
10601075
const streamEndEvent: StreamEndEvent = {
10611076
type: "stream-end",
@@ -1064,7 +1079,7 @@ export class StreamManager extends EventEmitter {
10641079
metadata: {
10651080
...streamInfo.initialMetadata, // AIService-provided metadata (systemMessageTokens, etc)
10661081
model: streamInfo.model,
1067-
usage, // Total across all steps (for cost calculation)
1082+
usage: totalUsage, // Total across all steps (for cost calculation)
10681083
contextUsage, // Last step only (for context window display)
10691084
providerMetadata, // Aggregated (for cost calculation)
10701085
contextProviderMetadata, // Last step (for context window display)

0 commit comments

Comments
 (0)