diff --git a/src/node/services/compactionHandler.test.ts b/src/node/services/compactionHandler.test.ts index 175d917007..c5f591de7d 100644 --- a/src/node/services/compactionHandler.test.ts +++ b/src/node/services/compactionHandler.test.ts @@ -255,7 +255,7 @@ describe("CompactionHandler", () => { model: "claude-3-5-sonnet-20241022", usage, duration: 2000, - providerMetadata: { foo: "bar" }, + providerMetadata: { anthropic: { cacheCreationInputTokens: 50000 } }, systemMessageTokens: 100, }); await handler.handleCompletion(event); @@ -266,14 +266,15 @@ describe("CompactionHandler", () => { }); expect(summaryEvent).toBeDefined(); const sevt = summaryEvent?.data.message as MuxMessage; + // providerMetadata is omitted to avoid inflating context with pre-compaction cacheCreationInputTokens expect(sevt.metadata).toMatchObject({ model: "claude-3-5-sonnet-20241022", usage, duration: 2000, - providerMetadata: { foo: "bar" }, systemMessageTokens: 100, compacted: true, }); + expect(sevt.metadata?.providerMetadata).toBeUndefined(); }); it("should emit stream-end event to frontend", async () => { diff --git a/src/node/services/compactionHandler.ts b/src/node/services/compactionHandler.ts index 3cb9846951..351f6ca5c7 100644 --- a/src/node/services/compactionHandler.ts +++ b/src/node/services/compactionHandler.ts @@ -112,7 +112,10 @@ export class CompactionHandler { } const deletedSequences = clearResult.data; - // Create summary message with metadata + // Create summary message with metadata. + // We omit providerMetadata because it contains cacheCreationInputTokens from the + // pre-compaction context, which inflates context usage display. The historicalUsage + // field preserves full cost accounting from pre-compaction messages. const summaryMessage = createMuxMessage( `summary-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`, "assistant", @@ -123,7 +126,6 @@ export class CompactionHandler { model: metadata.model, usage: metadata.usage, historicalUsage, - providerMetadata: metadata.providerMetadata, duration: metadata.duration, systemMessageTokens: metadata.systemMessageTokens, muxMetadata: { type: "normal" },