From 0e3fc8156606a4f4b40c23b64abffb5ba766844b Mon Sep 17 00:00:00 2001 From: ethan Date: Mon, 1 Dec 2025 22:49:35 +1100 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20fix:=20enable=20prompt=20caching?= =?UTF-8?q?=20for=20mux-gateway=20Anthropic=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gateway provider sends requests in SDK internal format (json.prompt) rather than Anthropic API format (json.messages). Additionally, the gateway server only translates providerOptions.anthropic.cacheControl at the message level, not at content part level. This fix: 1. Detects gateway format by checking for json.prompt array 2. Adds providerOptions.anthropic.cacheControl at message level for gateway 3. Keeps cache_control injection at content part level for direct Anthropic Without this fix, only system messages were cached (5.4k) because they already had message-level providerOptions. Conversation history was not cached because applyCacheControl() sets providerOptions at content part level, which the gateway server ignores. --- src/node/services/aiService.ts | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 6fd672f066..5cc98faa78 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -125,16 +125,34 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet // Inject cache_control on last message's last content part // This caches the entire conversation - if (Array.isArray(json.messages) && json.messages.length >= 1) { - const lastMsg = json.messages[json.messages.length - 1] as Record; - const content = lastMsg.content; + // Handle both formats: + // - Direct Anthropic provider: json.messages (Anthropic API format) + // - Gateway provider: json.prompt (AI SDK internal format) + const messages = Array.isArray(json.messages) + ? json.messages + : Array.isArray(json.prompt) + ? json.prompt + : null; + + if (messages && messages.length >= 1) { + const lastMsg = messages[messages.length - 1] as Record; + + // For gateway: add providerOptions.anthropic.cacheControl at message level + // (gateway validates schema strictly, doesn't allow raw cache_control on messages) + if (Array.isArray(json.prompt)) { + const providerOpts = (lastMsg.providerOptions ?? {}) as Record; + const anthropicOpts = (providerOpts.anthropic ?? {}) as Record; + anthropicOpts.cacheControl ??= { type: "ephemeral" }; + providerOpts.anthropic = anthropicOpts; + lastMsg.providerOptions = providerOpts; + } + // For direct Anthropic: add cache_control to last content part + const content = lastMsg.content; if (Array.isArray(content) && content.length > 0) { - // Array content: add cache_control to last part const lastPart = content[content.length - 1] as Record; lastPart.cache_control ??= { type: "ephemeral" }; } - // Note: String content messages are rare after SDK conversion; skip for now } // Update body with modified JSON