Skip to content

Commit 9284cba

Browse files
committed
🤖 fix: handle Google/Gemini cached token subtraction
Google/Gemini, like OpenAI, reports inputTokens INCLUSIVE of cachedInputTokens. Extend the subtraction logic to also handle Google models to avoid double-counting cached tokens.
1 parent c011420 commit 9284cba

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

src/common/utils/tokens/displayUsage.test.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,39 @@ describe("createDisplayUsage", () => {
209209
// Input stays as-is for gateway Anthropic
210210
expect(result!.input.tokens).toBe(36600);
211211
});
212+
213+
test("subtracts cached tokens for direct Google model", () => {
214+
// Google also reports inputTokens INCLUSIVE of cachedInputTokens
215+
const googleUsage: LanguageModelV2Usage = {
216+
inputTokens: 74300, // Includes 42600 cached
217+
outputTokens: 1600,
218+
totalTokens: 75900,
219+
cachedInputTokens: 42600,
220+
};
221+
222+
const result = createDisplayUsage(googleUsage, "google:gemini-3-pro-preview");
223+
224+
expect(result).toBeDefined();
225+
expect(result!.cached.tokens).toBe(42600);
226+
// Input should be raw minus cached: 74300 - 42600 = 31700
227+
expect(result!.input.tokens).toBe(31700);
228+
});
229+
230+
test("subtracts cached tokens for gateway Google model", () => {
231+
const googleUsage: LanguageModelV2Usage = {
232+
inputTokens: 74300,
233+
outputTokens: 1600,
234+
totalTokens: 75900,
235+
cachedInputTokens: 42600,
236+
};
237+
238+
const result = createDisplayUsage(googleUsage, "mux-gateway:google/gemini-3-pro-preview");
239+
240+
expect(result).toBeDefined();
241+
expect(result!.cached.tokens).toBe(42600);
242+
// Should also subtract: 74300 - 42600 = 31700
243+
expect(result!.input.tokens).toBe(31700);
244+
});
212245
});
213246

214247
test("returns undefined for undefined usage", () => {

src/common/utils/tokens/displayUsage.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,13 @@ export function createDisplayUsage(
3636

3737
// Detect provider from normalized model string
3838
const isOpenAI = normalizedModel.startsWith("openai:");
39+
const isGoogle = normalizedModel.startsWith("google:");
3940

40-
// For OpenAI, subtract cached tokens to get uncached input tokens
41-
const inputTokens = isOpenAI ? Math.max(0, rawInputTokens - cachedTokens) : rawInputTokens;
41+
// OpenAI and Google report inputTokens INCLUSIVE of cachedInputTokens
42+
// Anthropic reports them separately (inputTokens EXCLUDES cached)
43+
// Subtract cached tokens for providers that include them to avoid double-counting
44+
const inputTokens =
45+
isOpenAI || isGoogle ? Math.max(0, rawInputTokens - cachedTokens) : rawInputTokens;
4246

4347
// Extract cache creation tokens from provider metadata (Anthropic-specific)
4448
const cacheCreateTokens =

0 commit comments

Comments
 (0)