Skip to content

Commit 73573c1

Browse files
committed
🤖 fix: exclude output/reasoning tokens from context window percentage
Context window percentage was incorrectly including output and reasoning tokens, which don't count against the model's input context limit. This caused inflated percentages when models returned large outputs or used extended thinking. For example, with 150k input + 150k reasoning tokens on a 200k model: - Before: (150k + 150k) / 200k = 150% (incorrect) - After: 150k / 200k = 75% (correct) Also fixes the threshold slider blocking token meter tooltip - the slider now only captures mouse events in a small zone around the indicator, allowing the tooltip to show when hovering elsewhere on the bar. Changes: - tokenMeterUtils.ts: Calculate contextUsed separately from totalUsed - CostsTab.tsx: Use contextUsed for percentage calculation - autoCompactionCheck.ts: Rename getTotalTokens to getContextTokens - ThresholdSlider.tsx: Use pointer-events to not block tooltip - Updated tests to reflect correct behavior --- _Generated with `mux`_
1 parent 055145c commit 73573c1

File tree

5 files changed

+80
-45
lines changed

5 files changed

+80
-45
lines changed

src/browser/components/RightSidebar/CostsTab.tsx

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,17 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
126126
const is1MActive = use1M && supports1MContext(model);
127127
const maxTokens = is1MActive ? 1_000_000 : baseMaxTokens;
128128

129-
// Total tokens includes cache creation (they're input tokens sent for caching)
130-
const totalUsed = contextUsage
129+
// Context window only includes input-side tokens (what's sent to the model)
130+
// Output and reasoning tokens don't count against context window limits
131+
const contextUsed = contextUsage
131132
? contextUsage.input.tokens +
132133
contextUsage.cached.tokens +
133-
contextUsage.cacheCreate.tokens +
134-
contextUsage.output.tokens +
135-
contextUsage.reasoning.tokens
134+
contextUsage.cacheCreate.tokens
135+
: 0;
136+
137+
// Total tokens across all categories (for proportional display)
138+
const totalUsed = contextUsage
139+
? contextUsed + contextUsage.output.tokens + contextUsage.reasoning.tokens
136140
: 0;
137141

138142
// Calculate percentages based on max tokens (actual context window usage)
@@ -151,7 +155,8 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
151155
cachedPercentage = (contextUsage.cached.tokens / maxTokens) * 100;
152156
cacheCreatePercentage = (contextUsage.cacheCreate.tokens / maxTokens) * 100;
153157
reasoningPercentage = (contextUsage.reasoning.tokens / maxTokens) * 100;
154-
totalPercentage = (totalUsed / maxTokens) * 100;
158+
// Use contextUsed for percentage (excludes output/reasoning from context limit)
159+
totalPercentage = (contextUsed / maxTokens) * 100;
155160
} else if (contextUsage) {
156161
// Unknown model - scale to total tokens used
157162
inputPercentage = totalUsed > 0 ? (contextUsage.input.tokens / totalUsed) * 100 : 0;

src/browser/components/RightSidebar/ThresholdSlider.tsx

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -174,15 +174,38 @@ export const ThresholdSlider: React.FC<ThresholdSliderProps> = ({ config, orient
174174
const color = isEnabled ? "var(--color-plan-mode)" : "var(--color-muted)";
175175
const tooltipText = getTooltipText(config.threshold, orientation);
176176

177-
// Container styles
177+
// Container styles - covers the full bar area for drag handling
178+
// Uses pointer-events: none by default, only the indicator handle has pointer-events: auto
179+
// This allows the token meter tooltip to work when hovering elsewhere on the bar
178180
const containerStyle: React.CSSProperties = {
179181
position: "absolute",
180-
cursor: isHorizontal ? "ew-resize" : "ns-resize",
181182
top: 0,
182183
bottom: 0,
183184
left: 0,
184185
right: 0,
185186
zIndex: 50,
187+
pointerEvents: "none", // Let events pass through to tooltip beneath
188+
};
189+
190+
// Drag handle around the indicator - this captures mouse events
191+
const DRAG_ZONE_SIZE = 16; // pixels on each side of the indicator
192+
const handleStyle: React.CSSProperties = {
193+
position: "absolute",
194+
cursor: isHorizontal ? "ew-resize" : "ns-resize",
195+
pointerEvents: "auto", // Only this element captures events
196+
...(isHorizontal
197+
? {
198+
left: `calc(${config.threshold}% - ${DRAG_ZONE_SIZE}px)`,
199+
width: DRAG_ZONE_SIZE * 2,
200+
top: 0,
201+
bottom: 0,
202+
}
203+
: {
204+
top: `calc(${config.threshold}% - ${DRAG_ZONE_SIZE}px)`,
205+
height: DRAG_ZONE_SIZE * 2,
206+
left: 0,
207+
right: 0,
208+
}),
186209
};
187210

188211
// Indicator positioning - use transform for centering on both axes
@@ -215,15 +238,17 @@ export const ThresholdSlider: React.FC<ThresholdSliderProps> = ({ config, orient
215238
const containerRect = containerRef.current?.getBoundingClientRect();
216239

217240
return (
218-
<div
219-
ref={containerRef}
220-
style={containerStyle}
221-
onMouseDown={handleMouseDown}
222-
onMouseEnter={() => setIsHovered(true)}
223-
onMouseLeave={() => setIsHovered(false)}
224-
// Horizontal uses native title (simpler, no clipping issues with wide tooltips)
225-
title={isHorizontal ? tooltipText : undefined}
226-
>
241+
<div ref={containerRef} style={containerStyle}>
242+
{/* Drag handle - captures mouse events in a small zone around the indicator */}
243+
<div
244+
style={handleStyle}
245+
onMouseDown={handleMouseDown}
246+
onMouseEnter={() => setIsHovered(true)}
247+
onMouseLeave={() => setIsHovered(false)}
248+
// Horizontal uses native title (simpler, no clipping issues with wide tooltips)
249+
title={isHorizontal ? tooltipText : undefined}
250+
/>
251+
227252
{/* Visual indicator - pointer events disabled */}
228253
<div style={indicatorStyle}>
229254
<Triangle direction={isHorizontal ? "down" : "right"} color={color} />

src/browser/utils/compaction/autoCompactionCheck.test.ts

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,18 @@ import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
55
import { KNOWN_MODELS } from "@/common/constants/knownModels";
66

77
// Helper to create a mock usage entry
8+
// contextTokens is the total INPUT-side tokens (input + cached + cacheCreate)
9+
// This matches what counts against the context window limit
810
const createUsageEntry = (
9-
tokens: number,
11+
contextTokens: number,
1012
model: string = KNOWN_MODELS.SONNET.id
1113
): ChatUsageDisplay => {
12-
// Distribute tokens across different types (realistic pattern)
13-
const inputTokens = Math.floor(tokens * 0.6); // 60% input
14-
const outputTokens = Math.floor(tokens * 0.3); // 30% output
15-
const cachedTokens = Math.floor(tokens * 0.1); // 10% cached
14+
// Distribute context tokens across input types (all count against context window)
15+
const inputTokens = Math.floor(contextTokens * 0.85); // 85% fresh input
16+
const cachedTokens = Math.floor(contextTokens * 0.15); // 15% cached input
17+
18+
// Output and reasoning are separate (don't count against context)
19+
const outputTokens = Math.floor(contextTokens * 0.2); // ~20% of context size
1620

1721
return {
1822
input: { tokens: inputTokens },
@@ -135,14 +139,15 @@ describe("checkAutoCompaction", () => {
135139
expect(result.shouldShowWarning).toBe(false);
136140
});
137141

138-
test("includes all token types in calculation", () => {
142+
test("only counts input-side tokens (input, cached, cacheCreate) for context window", () => {
139143
// Create usage with all token types specified
144+
// Only input-side tokens should count against context window
140145
const usageEntry = {
141146
input: { tokens: 10_000 },
142147
cached: { tokens: 5_000 },
143148
cacheCreate: { tokens: 2_000 },
144-
output: { tokens: 3_000 },
145-
reasoning: { tokens: 1_000 },
149+
output: { tokens: 3_000 }, // Should NOT count
150+
reasoning: { tokens: 1_000 }, // Should NOT count
146151
model: KNOWN_MODELS.SONNET.id,
147152
};
148153
const usage: WorkspaceUsageState = {
@@ -153,8 +158,9 @@ describe("checkAutoCompaction", () => {
153158

154159
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);
155160

156-
// Total: 10k + 5k + 2k + 3k + 1k = 21k tokens = 10.5%
157-
expect(result.usagePercentage).toBe(10.5);
161+
// Context tokens: 10k + 5k + 2k = 17k (output/reasoning excluded)
162+
// 17,000 / 200,000 = 8.5%
163+
expect(result.usagePercentage).toBe(8.5);
158164
});
159165
});
160166

src/browser/utils/compaction/autoCompactionCheck.ts

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,13 @@ import {
2424
FORCE_COMPACTION_BUFFER_PERCENT,
2525
} from "@/common/constants/ui";
2626

27-
/** Sum all token components from a ChatUsageDisplay */
28-
function getTotalTokens(usage: ChatUsageDisplay): number {
29-
return (
30-
usage.input.tokens +
31-
usage.cached.tokens +
32-
usage.cacheCreate.tokens +
33-
usage.output.tokens +
34-
usage.reasoning.tokens
35-
);
27+
/**
28+
* Get context window token count from a ChatUsageDisplay.
29+
* Only includes input-side tokens (what's sent to the model).
30+
* Output and reasoning tokens don't count against context window limits.
31+
*/
32+
function getContextTokens(usage: ChatUsageDisplay): number {
33+
return usage.input.tokens + usage.cached.tokens + usage.cacheCreate.tokens;
3634
}
3735

3836
export interface AutoCompactionCheckResult {
@@ -100,15 +98,15 @@ export function checkAutoCompaction(
10098
const currentUsage = usage.liveUsage ?? lastUsage;
10199

102100
// Usage percentage from current context (live when streaming, otherwise last completed)
103-
const usagePercentage = currentUsage ? (getTotalTokens(currentUsage) / maxTokens) * 100 : 0;
101+
const usagePercentage = currentUsage ? (getContextTokens(currentUsage) / maxTokens) * 100 : 0;
104102

105103
// Force-compact when usage exceeds threshold + buffer
106104
const forceCompactThreshold = thresholdPercentage + FORCE_COMPACTION_BUFFER_PERCENT;
107105
const shouldForceCompact = usagePercentage >= forceCompactThreshold;
108106

109107
// Warning uses max of last completed and current (live when streaming)
110108
// This ensures warning shows when live usage spikes above threshold mid-stream
111-
const lastUsagePercentage = lastUsage ? (getTotalTokens(lastUsage) / maxTokens) * 100 : 0;
109+
const lastUsagePercentage = lastUsage ? (getContextTokens(lastUsage) / maxTokens) * 100 : 0;
112110
const shouldShowWarning =
113111
Math.max(lastUsagePercentage, usagePercentage) >= thresholdPercentage - warningAdvancePercent;
114112

src/common/utils/tokens/tokenMeterUtils.ts

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,12 @@ export function calculateTokenMeterData(
6363
const modelStats = getModelStats(model);
6464
const maxTokens = use1M && supports1MContext(model) ? 1_000_000 : modelStats?.max_input_tokens;
6565

66-
const totalUsed =
67-
usage.input.tokens +
68-
usage.cached.tokens +
69-
usage.cacheCreate.tokens +
70-
usage.output.tokens +
71-
usage.reasoning.tokens;
66+
// Context window only includes input-side tokens (what's sent to the model)
67+
// Output and reasoning tokens don't count against context window limits
68+
const contextUsed = usage.input.tokens + usage.cached.tokens + usage.cacheCreate.tokens;
69+
70+
// Total tokens across all categories (for proportional segment sizing)
71+
const totalUsed = contextUsed + usage.output.tokens + usage.reasoning.tokens;
7272

7373
const toPercentage = (tokens: number) => {
7474
if (verticalProportions) {
@@ -84,7 +84,8 @@ export function calculateTokenMeterData(
8484
color: def.color,
8585
}));
8686

87-
const contextPercentage = maxTokens ? (totalUsed / maxTokens) * 100 : 100;
87+
// Context percentage based only on input-side tokens
88+
const contextPercentage = maxTokens ? (contextUsed / maxTokens) * 100 : 100;
8889

8990
return {
9091
segments,

0 commit comments

Comments
 (0)