Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 8 additions & 133 deletions bun.lock

Large diffs are not rendered by default.

87 changes: 62 additions & 25 deletions src/browser/components/AIView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import { evictModelFromLRU } from "@/browser/hooks/useModelLRU";
import { QueuedMessage } from "./Messages/QueuedMessage";
import { CompactionWarning } from "./CompactionWarning";
import { checkAutoCompaction } from "@/browser/utils/compaction/autoCompactionCheck";
import { executeCompaction } from "@/browser/utils/chatCommands";
import { useProviderOptions } from "@/browser/hooks/useProviderOptions";
import { useAutoCompactionSettings } from "../hooks/useAutoCompactionSettings";
import { useSendMessageOptions } from "@/browser/hooks/useSendMessageOptions";
Expand Down Expand Up @@ -120,6 +121,67 @@ const AIViewInner: React.FC<AIViewProps> = ({
undefined
);

// Use send options for auto-compaction check
const pendingSendOptions = useSendMessageOptions(workspaceId);

// Track if we've already triggered force compaction for this stream
const forceCompactionTriggeredRef = useRef<string | null>(null);

// Extract state from workspace state
const { messages, canInterrupt, isCompacting, loading, currentModel } = workspaceState;

// Get active stream message ID for token counting
const activeStreamMessageId = aggregator.getActiveStreamMessageId();

// Use pending send model for auto-compaction check, not the last stream's model.
// This ensures the threshold is based on the model the user will actually send with,
// preventing context-length errors when switching from a large-context to smaller model.
const pendingModel = pendingSendOptions.model;

const autoCompactionResult = checkAutoCompaction(
workspaceUsage,
pendingModel,
use1M,
autoCompactionEnabled,
autoCompactionThreshold / 100
);

// Show warning when: shouldShowWarning flag is true AND not currently compacting
const shouldShowCompactionWarning = !isCompacting && autoCompactionResult.shouldShowWarning;

// Force compaction when live usage shows we're about to hit context limit
useEffect(() => {
if (
!autoCompactionResult.shouldForceCompact ||
!canInterrupt ||
isCompacting ||
forceCompactionTriggeredRef.current === activeStreamMessageId
) {
return;
}

forceCompactionTriggeredRef.current = activeStreamMessageId ?? null;
void executeCompaction({
workspaceId,
sendMessageOptions: pendingSendOptions,
continueMessage: { text: "Continue with the current task" },
});
}, [
autoCompactionResult.shouldForceCompact,
canInterrupt,
isCompacting,
activeStreamMessageId,
workspaceId,
pendingSendOptions,
]);

// Reset force compaction trigger when stream ends
useEffect(() => {
if (!canInterrupt) {
forceCompactionTriggeredRef.current = null;
}
}, [canInterrupt]);

// Auto-retry state - minimal setter for keybinds and message sent handler
// RetryBarrier manages its own state, but we need this for interrupt keybind
const [, setAutoRetry] = usePersistedState<boolean>(
Expand All @@ -144,9 +206,6 @@ const AIViewInner: React.FC<AIViewProps> = ({
markUserInteraction,
} = useAutoScroll();

// Use send options for auto-compaction check
const pendingSendOptions = useSendMessageOptions(workspaceId);

// ChatInput API for focus management
const chatInputAPI = useRef<ChatInputAPI | null>(null);
const handleChatInputReady = useCallback((api: ChatInputAPI) => {
Expand Down Expand Up @@ -329,28 +388,6 @@ const AIViewInner: React.FC<AIViewProps> = ({
);
}

// Extract state from workspace state
const { messages, canInterrupt, isCompacting, loading, currentModel } = workspaceState;

// Get active stream message ID for token counting
const activeStreamMessageId = aggregator.getActiveStreamMessageId();

// Use pending send model for auto-compaction check, not the last stream's model.
// This ensures the threshold is based on the model the user will actually send with,
// preventing context-length errors when switching from a large-context to smaller model.
const pendingModel = pendingSendOptions.model;

const autoCompactionResult = checkAutoCompaction(
workspaceUsage,
pendingModel,
use1M,
autoCompactionEnabled,
autoCompactionThreshold / 100
);

// Show warning when: shouldShowWarning flag is true AND not currently compacting
const shouldShowCompactionWarning = !isCompacting && autoCompactionResult.shouldShowWarning;

// Note: We intentionally do NOT reset autoRetry when streams start.
// If user pressed the interrupt key, autoRetry stays false until they manually retry.
// This makes state transitions explicit and predictable.
Expand Down
5 changes: 4 additions & 1 deletion src/browser/utils/chatCommands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { resolveCompactionModel } from "@/browser/utils/messages/compactionModel
import type { ImageAttachment } from "../components/ImageAttachments";
import { dispatchWorkspaceSwitch } from "./workspaceEvents";
import { getRuntimeKey, copyWorkspaceStorage } from "@/common/constants/storage";
import { DEFAULT_COMPACTION_WORD_TARGET, WORDS_TO_TOKENS_RATIO } from "@/common/constants/ui";

// ============================================================================
// Workspace Creation
Expand Down Expand Up @@ -572,7 +573,9 @@ export function prepareCompactionMessage(options: CompactionOptions): {
metadata: MuxFrontendMetadata;
sendOptions: SendMessageOptions;
} {
const targetWords = options.maxOutputTokens ? Math.round(options.maxOutputTokens / 1.3) : 2000;
const targetWords = options.maxOutputTokens
? Math.round(options.maxOutputTokens / WORDS_TO_TOKENS_RATIO)
: DEFAULT_COMPACTION_WORD_TARGET;

// Build compaction message with optional continue context
let messageText = `Summarize this conversation into a compact form for a new Assistant to continue helping the user. Focus entirely on the summary of what has happened. Do not suggest next steps or future actions. Use approximately ${targetWords} words.`;
Expand Down
79 changes: 77 additions & 2 deletions src/browser/utils/compaction/autoCompactionCheck.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { checkAutoCompaction } from "./autoCompactionCheck";
import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
import { KNOWN_MODELS } from "@/common/constants/knownModels";
import { FORCE_COMPACTION_TOKEN_BUFFER } from "@/common/constants/ui";

// Helper to create a mock usage entry
const createUsageEntry = (
Expand All @@ -28,7 +29,8 @@ const createUsageEntry = (
const createMockUsage = (
lastEntryTokens: number,
historicalTokens?: number,
model: string = KNOWN_MODELS.SONNET.id
model: string = KNOWN_MODELS.SONNET.id,
liveUsage?: ChatUsageDisplay
): WorkspaceUsageState => {
const usageHistory: ChatUsageDisplay[] = [];

Expand All @@ -40,7 +42,7 @@ const createMockUsage = (
// Add recent usage
usageHistory.push(createUsageEntry(lastEntryTokens, model));

return { usageHistory, totalTokens: 0 };
return { usageHistory, totalTokens: 0, liveUsage };
};

describe("checkAutoCompaction", () => {
Expand Down Expand Up @@ -297,4 +299,77 @@ describe("checkAutoCompaction", () => {
expect(result.shouldShowWarning).toBe(true); // Above 60%
});
});

describe("Force Compaction (Live Usage)", () => {
const SONNET_MAX_TOKENS = 200_000;
const BUFFER = FORCE_COMPACTION_TOKEN_BUFFER;

test("shouldForceCompact is false when no liveUsage (falls back to lastUsage with room)", () => {
const usage = createMockUsage(100_000); // 100k remaining - plenty of room
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);

expect(result.shouldForceCompact).toBe(false);
});

test("shouldForceCompact is false when currentUsage has plenty of room", () => {
const liveUsage = createUsageEntry(100_000); // 100k remaining
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);

expect(result.shouldForceCompact).toBe(false);
});

test("shouldForceCompact is true when remaining <= buffer", () => {
// Exactly at buffer threshold
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER);
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);

expect(result.shouldForceCompact).toBe(true);
});

test("shouldForceCompact is true when over context limit", () => {
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS + 5000);
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);

expect(result.shouldForceCompact).toBe(true);
});

test("shouldForceCompact is false when just above buffer", () => {
// 1 token above buffer threshold
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER - 1);
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);

expect(result.shouldForceCompact).toBe(false);
});

test("shouldForceCompact respects 1M context mode", () => {
// With 1M context, exactly at buffer threshold
const liveUsage = createUsageEntry(1_000_000 - BUFFER);
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, true, true);

expect(result.shouldForceCompact).toBe(true);
});

test("shouldForceCompact triggers with empty history but liveUsage near limit", () => {
// Bug fix: empty history but liveUsage should still trigger
const liveUsage = createUsageEntry(SONNET_MAX_TOKENS - BUFFER);
const usage: WorkspaceUsageState = { usageHistory: [], totalTokens: 0, liveUsage };
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, true);

expect(result.shouldForceCompact).toBe(true);
expect(result.usagePercentage).toBe(0); // No lastUsage for percentage
});

test("shouldForceCompact is false when auto-compaction disabled", () => {
const liveUsage = createUsageEntry(199_000); // Very close to limit
const usage = createMockUsage(50_000, undefined, KNOWN_MODELS.SONNET.id, liveUsage);
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false, false); // disabled

expect(result.shouldForceCompact).toBe(false);
});
});
});
56 changes: 43 additions & 13 deletions src/browser/utils/compaction/autoCompactionCheck.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,29 @@
*/

import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore";
import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
import { getModelStats } from "@/common/utils/tokens/modelStats";
import { supports1MContext } from "@/common/utils/ai/models";
import { DEFAULT_AUTO_COMPACTION_THRESHOLD } from "@/common/constants/ui";
import {
DEFAULT_AUTO_COMPACTION_THRESHOLD,
FORCE_COMPACTION_TOKEN_BUFFER,
} from "@/common/constants/ui";

/** Sum all token components from a ChatUsageDisplay */
function getTotalTokens(usage: ChatUsageDisplay): number {
return (
usage.input.tokens +
usage.cached.tokens +
usage.cacheCreate.tokens +
usage.output.tokens +
usage.reasoning.tokens
);
}

export interface AutoCompactionCheckResult {
shouldShowWarning: boolean;
/** True when live usage shows ≤FORCE_COMPACTION_TOKEN_BUFFER remaining in context */
shouldForceCompact: boolean;
usagePercentage: number;
thresholdPercentage: number;
}
Expand Down Expand Up @@ -54,11 +71,11 @@ export function checkAutoCompaction(
): AutoCompactionCheckResult {
const thresholdPercentage = threshold * 100;

// Short-circuit if auto-compaction is disabled
// Or if no usage data yet
if (!enabled || !model || !usage || usage.usageHistory.length === 0) {
// Short-circuit if auto-compaction is disabled or missing required data
if (!enabled || !model || !usage) {
return {
shouldShowWarning: false,
shouldForceCompact: false,
usagePercentage: 0,
thresholdPercentage,
};
Expand All @@ -67,31 +84,44 @@ export function checkAutoCompaction(
// Determine max tokens for this model
const modelStats = getModelStats(model);
const maxTokens = use1M && supports1MContext(model) ? 1_000_000 : modelStats?.max_input_tokens;
const lastUsage = usage.usageHistory[usage.usageHistory.length - 1];

// No max tokens known - safe default (can't calculate percentage)
if (!maxTokens) {
return {
shouldShowWarning: false,
shouldForceCompact: false,
usagePercentage: 0,
thresholdPercentage,
};
}

const currentContextTokens =
lastUsage.input.tokens +
lastUsage.cached.tokens +
lastUsage.cacheCreate.tokens +
lastUsage.output.tokens +
lastUsage.reasoning.tokens;
// Current usage: live when streaming, else last historical (pattern from CostsTab)
const lastUsage = usage.usageHistory[usage.usageHistory.length - 1];
const currentUsage = usage.liveUsage ?? lastUsage;

// Force-compact when approaching context limit (can trigger even with empty history if streaming)
let shouldForceCompact = false;
if (currentUsage) {
const remainingTokens = maxTokens - getTotalTokens(currentUsage);
shouldForceCompact = remainingTokens <= FORCE_COMPACTION_TOKEN_BUFFER;
}

const usagePercentage = (currentContextTokens / maxTokens) * 100;
// Warning/percentage based on lastUsage (completed requests only)
if (!lastUsage) {
return {
shouldShowWarning: false,
shouldForceCompact,
usagePercentage: 0,
thresholdPercentage,
};
}

// Show warning if within advance window (e.g., 60% for 70% threshold with 10% advance)
const usagePercentage = (getTotalTokens(lastUsage) / maxTokens) * 100;
const shouldShowWarning = usagePercentage >= thresholdPercentage - warningAdvancePercent;

return {
shouldShowWarning,
shouldForceCompact,
usagePercentage,
thresholdPercentage,
};
Expand Down
22 changes: 22 additions & 0 deletions src/common/constants/ui.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,28 @@ export const DEFAULT_AUTO_COMPACTION_THRESHOLD_PERCENT = 70;
* Default threshold as decimal for calculations (0.7 = 70%)
*/
export const DEFAULT_AUTO_COMPACTION_THRESHOLD = DEFAULT_AUTO_COMPACTION_THRESHOLD_PERCENT / 100;

/**
* Default word target for compaction summaries
*/
export const DEFAULT_COMPACTION_WORD_TARGET = 2000;

/**
* Approximate ratio of tokens to words (tokens per word)
* Used for converting between word counts and token counts
*/
export const WORDS_TO_TOKENS_RATIO = 1.3;

/**
* Force-compaction token buffer.
* When auto-compaction is enabled and live usage shows this many tokens or fewer
* remaining in the context window, force a compaction immediately.
* Set to 2x the expected compaction output size to ensure room for the summary.
*/
export const FORCE_COMPACTION_TOKEN_BUFFER = Math.round(
2 * DEFAULT_COMPACTION_WORD_TARGET * WORDS_TO_TOKENS_RATIO
); // = 5200 tokens

/**
* Duration (ms) to show "copied" feedback after copying to clipboard
*/
Expand Down
Loading