🤖 fix: use cumulative token usage for auto-compaction check

ethanndickson · ethanndickson · commit 0ef46ab6033b · 2025-11-20T14:04:47.000+11:00
Previous implementation only checked last message tokens against context window,
effectively disabling auto-compaction unless a single response exceeded 70%.

Now uses WorkspaceUsageState.totalTokens for cumulative conversation tracking.

- Remove lastUsage extraction and manual token calculation
- Use pre-calculated totalTokens from WorkspaceStore
- Simplifies code by ~20 lines
- Auto-compaction now correctly triggers at 70% cumulative usage
diff --git a/src/browser/utils/chatCommands.ts b/src/browser/utils/chatCommands.ts
@@ -21,7 +21,7 @@ import type { ParsedCommand } from "@/browser/utils/slashCommands/types";
 import { applyCompactionOverrides } from "@/browser/utils/messages/compactionOptions";
 import { resolveCompactionModel } from "@/browser/utils/messages/compactionModelPreference";
 import { getRuntimeKey } from "@/common/constants/storage";
-import { ImageAttachment } from "../components/ImageAttachments";
+import type { ImageAttachment } from "../components/ImageAttachments";
 
 // ============================================================================
 // Workspace Creation
diff --git a/src/browser/utils/compaction/autoCompactionCheck.ts b/src/browser/utils/compaction/autoCompactionCheck.ts
@@ -40,7 +40,7 @@ const WARNING_ADVANCE_PERCENT = 10;
  * @param use1M - Whether 1M context is enabled
  * @param threshold - Usage percentage threshold (0.0-1.0, default 0.7 = 70%)
  * @param warningAdvancePercent - Show warning this many percentage points before threshold (default 10)
- * @returns Check result with shouldAutoCompact flag, warning flag, and usage details
+ * @returns Check result with warning flag and usage percentage
  */
 export function shouldAutoCompact(
   usage: WorkspaceUsageState | undefined,
@@ -60,16 +60,6 @@ export function shouldAutoCompact(
     };
   }
 
-  // Get last usage (most recent API response)
-  const lastUsage = usage.usageHistory[usage.usageHistory.length - 1];
-  if (!lastUsage) {
-    return {
-      shouldShowWarning: false,
-      usagePercentage: 0,
-      thresholdPercentage,
-    };
-  }
-
   // Determine max tokens for this model
   const modelStats = getModelStats(model);
   const maxTokens = use1M && supports1MContext(model) ? 1_000_000 : modelStats?.max_input_tokens;
@@ -83,16 +73,8 @@ export function shouldAutoCompact(
     };
   }
 
-  // Calculate total tokens used in last request
-  const totalUsed =
-    lastUsage.input.tokens +
-    lastUsage.cached.tokens +
-    lastUsage.cacheCreate.tokens +
-    lastUsage.output.tokens +
-    lastUsage.reasoning.tokens;
-
-  // Calculate usage percentage
-  const usagePercentage = (totalUsed / maxTokens) * 100;
+  // Calculate usage percentage from cumulative conversation total
+  const usagePercentage = (usage.totalTokens / maxTokens) * 100;
 
   // Show warning if within advance window (e.g., 60% for 70% threshold with 10% advance)
   const shouldShowWarning = usagePercentage >= thresholdPercentage - warningAdvancePercent;