From c8366c6547ed6baaa676225f989893bf757fa2fe Mon Sep 17 00:00:00 2001 From: Ammar Date: Mon, 1 Dec 2025 12:15:44 -0600 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20fix:=20detect=20and=20abort=20re?= =?UTF-8?q?petitive=20Gemini=20output=20to=20prevent=20token=20exhaustion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This addresses the known Gemini token exhaustion bug where the model gets stuck in a loop emitting variations like 'I am done. I am done. I am done...' until it exhausts all output tokens. The fix: - Adds RepetitionDetector class that monitors streaming text for repetitive patterns using a sliding window approach - Integrates detector into StreamManager for Gemini models only - Automatically aborts the stream when 10+ repetitions of the same phrase (8-50 chars) are detected within a 2000 char window This is the same approach used by gemini-cli for loop detection. See: https://github.com/google-gemini/gemini-cli/issues/13322 _Generated with `mux`_ --- src/node/services/streamManager.ts | 15 +- src/node/utils/ai/repetitionDetector.test.ts | 115 ++++++++++++++ src/node/utils/ai/repetitionDetector.ts | 150 +++++++++++++++++++ src/node/utils/ai/repetitionStreamWrapper.ts | 58 +++++++ 4 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 src/node/utils/ai/repetitionDetector.test.ts create mode 100644 src/node/utils/ai/repetitionDetector.ts create mode 100644 src/node/utils/ai/repetitionStreamWrapper.ts diff --git a/src/node/services/streamManager.ts b/src/node/services/streamManager.ts index c09d2ec716..84a99c5acb 100644 --- a/src/node/services/streamManager.ts +++ b/src/node/services/streamManager.ts @@ -37,6 +37,7 @@ import { createCachedSystemMessage, applyCacheControlToTools, } from "@/common/utils/ai/cacheStrategy"; +import { withRepetitionProtection } from "@/node/utils/ai/repetitionStreamWrapper"; // Type definitions for stream parts with extended properties interface ReasoningDeltaPart { @@ -541,6 +542,10 @@ export class StreamManager extends EventEmitter { const messageId = `assistant-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; + // Create repetition detector for Gemini models (known token exhaustion bug) + // @see https://github.com/google-gemini/gemini-cli/issues/13322 + // Removed inline detector in favor of stream wrapper approach + const streamInfo: WorkspaceStreamInfo = { state: StreamState.STARTING, streamResult, @@ -658,7 +663,15 @@ export class StreamManager extends EventEmitter { { toolCallId: string; toolName: string; input: unknown; output?: unknown } >(); - for await (const part of streamInfo.streamResult.fullStream) { + // Wrap stream with repetition protection (handles Gemini infinite loops) + const protectedStream = withRepetitionProtection( + streamInfo.streamResult.fullStream, + streamInfo.model, + streamInfo.abortController, + workspaceId + ); + + for await (const part of protectedStream) { // Check if stream was cancelled BEFORE processing any parts // This improves interruption responsiveness by catching aborts earlier if (streamInfo.abortController.signal.aborted) { diff --git a/src/node/utils/ai/repetitionDetector.test.ts b/src/node/utils/ai/repetitionDetector.test.ts new file mode 100644 index 0000000000..07c99718e6 --- /dev/null +++ b/src/node/utils/ai/repetitionDetector.test.ts @@ -0,0 +1,115 @@ +import { describe, test, expect } from "bun:test"; +import { RepetitionDetector } from "./repetitionDetector"; + +describe("RepetitionDetector", () => { + test("detects period-separated repetition", () => { + const detector = new RepetitionDetector(); + const repeatedPhrase = "I am done. ".repeat(15); + detector.addText(repeatedPhrase); + expect(detector.isRepetitive()).toBe(true); + expect(detector.getDetectedPhrase()).toBe("I am done"); + }); + + test("detects newline-separated repetition", () => { + const detector = new RepetitionDetector(); + const repeatedPhrase = "I am done\n".repeat(15); + detector.addText(repeatedPhrase); + expect(detector.isRepetitive()).toBe(true); + }); + + test("detects repetition across multiple addText calls", () => { + const detector = new RepetitionDetector(); + for (let i = 0; i < 15; i++) { + detector.addText("I am done. "); + } + expect(detector.isRepetitive()).toBe(true); + }); + + test("does not trigger on normal text", () => { + const detector = new RepetitionDetector(); + detector.addText( + "This is a normal response with varied content. " + + "It talks about different things. " + + "Each sentence is unique. " + + "There is no repetition here. " + + "The model is working correctly. " + ); + expect(detector.isRepetitive()).toBe(false); + }); + + test("does not trigger on short repeated words", () => { + const detector = new RepetitionDetector(); + // Short phrases like "OK. OK. OK." should not trigger (below minPhraseLength) + detector.addText("OK. ".repeat(20)); + expect(detector.isRepetitive()).toBe(false); + }); + + test("handles the exact Gemini bug pattern", () => { + const detector = new RepetitionDetector(); + // This is the actual pattern reported in the bug + const bugPattern = `I am done. + +I will stop. + +I am done. + +I'm done. + +I am done. + +I am done. + +I am done. + +I am done. + +I am done. + +I am done. + +I am done. + +I am done.`; + detector.addText(bugPattern); + expect(detector.isRepetitive()).toBe(true); + }); + + test("handles the Gemini CLI loop pattern", () => { + const detector = new RepetitionDetector(); + // Pattern from https://github.com/google-gemini/gemini-cli/issues/13322 + // Need enough repetitions to trigger the threshold (default 10) + const cliPattern = "I'll do it. I'll execute. ".repeat(12); + detector.addText(cliPattern); + expect(detector.isRepetitive()).toBe(true); + }); + + test("reset clears detection state", () => { + const detector = new RepetitionDetector(); + detector.addText("I am done. ".repeat(15)); + expect(detector.isRepetitive()).toBe(true); + + detector.reset(); + + expect(detector.isRepetitive()).toBe(false); + expect(detector.getDetectedPhrase()).toBeNull(); + }); + + test("respects custom configuration", () => { + const detector = new RepetitionDetector({ + repetitionThreshold: 5, // Lower threshold + }); + detector.addText("I am done. ".repeat(6)); + expect(detector.isRepetitive()).toBe(true); + }); + + test("stops processing after detection", () => { + const detector = new RepetitionDetector(); + detector.addText("I am done. ".repeat(15)); + expect(detector.isRepetitive()).toBe(true); + const phrase = detector.getDetectedPhrase(); + + // Adding more text should not change the result + detector.addText("Something completely different. ".repeat(10)); + expect(detector.getDetectedPhrase()).toBe(phrase); + }); +}); diff --git a/src/node/utils/ai/repetitionDetector.ts b/src/node/utils/ai/repetitionDetector.ts new file mode 100644 index 0000000000..b161544e9b --- /dev/null +++ b/src/node/utils/ai/repetitionDetector.ts @@ -0,0 +1,150 @@ +/** + * Detects repetitive text patterns in streaming output. + * + * This is specifically designed to catch the Gemini token exhaustion bug where + * the model gets stuck in a loop emitting variations of "I am done. I am done. I am done..." + * until it exhausts all output tokens. + * + * The detector uses a sliding window approach to identify when the model is repeating + * short phrases, which is a clear signal of the bug. Normal text may occasionally + * repeat phrases, but not the same phrase 10+ times in a short window. + * + * @see https://github.com/google-gemini/gemini-cli/issues/13322 + */ + +/** + * Configuration for repetition detection + */ +export interface RepetitionDetectorConfig { + /** Minimum phrase length to track (shorter phrases are too common) */ + minPhraseLength: number; + /** Maximum phrase length to track (longer phrases are unlikely to repeat exactly) */ + maxPhraseLength: number; + /** Number of repetitions required to trigger detection */ + repetitionThreshold: number; + /** Size of the sliding window in characters */ + windowSize: number; +} + +const DEFAULT_CONFIG: RepetitionDetectorConfig = { + minPhraseLength: 8, // "I am done" is 9 chars + maxPhraseLength: 50, // Long enough to catch varied repetitions + repetitionThreshold: 10, // 10 repetitions is clearly a bug + windowSize: 2000, // ~500 tokens worth of text +}; + +/** + * Stateful repetition detector for streaming text. + * + * Call `addText()` with each text chunk as it streams in. + * Call `isRepetitive()` to check if repetitive patterns have been detected. + */ +export class RepetitionDetector { + private buffer = ""; + private readonly config: RepetitionDetectorConfig; + private detected = false; + private detectedPhrase: string | null = null; + + constructor(config: Partial = {}) { + this.config = { ...DEFAULT_CONFIG, ...config }; + } + + /** + * Add a chunk of text to the detector. + * @param text - The text chunk to analyze + */ + addText(text: string): void { + if (this.detected) { + // Already detected, no need to continue analyzing + return; + } + + this.buffer += text; + + // Trim buffer to window size + if (this.buffer.length > this.config.windowSize) { + this.buffer = this.buffer.slice(-this.config.windowSize); + } + + // Check for repetition after accumulating enough text + if (this.buffer.length >= this.config.minPhraseLength * this.config.repetitionThreshold) { + this.checkForRepetition(); + } + } + + /** + * Check if repetitive patterns have been detected. + */ + isRepetitive(): boolean { + return this.detected; + } + + /** + * Get the detected repetitive phrase, if any. + */ + getDetectedPhrase(): string | null { + return this.detectedPhrase; + } + + /** + * Reset the detector state. + */ + reset(): void { + this.buffer = ""; + this.detected = false; + this.detectedPhrase = null; + } + + /** + * Analyze the buffer for repetitive patterns. + * + * Strategy: Look for short phrases that appear multiple times. + * Split on common sentence boundaries and count phrase occurrences. + */ + private checkForRepetition(): void { + // First check line-by-line (before normalizing newlines away) + // This handles "I am done\nI am done\nI am done" + const lines = this.buffer.split(/\n+/).map((l) => l.trim()); + const lineCounts = new Map(); + for (const line of lines) { + if ( + line.length >= this.config.minPhraseLength && + line.length <= this.config.maxPhraseLength + ) { + const count = (lineCounts.get(line) ?? 0) + 1; + lineCounts.set(line, count); + + if (count >= this.config.repetitionThreshold) { + this.detected = true; + this.detectedPhrase = line; + return; + } + } + } + + // Normalize whitespace to make matching easier + const normalized = this.buffer.replace(/\s+/g, " ").trim(); + + // Split into sentences/phrases on common boundaries + // This handles patterns like "I am done. I am done. I am done." + const phrases = normalized.split(/[.!?\n]+/).map((p) => p.trim()); + + // Count phrase occurrences + const phraseCounts = new Map(); + for (const phrase of phrases) { + if ( + phrase.length >= this.config.minPhraseLength && + phrase.length <= this.config.maxPhraseLength + ) { + const count = (phraseCounts.get(phrase) ?? 0) + 1; + phraseCounts.set(phrase, count); + + if (count >= this.config.repetitionThreshold) { + this.detected = true; + this.detectedPhrase = phrase; + return; + } + } + } + } +} diff --git a/src/node/utils/ai/repetitionStreamWrapper.ts b/src/node/utils/ai/repetitionStreamWrapper.ts new file mode 100644 index 0000000000..2b9ca6afb9 --- /dev/null +++ b/src/node/utils/ai/repetitionStreamWrapper.ts @@ -0,0 +1,58 @@ +import { RepetitionDetector } from "./repetitionDetector"; +import { log } from "@/node/services/log"; + +// Minimal interface for stream parts to avoid importing internal types +// or depending on specific AI SDK versions that might not export the union type +interface StreamPartWithText { + type: string; + text?: string | unknown; +} + +/** + * Wraps an AI stream with repetition detection protection. + * + * This is a mitigation for known Gemini model bugs where the model enters + * an infinite loop of repeating phrases (e.g., "I am done.", "I'll do it."). + * + * @see https://github.com/google-gemini/gemini-cli/issues/13322 + */ +export async function* withRepetitionProtection( + stream: AsyncIterable, + modelId: string, + abortController: AbortController, + workspaceId: string +): AsyncIterable { + // Only apply protection to Gemini models + const isGemini = modelId.toLowerCase().includes("gemini") || modelId.toLowerCase().includes("google"); + + if (!isGemini) { + yield* stream; + return; + } + + const detector = new RepetitionDetector(); + + for await (const part of stream) { + // Check text deltas for repetition + if (part.type === "text-delta" && typeof part.text === "string") { + detector.addText(part.text); + + if (detector.isRepetitive()) { + const phrase = detector.getDetectedPhrase(); + log.info("Repetitive output detected for Gemini model, aborting stream", { + workspaceId, + model: modelId, + detectedPhrase: phrase, + }); + + // Abort the stream to stop upstream consumption + abortController.abort(); + + // Stop yielding parts immediately + return; + } + } + + yield part; + } +}