Skip to content

Commit c8366c6

Browse files
committed
🤖 fix: detect and abort repetitive Gemini output to prevent token exhaustion
This addresses the known Gemini token exhaustion bug where the model gets stuck in a loop emitting variations like 'I am done. I am done. I am done...' until it exhausts all output tokens. The fix: - Adds RepetitionDetector class that monitors streaming text for repetitive patterns using a sliding window approach - Integrates detector into StreamManager for Gemini models only - Automatically aborts the stream when 10+ repetitions of the same phrase (8-50 chars) are detected within a 2000 char window This is the same approach used by gemini-cli for loop detection. See: google-gemini/gemini-cli#13322 _Generated with `mux`_
1 parent 284dbc7 commit c8366c6

File tree

4 files changed

+337
-1
lines changed

4 files changed

+337
-1
lines changed

‎src/node/services/streamManager.ts‎

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import {
3737
createCachedSystemMessage,
3838
applyCacheControlToTools,
3939
} from "@/common/utils/ai/cacheStrategy";
40+
import { withRepetitionProtection } from "@/node/utils/ai/repetitionStreamWrapper";
4041

4142
// Type definitions for stream parts with extended properties
4243
interface ReasoningDeltaPart {
@@ -541,6 +542,10 @@ export class StreamManager extends EventEmitter {
541542

542543
const messageId = `assistant-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
543544

545+
// Create repetition detector for Gemini models (known token exhaustion bug)
546+
// @see https://github.com/google-gemini/gemini-cli/issues/13322
547+
// Removed inline detector in favor of stream wrapper approach
548+
544549
const streamInfo: WorkspaceStreamInfo = {
545550
state: StreamState.STARTING,
546551
streamResult,
@@ -658,7 +663,15 @@ export class StreamManager extends EventEmitter {
658663
{ toolCallId: string; toolName: string; input: unknown; output?: unknown }
659664
>();
660665

661-
for await (const part of streamInfo.streamResult.fullStream) {
666+
// Wrap stream with repetition protection (handles Gemini infinite loops)
667+
const protectedStream = withRepetitionProtection(
668+
streamInfo.streamResult.fullStream,
669+
streamInfo.model,
670+
streamInfo.abortController,
671+
workspaceId
672+
);
673+
674+
for await (const part of protectedStream) {
662675
// Check if stream was cancelled BEFORE processing any parts
663676
// This improves interruption responsiveness by catching aborts earlier
664677
if (streamInfo.abortController.signal.aborted) {
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import { describe, test, expect } from "bun:test";
2+
import { RepetitionDetector } from "./repetitionDetector";
3+
4+
describe("RepetitionDetector", () => {
5+
test("detects period-separated repetition", () => {
6+
const detector = new RepetitionDetector();
7+
const repeatedPhrase = "I am done. ".repeat(15);
8+
detector.addText(repeatedPhrase);
9+
expect(detector.isRepetitive()).toBe(true);
10+
expect(detector.getDetectedPhrase()).toBe("I am done");
11+
});
12+
13+
test("detects newline-separated repetition", () => {
14+
const detector = new RepetitionDetector();
15+
const repeatedPhrase = "I am done\n".repeat(15);
16+
detector.addText(repeatedPhrase);
17+
expect(detector.isRepetitive()).toBe(true);
18+
});
19+
20+
test("detects repetition across multiple addText calls", () => {
21+
const detector = new RepetitionDetector();
22+
for (let i = 0; i < 15; i++) {
23+
detector.addText("I am done. ");
24+
}
25+
expect(detector.isRepetitive()).toBe(true);
26+
});
27+
28+
test("does not trigger on normal text", () => {
29+
const detector = new RepetitionDetector();
30+
detector.addText(
31+
"This is a normal response with varied content. " +
32+
"It talks about different things. " +
33+
"Each sentence is unique. " +
34+
"There is no repetition here. " +
35+
"The model is working correctly. "
36+
);
37+
expect(detector.isRepetitive()).toBe(false);
38+
});
39+
40+
test("does not trigger on short repeated words", () => {
41+
const detector = new RepetitionDetector();
42+
// Short phrases like "OK. OK. OK." should not trigger (below minPhraseLength)
43+
detector.addText("OK. ".repeat(20));
44+
expect(detector.isRepetitive()).toBe(false);
45+
});
46+
47+
test("handles the exact Gemini bug pattern", () => {
48+
const detector = new RepetitionDetector();
49+
// This is the actual pattern reported in the bug
50+
const bugPattern = `I am done.
51+
52+
I will stop.
53+
54+
I am done.
55+
56+
I'm done.
57+
58+
I am done.
59+
60+
I am done.
61+
62+
I am done.
63+
64+
I am done.
65+
66+
I am done.
67+
68+
I am done.
69+
70+
I am done.
71+
72+
I am done.`;
73+
detector.addText(bugPattern);
74+
expect(detector.isRepetitive()).toBe(true);
75+
});
76+
77+
test("handles the Gemini CLI loop pattern", () => {
78+
const detector = new RepetitionDetector();
79+
// Pattern from https://github.com/google-gemini/gemini-cli/issues/13322
80+
// Need enough repetitions to trigger the threshold (default 10)
81+
const cliPattern = "I'll do it. I'll execute. ".repeat(12);
82+
detector.addText(cliPattern);
83+
expect(detector.isRepetitive()).toBe(true);
84+
});
85+
86+
test("reset clears detection state", () => {
87+
const detector = new RepetitionDetector();
88+
detector.addText("I am done. ".repeat(15));
89+
expect(detector.isRepetitive()).toBe(true);
90+
91+
detector.reset();
92+
93+
expect(detector.isRepetitive()).toBe(false);
94+
expect(detector.getDetectedPhrase()).toBeNull();
95+
});
96+
97+
test("respects custom configuration", () => {
98+
const detector = new RepetitionDetector({
99+
repetitionThreshold: 5, // Lower threshold
100+
});
101+
detector.addText("I am done. ".repeat(6));
102+
expect(detector.isRepetitive()).toBe(true);
103+
});
104+
105+
test("stops processing after detection", () => {
106+
const detector = new RepetitionDetector();
107+
detector.addText("I am done. ".repeat(15));
108+
expect(detector.isRepetitive()).toBe(true);
109+
const phrase = detector.getDetectedPhrase();
110+
111+
// Adding more text should not change the result
112+
detector.addText("Something completely different. ".repeat(10));
113+
expect(detector.getDetectedPhrase()).toBe(phrase);
114+
});
115+
});
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
/**
2+
* Detects repetitive text patterns in streaming output.
3+
*
4+
* This is specifically designed to catch the Gemini token exhaustion bug where
5+
* the model gets stuck in a loop emitting variations of "I am done. I am done. I am done..."
6+
* until it exhausts all output tokens.
7+
*
8+
* The detector uses a sliding window approach to identify when the model is repeating
9+
* short phrases, which is a clear signal of the bug. Normal text may occasionally
10+
* repeat phrases, but not the same phrase 10+ times in a short window.
11+
*
12+
* @see https://github.com/google-gemini/gemini-cli/issues/13322
13+
*/
14+
15+
/**
16+
* Configuration for repetition detection
17+
*/
18+
export interface RepetitionDetectorConfig {
19+
/** Minimum phrase length to track (shorter phrases are too common) */
20+
minPhraseLength: number;
21+
/** Maximum phrase length to track (longer phrases are unlikely to repeat exactly) */
22+
maxPhraseLength: number;
23+
/** Number of repetitions required to trigger detection */
24+
repetitionThreshold: number;
25+
/** Size of the sliding window in characters */
26+
windowSize: number;
27+
}
28+
29+
const DEFAULT_CONFIG: RepetitionDetectorConfig = {
30+
minPhraseLength: 8, // "I am done" is 9 chars
31+
maxPhraseLength: 50, // Long enough to catch varied repetitions
32+
repetitionThreshold: 10, // 10 repetitions is clearly a bug
33+
windowSize: 2000, // ~500 tokens worth of text
34+
};
35+
36+
/**
37+
* Stateful repetition detector for streaming text.
38+
*
39+
* Call `addText()` with each text chunk as it streams in.
40+
* Call `isRepetitive()` to check if repetitive patterns have been detected.
41+
*/
42+
export class RepetitionDetector {
43+
private buffer = "";
44+
private readonly config: RepetitionDetectorConfig;
45+
private detected = false;
46+
private detectedPhrase: string | null = null;
47+
48+
constructor(config: Partial<RepetitionDetectorConfig> = {}) {
49+
this.config = { ...DEFAULT_CONFIG, ...config };
50+
}
51+
52+
/**
53+
* Add a chunk of text to the detector.
54+
* @param text - The text chunk to analyze
55+
*/
56+
addText(text: string): void {
57+
if (this.detected) {
58+
// Already detected, no need to continue analyzing
59+
return;
60+
}
61+
62+
this.buffer += text;
63+
64+
// Trim buffer to window size
65+
if (this.buffer.length > this.config.windowSize) {
66+
this.buffer = this.buffer.slice(-this.config.windowSize);
67+
}
68+
69+
// Check for repetition after accumulating enough text
70+
if (this.buffer.length >= this.config.minPhraseLength * this.config.repetitionThreshold) {
71+
this.checkForRepetition();
72+
}
73+
}
74+
75+
/**
76+
* Check if repetitive patterns have been detected.
77+
*/
78+
isRepetitive(): boolean {
79+
return this.detected;
80+
}
81+
82+
/**
83+
* Get the detected repetitive phrase, if any.
84+
*/
85+
getDetectedPhrase(): string | null {
86+
return this.detectedPhrase;
87+
}
88+
89+
/**
90+
* Reset the detector state.
91+
*/
92+
reset(): void {
93+
this.buffer = "";
94+
this.detected = false;
95+
this.detectedPhrase = null;
96+
}
97+
98+
/**
99+
* Analyze the buffer for repetitive patterns.
100+
*
101+
* Strategy: Look for short phrases that appear multiple times.
102+
* Split on common sentence boundaries and count phrase occurrences.
103+
*/
104+
private checkForRepetition(): void {
105+
// First check line-by-line (before normalizing newlines away)
106+
// This handles "I am done\nI am done\nI am done"
107+
const lines = this.buffer.split(/\n+/).map((l) => l.trim());
108+
const lineCounts = new Map<string, number>();
109+
for (const line of lines) {
110+
if (
111+
line.length >= this.config.minPhraseLength &&
112+
line.length <= this.config.maxPhraseLength
113+
) {
114+
const count = (lineCounts.get(line) ?? 0) + 1;
115+
lineCounts.set(line, count);
116+
117+
if (count >= this.config.repetitionThreshold) {
118+
this.detected = true;
119+
this.detectedPhrase = line;
120+
return;
121+
}
122+
}
123+
}
124+
125+
// Normalize whitespace to make matching easier
126+
const normalized = this.buffer.replace(/\s+/g, " ").trim();
127+
128+
// Split into sentences/phrases on common boundaries
129+
// This handles patterns like "I am done. I am done. I am done."
130+
const phrases = normalized.split(/[.!?\n]+/).map((p) => p.trim());
131+
132+
// Count phrase occurrences
133+
const phraseCounts = new Map<string, number>();
134+
for (const phrase of phrases) {
135+
if (
136+
phrase.length >= this.config.minPhraseLength &&
137+
phrase.length <= this.config.maxPhraseLength
138+
) {
139+
const count = (phraseCounts.get(phrase) ?? 0) + 1;
140+
phraseCounts.set(phrase, count);
141+
142+
if (count >= this.config.repetitionThreshold) {
143+
this.detected = true;
144+
this.detectedPhrase = phrase;
145+
return;
146+
}
147+
}
148+
}
149+
}
150+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import { RepetitionDetector } from "./repetitionDetector";
2+
import { log } from "@/node/services/log";
3+
4+
// Minimal interface for stream parts to avoid importing internal types
5+
// or depending on specific AI SDK versions that might not export the union type
6+
interface StreamPartWithText {
7+
type: string;
8+
text?: string | unknown;
9+
}
10+
11+
/**
12+
* Wraps an AI stream with repetition detection protection.
13+
*
14+
* This is a mitigation for known Gemini model bugs where the model enters
15+
* an infinite loop of repeating phrases (e.g., "I am done.", "I'll do it.").
16+
*
17+
* @see https://github.com/google-gemini/gemini-cli/issues/13322
18+
*/
19+
export async function* withRepetitionProtection<T extends StreamPartWithText>(
20+
stream: AsyncIterable<T>,
21+
modelId: string,
22+
abortController: AbortController,
23+
workspaceId: string
24+
): AsyncIterable<T> {
25+
// Only apply protection to Gemini models
26+
const isGemini = modelId.toLowerCase().includes("gemini") || modelId.toLowerCase().includes("google");
27+
28+
if (!isGemini) {
29+
yield* stream;
30+
return;
31+
}
32+
33+
const detector = new RepetitionDetector();
34+
35+
for await (const part of stream) {
36+
// Check text deltas for repetition
37+
if (part.type === "text-delta" && typeof part.text === "string") {
38+
detector.addText(part.text);
39+
40+
if (detector.isRepetitive()) {
41+
const phrase = detector.getDetectedPhrase();
42+
log.info("Repetitive output detected for Gemini model, aborting stream", {
43+
workspaceId,
44+
model: modelId,
45+
detectedPhrase: phrase,
46+
});
47+
48+
// Abort the stream to stop upstream consumption
49+
abortController.abort();
50+
51+
// Stop yielding parts immediately
52+
return;
53+
}
54+
}
55+
56+
yield part;
57+
}
58+
}

0 commit comments

Comments
 (0)