Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion src/node/services/streamManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import {
createCachedSystemMessage,
applyCacheControlToTools,
} from "@/common/utils/ai/cacheStrategy";
import { withRepetitionProtection } from "@/node/utils/ai/repetitionStreamWrapper";

// Type definitions for stream parts with extended properties
interface ReasoningDeltaPart {
Expand Down Expand Up @@ -541,6 +542,10 @@ export class StreamManager extends EventEmitter {

const messageId = `assistant-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;

// Create repetition detector for Gemini models (known token exhaustion bug)
// @see https://github.com/google-gemini/gemini-cli/issues/13322
// Removed inline detector in favor of stream wrapper approach

const streamInfo: WorkspaceStreamInfo = {
state: StreamState.STARTING,
streamResult,
Expand Down Expand Up @@ -658,7 +663,15 @@ export class StreamManager extends EventEmitter {
{ toolCallId: string; toolName: string; input: unknown; output?: unknown }
>();

for await (const part of streamInfo.streamResult.fullStream) {
// Wrap stream with repetition protection (handles Gemini infinite loops)
const protectedStream = withRepetitionProtection(
streamInfo.streamResult.fullStream,
streamInfo.model,
streamInfo.abortController,
workspaceId
);

for await (const part of protectedStream) {
// Check if stream was cancelled BEFORE processing any parts
// This improves interruption responsiveness by catching aborts earlier
if (streamInfo.abortController.signal.aborted) {
Expand Down
115 changes: 115 additions & 0 deletions src/node/utils/ai/repetitionDetector.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import { describe, test, expect } from "bun:test";
import { RepetitionDetector } from "./repetitionDetector";

describe("RepetitionDetector", () => {
test("detects period-separated repetition", () => {
const detector = new RepetitionDetector();
const repeatedPhrase = "I am done. ".repeat(15);
detector.addText(repeatedPhrase);
expect(detector.isRepetitive()).toBe(true);
expect(detector.getDetectedPhrase()).toBe("I am done");
});

test("detects newline-separated repetition", () => {
const detector = new RepetitionDetector();
const repeatedPhrase = "I am done\n".repeat(15);
detector.addText(repeatedPhrase);
expect(detector.isRepetitive()).toBe(true);
});

test("detects repetition across multiple addText calls", () => {
const detector = new RepetitionDetector();
for (let i = 0; i < 15; i++) {
detector.addText("I am done. ");
}
expect(detector.isRepetitive()).toBe(true);
});

test("does not trigger on normal text", () => {
const detector = new RepetitionDetector();
detector.addText(
"This is a normal response with varied content. " +
"It talks about different things. " +
"Each sentence is unique. " +
"There is no repetition here. " +
"The model is working correctly. "
);
expect(detector.isRepetitive()).toBe(false);
});

test("does not trigger on short repeated words", () => {
const detector = new RepetitionDetector();
// Short phrases like "OK. OK. OK." should not trigger (below minPhraseLength)
detector.addText("OK. ".repeat(20));
expect(detector.isRepetitive()).toBe(false);
});

test("handles the exact Gemini bug pattern", () => {
const detector = new RepetitionDetector();
// This is the actual pattern reported in the bug
const bugPattern = `I am done.

I will stop.

I am done.

I'm done.

I am done.

I am done.

I am done.

I am done.

I am done.

I am done.

I am done.

I am done.`;
detector.addText(bugPattern);
expect(detector.isRepetitive()).toBe(true);
});

test("handles the Gemini CLI loop pattern", () => {
const detector = new RepetitionDetector();
// Pattern from https://github.com/google-gemini/gemini-cli/issues/13322
// Need enough repetitions to trigger the threshold (default 10)
const cliPattern = "I'll do it. I'll execute. ".repeat(12);
detector.addText(cliPattern);
expect(detector.isRepetitive()).toBe(true);
});

test("reset clears detection state", () => {
const detector = new RepetitionDetector();
detector.addText("I am done. ".repeat(15));
expect(detector.isRepetitive()).toBe(true);

detector.reset();

expect(detector.isRepetitive()).toBe(false);
expect(detector.getDetectedPhrase()).toBeNull();
});

test("respects custom configuration", () => {
const detector = new RepetitionDetector({
repetitionThreshold: 5, // Lower threshold
});
detector.addText("I am done. ".repeat(6));
expect(detector.isRepetitive()).toBe(true);
});

test("stops processing after detection", () => {
const detector = new RepetitionDetector();
detector.addText("I am done. ".repeat(15));
expect(detector.isRepetitive()).toBe(true);
const phrase = detector.getDetectedPhrase();

// Adding more text should not change the result
detector.addText("Something completely different. ".repeat(10));
expect(detector.getDetectedPhrase()).toBe(phrase);
});
});
150 changes: 150 additions & 0 deletions src/node/utils/ai/repetitionDetector.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/**
* Detects repetitive text patterns in streaming output.
*
* This is specifically designed to catch the Gemini token exhaustion bug where
* the model gets stuck in a loop emitting variations of "I am done. I am done. I am done..."
* until it exhausts all output tokens.
*
* The detector uses a sliding window approach to identify when the model is repeating
* short phrases, which is a clear signal of the bug. Normal text may occasionally
* repeat phrases, but not the same phrase 10+ times in a short window.
*
* @see https://github.com/google-gemini/gemini-cli/issues/13322
*/

/**
* Configuration for repetition detection
*/
export interface RepetitionDetectorConfig {
/** Minimum phrase length to track (shorter phrases are too common) */
minPhraseLength: number;
/** Maximum phrase length to track (longer phrases are unlikely to repeat exactly) */
maxPhraseLength: number;
/** Number of repetitions required to trigger detection */
repetitionThreshold: number;
/** Size of the sliding window in characters */
windowSize: number;
}

const DEFAULT_CONFIG: RepetitionDetectorConfig = {
minPhraseLength: 8, // "I am done" is 9 chars
maxPhraseLength: 50, // Long enough to catch varied repetitions
repetitionThreshold: 10, // 10 repetitions is clearly a bug
windowSize: 2000, // ~500 tokens worth of text
};

/**
* Stateful repetition detector for streaming text.
*
* Call `addText()` with each text chunk as it streams in.
* Call `isRepetitive()` to check if repetitive patterns have been detected.
*/
export class RepetitionDetector {
private buffer = "";
private readonly config: RepetitionDetectorConfig;
private detected = false;
private detectedPhrase: string | null = null;

constructor(config: Partial<RepetitionDetectorConfig> = {}) {
this.config = { ...DEFAULT_CONFIG, ...config };
}

/**
* Add a chunk of text to the detector.
* @param text - The text chunk to analyze
*/
addText(text: string): void {
if (this.detected) {
// Already detected, no need to continue analyzing
return;
}

this.buffer += text;

// Trim buffer to window size
if (this.buffer.length > this.config.windowSize) {
this.buffer = this.buffer.slice(-this.config.windowSize);
}

// Check for repetition after accumulating enough text
if (this.buffer.length >= this.config.minPhraseLength * this.config.repetitionThreshold) {
this.checkForRepetition();
}
}

/**
* Check if repetitive patterns have been detected.
*/
isRepetitive(): boolean {
return this.detected;
}

/**
* Get the detected repetitive phrase, if any.
*/
getDetectedPhrase(): string | null {
return this.detectedPhrase;
}

/**
* Reset the detector state.
*/
reset(): void {
this.buffer = "";
this.detected = false;
this.detectedPhrase = null;
}

/**
* Analyze the buffer for repetitive patterns.
*
* Strategy: Look for short phrases that appear multiple times.
* Split on common sentence boundaries and count phrase occurrences.
*/
private checkForRepetition(): void {
// First check line-by-line (before normalizing newlines away)
// This handles "I am done\nI am done\nI am done"
const lines = this.buffer.split(/\n+/).map((l) => l.trim());
const lineCounts = new Map<string, number>();
for (const line of lines) {
if (
line.length >= this.config.minPhraseLength &&
line.length <= this.config.maxPhraseLength
) {
const count = (lineCounts.get(line) ?? 0) + 1;
lineCounts.set(line, count);

if (count >= this.config.repetitionThreshold) {
this.detected = true;
this.detectedPhrase = line;
return;
}
}
}

// Normalize whitespace to make matching easier
const normalized = this.buffer.replace(/\s+/g, " ").trim();

// Split into sentences/phrases on common boundaries
// This handles patterns like "I am done. I am done. I am done."
const phrases = normalized.split(/[.!?\n]+/).map((p) => p.trim());

// Count phrase occurrences
const phraseCounts = new Map<string, number>();
for (const phrase of phrases) {
if (
phrase.length >= this.config.minPhraseLength &&
phrase.length <= this.config.maxPhraseLength
) {
const count = (phraseCounts.get(phrase) ?? 0) + 1;
phraseCounts.set(phrase, count);

if (count >= this.config.repetitionThreshold) {
this.detected = true;
this.detectedPhrase = phrase;
return;
}
}
}
}
}
58 changes: 58 additions & 0 deletions src/node/utils/ai/repetitionStreamWrapper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { RepetitionDetector } from "./repetitionDetector";
import { log } from "@/node/services/log";

// Minimal interface for stream parts to avoid importing internal types
// or depending on specific AI SDK versions that might not export the union type
interface StreamPartWithText {
type: string;
text?: string | unknown;
}

/**
* Wraps an AI stream with repetition detection protection.
*
* This is a mitigation for known Gemini model bugs where the model enters
* an infinite loop of repeating phrases (e.g., "I am done.", "I'll do it.").
*
* @see https://github.com/google-gemini/gemini-cli/issues/13322
*/
export async function* withRepetitionProtection<T extends StreamPartWithText>(
stream: AsyncIterable<T>,
modelId: string,
abortController: AbortController,
workspaceId: string
): AsyncIterable<T> {
// Only apply protection to Gemini models
const isGemini = modelId.toLowerCase().includes("gemini") || modelId.toLowerCase().includes("google");

if (!isGemini) {
yield* stream;
return;
}

const detector = new RepetitionDetector();

for await (const part of stream) {
// Check text deltas for repetition
if (part.type === "text-delta" && typeof part.text === "string") {
detector.addText(part.text);

if (detector.isRepetitive()) {
const phrase = detector.getDetectedPhrase();
log.info("Repetitive output detected for Gemini model, aborting stream", {
workspaceId,
model: modelId,
detectedPhrase: phrase,
});

// Abort the stream to stop upstream consumption
abortController.abort();

// Stop yielding parts immediately
return;
}
}

yield part;
}
}
Loading