diff --git a/src/core/mentions/__tests__/index.spec.ts b/src/core/mentions/__tests__/index.spec.ts index 8f229c28b87..8659d719b03 100644 --- a/src/core/mentions/__tests__/index.spec.ts +++ b/src/core/mentions/__tests__/index.spec.ts @@ -1,5 +1,8 @@ // npx vitest core/mentions/__tests__/index.spec.ts +import * as path from "path" +import * as fs from "fs/promises" +import * as os from "os" import * as vscode from "vscode" import { parseMentions } from "../index" @@ -157,3 +160,108 @@ describe("parseMentions - URL error handling", () => { expect(result.text).toContain("Error fetching content: timeout") }) }) + +describe("parseMentions - file token budget", () => { + let mockUrlContentFetcher: UrlContentFetcher + let tempDir: string + + beforeEach(async () => { + vi.clearAllMocks() + + mockUrlContentFetcher = { + launchBrowser: vi.fn(), + urlToMarkdown: vi.fn(), + closeBrowser: vi.fn(), + } as any + + // Create a temp directory for test files + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "mentions-test-")) + }) + + afterEach(async () => { + // Clean up temp directory + await fs.rm(tempDir, { recursive: true, force: true }) + }) + + it("should truncate large files when maxFileTokenBudget is specified", async () => { + // Create a large file with many lines + const lines = Array.from( + { length: 1000 }, + (_, i) => `Line ${i + 1}: This is some content that will be repeated to make the file larger.`, + ) + const largeContent = lines.join("\n") + const filePath = path.join(tempDir, "large-file.txt") + await fs.writeFile(filePath, largeContent, "utf8") + + // Use a small token budget to force truncation + const result = await parseMentions( + `Check @/${path.basename(filePath)}`, + tempDir, + mockUrlContentFetcher, + undefined, + undefined, + false, + true, + 50, + undefined, + 50, // Small token budget + ) + + // Should contain truncation message + expect(result.text).toContain("[File truncated:") + expect(result.text).toContain("within token budget of 50") + expect(result.text).toContain("Use the read_file tool to examine specific sections") + }) + + it("should read entire small file when within token budget", async () => { + // Create a small file + const smallContent = "Line 1: Hello\nLine 2: World" + const filePath = path.join(tempDir, "small-file.txt") + await fs.writeFile(filePath, smallContent, "utf8") + + // Use a large token budget + const result = await parseMentions( + `Check @/${path.basename(filePath)}`, + tempDir, + mockUrlContentFetcher, + undefined, + undefined, + false, + true, + 50, + undefined, + 10000, // Large token budget + ) + + // Should not contain truncation message + expect(result.text).not.toContain("[File truncated:") + expect(result.text).toContain("1 | Line 1: Hello") + expect(result.text).toContain("2 | Line 2: World") + }) + + it("should fall back to line-based reading when no token budget specified", async () => { + // Create a file + const content = "Line 1: Hello\nLine 2: World\nLine 3: Test" + const filePath = path.join(tempDir, "test-file.txt") + await fs.writeFile(filePath, content, "utf8") + + // Don't specify token budget + const result = await parseMentions( + `Check @/${path.basename(filePath)}`, + tempDir, + mockUrlContentFetcher, + undefined, + undefined, + false, + true, + 50, + undefined, // No maxReadFileLine + undefined, // No maxFileTokenBudget + ) + + // Should read the full file + expect(result.text).toContain("1 | Line 1: Hello") + expect(result.text).toContain("2 | Line 2: World") + expect(result.text).toContain("3 | Line 3: Test") + }) +}) diff --git a/src/core/mentions/__tests__/processUserContentMentions.spec.ts b/src/core/mentions/__tests__/processUserContentMentions.spec.ts index ec2e08f92ae..a60bf0346d1 100644 --- a/src/core/mentions/__tests__/processUserContentMentions.spec.ts +++ b/src/core/mentions/__tests__/processUserContentMentions.spec.ts @@ -57,6 +57,7 @@ describe("processUserContentMentions", () => { true, // includeDiagnosticMessages 50, // maxDiagnosticMessages 100, + undefined, // maxFileTokenBudget ) }) @@ -86,6 +87,7 @@ describe("processUserContentMentions", () => { true, // includeDiagnosticMessages 50, // maxDiagnosticMessages undefined, + undefined, // maxFileTokenBudget ) }) @@ -116,6 +118,7 @@ describe("processUserContentMentions", () => { true, // includeDiagnosticMessages 50, // maxDiagnosticMessages -1, + undefined, // maxFileTokenBudget ) }) }) @@ -326,6 +329,7 @@ describe("processUserContentMentions", () => { true, // includeDiagnosticMessages 50, // maxDiagnosticMessages undefined, + undefined, // maxFileTokenBudget ) }) @@ -355,6 +359,7 @@ describe("processUserContentMentions", () => { true, // includeDiagnosticMessages 50, // maxDiagnosticMessages undefined, + undefined, // maxFileTokenBudget ) }) }) diff --git a/src/core/mentions/index.ts b/src/core/mentions/index.ts index 2bbbf9ed0dc..82051c68f41 100644 --- a/src/core/mentions/index.ts +++ b/src/core/mentions/index.ts @@ -9,7 +9,8 @@ import { mentionRegexGlobal, commandRegexGlobal, unescapeSpaces } from "../../sh import { getCommitInfo, getWorkingState } from "../../utils/git" import { openFile } from "../../integrations/misc/open-file" -import { extractTextFromFile } from "../../integrations/misc/extract-text" +import { extractTextFromFile, addLineNumbers } from "../../integrations/misc/extract-text" +import { readFileWithTokenBudget } from "../../integrations/misc/read-file-with-budget" import { diagnosticsToProblemsString } from "../../integrations/diagnostics" import { UrlContentFetcher } from "../../services/browser/UrlContentFetcher" @@ -86,6 +87,7 @@ export async function parseMentions( includeDiagnosticMessages: boolean = true, maxDiagnosticMessages: number = 50, maxReadFileLine?: number, + maxFileTokenBudget?: number, ): Promise { const mentions: Set = new Set() const validCommands: Map = new Map() @@ -198,6 +200,7 @@ export async function parseMentions( rooIgnoreController, showRooIgnoredFiles, maxReadFileLine, + maxFileTokenBudget, ) if (mention.endsWith("/")) { parsedText += `\n\n\n${content}\n` @@ -276,6 +279,7 @@ async function getFileOrFolderContent( rooIgnoreController?: any, showRooIgnoredFiles: boolean = false, maxReadFileLine?: number, + maxFileTokenBudget?: number, ): Promise { const unescapedPath = unescapeSpaces(mentionPath) const absPath = path.resolve(cwd, unescapedPath) @@ -294,6 +298,22 @@ async function getFileOrFolderContent( return `(File ${mentionPath} is ignored by .rooignore)` } try { + // Use token-budget based reading if budget is specified + if (maxFileTokenBudget && maxFileTokenBudget > 0) { + const result = await readFileWithTokenBudget(absPath, { + budgetTokens: maxFileTokenBudget, + }) + // Add line numbers to the content (similar to extractTextFromFile) + const numberedContent = addLineNumbers(result.content) + if (!result.complete) { + return ( + numberedContent + + `\n\n[File truncated: read ${result.lineCount} lines (${result.tokenCount} tokens) within token budget of ${maxFileTokenBudget}. Use the read_file tool to examine specific sections.]` + ) + } + return numberedContent + } + // Fall back to line-based reading const content = await extractTextFromFile(absPath, maxReadFileLine) return content } catch (error) { @@ -330,10 +350,22 @@ async function getFileOrFolderContent( fileContentPromises.push( (async () => { try { - const isBinary = await isBinaryFile(absoluteFilePath).catch(() => false) - if (isBinary) { + const isBinaryFile_ = await isBinaryFile(absoluteFilePath).catch(() => false) + if (isBinaryFile_) { return undefined } + // Use token-budget based reading if budget is specified + if (maxFileTokenBudget && maxFileTokenBudget > 0) { + const result = await readFileWithTokenBudget(absoluteFilePath, { + budgetTokens: maxFileTokenBudget, + }) + const numberedContent = addLineNumbers(result.content) + let content = numberedContent + if (!result.complete) { + content += `\n\n[File truncated: read ${result.lineCount} lines (${result.tokenCount} tokens) within token budget of ${maxFileTokenBudget}. Use the read_file tool to examine specific sections.]` + } + return `\n${content}\n` + } const content = await extractTextFromFile(absoluteFilePath, maxReadFileLine) return `\n${content}\n` } catch (error) { diff --git a/src/core/mentions/processUserContentMentions.ts b/src/core/mentions/processUserContentMentions.ts index 5ea78f4dc30..ff71717525a 100644 --- a/src/core/mentions/processUserContentMentions.ts +++ b/src/core/mentions/processUserContentMentions.ts @@ -21,6 +21,7 @@ export async function processUserContentMentions({ includeDiagnosticMessages = true, maxDiagnosticMessages = 50, maxReadFileLine, + maxFileTokenBudget, }: { userContent: Anthropic.Messages.ContentBlockParam[] cwd: string @@ -31,6 +32,7 @@ export async function processUserContentMentions({ includeDiagnosticMessages?: boolean maxDiagnosticMessages?: number maxReadFileLine?: number + maxFileTokenBudget?: number }): Promise { // Track the first mode found from slash commands let commandMode: string | undefined @@ -65,6 +67,7 @@ export async function processUserContentMentions({ includeDiagnosticMessages, maxDiagnosticMessages, maxReadFileLine, + maxFileTokenBudget, ) // Capture the first mode found if (!commandMode && result.mode) { @@ -90,6 +93,7 @@ export async function processUserContentMentions({ includeDiagnosticMessages, maxDiagnosticMessages, maxReadFileLine, + maxFileTokenBudget, ) // Capture the first mode found if (!commandMode && result.mode) { @@ -116,6 +120,7 @@ export async function processUserContentMentions({ includeDiagnosticMessages, maxDiagnosticMessages, maxReadFileLine, + maxFileTokenBudget, ) // Capture the first mode found if (!commandMode && result.mode) { diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index e933fbff2cd..1a969f66d06 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -2607,6 +2607,14 @@ export class Task extends EventEmitter implements TaskLike { maxReadFileLine = -1, } = (await this.providerRef.deref()?.getState()) ?? {} + // Calculate token budget for file mentions based on context window + // Use 10% of context window per file mention to prevent context exhaustion + // This is more conservative than the read_file tool (which uses 60% of remaining context) + // because mentions are processed before context usage is known + const modelInfo = this.api.getModel().info + const contextWindow = modelInfo.contextWindow || 200_000 + const maxFileTokenBudget = Math.floor(contextWindow * 0.1) // 10% of context window per file + const { content: parsedUserContent, mode: slashCommandMode } = await processUserContentMentions({ userContent: currentUserContent, cwd: this.cwd, @@ -2617,6 +2625,7 @@ export class Task extends EventEmitter implements TaskLike { includeDiagnosticMessages, maxDiagnosticMessages, maxReadFileLine, + maxFileTokenBudget, }) // Switch mode if specified in a slash command's frontmatter