Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions src/core/mentions/__tests__/index.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
// npx vitest core/mentions/__tests__/index.spec.ts

import * as path from "path"
import * as fs from "fs/promises"
import * as os from "os"
import * as vscode from "vscode"

import { parseMentions } from "../index"
Expand Down Expand Up @@ -157,3 +160,108 @@ describe("parseMentions - URL error handling", () => {
expect(result.text).toContain("Error fetching content: timeout")
})
})

describe("parseMentions - file token budget", () => {
let mockUrlContentFetcher: UrlContentFetcher
let tempDir: string

beforeEach(async () => {
vi.clearAllMocks()

mockUrlContentFetcher = {
launchBrowser: vi.fn(),
urlToMarkdown: vi.fn(),
closeBrowser: vi.fn(),
} as any

// Create a temp directory for test files
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "mentions-test-"))
})

afterEach(async () => {
// Clean up temp directory
await fs.rm(tempDir, { recursive: true, force: true })
})

it("should truncate large files when maxFileTokenBudget is specified", async () => {
// Create a large file with many lines
const lines = Array.from(
{ length: 1000 },
(_, i) => `Line ${i + 1}: This is some content that will be repeated to make the file larger.`,
)
const largeContent = lines.join("\n")
const filePath = path.join(tempDir, "large-file.txt")
await fs.writeFile(filePath, largeContent, "utf8")

// Use a small token budget to force truncation
const result = await parseMentions(
`Check @/${path.basename(filePath)}`,
tempDir,
mockUrlContentFetcher,
undefined,
undefined,
false,
true,
50,
undefined,
50, // Small token budget
)

// Should contain truncation message
expect(result.text).toContain("[File truncated:")
expect(result.text).toContain("within token budget of 50")
expect(result.text).toContain("Use the read_file tool to examine specific sections")
})

it("should read entire small file when within token budget", async () => {
// Create a small file
const smallContent = "Line 1: Hello\nLine 2: World"
const filePath = path.join(tempDir, "small-file.txt")
await fs.writeFile(filePath, smallContent, "utf8")

// Use a large token budget
const result = await parseMentions(
`Check @/${path.basename(filePath)}`,
tempDir,
mockUrlContentFetcher,
undefined,
undefined,
false,
true,
50,
undefined,
10000, // Large token budget
)

// Should not contain truncation message
expect(result.text).not.toContain("[File truncated:")
expect(result.text).toContain("1 | Line 1: Hello")
expect(result.text).toContain("2 | Line 2: World")
})

it("should fall back to line-based reading when no token budget specified", async () => {
// Create a file
const content = "Line 1: Hello\nLine 2: World\nLine 3: Test"
const filePath = path.join(tempDir, "test-file.txt")
await fs.writeFile(filePath, content, "utf8")

// Don't specify token budget
const result = await parseMentions(
`Check @/${path.basename(filePath)}`,
tempDir,
mockUrlContentFetcher,
undefined,
undefined,
false,
true,
50,
undefined, // No maxReadFileLine
undefined, // No maxFileTokenBudget
)

// Should read the full file
expect(result.text).toContain("1 | Line 1: Hello")
expect(result.text).toContain("2 | Line 2: World")
expect(result.text).toContain("3 | Line 3: Test")
})
})
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ describe("processUserContentMentions", () => {
true, // includeDiagnosticMessages
50, // maxDiagnosticMessages
100,
undefined, // maxFileTokenBudget
)
})

Expand Down Expand Up @@ -86,6 +87,7 @@ describe("processUserContentMentions", () => {
true, // includeDiagnosticMessages
50, // maxDiagnosticMessages
undefined,
undefined, // maxFileTokenBudget
)
})

Expand Down Expand Up @@ -116,6 +118,7 @@ describe("processUserContentMentions", () => {
true, // includeDiagnosticMessages
50, // maxDiagnosticMessages
-1,
undefined, // maxFileTokenBudget
)
})
})
Expand Down Expand Up @@ -326,6 +329,7 @@ describe("processUserContentMentions", () => {
true, // includeDiagnosticMessages
50, // maxDiagnosticMessages
undefined,
undefined, // maxFileTokenBudget
)
})

Expand Down Expand Up @@ -355,6 +359,7 @@ describe("processUserContentMentions", () => {
true, // includeDiagnosticMessages
50, // maxDiagnosticMessages
undefined,
undefined, // maxFileTokenBudget
)
})
})
Expand Down
38 changes: 35 additions & 3 deletions src/core/mentions/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import { mentionRegexGlobal, commandRegexGlobal, unescapeSpaces } from "../../sh
import { getCommitInfo, getWorkingState } from "../../utils/git"

import { openFile } from "../../integrations/misc/open-file"
import { extractTextFromFile } from "../../integrations/misc/extract-text"
import { extractTextFromFile, addLineNumbers } from "../../integrations/misc/extract-text"
import { readFileWithTokenBudget } from "../../integrations/misc/read-file-with-budget"
import { diagnosticsToProblemsString } from "../../integrations/diagnostics"

import { UrlContentFetcher } from "../../services/browser/UrlContentFetcher"
Expand Down Expand Up @@ -86,6 +87,7 @@ export async function parseMentions(
includeDiagnosticMessages: boolean = true,
maxDiagnosticMessages: number = 50,
maxReadFileLine?: number,
maxFileTokenBudget?: number,
): Promise<ParseMentionsResult> {
const mentions: Set<string> = new Set()
const validCommands: Map<string, Command> = new Map()
Expand Down Expand Up @@ -198,6 +200,7 @@ export async function parseMentions(
rooIgnoreController,
showRooIgnoredFiles,
maxReadFileLine,
maxFileTokenBudget,
)
if (mention.endsWith("/")) {
parsedText += `\n\n<folder_content path="${mentionPath}">\n${content}\n</folder_content>`
Expand Down Expand Up @@ -276,6 +279,7 @@ async function getFileOrFolderContent(
rooIgnoreController?: any,
showRooIgnoredFiles: boolean = false,
maxReadFileLine?: number,
maxFileTokenBudget?: number,
): Promise<string> {
const unescapedPath = unescapeSpaces(mentionPath)
const absPath = path.resolve(cwd, unescapedPath)
Expand All @@ -294,6 +298,22 @@ async function getFileOrFolderContent(
return `(File ${mentionPath} is ignored by .rooignore)`
}
try {
// Use token-budget based reading if budget is specified
if (maxFileTokenBudget && maxFileTokenBudget > 0) {
const result = await readFileWithTokenBudget(absPath, {
budgetTokens: maxFileTokenBudget,
})
// Add line numbers to the content (similar to extractTextFromFile)
const numberedContent = addLineNumbers(result.content)
if (!result.complete) {
return (
numberedContent +
`\n\n[File truncated: read ${result.lineCount} lines (${result.tokenCount} tokens) within token budget of ${maxFileTokenBudget}. Use the read_file tool to examine specific sections.]`
)
}
return numberedContent
}
// Fall back to line-based reading
const content = await extractTextFromFile(absPath, maxReadFileLine)
return content
} catch (error) {
Expand Down Expand Up @@ -330,10 +350,22 @@ async function getFileOrFolderContent(
fileContentPromises.push(
(async () => {
try {
const isBinary = await isBinaryFile(absoluteFilePath).catch(() => false)
if (isBinary) {
const isBinaryFile_ = await isBinaryFile(absoluteFilePath).catch(() => false)
if (isBinaryFile_) {
return undefined
}
// Use token-budget based reading if budget is specified
if (maxFileTokenBudget && maxFileTokenBudget > 0) {
const result = await readFileWithTokenBudget(absoluteFilePath, {
budgetTokens: maxFileTokenBudget,
})
const numberedContent = addLineNumbers(result.content)
let content = numberedContent
if (!result.complete) {
content += `\n\n[File truncated: read ${result.lineCount} lines (${result.tokenCount} tokens) within token budget of ${maxFileTokenBudget}. Use the read_file tool to examine specific sections.]`
}
return `<file_content path="${filePath.toPosix()}">\n${content}\n</file_content>`
}
const content = await extractTextFromFile(absoluteFilePath, maxReadFileLine)
return `<file_content path="${filePath.toPosix()}">\n${content}\n</file_content>`
} catch (error) {
Expand Down
5 changes: 5 additions & 0 deletions src/core/mentions/processUserContentMentions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export async function processUserContentMentions({
includeDiagnosticMessages = true,
maxDiagnosticMessages = 50,
maxReadFileLine,
maxFileTokenBudget,
}: {
userContent: Anthropic.Messages.ContentBlockParam[]
cwd: string
Expand All @@ -31,6 +32,7 @@ export async function processUserContentMentions({
includeDiagnosticMessages?: boolean
maxDiagnosticMessages?: number
maxReadFileLine?: number
maxFileTokenBudget?: number
}): Promise<ProcessUserContentMentionsResult> {
// Track the first mode found from slash commands
let commandMode: string | undefined
Expand Down Expand Up @@ -65,6 +67,7 @@ export async function processUserContentMentions({
includeDiagnosticMessages,
maxDiagnosticMessages,
maxReadFileLine,
maxFileTokenBudget,
)
// Capture the first mode found
if (!commandMode && result.mode) {
Expand All @@ -90,6 +93,7 @@ export async function processUserContentMentions({
includeDiagnosticMessages,
maxDiagnosticMessages,
maxReadFileLine,
maxFileTokenBudget,
)
// Capture the first mode found
if (!commandMode && result.mode) {
Expand All @@ -116,6 +120,7 @@ export async function processUserContentMentions({
includeDiagnosticMessages,
maxDiagnosticMessages,
maxReadFileLine,
maxFileTokenBudget,
)
// Capture the first mode found
if (!commandMode && result.mode) {
Expand Down
9 changes: 9 additions & 0 deletions src/core/task/Task.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2607,6 +2607,14 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
maxReadFileLine = -1,
} = (await this.providerRef.deref()?.getState()) ?? {}

// Calculate token budget for file mentions based on context window
// Use 10% of context window per file mention to prevent context exhaustion
// This is more conservative than the read_file tool (which uses 60% of remaining context)
// because mentions are processed before context usage is known
const modelInfo = this.api.getModel().info
const contextWindow = modelInfo.contextWindow || 200_000
const maxFileTokenBudget = Math.floor(contextWindow * 0.1) // 10% of context window per file

const { content: parsedUserContent, mode: slashCommandMode } = await processUserContentMentions({
userContent: currentUserContent,
cwd: this.cwd,
Expand All @@ -2617,6 +2625,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
includeDiagnosticMessages,
maxDiagnosticMessages,
maxReadFileLine,
maxFileTokenBudget,
})

// Switch mode if specified in a slash command's frontmatter
Expand Down
Loading