diff --git a/Makefile b/Makefile index 866e23f8b9..3a5b8534f8 100644 --- a/Makefile +++ b/Makefile @@ -213,7 +213,10 @@ build/icon.png: docs/img/logo.webp scripts/generate-icons.ts @bun scripts/generate-icons.ts png ## Quality checks (can run in parallel) -static-check: lint typecheck fmt-check check-eager-imports ## Run all static checks (includes startup performance checks) +static-check: lint typecheck fmt-check check-eager-imports check-bench-agent ## Run all static checks (includes startup performance checks) + +check-bench-agent: ## Verify terminal-bench agent configuration and imports + @./scripts/check-bench-agent.sh lint: node_modules/.installed ## Run ESLint (typecheck runs in separate target) @./scripts/lint.sh diff --git a/benchmarks/terminal_bench/mux-run.sh b/benchmarks/terminal_bench/mux-run.sh index 89e0ba4b06..52cc14ab48 100644 --- a/benchmarks/terminal_bench/mux-run.sh +++ b/benchmarks/terminal_bench/mux-run.sh @@ -77,7 +77,7 @@ ensure_git_repo "${project_path}" log "starting mux agent session for ${project_path}" cd "${MUX_APP_ROOT}" -cmd=(bun src/debug/agentSessionCli.ts +cmd=(bun src/cli/debug/agentSessionCli.ts --config-root "${MUX_CONFIG_ROOT}" --project-path "${project_path}" --workspace-path "${project_path}" diff --git a/docs/benchmarking.md b/docs/benchmarking.md index 4afbf2f064..d35ac0f67e 100644 --- a/docs/benchmarking.md +++ b/docs/benchmarking.md @@ -65,7 +65,7 @@ The adapter lives in `benchmarks/terminal_bench/mux_agent.py`. For each task it: 1. Copies the mux repository (package manifests + `src/`) into `/tmp/mux-app` inside the container. 2. Ensures Bun exists, then runs `bun install --frozen-lockfile`. -3. Launches `src/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `MUX_CONFIG_ROOT` (default `/root/.mux`). +3. Launches `src/cli/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `MUX_CONFIG_ROOT` (default `/root/.mux`). `MUX_MODEL` accepts either the mux colon form (`anthropic:claude-sonnet-4-5`) or the Terminal-Bench slash form (`anthropic/claude-sonnet-4-5`); the adapter normalises whichever you provide. diff --git a/scripts/check-bench-agent.sh b/scripts/check-bench-agent.sh new file mode 100755 index 0000000000..09b693ad06 --- /dev/null +++ b/scripts/check-bench-agent.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -euo pipefail + +# This script verifies that the terminal-bench agent entry point +# referenced in mux-run.sh is valid and can be executed (imports resolve). + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +MUX_RUN_SH="$REPO_ROOT/benchmarks/terminal_bench/mux-run.sh" + +echo "Checking terminal-bench agent configuration..." + +if [[ ! -f "$MUX_RUN_SH" ]]; then + echo "❌ Error: $MUX_RUN_SH not found" + exit 1 +fi + +# Extract the agent CLI path from mux-run.sh +# Looks for line like: cmd=(bun src/cli/debug/agentSessionCli.ts +CLI_PATH_MATCH=$(grep -o "bun src/.*\.ts" "$MUX_RUN_SH" | head -1 | cut -d' ' -f2) + +if [[ -z "$CLI_PATH_MATCH" ]]; then + echo "❌ Error: Could not find agent CLI path in $MUX_RUN_SH" + exit 1 +fi + +FULL_CLI_PATH="$REPO_ROOT/$CLI_PATH_MATCH" + +echo "Found agent CLI path: $CLI_PATH_MATCH" + +if [[ ! -f "$FULL_CLI_PATH" ]]; then + echo "❌ Error: Referenced file $FULL_CLI_PATH does not exist" + exit 1 +fi + +echo "Verifying agent CLI startup (checking imports)..." + +# Run with --help or no args to check if it boots without crashing on imports +# We expect it to fail with "Unknown option" or "workspace-path required" but NOT with "Module not found" or "worker error" +if ! output=$(bun "$FULL_CLI_PATH" --help 2>&1); then + # It failed, which is expected (no args/bad args), but we need to check WHY + exit_code=$? + + # Check for known import/worker errors + if echo "$output" | grep -qE "Module not found|Worker error|Cannot find module"; then + echo "❌ Error: Agent CLI failed to start due to import/worker errors:" + echo "$output" + exit 1 + fi + + # If it failed just because of arguments, that's fine - it means the code loaded. + echo "✅ Agent CLI loaded successfully (ignoring argument errors)" +else + echo "✅ Agent CLI ran successfully" +fi + +echo "Terminal-bench agent check passed." diff --git a/src/node/utils/main/workerPool.ts b/src/node/utils/main/workerPool.ts index df2ee321ce..40968276c6 100644 --- a/src/node/utils/main/workerPool.ts +++ b/src/node/utils/main/workerPool.ts @@ -1,5 +1,5 @@ import { Worker } from "node:worker_threads"; -import { join, dirname, sep } from "node:path"; +import { join, dirname, sep, extname } from "node:path"; interface WorkerRequest { messageId: number; @@ -37,7 +37,17 @@ const hasDist = pathParts.includes("dist"); const srcIndex = pathParts.lastIndexOf("src"); let workerDir: string; -if (srcIndex !== -1 && !hasDist) { +let workerFile = "tokenizer.worker.js"; + +// Check if we're running under Bun (not Node with ts-jest) +// ts-jest transpiles .ts files but runs them via Node, which can't load .ts workers +const isBun = !!(process as unknown as { isBun?: boolean }).isBun; + +if (isBun && extname(__filename) === ".ts") { + // Running from source via Bun - use .ts worker directly + workerDir = currentDir; + workerFile = "tokenizer.worker.ts"; +} else if (srcIndex !== -1 && !hasDist) { // Replace 'src' with 'dist' in the path (only if not already in dist) pathParts[srcIndex] = "dist"; workerDir = pathParts.join(sep); @@ -45,7 +55,7 @@ if (srcIndex !== -1 && !hasDist) { workerDir = currentDir; } -const workerPath = join(workerDir, "tokenizer.worker.js"); +const workerPath = join(workerDir, workerFile); const worker = new Worker(workerPath); // Handle messages from worker