From 6b6be04bee3a66e2a6b90db90d64203f17bc50c7 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Fri, 9 Jan 2026 11:45:41 +1100 Subject: [PATCH] Add workspace sync and workspace-root --- .changeset/workspace-sync.md | 5 + .claude/skills/agentv-eval-builder/SKILL.md | 8 + README.md | 15 ++ apps/cli/README.md | 15 ++ apps/cli/package.json | 5 +- apps/cli/src/commands/eval/index.ts | 7 + apps/cli/src/commands/eval/run-eval.ts | 115 ++++++++- apps/cli/src/commands/workspace/config.ts | 191 +++++++++++++++ apps/cli/src/commands/workspace/create.ts | 51 ++++ apps/cli/src/commands/workspace/index.ts | 63 +++++ apps/cli/src/commands/workspace/sync.ts | 219 ++++++++++++++++++ apps/cli/src/index.ts | 2 + .../test/commands/eval/workspace-root.test.ts | 129 +++++++++++ .../add-agent-workspace-script/design.md | 90 +++++++ .../add-agent-workspace-script/proposal.md | 32 +++ .../specs/eval-cli/spec.md | 16 ++ .../specs/workspace-cli/spec.md | 36 +++ .../add-agent-workspace-script/tasks.md | 27 +++ package.json | 5 +- packages/core/package.json | 5 +- packages/eval/package.json | 5 +- 21 files changed, 1021 insertions(+), 20 deletions(-) create mode 100644 .changeset/workspace-sync.md create mode 100644 apps/cli/src/commands/workspace/config.ts create mode 100644 apps/cli/src/commands/workspace/create.ts create mode 100644 apps/cli/src/commands/workspace/index.ts create mode 100644 apps/cli/src/commands/workspace/sync.ts create mode 100644 apps/cli/test/commands/eval/workspace-root.test.ts create mode 100644 openspec/changes/add-agent-workspace-script/design.md create mode 100644 openspec/changes/add-agent-workspace-script/proposal.md create mode 100644 openspec/changes/add-agent-workspace-script/specs/eval-cli/spec.md create mode 100644 openspec/changes/add-agent-workspace-script/specs/workspace-cli/spec.md create mode 100644 openspec/changes/add-agent-workspace-script/tasks.md diff --git a/.changeset/workspace-sync.md b/.changeset/workspace-sync.md new file mode 100644 index 00000000..1542e319 --- /dev/null +++ b/.changeset/workspace-sync.md @@ -0,0 +1,5 @@ +--- +"agentv": minor +--- + +Add workspace sync commands and `eval --workspace-root` to default agentic providers' working directory. diff --git a/.claude/skills/agentv-eval-builder/SKILL.md b/.claude/skills/agentv-eval-builder/SKILL.md index 2f06a526..14689626 100644 --- a/.claude/skills/agentv-eval-builder/SKILL.md +++ b/.claude/skills/agentv-eval-builder/SKILL.md @@ -19,6 +19,14 @@ description: Create and maintain AgentV YAML evaluation files for testing AI age - Batch CLI: `references/batch-cli-evaluator.md` - Evaluate batch runner output (JSONL) - Compare: `references/compare-command.md` - Compare evaluation results between runs +## Workspace-Aware Runs + +If your target provider needs a consistent working directory (e.g., prompt files, skills, fixtures), create and sync a workspace folder and then run evals with `--workspace-root`. + +- `agentv workspace create --out ` writes `/.agentv/workspace.yaml` (`--workspace-root` is an alias) +- `agentv workspace sync --config /.agentv/workspace.yaml` refreshes all configured sources +- `agentv eval --workspace-root ...` defaults target `cwd` for `cli`, `codex`, `claude-code`, `pi-coding-agent` and defaults `workspaceTemplate` for `vscode`/`vscode-insiders` when not set in `targets.yaml` + ## Structure Requirements - Root level: `description` (optional), `execution` (with `target`), `evalcases` (required) - Eval case fields: `id` (required), `expected_outcome` (required), `input_messages` (required) diff --git a/README.md b/README.md index 833f87d0..450b92c6 100644 --- a/README.md +++ b/README.md @@ -122,10 +122,25 @@ agentv eval --eval-id case-123 evals/my-eval.yaml # Dry-run with mock provider agentv eval --dry-run evals/my-eval.yaml + +# Default agentic provider workspace (cwd/workspace template) +agentv eval --workspace-root ./agent-workspace evals/my-eval.yaml ``` See `agentv eval --help` for all options: workers, timeouts, output formats, trace dumping, and more. +### Workspace Sync + +Use `agentv workspace` to build and refresh a local working folder from multiple sources (local folders or git repos). This is useful for agentic targets that need a consistent working directory. + +```bash +# Create a new workspace config +agentv workspace create --out ./agent-workspace + +# Edit ./agent-workspace/.agentv/workspace.yaml to add sources, then sync +agentv workspace sync --config ./agent-workspace/.agentv/workspace.yaml +``` + ### Create Custom Evaluators Write code judges in Python or TypeScript: diff --git a/apps/cli/README.md b/apps/cli/README.md index 833f87d0..450b92c6 100644 --- a/apps/cli/README.md +++ b/apps/cli/README.md @@ -122,10 +122,25 @@ agentv eval --eval-id case-123 evals/my-eval.yaml # Dry-run with mock provider agentv eval --dry-run evals/my-eval.yaml + +# Default agentic provider workspace (cwd/workspace template) +agentv eval --workspace-root ./agent-workspace evals/my-eval.yaml ``` See `agentv eval --help` for all options: workers, timeouts, output formats, trace dumping, and more. +### Workspace Sync + +Use `agentv workspace` to build and refresh a local working folder from multiple sources (local folders or git repos). This is useful for agentic targets that need a consistent working directory. + +```bash +# Create a new workspace config +agentv workspace create --out ./agent-workspace + +# Edit ./agent-workspace/.agentv/workspace.yaml to add sources, then sync +agentv workspace sync --config ./agent-workspace/.agentv/workspace.yaml +``` + ### Create Custom Evaluators Write code judges in Python or TypeScript: diff --git a/apps/cli/package.json b/apps/cli/package.json index c689f8e4..3da5827e 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -14,10 +14,7 @@ "bin": { "agentv": "./dist/cli.js" }, - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "scripts": { "dev": "bun --watch src/index.ts", "build": "tsup && bun run copy-readme", diff --git a/apps/cli/src/commands/eval/index.ts b/apps/cli/src/commands/eval/index.ts index a88d99e3..ad7e0afe 100644 --- a/apps/cli/src/commands/eval/index.ts +++ b/apps/cli/src/commands/eval/index.ts @@ -93,6 +93,12 @@ export const evalCommand = command({ description: 'Retry count for timeout recoveries (default: 2)', defaultValue: () => 2, }), + workspaceRoot: option({ + type: optional(string), + long: 'workspace-root', + description: + 'Default workspace root for agentic providers (applied as target cwd/workspaceTemplate when not set in targets.yaml)', + }), cache: flag({ long: 'cache', description: 'Enable in-memory provider response cache', @@ -117,6 +123,7 @@ export const evalCommand = command({ dryRunDelayMax: args.dryRunDelayMax, agentTimeout: args.agentTimeout, maxRetries: args.maxRetries, + workspaceRoot: args.workspaceRoot, cache: args.cache, verbose: args.verbose, }; diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts index 50b05de0..3a737f9a 100644 --- a/apps/cli/src/commands/eval/run-eval.ts +++ b/apps/cli/src/commands/eval/run-eval.ts @@ -45,6 +45,7 @@ interface NormalizedOptions { readonly dryRunDelayMax: number; readonly agentTimeoutSeconds: number; readonly maxRetries: number; + readonly workspaceRoot?: string; readonly cache: boolean; readonly verbose: boolean; } @@ -93,6 +94,7 @@ function normalizeOptions(rawOptions: Record): NormalizedOption dryRunDelayMax: normalizeNumber(rawOptions.dryRunDelayMax, 0), agentTimeoutSeconds: normalizeNumber(rawOptions.agentTimeout, 120), maxRetries: normalizeNumber(rawOptions.maxRetries, 2), + workspaceRoot: normalizeString(rawOptions.workspaceRoot), cache: normalizeBoolean(rawOptions.cache), verbose: normalizeBoolean(rawOptions.verbose), } satisfies NormalizedOptions; @@ -217,10 +219,111 @@ function applyVerboseOverride(selection: TargetSelection, cliVerbose: boolean): }; } +export function applyWorkspaceRootOverride( + selection: TargetSelection, + workspaceRoot?: string, +): TargetSelection { + const root = workspaceRoot?.trim(); + if (!root) { + return selection; + } + + const { resolvedTarget } = selection; + + if (resolvedTarget.kind === 'vscode' || resolvedTarget.kind === 'vscode-insiders') { + const current = resolvedTarget.config.workspaceTemplate; + if (typeof current === 'string' && current.trim().length > 0) { + return selection; + } + + return { + ...selection, + resolvedTarget: { + ...resolvedTarget, + config: { + ...resolvedTarget.config, + workspaceTemplate: root, + }, + }, + }; + } + + if (resolvedTarget.kind === 'cli') { + const current = resolvedTarget.config.cwd; + if (typeof current === 'string' && current.trim().length > 0) { + return selection; + } + return { + ...selection, + resolvedTarget: { + ...resolvedTarget, + config: { + ...resolvedTarget.config, + cwd: root, + }, + }, + }; + } + + if (resolvedTarget.kind === 'codex') { + const current = resolvedTarget.config.cwd; + if (typeof current === 'string' && current.trim().length > 0) { + return selection; + } + return { + ...selection, + resolvedTarget: { + ...resolvedTarget, + config: { + ...resolvedTarget.config, + cwd: root, + }, + }, + }; + } + + if (resolvedTarget.kind === 'pi-coding-agent') { + const current = resolvedTarget.config.cwd; + if (typeof current === 'string' && current.trim().length > 0) { + return selection; + } + return { + ...selection, + resolvedTarget: { + ...resolvedTarget, + config: { + ...resolvedTarget.config, + cwd: root, + }, + }, + }; + } + + if (resolvedTarget.kind === 'claude-code') { + const current = resolvedTarget.config.cwd; + if (typeof current === 'string' && current.trim().length > 0) { + return selection; + } + return { + ...selection, + resolvedTarget: { + ...resolvedTarget, + config: { + ...resolvedTarget.config, + cwd: root, + }, + }, + }; + } + + return selection; +} + async function prepareFileMetadata(params: { readonly testFilePath: string; readonly repoRoot: string; readonly cwd: string; + readonly workspaceRoot?: string; readonly options: NormalizedOptions; }): Promise<{ readonly evalIds: readonly string[]; @@ -228,7 +331,7 @@ async function prepareFileMetadata(params: { readonly selection: TargetSelection; readonly inlineTargetLabel: string; }> { - const { testFilePath, repoRoot, cwd, options } = params; + const { testFilePath, repoRoot, cwd, options, workspaceRoot } = params; await ensureFileExists(testFilePath, 'Test file'); await loadEnvFromHierarchy({ @@ -250,10 +353,12 @@ async function prepareFileMetadata(params: { env: process.env, }); + const selectionWithWorkspaceRoot = applyWorkspaceRootOverride(selection, workspaceRoot); + const providerLabel = options.dryRun - ? `${selection.resolvedTarget.kind} (dry-run)` - : selection.resolvedTarget.kind; - const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`; + ? `${selectionWithWorkspaceRoot.resolvedTarget.kind} (dry-run)` + : selectionWithWorkspaceRoot.resolvedTarget.kind; + const inlineTargetLabel = `${selectionWithWorkspaceRoot.targetName} [provider=${providerLabel}]`; const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose, @@ -404,6 +509,7 @@ async function runSingleEvalFile(params: { export async function runEvalCommand(input: RunEvalCommandInput): Promise { const options = normalizeOptions(input.rawOptions); + const workspaceRoot = options.workspaceRoot ? path.resolve(options.workspaceRoot) : undefined; const cwd = process.cwd(); const repoRoot = await findRepoRoot(cwd); @@ -447,6 +553,7 @@ export async function runEvalCommand(input: RunEvalCommandInput): Promise testFilePath, repoRoot, cwd, + workspaceRoot, options, }); fileMetadata.set(testFilePath, meta); diff --git a/apps/cli/src/commands/workspace/config.ts b/apps/cli/src/commands/workspace/config.ts new file mode 100644 index 00000000..d8cac9f6 --- /dev/null +++ b/apps/cli/src/commands/workspace/config.ts @@ -0,0 +1,191 @@ +import fs from 'node:fs/promises'; +import path from 'node:path'; +import YAML from 'yaml'; + +export type WorkspaceMode = 'copy' | 'symlink'; + +export type WorkspaceSource = + | { + readonly id: string; + readonly type: 'local'; + /** Absolute or workspace-relative path to the source repo/folder */ + readonly root: string; + /** Relative folder paths (from root) to sync */ + readonly include: readonly string[]; + /** Relative path (from workspace root) to place the synced content */ + readonly dest?: string; + } + | { + readonly id: string; + readonly type: 'git'; + /** Git repo URL (https://, ssh, file path) */ + readonly repo: string; + /** Branch/tag/commit; defaults to repo default */ + readonly ref?: string; + /** Relative folder paths (from repo root) to sync */ + readonly include: readonly string[]; + /** Relative path (from workspace root) to place the synced content */ + readonly dest?: string; + }; + +export interface WorkspaceConfig { + readonly version: 1; + /** Workspace root directory containing this config, unless explicitly set */ + readonly workspace_root?: string; + readonly mode?: WorkspaceMode; + readonly sources: readonly WorkspaceSource[]; +} + +function isNonEmptyString(value: unknown): value is string { + return typeof value === 'string' && value.trim().length > 0; +} + +function normalizeRelPath(value: string): string { + const trimmed = value.trim().replace(/\\/g, '/'); + return trimmed.replace(/^\/+/, '').replace(/\/+$/, ''); +} + +function assertValidSourceId(id: string): void { + if (!/^[a-zA-Z0-9][a-zA-Z0-9._-]*$/.test(id)) { + throw new Error( + `Invalid source id '${id}'. Use letters, numbers, dot, underscore, hyphen (must start with alphanumeric).`, + ); + } +} + +export async function readWorkspaceConfig(configPath: string): Promise<{ + readonly config: WorkspaceConfig; + readonly configDir: string; + readonly workspaceRoot: string; +}> { + const resolvedConfigPath = path.resolve(configPath); + const configDir = path.dirname(resolvedConfigPath); + + const rawText = await fs.readFile(resolvedConfigPath, 'utf8'); + const parsed = YAML.parse(rawText) as unknown; + + if (typeof parsed !== 'object' || parsed === null) { + throw new Error(`Invalid workspace config: expected YAML object at ${resolvedConfigPath}`); + } + + const obj = parsed as Record; + const version = obj.version; + if (version !== 1) { + throw new Error( + `Unsupported workspace config version: ${String(version)}. Expected version: 1`, + ); + } + + const modeRaw = obj.mode; + const mode: WorkspaceMode | undefined = + modeRaw === undefined + ? undefined + : modeRaw === 'copy' || modeRaw === 'symlink' + ? (modeRaw as WorkspaceMode) + : undefined; + if (modeRaw !== undefined && mode === undefined) { + throw new Error(`Invalid workspace mode '${String(modeRaw)}' (expected 'copy' or 'symlink')`); + } + + const workspaceRootRaw = obj.workspace_root; + const workspaceRoot = isNonEmptyString(workspaceRootRaw) + ? path.resolve(configDir, workspaceRootRaw) + : configDir; + + const sourcesRaw = obj.sources; + if (!Array.isArray(sourcesRaw)) { + throw new Error('Workspace config must include sources: [...]'); + } + + const seenIds = new Set(); + const sources: WorkspaceSource[] = sourcesRaw.map((item, index) => { + if (typeof item !== 'object' || item === null) { + throw new Error(`sources[${index}] must be an object`); + } + + const rec = item as Record; + const id = rec.id; + if (!isNonEmptyString(id)) { + throw new Error(`sources[${index}].id is required`); + } + assertValidSourceId(id); + if (seenIds.has(id)) { + throw new Error(`Duplicate source id '${id}'`); + } + seenIds.add(id); + + const type = rec.type; + if (type !== 'local' && type !== 'git') { + throw new Error(`sources[${index}].type must be 'local' or 'git'`); + } + + const include = rec.include; + if (!Array.isArray(include) || include.length === 0 || !include.every(isNonEmptyString)) { + throw new Error(`sources[${index}].include must be a non-empty string array`); + } + const normalizedInclude = include.map((p) => normalizeRelPath(p)); + + const destRaw = rec.dest; + const dest = isNonEmptyString(destRaw) ? normalizeRelPath(destRaw) : undefined; + + if (type === 'local') { + const root = rec.root; + if (!isNonEmptyString(root)) { + throw new Error(`sources[${index}].root is required for local sources`); + } + return { + id, + type: 'local', + root, + include: normalizedInclude, + dest, + }; + } + + const repo = rec.repo; + if (!isNonEmptyString(repo)) { + throw new Error(`sources[${index}].repo is required for git sources`); + } + + const refRaw = rec.ref; + const ref = isNonEmptyString(refRaw) ? refRaw.trim() : undefined; + + return { + id, + type: 'git', + repo, + ref, + include: normalizedInclude, + dest, + }; + }); + + const config: WorkspaceConfig = { + version: 1, + workspace_root: isNonEmptyString(workspaceRootRaw) ? workspaceRootRaw.trim() : undefined, + mode, + sources, + }; + + return { config, configDir, workspaceRoot }; +} + +export async function writeDefaultWorkspaceConfig(configPath: string, workspaceRoot: string) { + const resolvedConfigPath = path.resolve(configPath); + const configDir = path.dirname(resolvedConfigPath); + + await fs.mkdir(path.join(workspaceRoot, '.agentv'), { recursive: true }); + await fs.mkdir(configDir, { recursive: true }); + + const doc: WorkspaceConfig = { + version: 1, + workspace_root: path.relative(configDir, workspaceRoot).split(path.sep).join('/'), + mode: 'copy', + sources: [], + }; + + const yamlText = YAML.stringify(doc); + await fs.writeFile(resolvedConfigPath, yamlText, 'utf8'); + + return resolvedConfigPath; +} diff --git a/apps/cli/src/commands/workspace/create.ts b/apps/cli/src/commands/workspace/create.ts new file mode 100644 index 00000000..663ae41f --- /dev/null +++ b/apps/cli/src/commands/workspace/create.ts @@ -0,0 +1,51 @@ +import fs from 'node:fs/promises'; +import path from 'node:path'; + +import { writeDefaultWorkspaceConfig } from './config.js'; + +function makeTimestampedWorkspaceDir(cwd: string): string { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + return path.join(cwd, '.agentv', 'workspaces', timestamp); +} + +async function isNonEmptyDir(dirPath: string): Promise { + try { + const entries = await fs.readdir(dirPath); + return entries.length > 0; + } catch { + return false; + } +} + +export async function workspaceCreateCommand(args: { + out?: string; + workspaceRoot?: string; + config?: string; + force?: boolean; +}): Promise<{ readonly workspaceRoot: string; readonly configPath: string }> { + const cwd = process.cwd(); + + const out = args.out?.trim(); + const workspaceRootArg = args.workspaceRoot?.trim(); + if (out && workspaceRootArg && path.resolve(out) !== path.resolve(workspaceRootArg)) { + throw new Error('Provide only one of --out or --workspace-root (they are aliases).'); + } + + const workspaceRoot = path.resolve(workspaceRootArg ?? out ?? makeTimestampedWorkspaceDir(cwd)); + const configPath = args.config + ? path.resolve(args.config) + : path.join(workspaceRoot, '.agentv', 'workspace.yaml'); + + if (args.force) { + await fs.rm(workspaceRoot, { recursive: true, force: true }); + } else if (await isNonEmptyDir(workspaceRoot)) { + throw new Error( + `Workspace directory already exists and is not empty: ${workspaceRoot} (use --force to overwrite)`, + ); + } + + await fs.mkdir(workspaceRoot, { recursive: true }); + await writeDefaultWorkspaceConfig(configPath, workspaceRoot); + + return { workspaceRoot, configPath }; +} diff --git a/apps/cli/src/commands/workspace/index.ts b/apps/cli/src/commands/workspace/index.ts new file mode 100644 index 00000000..5c5f7ef1 --- /dev/null +++ b/apps/cli/src/commands/workspace/index.ts @@ -0,0 +1,63 @@ +import { command, flag, oneOf, option, optional, string, subcommands } from 'cmd-ts'; + +import { workspaceCreateCommand } from './create.js'; +import { workspaceSyncCommand } from './sync.js'; + +export const workspaceCommand = subcommands({ + name: 'workspace', + description: 'Manage AgentV workspaces', + cmds: { + create: command({ + name: 'create', + description: 'Create a workspace config file', + args: { + out: option({ + long: 'out', + type: optional(string), + description: + 'Workspace output directory (default: .agentv/workspaces/ under the current working directory)', + }), + workspaceRoot: option({ + long: 'workspace-root', + type: optional(string), + description: 'Alias for --out', + }), + config: option({ + long: 'config', + type: optional(string), + description: + 'Path to workspace config file (default: /.agentv/workspace.yaml)', + }), + force: flag({ + long: 'force', + description: 'Overwrite the destination directory if it exists', + }), + }, + handler: async (args) => { + const result = await workspaceCreateCommand(args); + console.log(`Workspace root: ${result.workspaceRoot}`); + console.log(`Config path: ${result.configPath}`); + }, + }), + + sync: command({ + name: 'sync', + description: 'Sync workspace from configured sources', + args: { + config: option({ + long: 'config', + type: string, + description: 'Path to workspace config file', + }), + mode: option({ + long: 'mode', + type: optional(oneOf(['copy', 'symlink'] as const)), + description: "Override workspace mode (default: config.mode or 'copy')", + }), + }, + handler: async (args) => { + await workspaceSyncCommand(args); + }, + }), + }, +}); diff --git a/apps/cli/src/commands/workspace/sync.ts b/apps/cli/src/commands/workspace/sync.ts new file mode 100644 index 00000000..8ebbc1e2 --- /dev/null +++ b/apps/cli/src/commands/workspace/sync.ts @@ -0,0 +1,219 @@ +import { spawn } from 'node:child_process'; +import fs from 'node:fs/promises'; +import path from 'node:path'; + +import { type WorkspaceMode, type WorkspaceSource, readWorkspaceConfig } from './config.js'; + +async function pathExists(p: string): Promise { + try { + await fs.access(p); + return true; + } catch { + return false; + } +} + +async function safeRm(targetPath: string): Promise { + if (!(await pathExists(targetPath))) return; + await fs.rm(targetPath, { recursive: true, force: true }); +} + +async function ensureParentDir(targetPath: string): Promise { + await fs.mkdir(path.dirname(targetPath), { recursive: true }); +} + +async function copyPath(sourcePath: string, destPath: string): Promise { + await safeRm(destPath); + await ensureParentDir(destPath); + await fs.cp(sourcePath, destPath, { recursive: true }); +} + +async function symlinkPath(sourcePath: string, destPath: string): Promise { + await safeRm(destPath); + await ensureParentDir(destPath); + + const st = await fs.lstat(sourcePath); + const isDir = st.isDirectory(); + + // Windows: prefer junctions for directories. + if (process.platform === 'win32' && isDir) { + await fs.symlink(sourcePath, destPath, 'junction'); + return; + } + + await fs.symlink(sourcePath, destPath, isDir ? 'dir' : 'file'); +} + +function resolveWorkspaceDest( + workspaceRoot: string, + source: WorkspaceSource, + includePath: string, +): string { + const destBase = source.dest ? path.resolve(workspaceRoot, source.dest) : workspaceRoot; + return path.resolve(destBase, includePath); +} + +function resolveLocalSourcePath( + configDir: string, + sourceRoot: string, + includePath: string, +): string { + const resolvedRoot = path.isAbsolute(sourceRoot) + ? sourceRoot + : path.resolve(configDir, sourceRoot); + return path.resolve(resolvedRoot, includePath); +} + +async function runGit(args: readonly string[], cwd: string): Promise { + await new Promise((resolve, reject) => { + const child = spawn('git', args, { + cwd, + stdio: 'inherit', + windowsHide: true, + }); + + child.on('error', reject); + child.on('exit', (code) => { + if (code === 0) return resolve(); + reject(new Error(`git ${args.join(' ')} failed with exit code ${code}`)); + }); + }); +} + +async function ensureGitSparseCheckout(opts: { + repo: string; + ref?: string; + includes: readonly string[]; + checkoutDir: string; +}): Promise { + const { repo, ref, includes, checkoutDir } = opts; + + await fs.mkdir(checkoutDir, { recursive: true }); + + const gitDir = path.join(checkoutDir, '.git'); + const isRepo = await pathExists(gitDir); + + if (!isRepo) { + await runGit(['init'], checkoutDir); + await runGit(['remote', 'add', 'origin', repo], checkoutDir); + } else { + // If origin changed, update it. + await runGit(['remote', 'set-url', 'origin', repo], checkoutDir); + } + + await runGit(['config', 'core.sparseCheckout', 'true'], checkoutDir); + + const infoDir = path.join(gitDir, 'info'); + await fs.mkdir(infoDir, { recursive: true }); + const sparseFile = path.join(infoDir, 'sparse-checkout'); + + // Git sparse patterns use forward slashes. + const patterns = includes + .map((p) => p.replace(/\\/g, '/').replace(/^\/+/, '').replace(/\/+$/, '')) + .filter((p) => p.length > 0) + .map((p) => `${p}/`); + + await fs.writeFile(sparseFile, `${patterns.join('\n')}\n`, 'utf8'); + + // Fetch just the ref we need (or default). + if (ref) { + await runGit(['fetch', '--depth=1', 'origin', ref], checkoutDir); + await runGit(['checkout', '--force', 'FETCH_HEAD'], checkoutDir); + } else { + await runGit(['fetch', '--depth=1', 'origin'], checkoutDir); + // If the repo already has HEAD, checkout it. Otherwise, use origin/HEAD. + await runGit(['checkout', '--force', 'FETCH_HEAD'], checkoutDir); + } + + await runGit(['read-tree', '-mu', 'HEAD'], checkoutDir); +} + +async function syncLocalSource(opts: { + mode: WorkspaceMode; + configDir: string; + workspaceRoot: string; + source: Extract; +}): Promise { + const { mode, configDir, workspaceRoot, source } = opts; + + for (const includePath of source.include) { + const from = resolveLocalSourcePath(configDir, source.root, includePath); + const to = resolveWorkspaceDest(workspaceRoot, source, includePath); + + if (!(await pathExists(from))) { + throw new Error(`Local source path not found: ${from}`); + } + + if (mode === 'symlink') { + await symlinkPath(from, to); + } else { + await copyPath(from, to); + } + } +} + +async function syncGitSource(opts: { + mode: WorkspaceMode; + workspaceRoot: string; + cacheRoot: string; + source: Extract; +}): Promise { + const { mode, workspaceRoot, cacheRoot, source } = opts; + + const checkoutDir = path.join(cacheRoot, 'git', source.id); + + await ensureGitSparseCheckout({ + repo: source.repo, + ref: source.ref, + includes: source.include, + checkoutDir, + }); + + for (const includePath of source.include) { + const from = path.resolve(checkoutDir, includePath); + const to = resolveWorkspaceDest(workspaceRoot, source, includePath); + + if (!(await pathExists(from))) { + throw new Error(`Git source path not found after checkout: ${from}`); + } + + if (mode === 'symlink') { + await symlinkPath(from, to); + } else { + await copyPath(from, to); + } + } +} + +export async function workspaceSyncCommand(args: { + config: string; + mode?: WorkspaceMode; +}): Promise { + const { config: configPath, mode: modeOverride } = args; + + const { config, configDir, workspaceRoot } = await readWorkspaceConfig(configPath); + + const mode = modeOverride ?? config.mode ?? 'copy'; + const cacheRoot = path.resolve(workspaceRoot, '.agentv', 'cache'); + + await fs.mkdir(cacheRoot, { recursive: true }); + + for (const source of config.sources) { + if (source.type === 'local') { + await syncLocalSource({ + mode, + configDir, + workspaceRoot, + source, + }); + continue; + } + + await syncGitSource({ + mode, + workspaceRoot, + cacheRoot, + source, + }); + } +} diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts index d2bda995..4c0986fc 100644 --- a/apps/cli/src/index.ts +++ b/apps/cli/src/index.ts @@ -7,6 +7,7 @@ import { evalCommand } from './commands/eval/index.js'; import { generateCommand } from './commands/generate/index.js'; import { initCmdTsCommand } from './commands/init/index.js'; import { validateCommand } from './commands/validate/index.js'; +import { workspaceCommand } from './commands/workspace/index.js'; const packageJson = JSON.parse(readFileSync(new URL('../package.json', import.meta.url), 'utf8')); @@ -21,6 +22,7 @@ export const app = subcommands({ generate: generateCommand, init: initCmdTsCommand, validate: validateCommand, + workspace: workspaceCommand, }, }); diff --git a/apps/cli/test/commands/eval/workspace-root.test.ts b/apps/cli/test/commands/eval/workspace-root.test.ts new file mode 100644 index 00000000..1601e90c --- /dev/null +++ b/apps/cli/test/commands/eval/workspace-root.test.ts @@ -0,0 +1,129 @@ +import { describe, expect, it } from 'bun:test'; + +import { applyWorkspaceRootOverride } from '../../../src/commands/eval/run-eval.js'; +import type { TargetSelection } from '../../../src/commands/eval/targets.js'; + +describe('eval --workspace-root target defaults', () => { + it('sets vscode workspaceTemplate when missing', () => { + const selection: TargetSelection = { + definitions: [], + targetName: 'default', + targetSource: 'default', + targetsFilePath: '/tmp/targets.yaml', + resolvedTarget: { + kind: 'vscode' as const, + name: 'vscode', + judgeTarget: undefined, + workers: undefined, + providerBatching: undefined, + config: { + command: 'code', + waitForResponse: true, + dryRun: false, + }, + }, + }; + + const updated = applyWorkspaceRootOverride(selection, '/work'); + + expect(updated.resolvedTarget.config.workspaceTemplate).toBe('/work'); + }); + + it('does not override vscode workspaceTemplate when already set', () => { + const selection: TargetSelection = { + definitions: [], + targetName: 'default', + targetSource: 'default', + targetsFilePath: '/tmp/targets.yaml', + resolvedTarget: { + kind: 'vscode-insiders' as const, + name: 'vscode-insiders', + judgeTarget: undefined, + workers: undefined, + providerBatching: undefined, + config: { + command: 'code-insiders', + waitForResponse: true, + dryRun: false, + workspaceTemplate: '/already', + }, + }, + }; + + const updated = applyWorkspaceRootOverride(selection, '/work'); + + expect(updated.resolvedTarget.config.workspaceTemplate).toBe('/already'); + }); + + it('sets claude-code cwd when missing', () => { + const selection: TargetSelection = { + definitions: [], + targetName: 'default', + targetSource: 'default', + targetsFilePath: '/tmp/targets.yaml', + resolvedTarget: { + kind: 'claude-code' as const, + name: 'claude-code', + judgeTarget: undefined, + workers: undefined, + providerBatching: undefined, + config: { + executable: 'claude', + }, + }, + }; + + const updated = applyWorkspaceRootOverride(selection, '/work'); + + expect(updated.resolvedTarget.config.cwd).toBe('/work'); + }); + + it('does not override codex cwd when already set', () => { + const selection: TargetSelection = { + definitions: [], + targetName: 'default', + targetSource: 'default', + targetsFilePath: '/tmp/targets.yaml', + resolvedTarget: { + kind: 'codex' as const, + name: 'codex', + judgeTarget: undefined, + workers: undefined, + providerBatching: undefined, + config: { + executable: 'codex', + cwd: '/already', + }, + }, + }; + + const updated = applyWorkspaceRootOverride(selection, '/work'); + + expect(updated.resolvedTarget.config.cwd).toBe('/already'); + }); + + it('does nothing for providers without cwd/workspaceTemplate', () => { + const selection: TargetSelection = { + definitions: [], + targetName: 'default', + targetSource: 'default', + targetsFilePath: '/tmp/targets.yaml', + resolvedTarget: { + kind: 'azure' as const, + name: 'azure', + judgeTarget: undefined, + workers: undefined, + providerBatching: undefined, + config: { + resourceName: 'r', + deploymentName: 'd', + apiKey: 'k', + }, + }, + }; + + const updated = applyWorkspaceRootOverride(selection, '/work'); + + expect(updated).toBe(selection); + }); +}); diff --git a/openspec/changes/add-agent-workspace-script/design.md b/openspec/changes/add-agent-workspace-script/design.md new file mode 100644 index 00000000..40ec8b0f --- /dev/null +++ b/openspec/changes/add-agent-workspace-script/design.md @@ -0,0 +1,90 @@ +# Design: Agent Workspace Scaffold + +## Summary +Provide an AgentV-supported way to create and update a reproducible “agent workspace” directory populated from one or more sources (local folders and/or git repos), and a way to point agentic target providers at that directory as their working directory. + +## Key observations (from repo research) +- Subagent provisioning writes a minimal `.code-workspace` file and `.github/agents/wakeup.md` per subagent directory (see `subagent/src/vscode/provision.ts`). +- AgentV already supports per-target working directories: + - `cli`: `config.cwd` is passed to `exec` (see `packages/core/src/evaluation/providers/cli.ts`). + - `codex`: defaults to a temp workspace unless `config.cwd` is set (see `packages/core/src/evaluation/providers/codex.ts`). + - `claude-code`: defaults to `process.cwd()` unless `config.cwd` is set (see `packages/core/src/evaluation/providers/claude-code.ts`) to preserve Claude Code auth. + - `vscode`: uses `subagent` and can accept a `workspaceTemplate` override (see `packages/core/src/evaluation/providers/vscode.ts`). +- File resolution for guideline/prompt file references includes `process.cwd()` as a search root (see `packages/core/src/evaluation/file-utils.ts`). + +## Relevant pattern to borrow (OpenSkills) +The only OpenSkills concept we borrow is **symlink mode** for local development: instead of copying files into the workspace, create symlinks so edits in the source repo immediately reflect in the workspace. + +## CLI surface +### New commands +`agentv workspace create --out [--config ] [--force]` + +- Creates the workspace root directory. +- Writes a workspace config YAML file (default: `/.agentv/workspace.yaml`). + +`agentv workspace sync --config [--mode copy|symlink]` + +- Syncs all configured sources into the workspace root. +- `--mode` overrides config for one run; default is `copy`. + +### Workspace config file +The workspace config drives what gets synced into the workspace root. + +Default path: `/.agentv/workspace.yaml`. + +High-level shape (illustrative): +```yaml +version: 1 +workspace_root: . +mode: copy # or symlink + +sources: + - id: wtg-prompts + type: local + root: D:/GitHub/WiseTechGlobal/WTG.AI.Prompts + include: + - plugins/base/prompts + - plugins/development/prompts + dest: vendor/wtg-ai-prompts + + - id: upstream-prompts + type: git + repo: https://github.com/WiseTechGlobal/WTG.AI.Prompts.git + ref: main + include: + - plugins/base/prompts + dest: vendor/upstream +``` + +Notes: +- This is intentionally generic: sources can sync any folders (not “skills” specifically). +- For `git` sources, syncing specific folders SHOULD be implemented using `git` + sparse checkout. + +### New eval flag +`agentv eval ... --workspace-root ` + +When supplied, AgentV will treat `` as the default execution root for agentic target providers. + +## Provider override rules +- If a target already sets `cwd`, keep it. +- If `--workspace-root` is set and the target does not set `cwd`, then: + - `cli`, `codex`, `claude-code`, `pi-coding-agent`: set `cwd = workspaceRoot`. +- For VS Code targets: + - If `workspaceTemplate` is not set, synthesize a workspace template with `folders: [{ path: workspaceRoot }]`. + - If `workspaceTemplate` is already set, keep it. + +## Implementation placement +- Workspace creation (filesystem copy) belongs in the CLI layer (`apps/cli`) to keep `@agentv/core` minimal. +- Workspace-root injection can be implemented either: + - in CLI (post-parse, before evaluation run), or + - in core target resolution as an optional override parameter. + +Preference: do injection in CLI so the core remains a pure parser/normalizer, and the behavior remains clearly tied to the `agentv eval` command. + +## Windows considerations +- Use Node `fs/promises` + `path` only. +- Treat `workspaceTemplate` as JSON; when embedding absolute paths, use standard `C:\\...` JSON escaping. + +## Open questions +- Should `agentv workspace create` include flags to add initial sources inline (e.g. `--add-local --include `), or keep it as “create empty config, user edits YAML” for v1? +- Should `agentv workspace sync` support per-source filters (e.g. `--source `), or only “sync all” for v1? diff --git a/openspec/changes/add-agent-workspace-script/proposal.md b/openspec/changes/add-agent-workspace-script/proposal.md new file mode 100644 index 00000000..cd875cde --- /dev/null +++ b/openspec/changes/add-agent-workspace-script/proposal.md @@ -0,0 +1,32 @@ +# Change: Add agent workspace sync for agentic targets + +## Why +Agentic target providers (VS Code via subagent, Codex CLI, Claude Code CLI, and generic CLI targets) commonly need a consistent working directory that already contains prompt files and other agent assets (prompts, instructions, templates, etc.). Today users must manually curate a folder and then manually configure `cwd`/workspace settings per target. This is error-prone and makes evals harder to reproduce. + +## What Changes +- Add a workspace sync utility to AgentV that can populate a workspace folder from multiple sources (local paths or git repositories) based on a YAML config file. +- Add CLI commands: + - `agentv workspace create` to create an initial workspace directory and write the config file + - `agentv workspace sync` to update/refresh all configured sources into the workspace directory (copy mode) +- Add an optional `agentv eval --workspace-root ` flag that sets a default working directory for **agentic** targets when the target config does not specify one. + +## Non-goals +- Do not add a “skills loader” / marketplace installer / AGENTS.md rewriting system. +- Do not require any particular folder convention (skills are optional; sources can sync any folders). +- Do not add new provider types. +- Do not change default provider behavior unless `--workspace-root` is explicitly provided. +- Do not auto-modify user `targets.yaml`; instead provide CLI flag and/or sample snippet output. + +## Impact +- Affected specs: + - `eval-cli` (new optional flag affecting target resolution) + - new `workspace-cli` capability (new commands + config file) +- Affected code (expected): + - `apps/cli/src/cli.ts` (command registration) + - `apps/cli/src/commands/workspace/*` (new) + - `apps/cli/src/commands/eval/*` (flag plumbing) + - `packages/core/src/evaluation/providers/targets.ts` (optional: only if we choose to inject workspace root at core target resolution) + +## Compatibility +- Backward compatible: existing configs and command invocations continue working unchanged. +- `--workspace-root` is opt-in and only applies when a target does not already define `cwd` (or `workspaceTemplate` for VS Code). diff --git a/openspec/changes/add-agent-workspace-script/specs/eval-cli/spec.md b/openspec/changes/add-agent-workspace-script/specs/eval-cli/spec.md new file mode 100644 index 00000000..5f0b2aaf --- /dev/null +++ b/openspec/changes/add-agent-workspace-script/specs/eval-cli/spec.md @@ -0,0 +1,16 @@ +## MODIFIED Requirements + +### Requirement: Target and Environment Resolution +The CLI SHALL resolve targets and environment variables before running evaluations. + +#### Scenario: Workspace root override for agentic targets +- **WHEN** the user runs `agentv eval` with `--workspace-root ` +- **THEN** `` is treated as the default working directory for agentic providers when their target configuration does not specify one +- **AND** explicit per-target settings remain highest precedence + +Provider-specific expectations: +- **codex**: if target `cwd` is not set, set `cwd = ` +- **claude-code**: if target `cwd` is not set, set `cwd = ` +- **pi-coding-agent**: if target `cwd` is not set, set `cwd = ` +- **cli**: if target `cwd` is not set, set `cwd = ` +- **vscode / vscode-insiders**: if target `workspaceTemplate` is not set, synthesize a workspace template with root folder `` diff --git a/openspec/changes/add-agent-workspace-script/specs/workspace-cli/spec.md b/openspec/changes/add-agent-workspace-script/specs/workspace-cli/spec.md new file mode 100644 index 00000000..4088a2cc --- /dev/null +++ b/openspec/changes/add-agent-workspace-script/specs/workspace-cli/spec.md @@ -0,0 +1,36 @@ +## ADDED Requirements + +### Requirement: Workspace create command +The system SHALL provide a CLI command to create an agent workspace directory and a workspace config file used to sync assets into that directory. + +#### Scenario: Create workspace with config +- **WHEN** the user runs `agentv workspace create --out ` +- **THEN** the CLI creates `` (including parent directories) +- **AND** writes a workspace config file at `/.agentv/workspace.yaml` (unless `--config` is provided) +- **AND** the config file is versioned and supports multiple sources + +#### Scenario: Default output path +- **WHEN** `--out` is omitted +- **THEN** the CLI creates a workspace directory under `.agentv/workspaces/` relative to the current working directory + +#### Scenario: Existing output directory +- **WHEN** the output directory already exists and is non-empty +- **THEN** the CLI fails with a clear error +- **UNLESS** `--force` is provided, in which case the CLI overwrites the destination + +### Requirement: Workspace sync command +The system SHALL provide a CLI command to sync a workspace directory from one or more configured sources. + +#### Scenario: Sync all sources +- **WHEN** the user runs `agentv workspace sync --config ` +- **THEN** the CLI reads `` and syncs all configured sources into the workspace root +- **AND** in `copy` mode, updates the workspace by copying files from each source include path into its destination + +#### Scenario: Symlink mode +- **WHEN** the workspace is configured for `symlink` mode (or `--mode symlink` is passed) +- **THEN** the CLI uses symlinks where supported instead of copying +- **AND** failures to create symlinks produce a clear error message + +#### Scenario: Git sources with folder includes +- **WHEN** a source is `type: git` with `include` folders +- **THEN** the CLI syncs only those folders (e.g., via sparse checkout) rather than cloning the entire repository contents diff --git a/openspec/changes/add-agent-workspace-script/tasks.md b/openspec/changes/add-agent-workspace-script/tasks.md new file mode 100644 index 00000000..871ca795 --- /dev/null +++ b/openspec/changes/add-agent-workspace-script/tasks.md @@ -0,0 +1,27 @@ +# Tasks: Add agent workspace sync for agentic targets + +- [ ] Define `.agentv/workspace.yaml` schema (versioned) supporting: + - [ ] `workspace_root`, `mode: copy|symlink` + - [ ] multiple `sources` with `type: local|git`, `include` paths, and `dest` mapping +- [ ] Implement `agentv workspace create` command + - [ ] Accept `--out ` and optional `--config `, `--force` + - [ ] Create workspace root directory and write default config YAML + - [ ] Print workspace root + config path +- [ ] Implement `agentv workspace sync` command + - [ ] Accept `--config ` and optional `--mode copy|symlink` + - [ ] For `local` sources: copy/symlink configured folders into destination + - [ ] For `git` sources: clone/update using `git` and sparse checkout of configured folders +- [ ] Add `agentv eval --workspace-root ` flag + - [ ] When set, inject workspace root as default for agentic targets where not explicitly configured: + - [ ] `codex`: `config.cwd` + - [ ] `claude-code`: `config.cwd` + - [ ] `pi-coding-agent`: `config.cwd` + - [ ] `cli`: `config.cwd` + - [ ] `vscode`/`vscode-insiders`: `config.workspaceTemplate` (set root folder to workspace dir) + - [ ] Preserve explicit per-target settings as highest precedence +- [ ] Add docs + - [ ] Update `README.md` with a short “Workspace sync” section and example commands +- [ ] Add tests + - [ ] Unit test for workspace sync planner (sources → destination mapping) + - [ ] Unit test for `--workspace-root` injection logic +- [ ] Run verification: `bun run build`, `bun run typecheck`, `bun run lint`, `bun test` diff --git a/package.json b/package.json index 8a55e354..7f2d0cb2 100644 --- a/package.json +++ b/package.json @@ -4,10 +4,7 @@ "private": true, "description": "AgentV monorepo workspace", "packageManager": "bun@1.3.3", - "workspaces": [ - "apps/*", - "packages/*" - ], + "workspaces": ["apps/*", "packages/*"], "scripts": { "build": "bun --filter @agentv/core build && bun --filter @agentv/eval build && bun --filter agentv build", "verify": "bun run build && bun run typecheck && bun run lint && bun run test", diff --git a/packages/core/package.json b/packages/core/package.json index 5c4ff566..5b4c9c76 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -36,10 +36,7 @@ "test:watch": "bun test --watch", "diagnostics:azure": "bun src/diagnostics/azure-deployment-diag.ts" }, - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "dependencies": { "@ai-sdk/anthropic": "^2.0.53", "@ai-sdk/azure": "^2.0.78", diff --git a/packages/eval/package.json b/packages/eval/package.json index 53075bb9..87b3d186 100644 --- a/packages/eval/package.json +++ b/packages/eval/package.json @@ -29,10 +29,7 @@ "fix": "biome check --write .", "test": "bun test" }, - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "dependencies": { "zod": "^3.23.8" }