Skip to content

Commit ac30c74

Browse files
authored
🤖 feat: add first-class mux run CLI command (#881)
## Summary Replace the internal `agentSessionCli.ts` with a user-facing `mux run` command for running agent sessions from the command line, with unified logging across backend and CLI. ## Changes ### New `mux run` Command ```bash # Simple usage mux run "Fix the failing tests" # With options mux run --dir /path/to/project --runtime "ssh user@host" "Deploy changes" # Scripted echo "Add logging" | mux run --json | jq '.type' ``` **Key improvements over the old CLI:** | Old (`agentSessionCli.ts`) | New (`mux run`) | |---------------------------|-----------------| | `--workspace-path X --workspace-id Y` | Auto-derived from `--dir` | | `--json-streaming` | `--json` | | No `--help` | Full `--help` with examples | | Buried in `debug/` | Top-level subcommand | | `--thinking-level` | `-t, --thinking` | | Timeout in ms only | `--timeout 5m` (human-friendly) | | Very verbose output | Quiet by default (`--verbose` for info) | ### Unified Logging with Log Levels - **Log levels**: `error`, `warn`, `info`, `debug` (hierarchical) - **CLI default**: `error` (quiet - only errors shown) - **Desktop default**: `info` (current behavior preserved) - **Environment override**: `MUX_LOG_LEVEL=warn` or `MUX_DEBUG=1` ```bash # Quiet by default mux run "task" # Verbose output mux run --verbose "task" # Explicit level mux run --log-level debug "task" ``` Migrated ~30 files from raw `console.error/warn/log` to unified `log.*` calls. ### CLI Routing with Commander.js Top-level routing uses Commander.js with lazy loading to avoid importing Electron/AI SDK until needed: - `mux run` - agent sessions - `mux server` - oRPC server - `mux api` - API utilities - `mux desktop` - launch GUI (auto-detected when running under Electron) ### Other Improvements - **Default model**: Centralized to Opus 4.5 (`DEFAULT_MODEL` constant) - **Timeout parsing**: Uses `parse-duration` library for robust handling (`1h30m`, `5min`, etc.) - **Terminal-bench**: Updated to use new CLI entry point ## Testing - CLI integration tests in `src/cli/run.test.ts` - All static checks pass - E2E tests updated for new default model --- _Generated with `mux`_
1 parent f754732 commit ac30c74

37 files changed

+795
-366
lines changed

benchmarks/terminal_bench/mux-run.sh

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ MUX_TRUNK="${MUX_TRUNK:-main}"
2929
MUX_WORKSPACE_ID="${MUX_WORKSPACE_ID:-mux-bench}"
3030
MUX_THINKING_LEVEL="${MUX_THINKING_LEVEL:-high}"
3131
MUX_MODE="${MUX_MODE:-exec}"
32+
MUX_RUNTIME="${MUX_RUNTIME:-}"
3233

3334
resolve_project_path() {
3435
if [[ -n "${MUX_PROJECT_PATH}" ]]; then
@@ -77,21 +78,21 @@ ensure_git_repo "${project_path}"
7778
log "starting mux agent session for ${project_path}"
7879
cd "${MUX_APP_ROOT}"
7980

80-
cmd=(bun src/cli/debug/agentSessionCli.ts
81-
--config-root "${MUX_CONFIG_ROOT}"
82-
--project-path "${project_path}"
83-
--workspace-path "${project_path}"
84-
--workspace-id "${MUX_WORKSPACE_ID}"
81+
cmd=(bun src/cli/run.ts
82+
--dir "${project_path}"
8583
--model "${MUX_MODEL}"
8684
--mode "${MUX_MODE}"
87-
--json-streaming)
85+
--thinking "${MUX_THINKING_LEVEL}"
86+
--config-root "${MUX_CONFIG_ROOT}"
87+
--workspace-id "${MUX_WORKSPACE_ID}"
88+
--json)
8889

8990
if [[ -n "${MUX_TIMEOUT_MS}" ]]; then
9091
cmd+=(--timeout "${MUX_TIMEOUT_MS}")
9192
fi
9293

93-
if [[ -n "${MUX_THINKING_LEVEL}" ]]; then
94-
cmd+=(--thinking-level "${MUX_THINKING_LEVEL}")
94+
if [[ -n "${MUX_RUNTIME}" ]]; then
95+
cmd+=(--runtime "${MUX_RUNTIME}")
9596
fi
9697

9798
# Terminal-bench enforces timeouts via --global-agent-timeout-sec

benchmarks/terminal_bench/mux_agent.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ class MuxAgent(AbstractInstalledAgent):
6262
"MUX_APP_ROOT",
6363
"MUX_WORKSPACE_ID",
6464
"MUX_MODE",
65+
"MUX_RUNTIME",
6566
)
6667

6768
def __init__(

bun.lock

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
"motion": "^12.23.24",
4949
"ollama-ai-provider-v2": "^1.5.4",
5050
"openai": "^6.9.1",
51+
"parse-duration": "^2.1.4",
5152
"rehype-harden": "^1.1.5",
5253
"shescape": "^2.1.6",
5354
"source-map-support": "^0.5.21",
@@ -2937,6 +2938,8 @@
29372938

29382939
"parent-module": ["parent-module@1.0.1", "", { "dependencies": { "callsites": "^3.0.0" } }, "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g=="],
29392940

2941+
"parse-duration": ["parse-duration@2.1.4", "", {}, "sha512-b98m6MsCh+akxfyoz9w9dt0AlH2dfYLOBss5SdDsr9pkhKNvkWBXU/r8A4ahmIGByBOLV2+4YwfCuFxbDDaGyg=="],
2942+
29402943
"parse-entities": ["parse-entities@4.0.2", "", { "dependencies": { "@types/unist": "^2.0.0", "character-entities-legacy": "^3.0.0", "character-reference-invalid": "^2.0.0", "decode-named-character-reference": "^1.0.0", "is-alphanumerical": "^2.0.0", "is-decimal": "^2.0.0", "is-hexadecimal": "^2.0.0" } }, "sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw=="],
29412944

29422945
"parse-json": ["parse-json@5.2.0", "", { "dependencies": { "@babel/code-frame": "^7.0.0", "error-ex": "^1.3.1", "json-parse-even-better-errors": "^2.3.0", "lines-and-columns": "^1.1.6" } }, "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg=="],

docs/SUMMARY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
- [Introduction](./intro.md)
66
- [Install](./install.md)
7+
- [CLI](./cli.md)
78
- [Why Parallelize?](./why-parallelize.md)
89

910
# Features

docs/benchmarking.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Optional environment overrides:
1818
| `MUX_MODEL` | Preferred model (supports `provider/model` syntax) | `anthropic/claude-sonnet-4-5` |
1919
| `MUX_THINKING_LEVEL` | Optional reasoning level (`off`, `low`, `medium`, `high`) | `high` |
2020
| `MUX_MODE` | Starting mode (`plan` or `exec`) | `exec` |
21+
| `MUX_RUNTIME` | Runtime type (`local`, `worktree`, or `ssh <host>`) | `worktree` |
2122
| `MUX_TIMEOUT_MS` | Optional stream timeout in milliseconds | no timeout |
2223
| `MUX_CONFIG_ROOT` | Location for mux session data inside the container | `/root/.mux` |
2324
| `MUX_APP_ROOT` | Path where the mux sources are staged | `/opt/mux-app` |
@@ -65,7 +66,7 @@ The adapter lives in `benchmarks/terminal_bench/mux_agent.py`. For each task it:
6566

6667
1. Copies the mux repository (package manifests + `src/`) into `/tmp/mux-app` inside the container.
6768
2. Ensures Bun exists, then runs `bun install --frozen-lockfile`.
68-
3. Launches `src/cli/debug/agentSessionCli.ts` to prepare workspace metadata and stream the instruction, storing state under `MUX_CONFIG_ROOT` (default `/root/.mux`).
69+
3. Launches `mux run` (`src/cli/run.ts`) to prepare workspace metadata and stream the instruction, storing state under `MUX_CONFIG_ROOT` (default `/root/.mux`).
6970

7071
`MUX_MODEL` accepts either the mux colon form (`anthropic:claude-sonnet-4-5`) or the Terminal-Bench slash form (`anthropic/claude-sonnet-4-5`); the adapter normalises whichever you provide.
7172

docs/cli.md

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# Command Line Interface
2+
3+
Mux provides a CLI for running one-off agent tasks without the desktop app. Unlike the interactive desktop experience, `mux run` executes a single request to completion and exits.
4+
5+
## `mux run`
6+
7+
Execute a one-off agent task:
8+
9+
```bash
10+
# Basic usage - run in current directory
11+
mux run "Fix the failing tests"
12+
13+
# Specify a directory
14+
mux run --dir /path/to/project "Add authentication"
15+
16+
# Use SSH runtime
17+
mux run --runtime "ssh user@myserver" "Deploy changes"
18+
19+
# Pipe instructions via stdin
20+
echo "Add logging to all API endpoints" | mux run
21+
22+
# JSON output for scripts
23+
mux run --json "List all TypeScript files" | jq '.type'
24+
```
25+
26+
### Options
27+
28+
| Option | Short | Description | Default |
29+
| ---------------------- | ----- | -------------------------------------------------- | ----------------- |
30+
| `--dir <path>` | `-d` | Project directory | Current directory |
31+
| `--model <model>` | `-m` | Model to use (e.g., `anthropic:claude-sonnet-4-5`) | Default model |
32+
| `--runtime <runtime>` | `-r` | Runtime: `local`, `worktree`, or `ssh <host>` | `local` |
33+
| `--mode <mode>` | | Agent mode: `plan` or `exec` | `exec` |
34+
| `--thinking <level>` | `-t` | Thinking level: `off`, `low`, `medium`, `high` | `medium` |
35+
| `--timeout <duration>` | | Timeout (e.g., `5m`, `300s`, `300000`) | No timeout |
36+
| `--json` | | Output NDJSON for programmatic use | Off |
37+
| `--quiet` | `-q` | Only output final result | Off |
38+
| `--workspace-id <id>` | | Explicit workspace ID | Auto-generated |
39+
| `--config-root <path>` | | Mux config directory | `~/.mux` |
40+
41+
### Runtimes
42+
43+
- **`local`** (default): Runs directly in the specified directory. Best for one-off tasks.
44+
- **`worktree`**: Creates an isolated git worktree under `~/.mux/src`. Useful for parallel work.
45+
- **`ssh <host>`**: Runs on a remote machine via SSH. Example: `--runtime "ssh user@myserver.com"`
46+
47+
### Output Modes
48+
49+
- **Default (TTY)**: Human-readable streaming with tool call formatting
50+
- **`--json`**: NDJSON streaming - each line is a JSON object with event data
51+
- **`--quiet`**: Suppresses streaming output, only shows final assistant response
52+
53+
### Examples
54+
55+
```bash
56+
# Quick fix in current directory
57+
mux run "Fix the TypeScript errors"
58+
59+
# Use a specific model with extended thinking
60+
mux run -m anthropic:claude-sonnet-4-5 -t high "Optimize database queries"
61+
62+
# Run on remote server
63+
mux run -r "ssh dev@staging.example.com" -d /app "Update dependencies"
64+
65+
# Scripted usage with timeout
66+
mux run --json --timeout 5m "Generate API documentation" > output.jsonl
67+
```
68+
69+
## `mux server`
70+
71+
Start the HTTP/WebSocket server for remote access (e.g., from mobile devices):
72+
73+
```bash
74+
mux server --port 3000 --host 0.0.0.0
75+
```
76+
77+
Options:
78+
79+
- `--host <host>` - Host to bind to (default: `localhost`)
80+
- `--port <port>` - Port to bind to (default: `3000`)
81+
- `--auth-token <token>` - Optional bearer token for authentication
82+
- `--add-project <path>` - Add and open project at the specified path
83+
84+
## `mux desktop`
85+
86+
Launch the desktop app. This is automatically invoked when running the packaged app or via `electron .`:
87+
88+
```bash
89+
mux desktop
90+
```
91+
92+
Note: Requires Electron. When running `mux` with no arguments under Electron, the desktop app launches automatically.
93+
94+
## `mux --version`
95+
96+
Print the version and git commit:
97+
98+
```bash
99+
mux --version
100+
# v0.8.4 (abc123)
101+
```

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
"motion": "^12.23.24",
9090
"ollama-ai-provider-v2": "^1.5.4",
9191
"openai": "^6.9.1",
92+
"parse-duration": "^2.1.4",
9293
"rehype-harden": "^1.1.5",
9394
"shescape": "^2.1.6",
9495
"source-map-support": "^0.5.21",

scripts/check-bench-agent.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ if [[ ! -f "$MUX_RUN_SH" ]]; then
1515
fi
1616

1717
# Extract the agent CLI path from mux-run.sh
18-
# Looks for line like: cmd=(bun src/cli/debug/agentSessionCli.ts
18+
# Looks for line like: cmd=(bun src/cli/run.ts
1919
CLI_PATH_MATCH=$(grep -o "bun src/.*\.ts" "$MUX_RUN_SH" | head -1 | cut -d' ' -f2)
2020

2121
if [[ -z "$CLI_PATH_MATCH" ]]; then

src/browser/stories/mockFactory.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import type {
1616
MuxImagePart,
1717
MuxToolPart,
1818
} from "@/common/types/message";
19+
import { DEFAULT_MODEL } from "@/common/constants/knownModels";
1920

2021
/** Part type for message construction */
2122
type MuxPart = MuxTextPart | MuxReasoningPart | MuxImagePart | MuxToolPart;
@@ -196,7 +197,7 @@ export function createAssistantMessage(
196197
metadata: {
197198
historySequence: opts.historySequence,
198199
timestamp: opts.timestamp ?? STABLE_TIMESTAMP,
199-
model: opts.model ?? "anthropic:claude-sonnet-4-5",
200+
model: opts.model ?? DEFAULT_MODEL,
200201
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
201202
duration: 1000,
202203
},

src/browser/stories/storyHelpers.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
getInputKey,
1515
getModelKey,
1616
} from "@/common/constants/storage";
17+
import { DEFAULT_MODEL } from "@/common/constants/knownModels";
1718
import {
1819
createWorkspace,
1920
groupWorkspacesByProject,
@@ -178,7 +179,7 @@ export function setupStreamingChatStory(opts: StreamingChatSetupOptions): APICli
178179
createStreamingChatHandler({
179180
messages: opts.messages,
180181
streamingMessageId: opts.streamingMessageId,
181-
model: opts.model ?? "anthropic:claude-sonnet-4-5",
182+
model: opts.model ?? DEFAULT_MODEL,
182183
historySequence: opts.historySequence,
183184
streamText: opts.streamText,
184185
pendingTool: opts.pendingTool,

0 commit comments

Comments
 (0)