diff --git a/.cmux/scripts/demo b/.cmux/scripts/demo new file mode 100755 index 0000000000..b2451d75f3 --- /dev/null +++ b/.cmux/scripts/demo @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Description: Demo script to showcase the script execution feature. Accepts no arguments. +set -euo pipefail + +# Progress messages to stderr (shown to user, not sent to agent) +echo "Running demo script..." >&2 +echo "Current workspace: $(pwd)" >&2 +echo "Timestamp: $(date)" >&2 + +# Structured output to stdout (sent to agent) +cat <<'EOF' +## 🎉 Script Execution Demo + +✅ Script executed successfully! + +**Output Semantics:** +- `stdout`: Sent to the agent as tool result +- `stderr`: Shown to user only (progress/debug info) + +The demo script completed. You can create workspace-specific scripts to automate tasks. +EOF diff --git a/.cmux/scripts/echo b/.cmux/scripts/echo new file mode 100755 index 0000000000..0cbb9acaf9 --- /dev/null +++ b/.cmux/scripts/echo @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Description: Echo arguments demo. Accepts any number of arguments (strings) which will be echoed back. +set -euo pipefail + +# Check if arguments were provided +if [ $# -eq 0 ]; then + cat <<'EOF' +## ⚠️ No Arguments Provided + +Usage: `/s echo ` + +Example: `/s echo hello world` +EOF + exit 0 +fi + +# Structured output to stdout (sent to agent) +cat < + contains(fromJson('["chatgpt-codex-connector","chatgpt-codex-connector[bot]"]'), github.event.sender.login) + && (github.event_name != 'issue_comment' || github.event.issue.pull_request != null) + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Required for git describe to find tags + + - name: Determine PR number + id: determine-pr + run: | + if [[ "${{ github.event_name }}" == "issue_comment" ]]; then + echo "value=${{ github.event.issue.number }}" >> "$GITHUB_OUTPUT" + else + echo "value=${{ github.event.pull_request.number }}" >> "$GITHUB_OUTPUT" + fi + + - name: Check for unresolved Codex comments + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: ./scripts/check_codex_comments.sh ${{ steps.determine-pr.outputs.value }} diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 7a7677d317..d609542ca8 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -13,6 +13,7 @@ - [SSH](./ssh.md) - [Forking](./fork.md) - [Init Hooks](./init-hooks.md) + - [Workspace Scripts](./scripts.md) - [VS Code Extension](./vscode-extension.md) - [Models](./models.md) - [Keyboard Shortcuts](./keybinds.md) diff --git a/docs/scripts.md b/docs/scripts.md new file mode 100644 index 0000000000..60019bf6ba --- /dev/null +++ b/docs/scripts.md @@ -0,0 +1,187 @@ +# Workspace Scripts + +Execute custom scripts from your workspace using slash commands or let the AI Agent run them as tools. + +## Overview + +Scripts are stored in `.mux/scripts/` within each workspace. They serve two purposes: + +1. **Human Use**: Executable via `/script ` or `/s ` in chat. +2. **Agent Use**: Automatically exposed to the AI as tools (`script_`), allowing the agent to run complex workflows you define. + +Scripts run in the workspace directory with full access to project secrets and environment variables. + +**Key Point**: Scripts are workspace-specific. Each workspace has its own custom toolkit defined in `.mux/scripts/`. + +## Creating Scripts + +1. **Create the scripts directory**: + + ```bash + mkdir -p .mux/scripts + ``` + +2. **Add an executable script**: + + ```bash + #!/usr/bin/env bash + # Description: Deploy to staging. Accepts one optional argument: 'dry-run' to simulate. + + if [ "${1:-}" == "dry-run" ]; then + echo "Simulating deployment..." + else + echo "Deploying to staging..." + fi + ``` + + **Crucial**: The `# Description:` line is what the AI reads to understand the tool. Be descriptive about what the script does and what arguments it accepts. + +3. **Make it executable**: + + ```bash + chmod +x .mux/scripts/deploy + ``` + +## Agent Integration (AI Tools) + +Every executable script in `.mux/scripts/` is automatically registered as a tool for the AI Agent. + +- **Tool Name**: `script_` (e.g., `deploy` -> `script_deploy`, `run-tests` -> `script_run_tests`) +- **Tool Description**: Taken from the script's header comment (`# Description: ...`). +- **Arguments**: The AI can pass an array of string arguments to the script. + +### Optimization for AI + +To make your scripts effective AI tools: + +1. **Clear Descriptions**: Explicitly state what the script does and what arguments it expects. + + ```bash + # Description: Fetch logs. Requires one argument: the environment name (dev|prod). + ``` + +2. **Robustness**: Use `set -euo pipefail` to ensure the script fails loudly if something goes wrong, allowing the AI to catch the error. +3. **Clear Output**: Write structured output to stdout so the agent can understand results and take action. + +## Usage + +### Basic Execution + +Type `/s` or `/script` in chat to see available scripts with auto-completion: + +``` +/s deploy +``` + +### With Arguments + +Pass arguments to scripts: + +``` +/s deploy --dry-run +/script test --verbose --coverage +``` + +Arguments are passed directly to the script as `$1`, `$2`, etc. + +## Execution Context + +Scripts run with: + +- **Working directory**: The workspace directory. +- **Environment**: Full workspace environment + project secrets + special cmux variables. +- **Timeout**: 5 minutes by default. +- **Streams**: stdout/stderr are captured. + - **Human**: Visible in the chat card. + - **Agent**: Returned as the tool execution result. + +### Standard Streams + +Scripts follow Unix conventions for output: + +- **stdout**: Sent to the agent as the tool result. Use this for structured output the agent should act on. +- **stderr**: Shown to the user in the UI but **not** sent to the agent. Use this for progress messages, logs, or debugging info that doesn't need AI attention. + +This design means scripts work identically whether run inside mux or directly from the command line. + +#### Example: Test Runner + +```bash +#!/usr/bin/env bash +# Description: Run tests and report failures for the agent to fix + +set -euo pipefail + +# Progress to stderr (user sees it, agent doesn't) +echo "Running test suite..." >&2 + +if npm test > test.log 2>&1; then + # Success message to stdout (agent sees it) + echo "✅ All tests passed" +else + # Structured failure info to stdout (agent sees and can act on it) + cat << EOF +❌ Tests failed. Here is the log: + +\`\`\` +$(cat test.log) +\`\`\` + +Please analyze this error and propose a fix. +EOF + exit 1 +fi +``` + +**Result**: + +1. User sees "Running test suite..." progress message. +2. On failure, agent receives the structured error with test log and instructions. +3. Agent can immediately analyze and propose fixes. + +## Example Scripts + +### Deployment Script + +```bash +#!/usr/bin/env bash +# Description: Deploy application. Accepts one arg: environment (default: staging). +set -euo pipefail + +ENV=${1:-staging} +echo "Deploying to $ENV..." +# ... deployment logic ... +echo "Deployment complete!" +``` + +### Web Fetch Utility + +```bash +#!/usr/bin/env bash +# Description: Fetch a URL. Accepts exactly one argument: the URL. +set -euo pipefail + +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi +curl -sL "$1" +``` + +## Script Discovery + +- Scripts are discovered automatically from `.mux/scripts/` in the current workspace. +- Discovery is cached for performance but refreshes intelligently. +- **Sanitization**: Script names are sanitized for tool use (e.g., `my-script.sh` -> `script_my_script_sh`). + +## Troubleshooting + +**Script not appearing in suggestions or tools?** + +- Ensure file is executable: `chmod +x .mux/scripts/scriptname` +- Verify file is in `.mux/scripts/` directory. +- Check for valid description header. + +**Agent using script incorrectly?** + +- Improve the `# Description:` header. Explicitly tell the agent what arguments to pass. diff --git a/src/browser/App.stories.tsx b/src/browser/App.stories.tsx index ff4c30db0e..655fb28b28 100644 --- a/src/browser/App.stories.tsx +++ b/src/browser/App.stories.tsx @@ -85,6 +85,12 @@ function setupMockAPI(options: { success: true, data: { success: true, output: "", exitCode: 0, wall_duration_ms: 0 }, }), + listScripts: () => Promise.resolve({ success: true, data: [] }), + executeScript: () => + Promise.resolve({ + success: true, + data: { success: true, output: "", exitCode: 0, wall_duration_ms: 0 }, + }), }, projects: { list: () => Promise.resolve(Array.from(mockProjects.entries())), @@ -1255,6 +1261,12 @@ main data: { success: true, output: "", exitCode: 0, wall_duration_ms: 0 }, }); }, + listScripts: () => Promise.resolve({ success: true, data: [] }), + executeScript: () => + Promise.resolve({ + success: true, + data: { success: true, output: "", exitCode: 0, wall_duration_ms: 0 }, + }), }, }, }); @@ -1463,6 +1475,12 @@ These tables should render cleanly without any disruptive copy or download actio success: true, data: { success: true, output: "", exitCode: 0, wall_duration_ms: 0 }, }), + listScripts: () => Promise.resolve({ success: true, data: [] }), + executeScript: () => + Promise.resolve({ + success: true, + data: { success: true, output: "", exitCode: 0, wall_duration_ms: 0 }, + }), }, }, }); diff --git a/src/browser/App.tsx b/src/browser/App.tsx index 8c34d359d1..273cd53821 100644 --- a/src/browser/App.tsx +++ b/src/browser/App.tsx @@ -1,4 +1,4 @@ -import { useEffect, useCallback, useRef } from "react"; +import { useEffect, useCallback, useRef, useState } from "react"; import "./styles/globals.css"; import { useWorkspaceContext } from "./contexts/WorkspaceContext"; import { useProjectContext } from "./contexts/ProjectContext"; @@ -99,6 +99,39 @@ function AppInner() { setSidebarCollapsed((prev) => !prev); }, [setSidebarCollapsed]); + // Cache of scripts available in each workspace (lazy-loaded per workspace) + interface ScriptSummary { + name: string; + description?: string; + } + const [scriptCache, setScriptCache] = useState>(new Map()); + + // Load scripts for current workspace when workspace is selected + // Reloads every time workspace changes to pick up new scripts + useEffect(() => { + if (!selectedWorkspace) return; + + const workspaceId = selectedWorkspace.workspaceId; + + const loadScriptsForWorkspace = async () => { + try { + const result = await window.api.workspace.listScripts(workspaceId); + if (result.success) { + // Filter to only executable scripts for suggestions + const executableScripts = result.data + .filter((s) => s.isExecutable) + .map((s) => ({ name: s.name, description: s.description })); + + setScriptCache((prev) => new Map(prev).set(workspaceId, executableScripts)); + } + } catch (error) { + console.error(`Failed to load scripts for ${workspaceId}:`, error); + } + }; + + void loadScriptsForWorkspace(); + }, [selectedWorkspace]); + // Telemetry tracking const telemetry = useTelemetry(); @@ -642,10 +675,17 @@ function AppInner() { ({ - providerNames: [], - workspaceId: selectedWorkspace?.workspaceId, - })} + getSlashContext={() => { + const availableScripts = selectedWorkspace + ? (scriptCache.get(selectedWorkspace.workspaceId) ?? []) + : []; + + return { + providerNames: [], + availableScripts, + workspaceId: selectedWorkspace?.workspaceId, + }; + }} /> invokeIPC(IPC_CHANNELS.WORKSPACE_EXECUTE_BASH, workspaceId, script, options), openTerminal: (workspaceId) => invokeIPC(IPC_CHANNELS.WORKSPACE_OPEN_TERMINAL, workspaceId), + listScripts: (workspaceId) => + invokeIPC(IPC_CHANNELS.WORKSPACE_LIST_SCRIPTS, workspaceId), + executeScript: (workspaceId, scriptName, args) => + invokeIPC(IPC_CHANNELS.WORKSPACE_EXECUTE_SCRIPT, workspaceId, scriptName, args), activity: { list: async (): Promise> => { const response = await invokeIPC>( diff --git a/src/browser/components/AIView.tsx b/src/browser/components/AIView.tsx index 9ebff9d198..df91923211 100644 --- a/src/browser/components/AIView.tsx +++ b/src/browser/components/AIView.tsx @@ -128,10 +128,12 @@ const AIViewInner: React.FC = ({ const forceCompactionTriggeredRef = useRef(null); // Extract state from workspace state - const { messages, canInterrupt, isCompacting, loading, currentModel } = workspaceState; + const { messages, canInterrupt, isCompacting, loading, currentModel, pendingScriptExecution } = + workspaceState; // Get active stream message ID for token counting const activeStreamMessageId = aggregator.getActiveStreamMessageId(); + const isScriptExecutionPending = Boolean(pendingScriptExecution); // Use pending send model for auto-compaction check, not the last stream's model. // This ensures the threshold is based on the model the user will actually send with, @@ -359,9 +361,15 @@ const AIViewInner: React.FC = ({ const mergedMessages = mergeConsecutiveStreamErrors(workspaceState.messages); const editCutoffHistoryId = mergedMessages.find( - (msg): msg is Exclude => + ( + msg + ): msg is Exclude< + DisplayedMessage, + { type: "history-hidden" | "workspace-init" | "script-execution" } + > => msg.type !== "history-hidden" && msg.type !== "workspace-init" && + msg.type !== "script-execution" && msg.historyId === editingMessage.id )?.historyId; @@ -398,9 +406,15 @@ const AIViewInner: React.FC = ({ // When editing, find the cutoff point const editCutoffHistoryId = editingMessage ? mergedMessages.find( - (msg): msg is Exclude => + ( + msg + ): msg is Exclude< + DisplayedMessage, + { type: "history-hidden" | "workspace-init" | "script-execution" } + > => msg.type !== "history-hidden" && msg.type !== "workspace-init" && + msg.type !== "script-execution" && msg.historyId === editingMessage.id )?.historyId : undefined; @@ -440,6 +454,30 @@ const AIViewInner: React.FC = ({ ); } + const interruptKeybindDisplay = formatKeybind( + vimEnabled ? KEYBINDS.INTERRUPT_STREAM_VIM : KEYBINDS.INTERRUPT_STREAM_NORMAL + ); + const streamingStatusText = pendingScriptExecution + ? `${pendingScriptExecution.command} running...` + : isCompacting + ? currentModel + ? `${getModelName(currentModel)} compacting...` + : "compacting..." + : currentModel + ? `${getModelName(currentModel)} streaming...` + : "streaming..."; + const streamingCancelText = pendingScriptExecution + ? `hit ${interruptKeybindDisplay} to cancel script` + : `hit ${interruptKeybindDisplay} to cancel`; + const streamingTokenCount = + isScriptExecutionPending || !activeStreamMessageId + ? undefined + : aggregator.getStreamingTokenCount(activeStreamMessageId); + const streamingTPS = + isScriptExecutionPending || !activeStreamMessageId + ? undefined + : aggregator.getStreamingTPS(activeStreamMessageId); + return (
= ({ editCutoffHistoryId !== undefined && msg.type !== "history-hidden" && msg.type !== "workspace-init" && + msg.type !== "script-execution" && msg.historyId === editCutoffHistoryId; return (
= ({ {canInterrupt && ( )} {workspaceState?.queuedMessage && ( diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx index 3f124cbfc7..1df11a597b 100644 --- a/src/browser/components/ChatInput/index.tsx +++ b/src/browser/components/ChatInput/index.tsx @@ -17,6 +17,7 @@ import { usePersistedState, updatePersistedState } from "@/browser/hooks/usePers import { useMode } from "@/browser/contexts/ModeContext"; import { ThinkingSliderComponent } from "../ThinkingSlider"; import { ModelSettings } from "../ModelSettings"; +import { useAvailableScripts } from "@/browser/hooks/useAvailableScripts"; import { useSendMessageOptions } from "@/browser/hooks/useSendMessageOptions"; import { getModelKey, @@ -112,6 +113,7 @@ export type { ChatInputProps, ChatInputAPI }; export const ChatInput: React.FC = (props) => { const { variant } = props; + const workspaceId = variant === "workspace" ? props.workspaceId : undefined; // Extract workspace-specific props with defaults const disabled = props.disabled ?? false; @@ -138,6 +140,7 @@ export const ChatInput: React.FC = (props) => { const [showCommandSuggestions, setShowCommandSuggestions] = useState(false); const [commandSuggestions, setCommandSuggestions] = useState([]); const [providerNames, setProviderNames] = useState([]); + const availableScripts = useAvailableScripts(workspaceId ?? null); const [toast, setToast] = useState(null); const [imageAttachments, setImageAttachments] = useState([]); const handleToastDismiss = useCallback(() => { @@ -325,10 +328,13 @@ export const ChatInput: React.FC = (props) => { // Watch input for slash commands useEffect(() => { const normalizedSlashSource = normalizeSlashCommandInput(input); - const suggestions = getSlashCommandSuggestions(normalizedSlashSource, { providerNames }); + const suggestions = getSlashCommandSuggestions(normalizedSlashSource, { + providerNames, + availableScripts, + }); setCommandSuggestions(suggestions); setShowCommandSuggestions(normalizedSlashSource.startsWith("/") && suggestions.length > 0); - }, [input, providerNames]); + }, [input, providerNames, availableScripts]); // Load provider names for suggestions useEffect(() => { diff --git a/src/browser/components/ChatInputToast.tsx b/src/browser/components/ChatInputToast.tsx index 2a4a40b227..2c4ad72764 100644 --- a/src/browser/components/ChatInputToast.tsx +++ b/src/browser/components/ChatInputToast.tsx @@ -1,15 +1,18 @@ import type { ReactNode } from "react"; import React, { useEffect, useCallback } from "react"; import { cn } from "@/common/lib/utils"; +import ReactMarkdown from "react-markdown"; +import { markdownComponents } from "./Messages/MarkdownComponents"; -const toastTypeStyles: Record<"success" | "error", string> = { +const toastTypeStyles: Record<"success" | "error" | "warning", string> = { success: "bg-toast-success-bg border border-accent-dark text-toast-success-text", error: "bg-toast-error-bg border border-toast-error-border text-toast-error-text", + warning: "bg-amber-900 border border-yellow-600 text-yellow-100", }; export interface Toast { id: string; - type: "success" | "error"; + type: "success" | "error" | "warning"; title?: string; message: string; solution?: ReactNode; @@ -36,7 +39,7 @@ export const ChatInputToast: React.FC = ({ toast, onDismiss useEffect(() => { if (!toast) return; - // Only auto-dismiss success toasts + // Only auto-dismiss success toasts (warnings/errors stay until dismissed) if (toast.type === "success") { const duration = toast.duration ?? 3000; const timer = setTimeout(() => { @@ -48,7 +51,6 @@ export const ChatInputToast: React.FC = ({ toast, onDismiss }; } - // Error toasts stay until manually dismissed return () => { setIsLeaving(false); }; @@ -91,7 +93,7 @@ export const ChatInputToast: React.FC = ({ toast, onDismiss ); } - // Regular toast for simple messages and success + // Regular toast for simple messages, warnings, and success return (
= ({ toast, onDismiss {toast.type === "success" ? "✓" : "⚠"}
{toast.title &&
{toast.title}
} -
{toast.message}
+
+ {toast.message} +
- {toast.type === "error" && ( + {(toast.type === "error" || toast.type === "warning") && (