diff --git a/agent_cli/agents/interactive.py b/agent_cli/agents/interactive.py index f6c363e1..eed0e71e 100644 --- a/agent_cli/agents/interactive.py +++ b/agent_cli/agents/interactive.py @@ -16,6 +16,7 @@ import json import logging import os +import signal import time from contextlib import suppress from datetime import UTC, datetime @@ -477,6 +478,7 @@ def interactive( status, toggle, quiet=general_cfg.quiet, + stop_signal=signal.SIGINT, # Use SIGINT for graceful double-stop behavior ): return diff --git a/agent_cli/utils.py b/agent_cli/utils.py index 4adb2180..a0174866 100644 --- a/agent_cli/utils.py +++ b/agent_cli/utils.py @@ -204,12 +204,42 @@ def stop_or_status_or_toggle( toggle: bool, *, quiet: bool = False, + stop_signal: int = signal.SIGTERM, ) -> bool: - """Handle process control for a given process name.""" + """Handle process control for a given process name. + + Args: + process_name: Name of the process to control + which: Human-readable description of the process + stop: Whether to stop the process + status: Whether to check process status + toggle: Whether to toggle the process + quiet: Whether to suppress output + stop_signal: Signal to send when stopping (SIGTERM for kill, SIGINT for graceful) + + Returns: + True if any action was taken, False otherwise + """ if stop: - if process_manager.kill_process(process_name): - if not quiet: - print_with_style(f"✅ {which.capitalize()} stopped.") + pid = process_manager.get_running_pid(process_name) + if pid: + try: + import os + os.kill(pid, stop_signal) + if stop_signal == signal.SIGINT: + # For graceful stops (SIGINT), don't remove PID file - let process handle it + if not quiet: + print_with_style(f"✅ Sent stop signal to {which}.") + else: + # For force kills (SIGTERM), clean up PID file immediately + process_manager.kill_process(process_name) + if not quiet: + print_with_style(f"✅ {which.capitalize()} stopped.") + except (ProcessLookupError, PermissionError): + # Process already dead or no permission, clean up PID file + process_manager.kill_process(process_name) + if not quiet: + print_with_style(f"✅ {which.capitalize()} stopped.") elif not quiet: print_with_style(f"⚠️ No {which} is running.", style="yellow") return True @@ -225,8 +255,22 @@ def stop_or_status_or_toggle( if toggle: if process_manager.is_process_running(process_name): - if process_manager.kill_process(process_name) and not quiet: - print_with_style(f"✅ {which.capitalize()} stopped.") + pid = process_manager.get_running_pid(process_name) + if pid: + try: + import os + os.kill(pid, stop_signal) + if stop_signal == signal.SIGINT: + if not quiet: + print_with_style(f"✅ Sent stop signal to {which}.") + else: + process_manager.kill_process(process_name) + if not quiet: + print_with_style(f"✅ {which.capitalize()} stopped.") + except (ProcessLookupError, PermissionError): + process_manager.kill_process(process_name) + if not quiet: + print_with_style(f"✅ {which.capitalize()} stopped.") return True if not quiet: print_with_style(f"⚠️ {which.capitalize()} is not running.", style="yellow") diff --git a/pid_file_analysis.md b/pid_file_analysis.md new file mode 100644 index 00000000..4b581f3b --- /dev/null +++ b/pid_file_analysis.md @@ -0,0 +1,234 @@ +# PID File Mechanism Analysis + +## Current Implementation Overview + +The agent-cli codebase uses a PID file mechanism for process management with the following components: + +### 1. Process Manager (`agent_cli/process_manager.py`) +- **Purpose**: Manages background process lifecycle using PID files +- **Location**: PID files stored in `~/.cache/agent-cli/{process_name}.pid` +- **Key Functions**: + - `pid_file_context()`: Context manager that creates/cleans up PID files + - `is_process_running()`: Checks if process is active using PID file + `os.kill(pid, 0)` + - `kill_process()`: Terminates process and removes PID file + - `get_running_pid()`: Returns PID if process running, cleans up stale files + +### 2. CLI Process Control +The `--stop`, `--status`, and `--toggle` flags are implemented via `stop_or_status_or_toggle()`: +- **`--stop`**: Kills the background process and removes PID file +- **`--status`**: Checks if process is running and shows PID +- **`--toggle`**: Stops if running, shows warning if not running + +### 3. Current Usage in Agents +All agents (voice-assistant, interactive, transcribe, speak) use the same pattern: +```python +with process_manager.pid_file_context(process_name), suppress(KeyboardInterrupt): + # Main agent logic here +``` + +## The Keyboard Maestro Integration + +The **voice-assistant** has detailed instructions for Keyboard Maestro integration: + +``` +KEYBOARD MAESTRO INTEGRATION: +To create a hotkey toggle for this script, set up a Keyboard Maestro macro with: + +1. Trigger: Hot Key (e.g., Cmd+Shift+A for "Assistant") + +2. If/Then/Else Action: + - Condition: Shell script returns success + - Script: voice-assistant --status >/dev/null 2>&1 + +3. Then Actions (if process is running): + - Display Text Briefly: "🗣️ Processing command..." + - Execute Shell Script: voice-assistant --stop --quiet + - (The script will show its own "Done" notification) + +4. Else Actions (if process is not running): + - Display Text Briefly: "📋 Listening for command..." + - Execute Shell Script: voice-assistant --input-device-index 1 --quiet & + - Select "Display results in a notification" +``` + +**This workflow relies entirely on the PID file mechanism for:** +1. **Status checking** (`--status`) to determine if the process is running +2. **Process termination** (`--stop`) to stop the background listener +3. **Single hotkey toggle behavior** - same key starts/stops the agent + +## Interactive Agent Special Case + +The **interactive** agent has different `--stop` behavior than other agents: + +### Problem +- **Other agents**: `--stop` should kill the entire process (one-shot tasks) +- **Interactive agent**: `--stop` should behave like Ctrl+C with double-stop logic: + - First `--stop`: Stop current recording, continue conversation loop + - Second `--stop`: Exit the entire program + +### Solution +Added a `stop_signal` parameter to `stop_or_status_or_toggle()` for configurable behavior: + +```python +def stop_or_status_or_toggle( + process_name: str, + which: str, + stop: bool, + status: bool, + toggle: bool, + *, + quiet: bool = False, + stop_signal: int = signal.SIGTERM, # Default to force kill +) -> bool: +``` + +Interactive agent now calls it with `stop_signal=signal.SIGINT`: + +```python +stop_or_status_or_toggle( + "interactive", + "interactive agent", + stop, status, toggle, + quiet=quiet, + stop_signal=signal.SIGINT, # Graceful double-stop behavior +) +``` + +This allows the interactive agent to use its existing signal handling logic: +- First SIGINT: Set stop event, process current turn, continue loop +- Second SIGINT: Force exit with code 130 + +## The "Recording" Context + +Based on the code analysis, "recording" refers to: +1. **Audio input capture** via PyAudio stream (`send_audio()` in `asr.py`) +2. **Continuous listening** for voice commands +3. **Background process lifecycle** where agents run until explicitly stopped + +The `--stop` command doesn't just "stop recording and start processing" - it **terminates the entire background process**. But this is **intentional for the Keyboard Maestro workflow**: + +1. Copy text to clipboard +2. Press hotkey → starts `voice-assistant &` (background listening) +3. Speak command +4. Press same hotkey → runs `voice-assistant --stop` (stops listening, processes command, exits) +5. Result is in clipboard + +## Analysis: Is PID File Mechanism Needed? + +### ✅ **Strong Arguments FOR PID Files** + +#### 1. **Essential for Keyboard Maestro Integration** +- **Status checking**: `--status` enables conditional logic in KM macros +- **Toggle behavior**: Single hotkey can start/stop based on current state +- **Background process control**: Enables external automation to manage long-running processes +- **Cross-session persistence**: KM can detect if agent is running even after system restarts + +#### 2. **Prevents Multiple Instance Issues** +- Only one voice-assistant can run at a time (prevents audio conflicts) +- Clean process lifecycle management +- Prevents resource leaks from abandoned processes + +#### 3. **System Integration** +- Standard Unix process management pattern +- Works with any external automation tool (not just KM) +- Enables scripting and monitoring by system administrators + +#### 4. **Clean Architecture** +- Separates process control from application logic +- Enables background operation with foreground control +- Provides reliable cleanup on exit + +#### 5. **Interactive Agent Support** +- Allows external control of long-running conversations +- Double-stop logic matches double Ctrl+C behavior +- Preserves conversation state between stop signals + +### ❌ **Minor Arguments Against PID Files** + +#### 1. **Terminology Confusion** +- "Stop recording" suggests pause/resume, but `--stop` terminates the process +- This is actually **correct behavior** for the KM workflow, just confusing terminology + +#### 2. **File System Dependencies** +- PID files can become stale if processes crash +- But the code handles cleanup well (`get_running_pid()` cleans stale files) + +## Alternative Architectures (NOT Recommended) + +### Option 1: Signal-Based Control Only +```bash +# Instead of PID files, use signals +kill -USR1 $(pgrep voice-assistant) # Stop recording, process +kill -USR2 $(pgrep voice-assistant) # Resume recording +``` + +**Problems**: +- **Breaks KM integration**: No way to check if process is running reliably +- **Multiple instances**: No prevention of duplicate processes +- **Complex state management**: Application needs internal state machine +- **Poor user experience**: No simple status checking + +### Option 2: In-Process Pause/Resume +```python +class VoiceAssistant: + def handle_pause_signal(self): + self.recording = False + # Process and exit +``` + +**Problems**: +- **Wrong mental model**: KM workflow expects process termination, not pause +- **Breaks the clipboard workflow**: Process needs to exit to complete the task +- **Session management**: Would need persistent process for clipboard operations + +## Recommendations + +### ✅ **Keep PID Files** (Strongly Recommended) +The PID file mechanism is **perfectly designed** for your workflow: + +1. **Maintain current architecture** - it's working as intended +2. **Clarify documentation** to explain the KM integration context +3. **Update terminology** - `--stop` means "complete the task and exit" not "pause recording" + +### Implemented Improvements + +1. **Clean parameterized signal handling**: + - No special casing based on process names + - Configurable stop behavior via `stop_signal` parameter + - Maintains single responsibility principle + +### Possible Future Improvements + +1. **Better naming**: + ```bash + --complete # Instead of --stop (complete task and exit) + --terminate # More explicit about process termination + ``` + +2. **Enhanced status messages**: + ```bash + agent-cli voice-assistant --status + # Output: "✅ Voice assistant is listening for commands (PID: 1234)" + ``` + +3. **Add `--toggle` examples to KM docs**: + ```bash + voice-assistant --toggle # Simpler than if/then/else logic + ``` + +## Conclusion + +**The PID file mechanism is ESSENTIAL and well-designed for your use case.** + +Your Keyboard Maestro workflow depends on: +- ✅ **Status checking** to determine current state +- ✅ **Background process management** for the listening phase +- ✅ **Reliable termination** to complete the clipboard workflow +- ✅ **Single hotkey toggle** behavior +- ✅ **Interactive agent double-stop support** (like double Ctrl+C) + +**DO NOT remove PID files** - they are the foundation that makes your automation workflow possible. + +The only "issue" is terminological: `--stop` doesn't mean "pause recording" in your context - it means "complete the voice command task and exit." This is actually the correct behavior for a clipboard-based workflow where each hotkey press represents a complete interaction cycle. + +**Recommendation**: Keep the PID mechanism exactly as it is. It's a well-architected solution for your automation needs. \ No newline at end of file