basnijholt · basnijholt · Jul 3, 2025 · Jul 3, 2025 · Jul 4, 2025 · Jul 4, 2025
diff --git a/agent_cli/agents/interactive.py b/agent_cli/agents/interactive.py
@@ -16,6 +16,7 @@
 import json
 import logging
 import os
+import signal
 import time
 from contextlib import suppress
 from datetime import UTC, datetime
@@ -477,6 +478,7 @@ def interactive(
         status,
         toggle,
         quiet=general_cfg.quiet,
+        stop_signal=signal.SIGINT,  # Use SIGINT for graceful double-stop behavior
     ):
         return
 

diff --git a/agent_cli/utils.py b/agent_cli/utils.py
@@ -204,12 +204,42 @@ def stop_or_status_or_toggle(
     toggle: bool,
     *,
     quiet: bool = False,
+    stop_signal: int = signal.SIGTERM,
 ) -> bool:
-    """Handle process control for a given process name."""
+    """Handle process control for a given process name.
+
+    Args:
+        process_name: Name of the process to control
+        which: Human-readable description of the process
+        stop: Whether to stop the process
+        status: Whether to check process status
+        toggle: Whether to toggle the process
+        quiet: Whether to suppress output
+        stop_signal: Signal to send when stopping (SIGTERM for kill, SIGINT for graceful)
+
+    Returns:
+        True if any action was taken, False otherwise
+    """
     if stop:
-        if process_manager.kill_process(process_name):
-            if not quiet:
-                print_with_style(f"✅ {which.capitalize()} stopped.")
+        pid = process_manager.get_running_pid(process_name)
+        if pid:
+            try:
+                import os
+                os.kill(pid, stop_signal)
+                if stop_signal == signal.SIGINT:
+                    # For graceful stops (SIGINT), don't remove PID file - let process handle it
+                    if not quiet:
+                        print_with_style(f"✅ Sent stop signal to {which}.")
+                else:
+                    # For force kills (SIGTERM), clean up PID file immediately
+                    process_manager.kill_process(process_name)
+                    if not quiet:
+                        print_with_style(f"✅ {which.capitalize()} stopped.")
+            except (ProcessLookupError, PermissionError):
+                # Process already dead or no permission, clean up PID file
+                process_manager.kill_process(process_name)
+                if not quiet:
+                    print_with_style(f"✅ {which.capitalize()} stopped.")
         elif not quiet:
             print_with_style(f"⚠️  No {which} is running.", style="yellow")
         return True
@@ -225,8 +255,22 @@ def stop_or_status_or_toggle(
 
     if toggle:
         if process_manager.is_process_running(process_name):
-            if process_manager.kill_process(process_name) and not quiet:
-                print_with_style(f"✅ {which.capitalize()} stopped.")
+            pid = process_manager.get_running_pid(process_name)
+            if pid:
+                try:
+                    import os
+                    os.kill(pid, stop_signal)
+                    if stop_signal == signal.SIGINT:
+                        if not quiet:
+                            print_with_style(f"✅ Sent stop signal to {which}.")
+                    else:
+                        process_manager.kill_process(process_name)
+                        if not quiet:
+                            print_with_style(f"✅ {which.capitalize()} stopped.")
+                except (ProcessLookupError, PermissionError):
+                    process_manager.kill_process(process_name)
+                    if not quiet:
+                        print_with_style(f"✅ {which.capitalize()} stopped.")
             return True
         if not quiet:
             print_with_style(f"⚠️ {which.capitalize()} is not running.", style="yellow")

diff --git a/pid_file_analysis.md b/pid_file_analysis.md
@@ -0,0 +1,234 @@
+# PID File Mechanism Analysis
+
+## Current Implementation Overview
+
+The agent-cli codebase uses a PID file mechanism for process management with the following components:
+
+### 1. Process Manager (`agent_cli/process_manager.py`)
+- **Purpose**: Manages background process lifecycle using PID files
+- **Location**: PID files stored in `~/.cache/agent-cli/{process_name}.pid`
+- **Key Functions**:
+  - `pid_file_context()`: Context manager that creates/cleans up PID files
+  - `is_process_running()`: Checks if process is active using PID file + `os.kill(pid, 0)`
+  - `kill_process()`: Terminates process and removes PID file
+  - `get_running_pid()`: Returns PID if process running, cleans up stale files
+
+### 2. CLI Process Control
+The `--stop`, `--status`, and `--toggle` flags are implemented via `stop_or_status_or_toggle()`:
+- **`--stop`**: Kills the background process and removes PID file
+- **`--status`**: Checks if process is running and shows PID
+- **`--toggle`**: Stops if running, shows warning if not running
+
+### 3. Current Usage in Agents
+All agents (voice-assistant, interactive, transcribe, speak) use the same pattern:
+```python
+with process_manager.pid_file_context(process_name), suppress(KeyboardInterrupt):
+    # Main agent logic here
+```
+
+## The Keyboard Maestro Integration
+
+The **voice-assistant** has detailed instructions for Keyboard Maestro integration:
+
+```
+KEYBOARD MAESTRO INTEGRATION:
+To create a hotkey toggle for this script, set up a Keyboard Maestro macro with:
+
+1. Trigger: Hot Key (e.g., Cmd+Shift+A for "Assistant")
+
+2. If/Then/Else Action:
+   - Condition: Shell script returns success
+   - Script: voice-assistant --status >/dev/null 2>&1
+
+3. Then Actions (if process is running):
+   - Display Text Briefly: "🗣️ Processing command..."
+   - Execute Shell Script: voice-assistant --stop --quiet
+   - (The script will show its own "Done" notification)
+
+4. Else Actions (if process is not running):
+   - Display Text Briefly: "📋 Listening for command..."
+   - Execute Shell Script: voice-assistant --input-device-index 1 --quiet &
+   - Select "Display results in a notification"
+```
+
+**This workflow relies entirely on the PID file mechanism for:**
+1. **Status checking** (`--status`) to determine if the process is running
+2. **Process termination** (`--stop`) to stop the background listener
+3. **Single hotkey toggle behavior** - same key starts/stops the agent
+
+## Interactive Agent Special Case
+
+The **interactive** agent has different `--stop` behavior than other agents:
+
+### Problem
+- **Other agents**: `--stop` should kill the entire process (one-shot tasks)
+- **Interactive agent**: `--stop` should behave like Ctrl+C with double-stop logic:
+  - First `--stop`: Stop current recording, continue conversation loop
+  - Second `--stop`: Exit the entire program
+
+### Solution
+Added a `stop_signal` parameter to `stop_or_status_or_toggle()` for configurable behavior:
+
+```python
+def stop_or_status_or_toggle(
+    process_name: str,
+    which: str,
+    stop: bool,
+    status: bool,
+    toggle: bool,
+    *,
+    quiet: bool = False,
+    stop_signal: int = signal.SIGTERM,  # Default to force kill
+) -> bool:
+```
+
+Interactive agent now calls it with `stop_signal=signal.SIGINT`:
+
+```python
+stop_or_status_or_toggle(
+    "interactive",
+    "interactive agent", 
+    stop, status, toggle,
+    quiet=quiet,
+    stop_signal=signal.SIGINT,  # Graceful double-stop behavior
+)
+```
+
+This allows the interactive agent to use its existing signal handling logic:
+- First SIGINT: Set stop event, process current turn, continue loop
+- Second SIGINT: Force exit with code 130
+
+## The "Recording" Context
+
+Based on the code analysis, "recording" refers to:
+1. **Audio input capture** via PyAudio stream (`send_audio()` in `asr.py`)
+2. **Continuous listening** for voice commands
+3. **Background process lifecycle** where agents run until explicitly stopped
+
+The `--stop` command doesn't just "stop recording and start processing" - it **terminates the entire background process**. But this is **intentional for the Keyboard Maestro workflow**:
+
+1. Copy text to clipboard
+2. Press hotkey → starts `voice-assistant &` (background listening)
+3. Speak command
+4. Press same hotkey → runs `voice-assistant --stop` (stops listening, processes command, exits)
+5. Result is in clipboard
+
+## Analysis: Is PID File Mechanism Needed?
+
+### ✅ **Strong Arguments FOR PID Files**
+
+#### 1. **Essential for Keyboard Maestro Integration**
+- **Status checking**: `--status` enables conditional logic in KM macros
+- **Toggle behavior**: Single hotkey can start/stop based on current state
+- **Background process control**: Enables external automation to manage long-running processes
+- **Cross-session persistence**: KM can detect if agent is running even after system restarts
+
+#### 2. **Prevents Multiple Instance Issues**
+- Only one voice-assistant can run at a time (prevents audio conflicts)
+- Clean process lifecycle management
+- Prevents resource leaks from abandoned processes
+
+#### 3. **System Integration**
+- Standard Unix process management pattern
+- Works with any external automation tool (not just KM)
+- Enables scripting and monitoring by system administrators
+
+#### 4. **Clean Architecture**
+- Separates process control from application logic
+- Enables background operation with foreground control
+- Provides reliable cleanup on exit
+
+#### 5. **Interactive Agent Support**
+- Allows external control of long-running conversations
+- Double-stop logic matches double Ctrl+C behavior
+- Preserves conversation state between stop signals
+
+### ❌ **Minor Arguments Against PID Files**
+
+#### 1. **Terminology Confusion**
+- "Stop recording" suggests pause/resume, but `--stop` terminates the process
+- This is actually **correct behavior** for the KM workflow, just confusing terminology
+
+#### 2. **File System Dependencies**
+- PID files can become stale if processes crash
+- But the code handles cleanup well (`get_running_pid()` cleans stale files)
+
+## Alternative Architectures (NOT Recommended)
+
+### Option 1: Signal-Based Control Only
+```bash
+# Instead of PID files, use signals
+kill -USR1 $(pgrep voice-assistant)  # Stop recording, process
+kill -USR2 $(pgrep voice-assistant)  # Resume recording
+```
+
+**Problems**:
+- **Breaks KM integration**: No way to check if process is running reliably
+- **Multiple instances**: No prevention of duplicate processes
+- **Complex state management**: Application needs internal state machine
+- **Poor user experience**: No simple status checking
+
+### Option 2: In-Process Pause/Resume
+```python
+class VoiceAssistant:
+    def handle_pause_signal(self):
+        self.recording = False
+        # Process and exit
+```
+
+**Problems**:
+- **Wrong mental model**: KM workflow expects process termination, not pause
+- **Breaks the clipboard workflow**: Process needs to exit to complete the task
+- **Session management**: Would need persistent process for clipboard operations
+
+## Recommendations
+
+### ✅ **Keep PID Files** (Strongly Recommended)
+The PID file mechanism is **perfectly designed** for your workflow:
+
+1. **Maintain current architecture** - it's working as intended
+2. **Clarify documentation** to explain the KM integration context
+3. **Update terminology** - `--stop` means "complete the task and exit" not "pause recording"
+
+### Implemented Improvements
+
+1. **Clean parameterized signal handling**:
+   - No special casing based on process names
+   - Configurable stop behavior via `stop_signal` parameter
+   - Maintains single responsibility principle
+
+### Possible Future Improvements
+
+1. **Better naming**:
+   ```bash
+   --complete    # Instead of --stop (complete task and exit)
+   --terminate   # More explicit about process termination
+   ```
+
+2. **Enhanced status messages**:
+   ```bash
+   agent-cli voice-assistant --status
+   # Output: "✅ Voice assistant is listening for commands (PID: 1234)"
+   ```
+
+3. **Add `--toggle` examples to KM docs**:
+   ```bash
+   voice-assistant --toggle  # Simpler than if/then/else logic
+   ```
+
+## Conclusion
+
+**The PID file mechanism is ESSENTIAL and well-designed for your use case.**
+
+Your Keyboard Maestro workflow depends on:
+- ✅ **Status checking** to determine current state
+- ✅ **Background process management** for the listening phase  
+- ✅ **Reliable termination** to complete the clipboard workflow
+- ✅ **Single hotkey toggle** behavior
+- ✅ **Interactive agent double-stop support** (like double Ctrl+C)
+
+**DO NOT remove PID files** - they are the foundation that makes your automation workflow possible.
+
+The only "issue" is terminological: `--stop` doesn't mean "pause recording" in your context - it means "complete the voice command task and exit." This is actually the correct behavior for a clipboard-based workflow where each hotkey press represents a complete interaction cycle.
+
+**Recommendation**: Keep the PID mechanism exactly as it is. It's a well-architected solution for your automation needs.