basnijholt · sbalk · Jul 28, 2025 · Jul 28, 2025 · Jul 30, 2025
diff --git a/.gitignore b/.gitignore
@@ -57,6 +57,7 @@ coverage.*
 .pytest_cache/
 .vscode/
 .vscode/
+.DS_Store
 
 # Examples and scripts - exclude downloaded models and data
 examples/ollama/models/
@@ -67,3 +68,6 @@ scripts/.runtime/
 *.onnx
 *.onnx.json
 *.bin
+
+# Config file
+agent-cli-config.toml
diff --git a/README.md b/README.md
diff --git a/agent_cli/agents/_voice_agent_common.py b/agent_cli/agents/_voice_agent_common.py
@@ -86,6 +86,7 @@ async def process_instruction_and_respond(
     wyoming_tts_cfg: config.WyomingTTS,
     openai_tts_cfg: config.OpenAITTS,
     kokoro_tts_cfg: config.KokoroTTS,
+    piper_tts_cfg: config.PiperTTS,
     system_prompt: str,
     agent_instructions: str,
     live: Live | None,
@@ -120,6 +121,7 @@ async def process_instruction_and_respond(
                     wyoming_tts_cfg=wyoming_tts_cfg,
                     openai_tts_cfg=openai_tts_cfg,
                     kokoro_tts_cfg=kokoro_tts_cfg,
+                    piper_tts_cfg=piper_tts_cfg,
                     save_file=general_cfg.save_file,
                     quiet=general_cfg.quiet,
                     logger=logger,

diff --git a/agent_cli/agents/assistant.py b/agent_cli/agents/assistant.py
@@ -178,6 +178,7 @@ async def _async_main(
     wyoming_tts_cfg: config.WyomingTTS,
     openai_tts_cfg: config.OpenAITTS,
     kokoro_tts_cfg: config.KokoroTTS,
+    piper_tts_cfg: config.PiperTTS,
     wake_word_cfg: config.WakeWord,
     system_prompt: str,
     agent_instructions: str,
@@ -240,6 +241,7 @@ async def _async_main(
                     wyoming_tts_cfg=wyoming_tts_cfg,
                     openai_tts_cfg=openai_tts_cfg,
                     kokoro_tts_cfg=kokoro_tts_cfg,
+                    piper_tts_cfg=piper_tts_cfg,
                     system_prompt=system_prompt,
                     agent_instructions=agent_instructions,
                     live=live,
@@ -289,6 +291,13 @@ def assistant(
     tts_kokoro_model: str = opts.TTS_KOKORO_MODEL,
     tts_kokoro_voice: str = opts.TTS_KOKORO_VOICE,
     tts_kokoro_host: str = opts.TTS_KOKORO_HOST,
+    tts_piper_host: str = opts.TTS_PIPER_HOST,
+    tts_piper_voice: str | None = opts.TTS_PIPER_VOICE,
+    tts_piper_speaker: str | None = opts.TTS_PIPER_SPEAKER,
+    tts_piper_speaker_id: int | None = opts.TTS_PIPER_SPEAKER_ID,
+    tts_piper_length_scale: float = opts.TTS_PIPER_LENGTH_SCALE,
+    tts_piper_noise_scale: float | None = opts.TTS_PIPER_NOISE_SCALE,
+    tts_piper_noise_w_scale: float | None = opts.TTS_PIPER_NOISE_W_SCALE,
     # --- Process Management ---
     stop: bool = opts.STOP,
     status: bool = opts.STATUS,
@@ -383,6 +392,15 @@ def assistant(
             tts_kokoro_voice=tts_kokoro_voice,
             tts_kokoro_host=tts_kokoro_host,
         )
+        piper_tts_cfg = config.PiperTTS(
+            tts_piper_host=tts_piper_host,
+            tts_piper_voice=tts_piper_voice,
+            tts_piper_speaker=tts_piper_speaker,
+            tts_piper_speaker_id=tts_piper_speaker_id,
+            tts_piper_length_scale=tts_piper_length_scale,
+            tts_piper_noise_scale=tts_piper_noise_scale,
+            tts_piper_noise_w_scale=tts_piper_noise_w_scale,
+        )
         wake_word_cfg = config.WakeWord(
             wake_server_ip=wake_server_ip,
             wake_server_port=wake_server_port,
@@ -413,6 +431,7 @@ def assistant(
                 wyoming_tts_cfg=wyoming_tts_cfg,
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
+                piper_tts_cfg=piper_tts_cfg,
                 wake_word_cfg=wake_word_cfg,
                 system_prompt=system_prompt,
                 agent_instructions=agent_instructions,

diff --git a/agent_cli/agents/autocorrect.py b/agent_cli/agents/autocorrect.py
@@ -229,7 +229,7 @@ def autocorrect(
     provider_cfg = config.ProviderSelection(
         llm_provider=llm_provider,
         asr_provider="local",  # Not used, but required by model
-        tts_provider="local",  # Not used, but required by model
+        tts_provider="piper",  # Not used, but required by model
     )
     ollama_cfg = config.Ollama(llm_ollama_model=llm_ollama_model, llm_ollama_host=llm_ollama_host)
     openai_llm_cfg = config.OpenAILLM(

diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
@@ -163,6 +163,7 @@ async def _handle_conversation_turn(
     wyoming_tts_cfg: config.WyomingTTS,
     openai_tts_cfg: config.OpenAITTS,
     kokoro_tts_cfg: config.KokoroTTS,
+    piper_tts_cfg: config.PiperTTS,
     live: Live,
 ) -> None:
     """Handles a single turn of the conversation."""
@@ -285,6 +286,7 @@ async def _handle_conversation_turn(
             wyoming_tts_cfg=wyoming_tts_cfg,
             openai_tts_cfg=openai_tts_cfg,
             kokoro_tts_cfg=kokoro_tts_cfg,
+            piper_tts_cfg=piper_tts_cfg,
             save_file=general_cfg.save_file,
             quiet=general_cfg.quiet,
             logger=LOGGER,
@@ -315,6 +317,7 @@ async def _async_main(
     wyoming_tts_cfg: config.WyomingTTS,
     openai_tts_cfg: config.OpenAITTS,
     kokoro_tts_cfg: config.KokoroTTS,
+    piper_tts_cfg: config.PiperTTS,
 ) -> None:
     """Main async function, consumes parsed arguments."""
     try:
@@ -362,6 +365,7 @@ async def _async_main(
                         wyoming_tts_cfg=wyoming_tts_cfg,
                         openai_tts_cfg=openai_tts_cfg,
                         kokoro_tts_cfg=kokoro_tts_cfg,
+                        piper_tts_cfg=piper_tts_cfg,
                         live=live,
                     )
     except Exception:
@@ -405,6 +409,13 @@ def chat(
     tts_kokoro_model: str = opts.TTS_KOKORO_MODEL,
     tts_kokoro_voice: str = opts.TTS_KOKORO_VOICE,
     tts_kokoro_host: str = opts.TTS_KOKORO_HOST,
+    tts_piper_host: str = opts.TTS_PIPER_HOST,
+    tts_piper_voice: str | None = opts.TTS_PIPER_VOICE,
+    tts_piper_speaker: str | None = opts.TTS_PIPER_SPEAKER,
+    tts_piper_speaker_id: int | None = opts.TTS_PIPER_SPEAKER_ID,
+    tts_piper_length_scale: float = opts.TTS_PIPER_LENGTH_SCALE,
+    tts_piper_noise_scale: float | None = opts.TTS_PIPER_NOISE_SCALE,
+    tts_piper_noise_w_scale: float | None = opts.TTS_PIPER_NOISE_W_SCALE,
     # --- Process Management ---
     stop: bool = opts.STOP,
     status: bool = opts.STATUS,
@@ -508,6 +519,15 @@ def chat(
             tts_kokoro_voice=tts_kokoro_voice,
             tts_kokoro_host=tts_kokoro_host,
         )
+        piper_tts_cfg = config.PiperTTS(
+            tts_piper_host=tts_piper_host,
+            tts_piper_voice=tts_piper_voice,
+            tts_piper_speaker=tts_piper_speaker,
+            tts_piper_speaker_id=tts_piper_speaker_id,
+            tts_piper_length_scale=tts_piper_length_scale,
+            tts_piper_noise_scale=tts_piper_noise_scale,
+            tts_piper_noise_w_scale=tts_piper_noise_w_scale,
+        )
         history_cfg = config.History(
             history_dir=history_dir,
             last_n_messages=last_n_messages,
@@ -528,5 +548,6 @@ def chat(
                 wyoming_tts_cfg=wyoming_tts_cfg,
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
+                piper_tts_cfg=piper_tts_cfg,
             ),
         )
diff --git a/agent_cli/agents/speak.py b/agent_cli/agents/speak.py
@@ -35,6 +35,7 @@ async def _async_main(
     wyoming_tts_cfg: config.WyomingTTS,
     openai_tts_cfg: config.OpenAITTS,
     kokoro_tts_cfg: config.KokoroTTS,
+    piper_tts_cfg: config.PiperTTS,
 ) -> None:
     """Async entry point for the speak command."""
     with pyaudio_context() as p:
@@ -64,6 +65,7 @@ async def _async_main(
                 wyoming_tts_cfg=wyoming_tts_cfg,
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
+                piper_tts_cfg=piper_tts_cfg,
                 save_file=general_cfg.save_file,
                 quiet=general_cfg.quiet,
                 logger=LOGGER,
@@ -102,6 +104,14 @@ def speak(
     tts_kokoro_model: str = opts.TTS_KOKORO_MODEL,
     tts_kokoro_voice: str = opts.TTS_KOKORO_VOICE,
     tts_kokoro_host: str = opts.TTS_KOKORO_HOST,
+    # Piper
+    tts_piper_host: str = opts.TTS_PIPER_HOST,
+    tts_piper_voice: str | None = opts.TTS_PIPER_VOICE,
+    tts_piper_speaker: str | None = opts.TTS_PIPER_SPEAKER,
+    tts_piper_speaker_id: int | None = opts.TTS_PIPER_SPEAKER_ID,
+    tts_piper_length_scale: float = opts.TTS_PIPER_LENGTH_SCALE,
+    tts_piper_noise_scale: float | None = opts.TTS_PIPER_NOISE_SCALE,
+    tts_piper_noise_w_scale: float | None = opts.TTS_PIPER_NOISE_W_SCALE,
     # --- General Options ---
     list_devices: bool = opts.LIST_DEVICES,
     save_file: Path | None = opts.SAVE_FILE,
@@ -165,6 +175,15 @@ def speak(
             tts_kokoro_voice=tts_kokoro_voice,
             tts_kokoro_host=tts_kokoro_host,
         )
+        piper_tts_cfg = config.PiperTTS(
+            tts_piper_host=tts_piper_host,
+            tts_piper_voice=tts_piper_voice,
+            tts_piper_speaker=tts_piper_speaker,
+            tts_piper_speaker_id=tts_piper_speaker_id,
+            tts_piper_length_scale=tts_piper_length_scale,
+            tts_piper_noise_scale=tts_piper_noise_scale,
+            tts_piper_noise_w_scale=tts_piper_noise_w_scale,
+        )
 
         asyncio.run(
             _async_main(
@@ -175,5 +194,6 @@ def speak(
                 wyoming_tts_cfg=wyoming_tts_cfg,
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
+                piper_tts_cfg=piper_tts_cfg,
             ),
         )
diff --git a/agent_cli/agents/transcribe.py b/agent_cli/agents/transcribe.py
@@ -293,7 +293,7 @@ def transcribe(
         provider_cfg = config.ProviderSelection(
             asr_provider=asr_provider,
             llm_provider=llm_provider,
-            tts_provider="local",  # Not used
+            tts_provider="piper",  # Not used
         )
         audio_in_cfg = config.AudioInput(
             input_device_index=input_device_index,

diff --git a/agent_cli/agents/voice_edit.py b/agent_cli/agents/voice_edit.py
@@ -101,6 +101,7 @@ async def _async_main(
     wyoming_tts_cfg: config.WyomingTTS,
     openai_tts_cfg: config.OpenAITTS,
     kokoro_tts_cfg: config.KokoroTTS,
+    piper_tts_cfg: config.PiperTTS,
 ) -> None:
     """Core asynchronous logic for the voice assistant."""
     with pyaudio_context() as p:
@@ -161,6 +162,7 @@ async def _async_main(
                 wyoming_tts_cfg=wyoming_tts_cfg,
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
+                piper_tts_cfg=piper_tts_cfg,
                 system_prompt=SYSTEM_PROMPT,
                 agent_instructions=AGENT_INSTRUCTIONS,
                 live=live,
@@ -203,6 +205,13 @@ def voice_edit(
     tts_kokoro_model: str = opts.TTS_KOKORO_MODEL,
     tts_kokoro_voice: str = opts.TTS_KOKORO_VOICE,
     tts_kokoro_host: str = opts.TTS_KOKORO_HOST,
+    tts_piper_host: str = opts.TTS_PIPER_HOST,
+    tts_piper_voice: str | None = opts.TTS_PIPER_VOICE,
+    tts_piper_speaker: str | None = opts.TTS_PIPER_SPEAKER,
+    tts_piper_speaker_id: int | None = opts.TTS_PIPER_SPEAKER_ID,
+    tts_piper_length_scale: float = opts.TTS_PIPER_LENGTH_SCALE,
+    tts_piper_noise_scale: float | None = opts.TTS_PIPER_NOISE_SCALE,
+    tts_piper_noise_w_scale: float | None = opts.TTS_PIPER_NOISE_W_SCALE,
     # --- Process Management ---
     stop: bool = opts.STOP,
     status: bool = opts.STATUS,
@@ -302,6 +311,15 @@ def voice_edit(
             tts_kokoro_voice=tts_kokoro_voice,
             tts_kokoro_host=tts_kokoro_host,
         )
+        piper_tts_cfg = config.PiperTTS(
+            tts_piper_host=tts_piper_host,
+            tts_piper_voice=tts_piper_voice,
+            tts_piper_speaker=tts_piper_speaker,
+            tts_piper_speaker_id=tts_piper_speaker_id,
+            tts_piper_length_scale=tts_piper_length_scale,
+            tts_piper_noise_scale=tts_piper_noise_scale,
+            tts_piper_noise_w_scale=tts_piper_noise_w_scale,
+        )
 
         asyncio.run(
             _async_main(
@@ -317,5 +335,6 @@ def voice_edit(
                 wyoming_tts_cfg=wyoming_tts_cfg,
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
+                piper_tts_cfg=piper_tts_cfg,
             ),
         )
diff --git a/agent_cli/config.py b/agent_cli/config.py
@@ -23,7 +23,7 @@ class ProviderSelection(BaseModel):
 
     llm_provider: Literal["local", "openai", "gemini"]
     asr_provider: Literal["local", "openai"]
-    tts_provider: Literal["local", "openai", "kokoro"]
+    tts_provider: Literal["local", "openai", "kokoro", "piper"]
 
 
 # --- Panel: LLM Configuration ---
@@ -112,6 +112,18 @@ class KokoroTTS(BaseModel):
     tts_kokoro_host: str
 
 
+class PiperTTS(BaseModel):
+    """Configuration for the Piper HTTP TTS provider."""
+
+    tts_piper_host: str
+    tts_piper_voice: str | None = None
+    tts_piper_speaker: str | None = None
+    tts_piper_speaker_id: int | None = None
+    tts_piper_length_scale: float = 1.0
+    tts_piper_noise_scale: float | None = None
+    tts_piper_noise_w_scale: float | None = None
+
+
 # --- Panel: Wake Word Options ---
 
 

diff --git a/agent_cli/opts.py b/agent_cli/opts.py
@@ -230,6 +230,50 @@
     rich_help_panel="TTS (Text-to-Speech) Configuration: Kokoro",
 )
 
+# --- TTS Configuration: Piper ---
+TTS_PIPER_HOST: str = typer.Option(
+    "http://localhost:10200",
+    "--tts-piper-host",
+    help="The base URL for the Piper HTTP server.",
+    rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
+)
+TTS_PIPER_VOICE: str | None = typer.Option(
+    None,
+    "--tts-piper-voice",
+    help="The voice to use for Piper TTS (optional).",
+    rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
+)
+TTS_PIPER_SPEAKER: str | None = typer.Option(
+    None,
+    "--tts-piper-speaker",
+    help="The speaker to use for multi-speaker voices (optional).",
+    rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
+)
+TTS_PIPER_SPEAKER_ID: int | None = typer.Option(
+    None,
+    "--tts-piper-speaker-id",
+    help="The speaker ID to use for multi-speaker voices (optional, overrides speaker).",
+    rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
+)
+TTS_PIPER_LENGTH_SCALE: float = typer.Option(
+    1.0,
+    "--tts-piper-length-scale",
+    help="Speaking speed (1.0 = normal speed).",
+    rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
+)
+TTS_PIPER_NOISE_SCALE: float | None = typer.Option(
+    None,
+    "--tts-piper-noise-scale",
+    help="Speaking variability (optional).",
+    rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
+)
+TTS_PIPER_NOISE_W_SCALE: float | None = typer.Option(
+    None,
+    "--tts-piper-noise-w-scale",
+    help="Phoneme width variability (optional).",
+    rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
+)
+
 
 # --- Process Management Options ---
 STOP: bool = typer.Option(