Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ coverage.*
.pytest_cache/
.vscode/
.vscode/
.DS_Store

# Examples and scripts - exclude downloaded models and data
examples/ollama/models/
Expand All @@ -67,3 +68,6 @@ scripts/.runtime/
*.onnx
*.onnx.json
*.bin

# Config file
agent-cli-config.toml
1,228 changes: 618 additions & 610 deletions README.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions agent_cli/agents/_voice_agent_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ async def process_instruction_and_respond(
wyoming_tts_cfg: config.WyomingTTS,
openai_tts_cfg: config.OpenAITTS,
kokoro_tts_cfg: config.KokoroTTS,
piper_tts_cfg: config.PiperTTS,
system_prompt: str,
agent_instructions: str,
live: Live | None,
Expand Down Expand Up @@ -120,6 +121,7 @@ async def process_instruction_and_respond(
wyoming_tts_cfg=wyoming_tts_cfg,
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
piper_tts_cfg=piper_tts_cfg,
save_file=general_cfg.save_file,
quiet=general_cfg.quiet,
logger=logger,
Expand Down
19 changes: 19 additions & 0 deletions agent_cli/agents/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ async def _async_main(
wyoming_tts_cfg: config.WyomingTTS,
openai_tts_cfg: config.OpenAITTS,
kokoro_tts_cfg: config.KokoroTTS,
piper_tts_cfg: config.PiperTTS,
wake_word_cfg: config.WakeWord,
system_prompt: str,
agent_instructions: str,
Expand Down Expand Up @@ -240,6 +241,7 @@ async def _async_main(
wyoming_tts_cfg=wyoming_tts_cfg,
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
piper_tts_cfg=piper_tts_cfg,
system_prompt=system_prompt,
agent_instructions=agent_instructions,
live=live,
Expand Down Expand Up @@ -289,6 +291,13 @@ def assistant(
tts_kokoro_model: str = opts.TTS_KOKORO_MODEL,
tts_kokoro_voice: str = opts.TTS_KOKORO_VOICE,
tts_kokoro_host: str = opts.TTS_KOKORO_HOST,
tts_piper_host: str = opts.TTS_PIPER_HOST,
tts_piper_voice: str | None = opts.TTS_PIPER_VOICE,
tts_piper_speaker: str | None = opts.TTS_PIPER_SPEAKER,
tts_piper_speaker_id: int | None = opts.TTS_PIPER_SPEAKER_ID,
tts_piper_length_scale: float = opts.TTS_PIPER_LENGTH_SCALE,
tts_piper_noise_scale: float | None = opts.TTS_PIPER_NOISE_SCALE,
tts_piper_noise_w_scale: float | None = opts.TTS_PIPER_NOISE_W_SCALE,
# --- Process Management ---
stop: bool = opts.STOP,
status: bool = opts.STATUS,
Expand Down Expand Up @@ -383,6 +392,15 @@ def assistant(
tts_kokoro_voice=tts_kokoro_voice,
tts_kokoro_host=tts_kokoro_host,
)
piper_tts_cfg = config.PiperTTS(
tts_piper_host=tts_piper_host,
tts_piper_voice=tts_piper_voice,
tts_piper_speaker=tts_piper_speaker,
tts_piper_speaker_id=tts_piper_speaker_id,
tts_piper_length_scale=tts_piper_length_scale,
tts_piper_noise_scale=tts_piper_noise_scale,
tts_piper_noise_w_scale=tts_piper_noise_w_scale,
)
wake_word_cfg = config.WakeWord(
wake_server_ip=wake_server_ip,
wake_server_port=wake_server_port,
Expand Down Expand Up @@ -413,6 +431,7 @@ def assistant(
wyoming_tts_cfg=wyoming_tts_cfg,
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
piper_tts_cfg=piper_tts_cfg,
wake_word_cfg=wake_word_cfg,
system_prompt=system_prompt,
agent_instructions=agent_instructions,
Expand Down
2 changes: 1 addition & 1 deletion agent_cli/agents/autocorrect.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def autocorrect(
provider_cfg = config.ProviderSelection(
llm_provider=llm_provider,
asr_provider="local", # Not used, but required by model
tts_provider="local", # Not used, but required by model
tts_provider="piper", # Not used, but required by model
)
ollama_cfg = config.Ollama(llm_ollama_model=llm_ollama_model, llm_ollama_host=llm_ollama_host)
openai_llm_cfg = config.OpenAILLM(
Expand Down
21 changes: 21 additions & 0 deletions agent_cli/agents/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ async def _handle_conversation_turn(
wyoming_tts_cfg: config.WyomingTTS,
openai_tts_cfg: config.OpenAITTS,
kokoro_tts_cfg: config.KokoroTTS,
piper_tts_cfg: config.PiperTTS,
live: Live,
) -> None:
"""Handles a single turn of the conversation."""
Expand Down Expand Up @@ -285,6 +286,7 @@ async def _handle_conversation_turn(
wyoming_tts_cfg=wyoming_tts_cfg,
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
piper_tts_cfg=piper_tts_cfg,
save_file=general_cfg.save_file,
quiet=general_cfg.quiet,
logger=LOGGER,
Expand Down Expand Up @@ -315,6 +317,7 @@ async def _async_main(
wyoming_tts_cfg: config.WyomingTTS,
openai_tts_cfg: config.OpenAITTS,
kokoro_tts_cfg: config.KokoroTTS,
piper_tts_cfg: config.PiperTTS,
) -> None:
"""Main async function, consumes parsed arguments."""
try:
Expand Down Expand Up @@ -362,6 +365,7 @@ async def _async_main(
wyoming_tts_cfg=wyoming_tts_cfg,
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
piper_tts_cfg=piper_tts_cfg,
live=live,
)
except Exception:
Expand Down Expand Up @@ -405,6 +409,13 @@ def chat(
tts_kokoro_model: str = opts.TTS_KOKORO_MODEL,
tts_kokoro_voice: str = opts.TTS_KOKORO_VOICE,
tts_kokoro_host: str = opts.TTS_KOKORO_HOST,
tts_piper_host: str = opts.TTS_PIPER_HOST,
tts_piper_voice: str | None = opts.TTS_PIPER_VOICE,
tts_piper_speaker: str | None = opts.TTS_PIPER_SPEAKER,
tts_piper_speaker_id: int | None = opts.TTS_PIPER_SPEAKER_ID,
tts_piper_length_scale: float = opts.TTS_PIPER_LENGTH_SCALE,
tts_piper_noise_scale: float | None = opts.TTS_PIPER_NOISE_SCALE,
tts_piper_noise_w_scale: float | None = opts.TTS_PIPER_NOISE_W_SCALE,
# --- Process Management ---
stop: bool = opts.STOP,
status: bool = opts.STATUS,
Expand Down Expand Up @@ -508,6 +519,15 @@ def chat(
tts_kokoro_voice=tts_kokoro_voice,
tts_kokoro_host=tts_kokoro_host,
)
piper_tts_cfg = config.PiperTTS(
tts_piper_host=tts_piper_host,
tts_piper_voice=tts_piper_voice,
tts_piper_speaker=tts_piper_speaker,
tts_piper_speaker_id=tts_piper_speaker_id,
tts_piper_length_scale=tts_piper_length_scale,
tts_piper_noise_scale=tts_piper_noise_scale,
tts_piper_noise_w_scale=tts_piper_noise_w_scale,
)
history_cfg = config.History(
history_dir=history_dir,
last_n_messages=last_n_messages,
Expand All @@ -528,5 +548,6 @@ def chat(
wyoming_tts_cfg=wyoming_tts_cfg,
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
piper_tts_cfg=piper_tts_cfg,
),
)
20 changes: 20 additions & 0 deletions agent_cli/agents/speak.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ async def _async_main(
wyoming_tts_cfg: config.WyomingTTS,
openai_tts_cfg: config.OpenAITTS,
kokoro_tts_cfg: config.KokoroTTS,
piper_tts_cfg: config.PiperTTS,
) -> None:
"""Async entry point for the speak command."""
with pyaudio_context() as p:
Expand Down Expand Up @@ -64,6 +65,7 @@ async def _async_main(
wyoming_tts_cfg=wyoming_tts_cfg,
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
piper_tts_cfg=piper_tts_cfg,
save_file=general_cfg.save_file,
quiet=general_cfg.quiet,
logger=LOGGER,
Expand Down Expand Up @@ -102,6 +104,14 @@ def speak(
tts_kokoro_model: str = opts.TTS_KOKORO_MODEL,
tts_kokoro_voice: str = opts.TTS_KOKORO_VOICE,
tts_kokoro_host: str = opts.TTS_KOKORO_HOST,
# Piper
tts_piper_host: str = opts.TTS_PIPER_HOST,
tts_piper_voice: str | None = opts.TTS_PIPER_VOICE,
tts_piper_speaker: str | None = opts.TTS_PIPER_SPEAKER,
tts_piper_speaker_id: int | None = opts.TTS_PIPER_SPEAKER_ID,
tts_piper_length_scale: float = opts.TTS_PIPER_LENGTH_SCALE,
tts_piper_noise_scale: float | None = opts.TTS_PIPER_NOISE_SCALE,
tts_piper_noise_w_scale: float | None = opts.TTS_PIPER_NOISE_W_SCALE,
# --- General Options ---
list_devices: bool = opts.LIST_DEVICES,
save_file: Path | None = opts.SAVE_FILE,
Expand Down Expand Up @@ -165,6 +175,15 @@ def speak(
tts_kokoro_voice=tts_kokoro_voice,
tts_kokoro_host=tts_kokoro_host,
)
piper_tts_cfg = config.PiperTTS(
tts_piper_host=tts_piper_host,
tts_piper_voice=tts_piper_voice,
tts_piper_speaker=tts_piper_speaker,
tts_piper_speaker_id=tts_piper_speaker_id,
tts_piper_length_scale=tts_piper_length_scale,
tts_piper_noise_scale=tts_piper_noise_scale,
tts_piper_noise_w_scale=tts_piper_noise_w_scale,
)

asyncio.run(
_async_main(
Expand All @@ -175,5 +194,6 @@ def speak(
wyoming_tts_cfg=wyoming_tts_cfg,
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
piper_tts_cfg=piper_tts_cfg,
),
)
2 changes: 1 addition & 1 deletion agent_cli/agents/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def transcribe(
provider_cfg = config.ProviderSelection(
asr_provider=asr_provider,
llm_provider=llm_provider,
tts_provider="local", # Not used
tts_provider="piper", # Not used
)
audio_in_cfg = config.AudioInput(
input_device_index=input_device_index,
Expand Down
19 changes: 19 additions & 0 deletions agent_cli/agents/voice_edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ async def _async_main(
wyoming_tts_cfg: config.WyomingTTS,
openai_tts_cfg: config.OpenAITTS,
kokoro_tts_cfg: config.KokoroTTS,
piper_tts_cfg: config.PiperTTS,
) -> None:
"""Core asynchronous logic for the voice assistant."""
with pyaudio_context() as p:
Expand Down Expand Up @@ -161,6 +162,7 @@ async def _async_main(
wyoming_tts_cfg=wyoming_tts_cfg,
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
piper_tts_cfg=piper_tts_cfg,
system_prompt=SYSTEM_PROMPT,
agent_instructions=AGENT_INSTRUCTIONS,
live=live,
Expand Down Expand Up @@ -203,6 +205,13 @@ def voice_edit(
tts_kokoro_model: str = opts.TTS_KOKORO_MODEL,
tts_kokoro_voice: str = opts.TTS_KOKORO_VOICE,
tts_kokoro_host: str = opts.TTS_KOKORO_HOST,
tts_piper_host: str = opts.TTS_PIPER_HOST,
tts_piper_voice: str | None = opts.TTS_PIPER_VOICE,
tts_piper_speaker: str | None = opts.TTS_PIPER_SPEAKER,
tts_piper_speaker_id: int | None = opts.TTS_PIPER_SPEAKER_ID,
tts_piper_length_scale: float = opts.TTS_PIPER_LENGTH_SCALE,
tts_piper_noise_scale: float | None = opts.TTS_PIPER_NOISE_SCALE,
tts_piper_noise_w_scale: float | None = opts.TTS_PIPER_NOISE_W_SCALE,
# --- Process Management ---
stop: bool = opts.STOP,
status: bool = opts.STATUS,
Expand Down Expand Up @@ -302,6 +311,15 @@ def voice_edit(
tts_kokoro_voice=tts_kokoro_voice,
tts_kokoro_host=tts_kokoro_host,
)
piper_tts_cfg = config.PiperTTS(
tts_piper_host=tts_piper_host,
tts_piper_voice=tts_piper_voice,
tts_piper_speaker=tts_piper_speaker,
tts_piper_speaker_id=tts_piper_speaker_id,
tts_piper_length_scale=tts_piper_length_scale,
tts_piper_noise_scale=tts_piper_noise_scale,
tts_piper_noise_w_scale=tts_piper_noise_w_scale,
)

asyncio.run(
_async_main(
Expand All @@ -317,5 +335,6 @@ def voice_edit(
wyoming_tts_cfg=wyoming_tts_cfg,
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
piper_tts_cfg=piper_tts_cfg,
),
)
14 changes: 13 additions & 1 deletion agent_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class ProviderSelection(BaseModel):

llm_provider: Literal["local", "openai", "gemini"]
asr_provider: Literal["local", "openai"]
tts_provider: Literal["local", "openai", "kokoro"]
tts_provider: Literal["local", "openai", "kokoro", "piper"]


# --- Panel: LLM Configuration ---
Expand Down Expand Up @@ -112,6 +112,18 @@ class KokoroTTS(BaseModel):
tts_kokoro_host: str


class PiperTTS(BaseModel):
"""Configuration for the Piper HTTP TTS provider."""

tts_piper_host: str
tts_piper_voice: str | None = None
tts_piper_speaker: str | None = None
tts_piper_speaker_id: int | None = None
tts_piper_length_scale: float = 1.0
tts_piper_noise_scale: float | None = None
tts_piper_noise_w_scale: float | None = None


# --- Panel: Wake Word Options ---


Expand Down
44 changes: 44 additions & 0 deletions agent_cli/opts.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,50 @@
rich_help_panel="TTS (Text-to-Speech) Configuration: Kokoro",
)

# --- TTS Configuration: Piper ---
TTS_PIPER_HOST: str = typer.Option(
"http://localhost:10200",
"--tts-piper-host",
help="The base URL for the Piper HTTP server.",
rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
)
TTS_PIPER_VOICE: str | None = typer.Option(
None,
"--tts-piper-voice",
help="The voice to use for Piper TTS (optional).",
rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
)
TTS_PIPER_SPEAKER: str | None = typer.Option(
None,
"--tts-piper-speaker",
help="The speaker to use for multi-speaker voices (optional).",
rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
)
TTS_PIPER_SPEAKER_ID: int | None = typer.Option(
None,
"--tts-piper-speaker-id",
help="The speaker ID to use for multi-speaker voices (optional, overrides speaker).",
rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
)
TTS_PIPER_LENGTH_SCALE: float = typer.Option(
1.0,
"--tts-piper-length-scale",
help="Speaking speed (1.0 = normal speed).",
rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
)
TTS_PIPER_NOISE_SCALE: float | None = typer.Option(
None,
"--tts-piper-noise-scale",
help="Speaking variability (optional).",
rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
)
TTS_PIPER_NOISE_W_SCALE: float | None = typer.Option(
None,
"--tts-piper-noise-w-scale",
help="Phoneme width variability (optional).",
rich_help_panel="TTS (Text-to-Speech) Configuration: Piper",
)


# --- Process Management Options ---
STOP: bool = typer.Option(
Expand Down
Loading
Loading