Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions agent_cli/_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
from pathlib import Path
from typing import TYPE_CHECKING, Any, TypeVar

from pydantic_ai.common_tools.duckduckgo import duckduckgo_search_tool
from pydantic_ai.tools import Tool

if TYPE_CHECKING:
from collections.abc import Callable


# Memory system helpers


Expand Down Expand Up @@ -352,10 +354,15 @@ def _list_categories_operation() -> str:
return _memory_operation("listing categories", _list_categories_operation)


ReadFileTool = Tool(read_file)
ExecuteCodeTool = Tool(execute_code)
AddMemoryTool = Tool(add_memory)
SearchMemoryTool = Tool(search_memory)
UpdateMemoryTool = Tool(update_memory)
ListAllMemoriesTool = Tool(list_all_memories)
ListMemoryCategoriesTool = Tool(list_memory_categories)
def tools() -> list:
"""Return a list of tools."""
return [
Tool(read_file),
Tool(execute_code),
Tool(add_memory),
Tool(search_memory),
Tool(update_memory),
Tool(list_all_memories),
Tool(list_memory_categories),
duckduckgo_search_tool(),
]
78 changes: 35 additions & 43 deletions agent_cli/agents/_voice_agent_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
import pyperclip

from agent_cli.core.utils import print_input_panel, print_with_style
from agent_cli.services import asr
from agent_cli.services.llm import process_and_update_clipboard
from agent_cli.services.factory import get_asr_service, get_llm_service
from agent_cli.services.tts import handle_tts_playback

if TYPE_CHECKING:
Expand All @@ -25,28 +24,22 @@ async def get_instruction_from_audio(
*,
audio_data: bytes,
provider_config: config.ProviderSelection,
audio_input_config: config.AudioInput,
wyoming_asr_config: config.WyomingASR,
openai_asr_config: config.OpenAIASR,
ollama_config: config.Ollama,
openai_llm_config: config.OpenAILLM,
logger: logging.Logger,
quiet: bool,
logger: logging.Logger,
) -> str | None:
"""Transcribe audio data and return the instruction."""
try:
start_time = time.monotonic()
transcriber = asr.get_recorded_audio_transcriber(provider_config)
instruction = await transcriber(
transcriber = get_asr_service(
provider_config,
wyoming_asr_config,
openai_asr_config,
is_interactive=not quiet,
)
instruction = await transcriber.transcribe(
audio_data=audio_data,
provider_config=provider_config,
audio_input_config=audio_input_config,
wyoming_asr_config=wyoming_asr_config,
openai_asr_config=openai_asr_config,
ollama_config=ollama_config,
openai_llm_config=openai_llm_config,
logger=logger,
quiet=quiet,
)
elapsed = time.monotonic() - start_time

Expand Down Expand Up @@ -94,36 +87,35 @@ async def process_instruction_and_respond(
"""Process instruction with LLM and handle TTS response."""
# Process with LLM if clipboard mode is enabled
if general_config.clipboard:
await process_and_update_clipboard(
system_prompt=system_prompt,
agent_instructions=agent_instructions,
llm_service = get_llm_service(
provider_config=provider_config,
ollama_config=ollama_config,
openai_config=openai_llm_config,
logger=logger,
original_text=original_text,
instruction=instruction,
clipboard=general_config.clipboard,
quiet=general_config.quiet,
live=live,
is_interactive=not general_config.quiet,
)
message = f"<original-text>{original_text}</original-text><instruction>{instruction}</instruction>"
response_generator = llm_service.chat(
message=message,
system_prompt=system_prompt,
instructions=agent_instructions,
)
response_text = "".join([chunk async for chunk in response_generator])
pyperclip.copy(response_text)

# Handle TTS response if enabled
if audio_output_config.enable_tts:
response_text = pyperclip.paste()
if response_text and response_text.strip():
await handle_tts_playback(
text=response_text,
provider_config=provider_config,
audio_output_config=audio_output_config,
wyoming_tts_config=wyoming_tts_config,
openai_tts_config=openai_tts_config,
openai_llm_config=openai_llm_config,
save_file=general_config.save_file,
quiet=general_config.quiet,
logger=logger,
play_audio=not general_config.save_file,
status_message="🔊 Speaking response...",
description="TTS audio",
live=live,
)
if audio_output_config.enable_tts and response_text and response_text.strip():
await handle_tts_playback(
text=response_text,
provider_config=provider_config,
audio_output_config=audio_output_config,
wyoming_tts_config=wyoming_tts_config,
openai_tts_config=openai_tts_config,
openai_llm_config=openai_llm_config,
save_file=general_config.save_file,
quiet=general_config.quiet,
logger=logger,
play_audio=not general_config.save_file,
status_message="🔊 Speaking response...",
description="TTS audio",
live=live,
)
9 changes: 3 additions & 6 deletions agent_cli/agents/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
signal_handling_context,
stop_or_status_or_toggle,
)
from agent_cli.services import asr, wake_word
from agent_cli.services import wake_word

if TYPE_CHECKING:
import pyaudio
Expand Down Expand Up @@ -134,7 +134,7 @@ async def _record_audio_with_wake_word(

# Add a new queue for recording
record_queue = await tee.add_queue()
record_task = asyncio.create_task(asr.record_audio_to_buffer(record_queue, logger))
record_task = asyncio.create_task(audio.record_audio_to_buffer(record_queue, logger))

# Use the same wake_queue for stop-word detection
stop_detected_word = await wake_word.detect_wake_word_from_queue(
Expand Down Expand Up @@ -219,13 +219,10 @@ async def _async_main(
instruction = await get_instruction_from_audio(
audio_data=audio_data,
provider_config=provider_cfg,
audio_input_config=audio_in_cfg,
wyoming_asr_config=wyoming_asr_cfg,
openai_asr_config=openai_asr_cfg,
ollama_config=ollama_cfg,
openai_llm_config=openai_llm_cfg,
logger=LOGGER,
quiet=general_cfg.quiet,
logger=LOGGER,
)
if not instruction:
continue
Expand Down
17 changes: 11 additions & 6 deletions agent_cli/agents/autocorrect.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@
print_with_style,
setup_logging,
)
from agent_cli.services.llm import build_agent
from agent_cli.services.factory import get_llm_service

if TYPE_CHECKING:
from rich.status import Status


# --- Configuration ---

# Template to clearly separate the text to be corrected from instructions
Expand Down Expand Up @@ -78,21 +79,25 @@ async def _process_text(
openai_llm_cfg: config.OpenAILLM,
) -> tuple[str, float]:
"""Process text with the LLM and return the corrected text and elapsed time."""
agent = build_agent(
llm_service = get_llm_service(
provider_config=provider_cfg,
ollama_config=ollama_cfg,
openai_config=openai_llm_cfg,
system_prompt=SYSTEM_PROMPT,
instructions=AGENT_INSTRUCTIONS,
is_interactive=False,
)

# Format the input using the template to clearly separate text from instructions
formatted_input = INPUT_TEMPLATE.format(text=text)

start_time = time.monotonic()
result = await agent.run(formatted_input)
response_generator = llm_service.chat(
message=formatted_input,
system_prompt=SYSTEM_PROMPT,
instructions=AGENT_INSTRUCTIONS,
)
corrected_text = "".join([chunk async for chunk in response_generator])
elapsed = time.monotonic() - start_time
return result.output, elapsed
return corrected_text, elapsed


def _display_original_text(original_text: str, quiet: bool) -> None:
Expand Down
Loading
Loading