diff --git a/agent_cli/agents/__init__.py b/agent_cli/agents/__init__.py index 26fba061..1cb20a8b 100644 --- a/agent_cli/agents/__init__.py +++ b/agent_cli/agents/__init__.py @@ -8,6 +8,7 @@ rag_proxy, server, speak, + summarize, transcribe, voice_edit, ) @@ -20,6 +21,7 @@ "rag_proxy", "server", "speak", + "summarize", "transcribe", "voice_edit", ] diff --git a/agent_cli/agents/summarize.py b/agent_cli/agents/summarize.py new file mode 100644 index 00000000..ecfd1e05 --- /dev/null +++ b/agent_cli/agents/summarize.py @@ -0,0 +1,410 @@ +"""Summarize text files or stdin using adaptive map-reduce summarization.""" + +from __future__ import annotations + +import asyncio +import contextlib +import json +import sys +import time +from enum import Enum +from pathlib import Path # noqa: TC003 +from typing import TYPE_CHECKING + +import typer + +from agent_cli import config, opts +from agent_cli.cli import app +from agent_cli.core.utils import ( + console, + create_status, + print_command_line_args, + print_error_message, + print_input_panel, + print_output_panel, + print_with_style, + setup_logging, +) +from agent_cli.summarizer import SummarizationError, SummarizerConfig, summarize +from agent_cli.summarizer._utils import count_tokens + +if TYPE_CHECKING: + from agent_cli.summarizer import SummaryResult + + +class ContentType(str, Enum): + """Content type for specialized summarization prompts.""" + + general = "general" + conversation = "conversation" + journal = "journal" + document = "document" + + +class OutputFormat(str, Enum): + """Output format for the summarization result.""" + + text = "text" + json = "json" + full = "full" + + +def _read_input(file_path: Path | None) -> str | None: + """Read input from file or stdin.""" + if file_path: + if not file_path.exists(): + print_error_message( + f"File not found: {file_path}", + "Please check the file path and try again.", + ) + return None + return file_path.read_text(encoding="utf-8") + + # Read from stdin + if sys.stdin.isatty(): + print_error_message( + "No input provided", + "Provide a file path or pipe content via stdin.", + ) + return None + + return sys.stdin.read() + + +def _display_input_preview( + content: str, + token_count: int, + *, + quiet: bool, + max_preview_chars: int = 500, +) -> None: + """Display a preview of the input content.""" + if quiet: + return + + preview = content[:max_preview_chars] + if len(content) > max_preview_chars: + preview += f"\n... [{len(content) - max_preview_chars} more characters]" + + print_input_panel( + preview, + title=f"Input ({token_count:,} tokens)", + ) + + +def _display_result( + result: SummaryResult, + elapsed: float, + output_format: OutputFormat, + *, + quiet: bool, +) -> None: + """Display the summarization result.""" + if output_format == OutputFormat.json: + print(json.dumps(result.model_dump(mode="json"), indent=2)) + return + + if output_format == OutputFormat.full: + _display_full_result(result, elapsed, quiet=quiet) + return + + # Text output - just the summary + if quiet: + if result.summary: + print(result.summary) + elif result.summary: + print_output_panel( + result.summary, + title="Summary", + subtitle=f"[dim]{result.output_tokens:,} tokens | {result.compression_ratio:.1%} of original | {elapsed:.2f}s[/dim]", + ) + else: + print_with_style( + f"No summary generated (input too short: {result.input_tokens} tokens)", + style="yellow", + ) + + +def _display_full_result( + result: SummaryResult, + elapsed: float, + *, + quiet: bool, +) -> None: + """Display full result with all metadata.""" + if quiet: + if result.summary: + print(result.summary) + return + + console.print() + console.print("[bold cyan]Summarization Result[/bold cyan]") + console.print(f" Input tokens: [bold]{result.input_tokens:,}[/bold]") + console.print(f" Output tokens: [bold]{result.output_tokens:,}[/bold]") + console.print(f" Compression: [bold]{result.compression_ratio:.1%}[/bold]") + if result.collapse_depth > 0: + console.print(f" Collapse depth: [bold]{result.collapse_depth}[/bold]") + console.print(f" Time: [bold]{elapsed:.2f}s[/bold]") + console.print() + + if result.summary: + print_output_panel( + result.summary, + title="Summary", + ) + + +def _get_llm_config( + provider_cfg: config.ProviderSelection, + ollama_cfg: config.Ollama, + openai_llm_cfg: config.OpenAILLM, + gemini_llm_cfg: config.GeminiLLM, +) -> tuple[str, str, str | None]: + """Get openai_base_url, model, and api_key from provider config.""" + if provider_cfg.llm_provider == "ollama": + # Ollama uses OpenAI-compatible API at /v1 + base_url = ollama_cfg.llm_ollama_host.rstrip("/") + if not base_url.endswith("/v1"): + base_url = f"{base_url}/v1" + return base_url, ollama_cfg.llm_ollama_model, None + if provider_cfg.llm_provider == "openai": + base_url = openai_llm_cfg.openai_base_url or "https://api.openai.com/v1" + return base_url, openai_llm_cfg.llm_openai_model, openai_llm_cfg.openai_api_key + # gemini + return ( + "https://generativelanguage.googleapis.com/v1beta/openai", + gemini_llm_cfg.llm_gemini_model, + gemini_llm_cfg.gemini_api_key, + ) + + +async def _async_summarize( + content: str, + *, + content_type: ContentType, + prior_summary: str | None, + provider_cfg: config.ProviderSelection, + ollama_cfg: config.Ollama, + openai_llm_cfg: config.OpenAILLM, + gemini_llm_cfg: config.GeminiLLM, + general_cfg: config.General, + chunk_size: int, + chunk_overlap: int, + max_concurrent_chunks: int, + output_format: OutputFormat, +) -> None: + """Asynchronous summarization entry point.""" + setup_logging(general_cfg.log_level, general_cfg.log_file, quiet=general_cfg.quiet) + + openai_base_url, model, api_key = _get_llm_config( + provider_cfg, + ollama_cfg, + openai_llm_cfg, + gemini_llm_cfg, + ) + + token_count = count_tokens(content, model) + _display_input_preview(content, token_count, quiet=general_cfg.quiet) + + summarizer_config = SummarizerConfig( + openai_base_url=openai_base_url, + model=model, + api_key=api_key, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + max_concurrent_chunks=max_concurrent_chunks, + ) + + try: + if not general_cfg.quiet: + status = create_status(f"Summarizing with {model}...", "bold yellow") + else: + status = contextlib.nullcontext() + + with status: + start_time = time.monotonic() + result = await summarize( + content, + summarizer_config, + prior_summary=prior_summary, + content_type=content_type.value, + ) + elapsed = time.monotonic() - start_time + + _display_result(result, elapsed, output_format, quiet=general_cfg.quiet) + + except SummarizationError as e: + print_error_message( + str(e), + f"Check that your LLM server is running at {openai_base_url}", + ) + sys.exit(1) + except Exception as e: + print_error_message(str(e), "An unexpected error occurred during summarization.") + sys.exit(1) + + +@app.command("summarize") +def summarize_command( + *, + file_path: Path | None = typer.Argument( # noqa: B008 + None, + help="Path to file to summarize. If not provided, reads from stdin.", + ), + # --- Content Options --- + content_type: ContentType = typer.Option( # noqa: B008 + ContentType.general, + "--type", + "-t", + help="Content type for specialized summarization prompts.", + rich_help_panel="Content Options", + ), + prior_summary: str | None = typer.Option( + None, + "--prior-summary", + help="Prior summary to integrate with (for rolling summaries).", + rich_help_panel="Content Options", + ), + prior_summary_file: Path | None = typer.Option( # noqa: B008 + None, + "--prior-summary-file", + help="File containing prior summary to integrate with.", + rich_help_panel="Content Options", + ), + # --- Chunking Options --- + chunk_size: int = typer.Option( + 2048, + "--chunk-size", + help="Target token count per chunk for map-reduce summarization.", + rich_help_panel="Chunking Options", + ), + chunk_overlap: int = typer.Option( + 200, + "--chunk-overlap", + help="Token overlap between chunks for context continuity.", + rich_help_panel="Chunking Options", + ), + max_concurrent_chunks: int = typer.Option( + 5, + "--max-concurrent", + help="Maximum number of chunks to process in parallel.", + rich_help_panel="Chunking Options", + ), + # --- Output Options --- + output_format: OutputFormat = typer.Option( # noqa: B008 + OutputFormat.text, + "--output", + "-o", + help="Output format: 'text' (summary only), 'json' (full result), 'full' (all levels).", + rich_help_panel="Output Options", + ), + # --- Provider Selection --- + llm_provider: str = opts.LLM_PROVIDER, + # --- LLM Configuration --- + # Ollama (local service) + llm_ollama_model: str = opts.LLM_OLLAMA_MODEL, + llm_ollama_host: str = opts.LLM_OLLAMA_HOST, + # OpenAI + llm_openai_model: str = opts.LLM_OPENAI_MODEL, + openai_api_key: str | None = opts.OPENAI_API_KEY, + openai_base_url: str | None = opts.OPENAI_BASE_URL, + # Gemini + llm_gemini_model: str = opts.LLM_GEMINI_MODEL, + gemini_api_key: str | None = opts.GEMINI_API_KEY, + # --- General Options --- + log_level: str = opts.LOG_LEVEL, + log_file: str | None = opts.LOG_FILE, + quiet: bool = opts.QUIET, + config_file: str | None = opts.CONFIG_FILE, + print_args: bool = opts.PRINT_ARGS, +) -> None: + """Summarize text using adaptive map-reduce summarization. + + Reads from a file or stdin and produces a summary scaled to the input complexity: + + - NONE (<100 tokens): No summary needed + - BRIEF (100-500): Single sentence + - MAP_REDUCE (>500): Dynamic collapse until fits token budget + + Examples: + # Summarize a file + agent-cli summarize document.txt + + # Summarize with conversation-specific prompts + agent-cli summarize chat.txt --type conversation + + # Pipe content from stdin + cat book.txt | agent-cli summarize + + # Get full output with all metadata + agent-cli summarize large_document.txt --output full + + # Use OpenAI instead of Ollama + agent-cli summarize notes.md --llm-provider openai + + """ + if print_args: + print_command_line_args(locals()) + + # Create config objects following the standard pattern + provider_cfg = config.ProviderSelection( + llm_provider=llm_provider, + asr_provider="wyoming", # Not used, but required by model + tts_provider="wyoming", # Not used, but required by model + ) + ollama_cfg = config.Ollama( + llm_ollama_model=llm_ollama_model, + llm_ollama_host=llm_ollama_host, + ) + openai_llm_cfg = config.OpenAILLM( + llm_openai_model=llm_openai_model, + openai_api_key=openai_api_key, + openai_base_url=openai_base_url, + ) + gemini_llm_cfg = config.GeminiLLM( + llm_gemini_model=llm_gemini_model, + gemini_api_key=gemini_api_key, + ) + general_cfg = config.General( + log_level=log_level, + log_file=log_file, + quiet=quiet, + clipboard=False, # summarize doesn't use clipboard + ) + + # Read content + content = _read_input(file_path) + if content is None: + raise typer.Exit(1) + + if not content.strip(): + print_error_message("Empty input", "The input file or stdin is empty.") + raise typer.Exit(1) + + # Handle prior summary from file + actual_prior_summary = prior_summary + if prior_summary_file: + if not prior_summary_file.exists(): + print_error_message( + f"Prior summary file not found: {prior_summary_file}", + "Please check the file path.", + ) + raise typer.Exit(1) + actual_prior_summary = prior_summary_file.read_text(encoding="utf-8") + + asyncio.run( + _async_summarize( + content, + content_type=content_type, + prior_summary=actual_prior_summary, + provider_cfg=provider_cfg, + ollama_cfg=ollama_cfg, + openai_llm_cfg=openai_llm_cfg, + gemini_llm_cfg=gemini_llm_cfg, + general_cfg=general_cfg, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + max_concurrent_chunks=max_concurrent_chunks, + output_format=output_format, + ), + ) diff --git a/agent_cli/cli.py b/agent_cli/cli.py index d8e74c79..f36c5c61 100644 --- a/agent_cli/cli.py +++ b/agent_cli/cli.py @@ -60,6 +60,7 @@ def set_config_defaults(ctx: typer.Context, config_file: str | None) -> None: rag_proxy, server, speak, + summarize, transcribe, voice_edit, ) diff --git a/agent_cli/core/chroma.py b/agent_cli/core/chroma.py index 56d54ede..89e289b7 100644 --- a/agent_cli/core/chroma.py +++ b/agent_cli/core/chroma.py @@ -53,12 +53,28 @@ def upsert( ids: list[str], documents: list[str], metadatas: Sequence[BaseModel], + batch_size: int = 10, ) -> None: - """Upsert documents with JSON-serialized metadata.""" + """Upsert documents with JSON-serialized metadata. + + Args: + collection: ChromaDB collection. + ids: Document IDs. + documents: Document contents. + metadatas: Pydantic metadata models. + batch_size: Max documents per embedding API call (default: 10). + + """ if not ids: return serialized = flatten_metadatas(metadatas) - collection.upsert(ids=ids, documents=documents, metadatas=serialized) + + # Process in batches to avoid overwhelming the embedding service + for i in range(0, len(ids), batch_size): + batch_ids = ids[i : i + batch_size] + batch_docs = documents[i : i + batch_size] + batch_metas = serialized[i : i + batch_size] + collection.upsert(ids=batch_ids, documents=batch_docs, metadatas=batch_metas) def delete(collection: Collection, ids: list[str]) -> None: diff --git a/agent_cli/memory/_files.py b/agent_cli/memory/_files.py index d55133d9..a51c7ad1 100644 --- a/agent_cli/memory/_files.py +++ b/agent_cli/memory/_files.py @@ -24,6 +24,11 @@ _SNAPSHOT_FILENAME = "memory_index.json" _DELETED_DIRNAME = "deleted" +# Summary level constants for file structure (kept for backward compatibility) +_SUMMARY_LEVEL_L1 = 1 +_SUMMARY_LEVEL_L2 = 2 +_SUMMARY_LEVEL_L3 = 3 + @dataclass class MemoryFileRecord: @@ -83,46 +88,74 @@ def soft_delete_memory_file( def write_memory_file( root: Path, *, - conversation_id: str, - role: str, - created_at: str, content: str, - summary_kind: str | None = None, doc_id: str | None = None, + # Either pass pre-built metadata OR individual fields + metadata: MemoryMetadata | None = None, + # Individual fields (used when metadata is None) + conversation_id: str | None = None, + role: str | None = None, + created_at: str | None = None, + summary_kind: str | None = None, source_id: str | None = None, ) -> MemoryFileRecord: - """Render and persist a memory document to disk.""" + """Render and persist a memory document to disk. + + Can be called in two ways: + 1. With pre-built metadata: write_memory_file(root, content=..., metadata=..., doc_id=...) + 2. With individual fields: write_memory_file(root, content=..., conversation_id=..., role=..., ...) + + """ entries_dir, _ = ensure_store_dirs(root) - safe_conversation = _slugify(conversation_id) doc_id = doc_id or str(uuid4()) - safe_ts = _safe_timestamp(created_at) + + # Build or use provided metadata + if metadata is not None: + meta = metadata + else: + if conversation_id is None or role is None or created_at is None: + msg = "Must provide metadata or (conversation_id, role, created_at)" + raise ValueError(msg) + meta = MemoryMetadata( + conversation_id=conversation_id, + role=role, + created_at=created_at, + summary_kind=summary_kind, + source_id=source_id, + ) + + safe_conversation = _slugify(meta.conversation_id) + safe_ts = _safe_timestamp(meta.created_at) # Route by role/category for readability - if summary_kind: + if meta.summary_kind and meta.level is not None: + # Hierarchical summary file structure + if meta.level == _SUMMARY_LEVEL_L1: + subdir = Path("summaries") / "L1" + filename = f"chunk_{meta.chunk_index or 0}.md" + elif meta.level == _SUMMARY_LEVEL_L2: + subdir = Path("summaries") / "L2" + filename = f"group_{meta.group_index or 0}.md" + else: # level == _SUMMARY_LEVEL_L3 + subdir = Path("summaries") / "L3" + filename = "final.md" + elif meta.summary_kind: subdir = Path("summaries") filename = "summary.md" - elif role == "user": + elif meta.role == "user": subdir = Path("turns") / "user" filename = f"{safe_ts}__{doc_id}.md" - elif role == "assistant": + elif meta.role == "assistant": subdir = Path("turns") / "assistant" filename = f"{safe_ts}__{doc_id}.md" - elif role == "memory": + elif meta.role == "memory": subdir = Path("facts") filename = f"{safe_ts}__{doc_id}.md" else: subdir = Path() filename = f"{doc_id}.md" - metadata = MemoryMetadata( - conversation_id=conversation_id, - role=role, - created_at=created_at, - summary_kind=summary_kind, - source_id=source_id, - ) - - front_matter = _render_front_matter(doc_id, metadata) + front_matter = _render_front_matter(doc_id, meta) body = front_matter + "\n" + content.strip() + "\n" file_path = entries_dir / safe_conversation / subdir / filename @@ -130,7 +163,7 @@ def write_memory_file( atomic_write_text(file_path, body) - return MemoryFileRecord(id=doc_id, path=file_path, metadata=metadata, content=content) + return MemoryFileRecord(id=doc_id, path=file_path, metadata=meta, content=content) def load_memory_files(root: Path) -> list[MemoryFileRecord]: diff --git a/agent_cli/memory/_ingest.py b/agent_cli/memory/_ingest.py index 6211c029..b98bee13 100644 --- a/agent_cli/memory/_ingest.py +++ b/agent_cli/memory/_ingest.py @@ -12,23 +12,25 @@ import httpx from agent_cli.memory._git import commit_changes -from agent_cli.memory._persistence import delete_memory_files, persist_entries, persist_summary +from agent_cli.memory._persistence import ( + delete_memory_files, + persist_entries, + persist_summary, +) from agent_cli.memory._prompt import ( FACT_INSTRUCTIONS, FACT_SYSTEM_PROMPT, - SUMMARY_PROMPT, UPDATE_MEMORY_PROMPT, ) from agent_cli.memory._retrieval import gather_relevant_existing_memories -from agent_cli.memory._store import delete_entries, get_summary_entry -from agent_cli.memory.entities import Fact, Summary +from agent_cli.memory._store import delete_entries, get_final_summary +from agent_cli.memory.entities import Fact from agent_cli.memory.models import ( MemoryAdd, MemoryDecision, MemoryDelete, MemoryIgnore, MemoryUpdate, - SummaryOutput, ) if TYPE_CHECKING: @@ -36,9 +38,9 @@ from chromadb import Collection -LOGGER = logging.getLogger(__name__) + from agent_cli.summarizer import SummaryResult -_SUMMARY_ROLE = "summary" +LOGGER = logging.getLogger(__name__) def _elapsed_ms(start: float) -> float: @@ -178,7 +180,7 @@ async def reconcile_facts( existing_json = [{"id": idx, "text": mem.content} for idx, mem in enumerate(existing)] existing_ids = set(id_map.keys()) - from pydantic_ai import Agent, ModelRetry # noqa: PLC0415 + from pydantic_ai import Agent, ModelRetry, PromptedOutput # noqa: PLC0415 from pydantic_ai.exceptions import AgentRunError, UnexpectedModelBehavior # noqa: PLC0415 from pydantic_ai.models.openai import OpenAIChatModel # noqa: PLC0415 from pydantic_ai.providers.openai import OpenAIProvider # noqa: PLC0415 @@ -193,7 +195,7 @@ async def reconcile_facts( agent = Agent( model=model_cfg, system_prompt=UPDATE_MEMORY_PROMPT, - output_type=list[MemoryDecision], + output_type=PromptedOutput(list[MemoryDecision]), # JSON mode instead of tool calls retries=3, ) @@ -275,39 +277,74 @@ def validate_decisions(decisions: list[MemoryDecision]) -> list[MemoryDecision]: return to_add, to_delete, replacement_map -async def update_summary( +async def summarize_content( *, - prior_summary: str | None, - new_facts: list[str], + content: str, + prior_summary: str | None = None, + content_type: str = "general", openai_base_url: str, api_key: str | None, model: str, - max_tokens: int = 256, -) -> str | None: - """Update the conversation summary based on new facts.""" - if not new_facts: - return prior_summary +) -> SummaryResult: + """Adaptively summarize content based on its length. - from pydantic_ai import Agent # noqa: PLC0415 - from pydantic_ai.models.openai import OpenAIChatModel # noqa: PLC0415 - from pydantic_ai.providers.openai import OpenAIProvider # noqa: PLC0415 - from pydantic_ai.settings import ModelSettings # noqa: PLC0415 + Automatically selects the appropriate summarization strategy + (NONE, BRIEF, MAP_REDUCE) based on input token count. - system_prompt = SUMMARY_PROMPT - user_parts: list[str] = [] - if prior_summary: - user_parts.append(f"Previous summary:\n{prior_summary}") - user_parts.append("New facts:\n" + "\n".join(f"- {fact}" for fact in new_facts)) - prompt_text = "\n\n".join(user_parts) - provider = OpenAIProvider(api_key=api_key or "dummy", base_url=openai_base_url) - model_cfg = OpenAIChatModel( - model_name=model, - provider=provider, - settings=ModelSettings(temperature=0.2, max_tokens=max_tokens), + Args: + content: The content to summarize. + prior_summary: Optional prior summary for context continuity. + content_type: Type of content ("general", "conversation", "journal", "document"). + openai_base_url: Base URL for OpenAI-compatible API. + api_key: API key for the LLM. + model: Model name to use for summarization. + + Returns: + SummaryResult with the summary and metadata. + + """ + # Import here to avoid circular imports and allow optional dependency + from agent_cli.summarizer import SummarizerConfig, summarize # noqa: PLC0415 + + config = SummarizerConfig( + openai_base_url=openai_base_url, + model=model, + api_key=api_key, + ) + return await summarize( + content=content, + config=config, + prior_summary=prior_summary, + content_type=content_type, + ) + + +async def store_adaptive_summary( + collection: Collection, + memory_root: Path, + conversation_id: str, + summary_result: SummaryResult, +) -> list[str]: + """Store a summary result to files and ChromaDB. + + Old summaries are deleted first, then the new summary is stored. + + Args: + collection: ChromaDB collection. + memory_root: Root path for memory files. + conversation_id: The conversation this summary belongs to. + summary_result: The result from summarize(). + + Returns: + List of IDs that were stored. + + """ + return persist_summary( + collection, + memory_root=memory_root, + conversation_id=conversation_id, + summary_result=summary_result, ) - agent = Agent(model=model_cfg, system_prompt=system_prompt, output_type=SummaryOutput) - result = await agent.run(prompt_text) - return result.output.summary or prior_summary async def extract_and_store_facts_and_summaries( @@ -370,37 +407,41 @@ async def extract_and_store_facts_and_summaries( entries=list(to_add), ) - if enable_summarization: - prior_summary_entry = get_summary_entry( - collection, - conversation_id, - role=_SUMMARY_ROLE, - ) + # Summarize raw conversation turns (not extracted facts) + has_content = user_message or assistant_message + if enable_summarization and has_content: + prior_summary_entry = get_final_summary(collection, conversation_id) prior_summary = prior_summary_entry.content if prior_summary_entry else None + # Build conversation transcript + parts = [] + if user_message: + parts.append(f"User: {user_message}") + if assistant_message: + parts.append(f"Assistant: {assistant_message}") + content_to_summarize = "\n".join(parts) + summary_start = perf_counter() - new_summary = await update_summary( + summary_result = await summarize_content( + content=content_to_summarize, prior_summary=prior_summary, - new_facts=facts, + content_type="conversation", openai_base_url=openai_base_url, api_key=api_key, model=model, ) LOGGER.info( - "Summary update completed in %.1f ms (conversation=%s)", + "Summary update completed in %.1f ms (conversation=%s, compression=%.1f%%)", _elapsed_ms(summary_start), conversation_id, + summary_result.compression_ratio * 100, ) - if new_summary: - summary_obj = Summary( - conversation_id=conversation_id, - content=new_summary, - created_at=datetime.now(UTC), - ) - persist_summary( + if summary_result.summary: + await store_adaptive_summary( collection, memory_root=memory_root, - summary=summary_obj, + conversation_id=conversation_id, + summary_result=summary_result, ) if enable_git_versioning: diff --git a/agent_cli/memory/_persistence.py b/agent_cli/memory/_persistence.py index bd8f4dfd..46ac0363 100644 --- a/agent_cli/memory/_persistence.py +++ b/agent_cli/memory/_persistence.py @@ -3,10 +3,13 @@ from __future__ import annotations import logging +import shutil +from datetime import UTC, datetime from typing import TYPE_CHECKING from agent_cli.memory._files import ( _DELETED_DIRNAME, + _slugify, ensure_store_dirs, load_snapshot, read_memory_file, @@ -14,15 +17,22 @@ write_memory_file, write_snapshot, ) -from agent_cli.memory._store import delete_entries, list_conversation_entries, upsert_memories -from agent_cli.memory.entities import Fact, Summary, Turn +from agent_cli.memory._store import ( + delete_entries, + delete_summaries, + list_conversation_entries, + upsert_memories, + upsert_summary_entries, +) +from agent_cli.memory.entities import Fact, Turn +from agent_cli.memory.models import MemoryMetadata if TYPE_CHECKING: from pathlib import Path from chromadb import Collection - from agent_cli.memory.models import MemoryMetadata + from agent_cli.summarizer import SummaryResult LOGGER = logging.getLogger(__name__) @@ -79,31 +89,6 @@ def persist_entries( upsert_memories(collection, ids=ids, contents=contents, metadatas=metadatas) -def persist_summary( - collection: Collection, - *, - memory_root: Path, - summary: Summary, -) -> None: - """Persist a summary to disk and Chroma.""" - doc_id = _safe_identifier(f"{summary.conversation_id}{_SUMMARY_DOC_ID_SUFFIX}-summary") - record = write_memory_file( - memory_root, - conversation_id=summary.conversation_id, - role="summary", - created_at=summary.created_at.isoformat(), - content=summary.content, - summary_kind="summary", - doc_id=doc_id, - ) - upsert_memories( - collection, - ids=[record.id], - contents=[record.content], - metadatas=[record.metadata], - ) - - def delete_memory_files( memory_root: Path, conversation_id: str, @@ -180,3 +165,99 @@ def evict_if_needed( ids_to_remove = [e.id for e in overflow] delete_entries(collection, ids_to_remove) delete_memory_files(memory_root, conversation_id, ids_to_remove) + + +def persist_summary( + collection: Collection, + *, + memory_root: Path, + conversation_id: str, + summary_result: SummaryResult, +) -> list[str]: + """Persist a summary to disk and ChromaDB. + + This function: + 1. Deletes existing summaries (files and ChromaDB entries) + 2. Writes new summary file to disk + 3. Stores entry in ChromaDB + + Args: + collection: ChromaDB collection. + memory_root: Root path for memory files. + conversation_id: The conversation this summary belongs to. + summary_result: The result from summarize(). + + Returns: + List of IDs that were stored. + + """ + # Skip if no summary was generated + if not summary_result.summary: + return [] + + # Delete existing summary files + _delete_summary_files(memory_root, conversation_id) + + # Delete existing ChromaDB entries + delete_summaries(collection, conversation_id) + + # Get storage metadata from SummaryResult + entries = summary_result.to_storage_metadata(conversation_id) + if not entries: + return [] + + stored_ids: list[str] = [] + created_at = datetime.now(UTC).isoformat() + + for entry in entries: + meta_dict = entry["metadata"] + # Build MemoryMetadata from the summary result's metadata dict + metadata = MemoryMetadata( + conversation_id=meta_dict["conversation_id"], + role=meta_dict["role"], + created_at=meta_dict.get("created_at", created_at), + summary_kind="summary", + is_final=meta_dict.get("is_final"), + input_tokens=meta_dict.get("input_tokens"), + output_tokens=meta_dict.get("output_tokens"), + compression_ratio=meta_dict.get("compression_ratio"), + summary_level=meta_dict.get("summary_level"), + collapse_depth=meta_dict.get("collapse_depth"), + ) + record = write_memory_file( + memory_root, + content=entry["content"], + doc_id=entry["id"], + metadata=metadata, + ) + LOGGER.info( + "Persisted summary file: %s (level=%s)", + record.path, + meta_dict.get("summary_level"), + ) + stored_ids.append(record.id) + + # Store in ChromaDB (reuse the entries we already built) + upsert_summary_entries(collection, entries) + + return stored_ids + + +def _delete_summary_files(memory_root: Path, conversation_id: str) -> None: + """Delete all summary files for a conversation.""" + entries_dir, _ = ensure_store_dirs(memory_root) + safe_conversation = _slugify(conversation_id) + summaries_dir = entries_dir / safe_conversation / "summaries" + + if summaries_dir.exists(): + # Move to deleted folder instead of hard delete + deleted_dir = entries_dir / _DELETED_DIRNAME / safe_conversation / "summaries" + deleted_dir.parent.mkdir(parents=True, exist_ok=True) + + # If deleted summaries already exist, remove them first + if deleted_dir.exists(): + shutil.rmtree(deleted_dir) + + # Move current summaries to deleted + shutil.move(str(summaries_dir), str(deleted_dir)) + LOGGER.info("Moved old summaries to deleted: %s", deleted_dir) diff --git a/agent_cli/memory/_retrieval.py b/agent_cli/memory/_retrieval.py index 6091f109..82c7296f 100644 --- a/agent_cli/memory/_retrieval.py +++ b/agent_cli/memory/_retrieval.py @@ -7,7 +7,7 @@ from datetime import UTC, datetime from typing import TYPE_CHECKING, Any -from agent_cli.memory._store import get_summary_entry, query_memories +from agent_cli.memory._store import get_final_summary, query_memories from agent_cli.memory.models import ( ChatRequest, MemoryEntry, @@ -24,7 +24,6 @@ LOGGER = logging.getLogger(__name__) _DEFAULT_MMR_LAMBDA = 0.7 -_SUMMARY_ROLE = "summary" def gather_relevant_existing_memories( @@ -202,7 +201,7 @@ def recency_score(meta: MemoryMetadata) -> float: summaries: list[str] = [] if include_summary: - summary_entry = get_summary_entry(collection, conversation_id, role=_SUMMARY_ROLE) + summary_entry = get_final_summary(collection, conversation_id) if summary_entry: summaries.append(f"Conversation summary:\n{summary_entry.content}") diff --git a/agent_cli/memory/_store.py b/agent_cli/memory/_store.py index 96e7c66a..36ace588 100644 --- a/agent_cli/memory/_store.py +++ b/agent_cli/memory/_store.py @@ -111,31 +111,6 @@ def query_memories( return records -def get_summary_entry( - collection: Collection, - conversation_id: str, - *, - role: str = "summary", -) -> StoredMemory | None: - """Return the latest summary entry for a conversation, if present.""" - result = collection.get( - where={"$and": [{"conversation_id": conversation_id}, {"role": role}]}, - ) - docs = result.get("documents") or [] - metas = result.get("metadatas") or [] - ids = result.get("ids") or [] - - if not docs or not metas or not ids: - return None - - return StoredMemory( - id=ids[0], - content=docs[0], - metadata=MemoryMetadata(**dict(metas[0])), - distance=None, - ) - - def list_conversation_entries( collection: Collection, conversation_id: str, @@ -167,3 +142,124 @@ def list_conversation_entries( def delete_entries(collection: Collection, ids: list[str]) -> None: """Delete entries by ID.""" delete_docs(collection, ids) + + +def upsert_summary_entries( + collection: Collection, + entries: list[dict[str, Any]], +) -> list[str]: + """Store pre-built summary entries (from to_storage_metadata) to ChromaDB.""" + if not entries: + return [] + + ids: list[str] = [] + contents: list[str] = [] + metadatas: list[MemoryMetadata] = [] + + for entry in entries: + ids.append(entry["id"]) + contents.append(entry["content"]) + # Convert the raw metadata dict to MemoryMetadata + meta_dict = entry["metadata"] + metadatas.append(MemoryMetadata(**meta_dict)) + + upsert_memories(collection, ids=ids, contents=contents, metadatas=metadatas) + return ids + + +def get_summary_at_level( + collection: Collection, + conversation_id: str, + level: int, +) -> list[StoredMemory]: + """Retrieve summaries at a specific level for a conversation. + + Args: + collection: ChromaDB collection. + conversation_id: The conversation to retrieve summaries for. + level: Summary level (1=chunk, 2=group, 3=final). + + Returns: + List of StoredMemory entries at the requested level. + + """ + filters: list[dict[str, Any]] = [ + {"conversation_id": conversation_id}, + {"role": "summary"}, + {"level": level}, + ] + result = collection.get(where={"$and": filters}) + docs = result.get("documents") or [] + metas = result.get("metadatas") or [] + ids = result.get("ids") or [] + + records: list[StoredMemory] = [] + for doc, meta, entry_id in zip(docs, metas, ids, strict=False): + records.append( + StoredMemory( + id=entry_id, + content=doc, + metadata=MemoryMetadata(**dict(meta)), + distance=None, + ), + ) + return records + + +def get_final_summary( + collection: Collection, + conversation_id: str, +) -> StoredMemory | None: + """Get the L3 (final) summary for a conversation. + + This is a convenience wrapper around get_summary_at_level for the + most common use case of retrieving the top-level summary. + + Args: + collection: ChromaDB collection. + conversation_id: The conversation to retrieve the summary for. + + Returns: + The final summary entry, or None if not found. + + """ + summaries = get_summary_at_level(collection, conversation_id, level=3) + # Return the one marked as final, or the first if none marked + for summary in summaries: + if summary.metadata.is_final: + return summary + return summaries[0] if summaries else None + + +def delete_summaries( + collection: Collection, + conversation_id: str, + *, + levels: list[int] | None = None, +) -> int: + """Delete summary entries for a conversation. + + Args: + collection: ChromaDB collection. + conversation_id: The conversation to delete summaries from. + levels: Optional list of levels to delete. If None, deletes all levels. + + Returns: + Number of entries deleted. + + """ + filters: list[dict[str, Any]] = [ + {"conversation_id": conversation_id}, + {"role": "summary"}, + ] + if levels: + filters.append({"level": {"$in": levels}}) + + # First get the IDs to count them + result = collection.get(where={"$and": filters}) + ids = result.get("ids") or [] + + if ids: + delete_docs(collection, list(ids)) + + return len(ids) diff --git a/agent_cli/memory/client.py b/agent_cli/memory/client.py index b5ea3a7f..a3cc970d 100644 --- a/agent_cli/memory/client.py +++ b/agent_cli/memory/client.py @@ -14,7 +14,7 @@ from agent_cli.memory._ingest import extract_and_store_facts_and_summaries from agent_cli.memory._persistence import evict_if_needed from agent_cli.memory._retrieval import augment_chat_request -from agent_cli.memory._store import init_memory_collection +from agent_cli.memory._store import init_memory_collection, list_conversation_entries from agent_cli.memory.engine import process_chat_request from agent_cli.memory.models import ChatRequest, MemoryRetrieval, Message from agent_cli.rag._retriever import get_reranker_model @@ -185,6 +185,36 @@ async def search( ) return retrieval or MemoryRetrieval(entries=[]) + def list_all( + self, + conversation_id: str = "default", + include_summary: bool = False, + ) -> list[dict[str, Any]]: + """List all stored memories for a conversation. + + Args: + conversation_id: Conversation scope. + include_summary: Whether to include summary entries. + + Returns: + List of memory entries with id, content, and metadata. + + """ + entries = list_conversation_entries( + self.collection, + conversation_id, + include_summary=include_summary, + ) + return [ + { + "id": e.id, + "content": e.content, + "role": e.metadata.role, + "created_at": e.metadata.created_at, + } + for e in entries + ] + async def chat( self, messages: list[dict[str, str]] | list[Any], diff --git a/agent_cli/memory/entities.py b/agent_cli/memory/entities.py index 70b16a78..a352b0bb 100644 --- a/agent_cli/memory/entities.py +++ b/agent_cli/memory/entities.py @@ -32,12 +32,3 @@ class Fact(BaseModel): source_id: str = Field(..., description="UUID of the Turn this fact was extracted from") created_at: datetime # Facts are always role="memory" implicitly in the storage layer - - -class Summary(BaseModel): - """The rolling summary of a conversation.""" - - conversation_id: str - content: str - created_at: datetime - # Summaries are role="summary" implicitly diff --git a/agent_cli/memory/models.py b/agent_cli/memory/models.py index 9ef076d5..d52d952c 100644 --- a/agent_cli/memory/models.py +++ b/agent_cli/memory/models.py @@ -4,7 +4,7 @@ from typing import Literal -from pydantic import BaseModel, ConfigDict, field_validator +from pydantic import BaseModel, ConfigDict class Message(BaseModel): @@ -49,19 +49,25 @@ class MemoryMetadata(BaseModel): replaced_by: str | None = None source_id: str | None = None - -class SummaryOutput(BaseModel): - """Structured summary returned by the LLM.""" - - summary: str - - @field_validator("summary") - @classmethod - def _not_empty(cls, v: str) -> str: - if not v or not str(v).strip(): - msg = "field must be non-empty" - raise ValueError(msg) - return str(v).strip() + # Summary fields (only used when role="summary") + level: int | None = None + """Summary level (deprecated, kept for file structure compatibility).""" + is_final: bool | None = None + """Whether this is the final summary.""" + chunk_index: int | None = None + """Deprecated: index of the source chunk.""" + group_index: int | None = None + """Deprecated: index of this group.""" + input_tokens: int | None = None + """Number of tokens in the original input.""" + output_tokens: int | None = None + """Number of tokens in the summary output.""" + compression_ratio: float | None = None + """Ratio of output to input tokens.""" + summary_level: str | None = None + """Deprecated: previously stored SummaryLevel enum name.""" + collapse_depth: int | None = None + """Number of collapse iterations in map-reduce (0 = no collapse needed).""" class StoredMemory(BaseModel): diff --git a/agent_cli/rag/client.py b/agent_cli/rag/client.py index 3e43939a..940985de 100644 --- a/agent_cli/rag/client.py +++ b/agent_cli/rag/client.py @@ -124,8 +124,17 @@ def add( for i in range(len(chunks)) ] - # Upsert to collection - self.collection.upsert(ids=ids, documents=chunks, metadatas=metadatas) + # Upsert to collection in batches to avoid overwhelming the embedding service + batch_size = 10 + for i in range(0, len(ids), batch_size): + batch_ids = ids[i : i + batch_size] + batch_docs = chunks[i : i + batch_size] + batch_metas = metadatas[i : i + batch_size] + self.collection.upsert( + ids=batch_ids, + documents=batch_docs, + metadatas=batch_metas, + ) logger.info("Added doc_id=%s with %d chunks", doc_id, len(chunks)) return doc_id diff --git a/agent_cli/summarizer/__init__.py b/agent_cli/summarizer/__init__.py new file mode 100644 index 00000000..7c7603b9 --- /dev/null +++ b/agent_cli/summarizer/__init__.py @@ -0,0 +1,42 @@ +"""Adaptive summarization module for variable-length content. + +This module provides map-reduce summarization inspired by LangChain's approach: +1. If content fits target, return as-is (no LLM call) +2. Otherwise, split into chunks and summarize each in parallel (map phase) +3. Recursively collapse summaries until they fit target (reduce phase) + +Research foundations: +- LangChain ReduceDocumentsChain: token_max=3000, recursive collapse +- BOOOOKSCORE (arXiv:2310.00785): chunk_size=2048 optimal + +Example: + from agent_cli.summarizer import summarize, SummarizerConfig + + config = SummarizerConfig( + openai_base_url="http://localhost:8000/v1", + model="gpt-4", + ) + + # Compress to fit 4000 tokens + result = await summarize(long_document, config, target_tokens=4000) + + # Compress to 20% of original size + result = await summarize(long_document, config, target_ratio=0.2) + + print(f"Compression: {result.compression_ratio:.1%}") + +""" + +from agent_cli.summarizer.adaptive import summarize +from agent_cli.summarizer.models import ( + SummarizationError, + SummarizerConfig, + SummaryResult, +) + +__all__ = [ + "SummarizationError", + "SummarizerConfig", + "SummaryResult", + "summarize", +] diff --git a/agent_cli/summarizer/_prompts.py b/agent_cli/summarizer/_prompts.py new file mode 100644 index 00000000..e49fd417 --- /dev/null +++ b/agent_cli/summarizer/_prompts.py @@ -0,0 +1,118 @@ +"""Prompt templates for adaptive summarization. + +These prompts are designed to work with various LLM sizes (8B-20B parameters) +and are optimized for structured, factual output. +""" + +# Paragraph summary for "general" content type (default when no specific type provided) +GENERAL_SUMMARY_PROMPT = """Summarize the following content concisely in a short paragraph. + +Focus on: +- Key facts, decisions, and outcomes +- Important context that should be remembered +- Skip transient details, greetings, and chitchat + +{prior_context} + +Content to summarize: +{content} + +Summary (maximum {max_words} words):""".strip() + +# CHUNK - Used in map phase of map-reduce summarization +CHUNK_SUMMARY_PROMPT = """Summarize this section of a longer document. +Capture the main points while preserving important details. + +Section {chunk_index} of {total_chunks}: +{content} + +Summary of this section (maximum {max_words} words):""".strip() + +# META - Combine multiple summaries in reduce phase +META_SUMMARY_PROMPT = """Synthesize these summaries into a single coherent overview. +Identify common themes and key points across all sections. +Eliminate redundancy while preserving unique insights. + +Summaries to combine: +{summaries} + +Combined summary (maximum {max_words} words):""".strip() + +# For conversation-specific summarization +CONVERSATION_SUMMARY_PROMPT = """Summarize this conversation from the AI assistant's perspective. +Focus on: +- What the user wanted or asked about +- Key information the user shared about themselves +- Decisions made or conclusions reached +- Any commitments or follow-ups mentioned + +{prior_context} + +Conversation: +{content} + +Summary (maximum {max_words} words):""".strip() + +# For journal/personal content +JOURNAL_SUMMARY_PROMPT = """Summarize this personal entry or reflection. +Preserve: +- Key events and experiences mentioned +- Emotions and insights expressed +- Goals, plans, or intentions stated +- People, places, or things that are important + +{prior_context} + +Entry: +{content} + +Summary (maximum {max_words} words):""".strip() + +# For technical/document content +DOCUMENT_SUMMARY_PROMPT = """Summarize this technical content or documentation. +Focus on: +- Main concepts and their relationships +- Key procedures or processes described +- Important specifications or requirements +- Conclusions or recommendations + +{prior_context} + +Document: +{content} + +Summary (maximum {max_words} words):""".strip() + + +def get_prompt_for_content_type(content_type: str) -> str: + """Get the appropriate prompt template for a content type. + + Args: + content_type: One of "general", "conversation", "journal", "document". + + Returns: + The prompt template string. + + """ + prompts = { + "general": GENERAL_SUMMARY_PROMPT, + "conversation": CONVERSATION_SUMMARY_PROMPT, + "journal": JOURNAL_SUMMARY_PROMPT, + "document": DOCUMENT_SUMMARY_PROMPT, + } + return prompts.get(content_type, GENERAL_SUMMARY_PROMPT) + + +def format_prior_context(prior_summary: str | None) -> str: + """Format prior summary context for inclusion in prompts.""" + if prior_summary: + return f"Prior context (for continuity):\n{prior_summary}\n" + return "" + + +def format_summaries_for_meta(summaries: list[str]) -> str: + """Format a list of summaries for the meta-summary prompt.""" + formatted = [] + for i, summary in enumerate(summaries, 1): + formatted.append(f"[Section {i}]\n{summary}") + return "\n\n".join(formatted) diff --git a/agent_cli/summarizer/_utils.py b/agent_cli/summarizer/_utils.py new file mode 100644 index 00000000..64c72b8f --- /dev/null +++ b/agent_cli/summarizer/_utils.py @@ -0,0 +1,246 @@ +"""Utility functions for adaptive summarization.""" + +from __future__ import annotations + +import re +from functools import lru_cache +from typing import TYPE_CHECKING + +from pydantic import BaseModel + +from agent_cli.summarizer.models import SummarizationError, SummarizerConfig + +if TYPE_CHECKING: + import tiktoken + + +class SummaryOutput(BaseModel): + """Structured output for summary generation.""" + + summary: str + + +async def generate_summary( + prompt: str, + config: SummarizerConfig, + max_tokens: int = 256, +) -> str: + """Call the LLM to generate a summary. + + Args: + prompt: The prompt to send to the LLM. + config: Summarizer configuration. + max_tokens: Maximum tokens for the response. + + Returns: + The generated summary text. + + Raises: + SummarizationError: If the LLM call fails. + + """ + from pydantic_ai import Agent # noqa: PLC0415 + from pydantic_ai.models.openai import OpenAIChatModel # noqa: PLC0415 + from pydantic_ai.providers.openai import OpenAIProvider # noqa: PLC0415 + from pydantic_ai.settings import ModelSettings # noqa: PLC0415 + + provider = OpenAIProvider(api_key=config.api_key, base_url=config.openai_base_url) + model = OpenAIChatModel( + model_name=config.model, + provider=provider, + settings=ModelSettings( + temperature=0.3, + max_tokens=max_tokens, + ), + ) + + agent = Agent( + model=model, + system_prompt="You are a concise summarizer. Output only the summary, no preamble.", + output_type=SummaryOutput, + retries=2, + ) + + try: + result = await agent.run(prompt) + return result.output.summary.strip() + except Exception as e: + msg = f"Summarization failed: {e}" + raise SummarizationError(msg) from e + + +@lru_cache(maxsize=4) +def _get_encoding(model: str = "gpt-4") -> tiktoken.Encoding | None: + """Get tiktoken encoding for a model, with caching. + + Falls back to cl100k_base for unknown models (covers most modern LLMs). + Returns None when tiktoken is not installed so callers can use a heuristic. + """ + try: + import tiktoken # noqa: PLC0415 + except ModuleNotFoundError: + return None + + try: + return tiktoken.encoding_for_model(model) + except KeyError: + return tiktoken.get_encoding("cl100k_base") + + +def count_tokens(text: str, model: str = "gpt-4") -> int: + """Count tokens using tiktoken, falling back to char-based estimate.""" + if not text: + return 0 + enc = _get_encoding(model) + if enc is None: + return _estimate_token_count(text) + # Disable special token checking - LLM outputs may contain special tokens + # like <|constrain|>, <|endoftext|>, etc. that we want to count normally + return len(enc.encode(text, disallowed_special=())) + + +def _estimate_token_count(text: str) -> int: + """Very rough token estimate based on character length (~4 chars/token).""" + return max(1, (len(text) + 3) // 4) + + +def chunk_text( + text: str, + chunk_size: int = 3000, + overlap: int = 200, + model: str = "gpt-4", +) -> list[str]: + """Split text into overlapping chunks by token count. + + Uses semantic boundaries (paragraphs, sentences) when possible to avoid + splitting mid-thought. Falls back to token-based splitting if no good + boundaries are found. + + Args: + text: The text to chunk. + chunk_size: Target token count per chunk. + overlap: Token overlap between chunks for context continuity. + model: Model name for tokenizer. + + Returns: + List of text chunks. + + """ + if not text: + return [] + + total_tokens = count_tokens(text, model) + if total_tokens <= chunk_size: + return [text] + + # Split into paragraphs first + paragraphs = re.split(r"\n\s*\n", text) + paragraphs = [p.strip() for p in paragraphs if p.strip()] + + if not paragraphs: + return [text] + + chunks: list[str] = [] + current_chunk: list[str] = [] + current_tokens = 0 + + for para in paragraphs: + para_tokens = count_tokens(para, model) + + # If single paragraph exceeds chunk size, split it further + if para_tokens > chunk_size: + # Flush current chunk if any + if current_chunk: + chunks.append("\n\n".join(current_chunk)) + current_chunk = [] + current_tokens = 0 + + # Split large paragraph by sentences + sentences = _split_sentences(para) + for sentence in sentences: + sent_tokens = count_tokens(sentence, model) + if current_tokens + sent_tokens > chunk_size and current_chunk: + chunks.append(" ".join(current_chunk)) + # Keep overlap from end of previous chunk + overlap_text = _get_overlap_text(current_chunk, overlap, model) + current_chunk = [overlap_text] if overlap_text else [] + current_tokens = count_tokens(overlap_text, model) if overlap_text else 0 + current_chunk.append(sentence) + current_tokens += sent_tokens + elif current_tokens + para_tokens > chunk_size: + # Flush current chunk and start new one + chunks.append("\n\n".join(current_chunk)) + # Keep overlap from end of previous chunk + overlap_text = _get_overlap_text(current_chunk, overlap, model) + current_chunk = [overlap_text, para] if overlap_text else [para] + current_tokens = ( + count_tokens(overlap_text, model) + para_tokens if overlap_text else para_tokens + ) + else: + current_chunk.append(para) + current_tokens += para_tokens + + # Don't forget the last chunk + if current_chunk: + chunks.append("\n\n".join(current_chunk)) + + return chunks + + +def _split_sentences(text: str) -> list[str]: + """Split text into sentences, preserving common abbreviations.""" + # Simple sentence splitting that handles common cases + # Matches period/question/exclamation followed by space and capital letter + sentences = re.split(r"(?<=[.!?])\s+(?=[A-Z])", text) + return [s.strip() for s in sentences if s.strip()] + + +def _get_overlap_text(chunks: list[str], target_tokens: int, model: str) -> str: + """Extract overlap text from end of chunk list. + + Takes text from the end of the chunk list until reaching target_tokens. + """ + if not chunks or target_tokens <= 0: + return "" + + # Work backwards through chunks + overlap_parts: list[str] = [] + tokens_collected = 0 + + for chunk in reversed(chunks): + chunk_tokens = count_tokens(chunk, model) + if tokens_collected + chunk_tokens <= target_tokens: + overlap_parts.insert(0, chunk) + tokens_collected += chunk_tokens + else: + # Take partial chunk if needed + words = chunk.split() + partial: list[str] = [] + for word in reversed(words): + word_tokens = count_tokens(word, model) + if tokens_collected + word_tokens <= target_tokens: + partial.insert(0, word) + tokens_collected += word_tokens + else: + break + if partial: + overlap_parts.insert(0, " ".join(partial)) + break + + return " ".join(overlap_parts) + + +def estimate_summary_tokens(input_tokens: int) -> int: + """Estimate target summary tokens based on input size. + + Uses ~10% compression ratio with floor/ceiling bounds. + """ + return min(500, max(50, input_tokens // 10)) + + +def tokens_to_words(tokens: int) -> int: + """Convert token count to approximate word count. + + Rough approximation: 1 token ≈ 0.75 words for English text. + """ + return int(tokens * 0.75) diff --git a/agent_cli/summarizer/adaptive.py b/agent_cli/summarizer/adaptive.py new file mode 100644 index 00000000..2a772062 --- /dev/null +++ b/agent_cli/summarizer/adaptive.py @@ -0,0 +1,164 @@ +"""Adaptive summarization using map-reduce with dynamic collapse. + +Implements a simple algorithm inspired by LangChain's map-reduce chains: +1. If content fits target, return as-is (no LLM call) +2. Otherwise, split into chunks and summarize each (map phase) +3. Recursively collapse summaries until they fit target (reduce phase) + +Research foundations: +- LangChain ReduceDocumentsChain: token_max=3000, recursive collapse +- BOOOOKSCORE (arXiv:2310.00785): chunk_size=2048 optimal + +See docs/architecture/summarizer.md for detailed design rationale. +""" + +from __future__ import annotations + +import logging + +from agent_cli.summarizer._prompts import ( + format_prior_context, + get_prompt_for_content_type, +) +from agent_cli.summarizer._utils import ( + count_tokens, + generate_summary, + tokens_to_words, +) +from agent_cli.summarizer.map_reduce import map_reduce_summarize +from agent_cli.summarizer.models import ( + SummarizerConfig, + SummaryResult, +) + +logger = logging.getLogger(__name__) + +__all__ = [ + "SummarizerConfig", + "summarize", +] + + +async def summarize( + content: str, + config: SummarizerConfig, + *, + target_tokens: int | None = None, + target_ratio: float | None = None, + prior_summary: str | None = None, + content_type: str = "general", +) -> SummaryResult: + """Summarize content to fit within a target token limit. + + Simple algorithm: + - If content already fits target, return as-is (no LLM call) + - Otherwise, use map-reduce to compress until it fits + + Args: + content: The content to summarize. + config: Summarizer configuration. + target_tokens: Absolute token limit (e.g., 4000). Defaults to config.token_max. + target_ratio: Relative compression ratio (e.g., 0.2 = compress to 20% of input). + Takes precedence over target_tokens if both provided. + prior_summary: Optional prior summary for context continuity. + content_type: Type of content ("general", "conversation", "journal", "document"). + + Returns: + SummaryResult with summary and compression metrics. + + Examples: + # Compress to fit 4000 tokens + result = await summarize(huge_doc, config, target_tokens=4000) + + # Compress to 20% of original size + result = await summarize(huge_doc, config, target_ratio=0.2) + + # Use default (config.token_max = 3000) + result = await summarize(huge_doc, config) + + """ + if not content or not content.strip(): + return SummaryResult( + summary=None, + input_tokens=0, + output_tokens=0, + compression_ratio=0.0, + ) + + input_tokens = count_tokens(content, config.model) + + # Determine target + if target_ratio is not None: + target = max(1, int(input_tokens * target_ratio)) + elif target_tokens is not None: + target = target_tokens + else: + target = config.token_max + + logger.info( + "Summarizing %d tokens to target %d (type=%s)", + input_tokens, + target, + content_type, + ) + + # Already fits? Return content as-is (no LLM call) + if input_tokens <= target: + return SummaryResult( + summary=content, + input_tokens=input_tokens, + output_tokens=input_tokens, + compression_ratio=1.0, + collapse_depth=0, + ) + + # Content fits in single chunk but exceeds target - use content-aware summary + if input_tokens <= config.chunk_size: + summary = await _content_aware_summary( + content, + config, + target, + prior_summary, + content_type, + ) + output_tokens = count_tokens(summary, config.model) + return SummaryResult( + summary=summary, + input_tokens=input_tokens, + output_tokens=output_tokens, + compression_ratio=output_tokens / input_tokens, + collapse_depth=0, + ) + + # Large content - use map-reduce with dynamic collapse + result = await map_reduce_summarize(content, config, target) + + return SummaryResult( + summary=result.summary, + input_tokens=result.input_tokens, + output_tokens=result.output_tokens, + compression_ratio=result.compression_ratio, + collapse_depth=result.collapse_depth, + ) + + +async def _content_aware_summary( + content: str, + config: SummarizerConfig, + target_tokens: int, + prior_summary: str | None, + content_type: str, +) -> str: + """Generate a content-type aware summary for single-chunk content.""" + max_words = tokens_to_words(target_tokens) + + prompt_template = get_prompt_for_content_type(content_type) + prior_context = format_prior_context(prior_summary) + + prompt = prompt_template.format( + content=content, + prior_context=prior_context, + max_words=max_words, + ) + + return await generate_summary(prompt, config, max_tokens=target_tokens + 50) diff --git a/agent_cli/summarizer/map_reduce.py b/agent_cli/summarizer/map_reduce.py new file mode 100644 index 00000000..86e8b796 --- /dev/null +++ b/agent_cli/summarizer/map_reduce.py @@ -0,0 +1,242 @@ +"""Map-reduce summarization inspired by LangChain's approach. + +Simple algorithm: +1. Map: Split content into chunks, summarize each in parallel +2. Reduce: If combined summaries exceed target, recursively collapse + +Key insight from LangChain: No need for predetermined levels (L1/L2/L3). +Just keep collapsing until content fits. Dynamic depth based on actual content. + +References: +- LangChain ReduceDocumentsChain: token_max=3000, recursive collapse +- BOOOOKSCORE: chunk_size=2048 optimal for summarization + +""" + +from __future__ import annotations + +import asyncio +import logging +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from agent_cli.summarizer._prompts import ( + CHUNK_SUMMARY_PROMPT, + META_SUMMARY_PROMPT, + format_summaries_for_meta, +) +from agent_cli.summarizer._utils import ( + chunk_text, + count_tokens, + estimate_summary_tokens, + generate_summary, + tokens_to_words, +) + +if TYPE_CHECKING: + from agent_cli.summarizer.models import SummarizerConfig + +logger = logging.getLogger(__name__) + + +@dataclass +class MapReduceResult: + """Result of map-reduce summarization. + + Attributes: + summary: The final collapsed summary. + input_tokens: Token count of original content. + output_tokens: Token count of final summary. + compression_ratio: output_tokens / input_tokens. + collapse_depth: How many reduce iterations were needed. + intermediate_summaries: All intermediate summaries (for debugging/storage). + + """ + + summary: str + input_tokens: int + output_tokens: int + compression_ratio: float + collapse_depth: int + intermediate_summaries: list[list[str]] # Each level of collapse + + +async def map_reduce_summarize( + content: str, + config: SummarizerConfig, + target: int | None = None, + max_collapse_depth: int = 10, +) -> MapReduceResult: + """Summarize content using map-reduce with dynamic collapse. + + Algorithm: + 1. Split into chunks and summarize each (map phase) + 2. If combined summaries exceed target, recursively collapse (reduce phase) + 3. Continue until everything fits in target + + Args: + content: The content to summarize. + config: Summarizer configuration. + target: Target token count. Defaults to config.token_max. + max_collapse_depth: Safety limit on recursive collapse depth. + + Returns: + MapReduceResult with summary and metadata. + + """ + if target is None: + target = config.token_max + + input_tokens = count_tokens(content, config.model) + + # Map phase: Split and summarize chunks in parallel + chunks = chunk_text( + content, + chunk_size=config.chunk_size, + overlap=config.chunk_overlap, + model=config.model, + ) + + logger.info("Map phase: processing %d chunks", len(chunks)) + summaries = await _map_summarize(chunks, config) + intermediate_summaries = [summaries.copy()] + + # Reduce phase: Recursively collapse until fits target + depth = 0 + while _total_tokens(summaries, config.model) > target: + depth += 1 + if depth > max_collapse_depth: + logger.warning( + "Hit max collapse depth %d, forcing final summary", + max_collapse_depth, + ) + break + + logger.info( + "Reduce phase (depth %d): collapsing %d summaries (%d tokens) to target %d", + depth, + len(summaries), + _total_tokens(summaries, config.model), + target, + ) + summaries = await _collapse_summaries(summaries, config, target) + intermediate_summaries.append(summaries.copy()) + + # Final synthesis if we have multiple summaries left + if len(summaries) > 1: + final_summary = await _synthesize(summaries, config, target) + else: + final_summary = summaries[0] + + output_tokens = count_tokens(final_summary, config.model) + + return MapReduceResult( + summary=final_summary, + input_tokens=input_tokens, + output_tokens=output_tokens, + compression_ratio=output_tokens / input_tokens, + collapse_depth=depth, + intermediate_summaries=intermediate_summaries, + ) + + +def _total_tokens(texts: list[str], model: str) -> int: + """Count total tokens across all texts.""" + return sum(count_tokens(t, model) for t in texts) + + +async def _map_summarize(chunks: list[str], config: SummarizerConfig) -> list[str]: + """Summarize each chunk in parallel (map phase).""" + semaphore = asyncio.Semaphore(config.max_concurrent_chunks) + total = len(chunks) + + async def summarize_chunk(idx: int, chunk: str) -> str: + async with semaphore: + return await _summarize_chunk(chunk, idx, total, config) + + tasks = [summarize_chunk(i, chunk) for i, chunk in enumerate(chunks)] + return list(await asyncio.gather(*tasks)) + + +async def _summarize_chunk( + chunk: str, + chunk_index: int, + total_chunks: int, + config: SummarizerConfig, +) -> str: + """Summarize a single chunk.""" + source_tokens = count_tokens(chunk, config.model) + target_tokens = estimate_summary_tokens(source_tokens) + max_words = tokens_to_words(target_tokens) + + prompt = CHUNK_SUMMARY_PROMPT.format( + chunk_index=chunk_index + 1, + total_chunks=total_chunks, + content=chunk, + max_words=max_words, + ) + + return await generate_summary(prompt, config, max_tokens=target_tokens + 50) + + +async def _collapse_summaries( + summaries: list[str], + config: SummarizerConfig, + target: int, +) -> list[str]: + """Collapse summaries by grouping and re-summarizing (reduce phase). + + Groups summaries that together fit within target, then summarizes each group. + This is similar to LangChain's split_list_of_docs approach. + """ + if len(summaries) <= 1: + return summaries + + # Group summaries that together fit within target + groups: list[list[str]] = [] + current_group: list[str] = [] + current_tokens = 0 + + for summary in summaries: + summary_tokens = count_tokens(summary, config.model) + + # If adding this summary would exceed target, start new group + if current_tokens + summary_tokens > target and current_group: + groups.append(current_group) + current_group = [summary] + current_tokens = summary_tokens + else: + current_group.append(summary) + current_tokens += summary_tokens + + if current_group: + groups.append(current_group) + + # Summarize each group in parallel + semaphore = asyncio.Semaphore(config.max_concurrent_chunks) + + async def summarize_group(group: list[str]) -> str: + async with semaphore: + return await _synthesize(group, config, target) + + tasks = [summarize_group(g) for g in groups] + return list(await asyncio.gather(*tasks)) + + +async def _synthesize( + summaries: list[str], + config: SummarizerConfig, + target: int, +) -> str: + """Synthesize multiple summaries into one.""" + combined_tokens = sum(count_tokens(s, config.model) for s in summaries) + # Aim for target tokens but use estimate if combined is smaller + target_tokens = min(target, estimate_summary_tokens(combined_tokens)) + max_words = tokens_to_words(target_tokens) + + prompt = META_SUMMARY_PROMPT.format( + summaries=format_summaries_for_meta(summaries), + max_words=max_words, + ) + + return await generate_summary(prompt, config, max_tokens=target_tokens + 100) diff --git a/agent_cli/summarizer/models.py b/agent_cli/summarizer/models.py new file mode 100644 index 00000000..721201da --- /dev/null +++ b/agent_cli/summarizer/models.py @@ -0,0 +1,99 @@ +"""Data models for map-reduce summarization.""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import UTC, datetime +from typing import Any + +from pydantic import BaseModel, Field + + +class SummarizationError(Exception): + """Raised when summarization fails after all retries.""" + + +@dataclass +class SummarizerConfig: + """Configuration for summarization operations. + + Example: + config = SummarizerConfig( + openai_base_url="http://localhost:8000/v1", + model="llama3.1:8b", + ) + result = await summarize(long_document, config) + print(f"Compression: {result.compression_ratio:.1%}") + + """ + + openai_base_url: str + model: str + api_key: str | None = None + chunk_size: int = 2048 # BOOOOKSCORE's tested default + token_max: int = 3000 # LangChain's default - target size after compression + chunk_overlap: int = 200 + max_concurrent_chunks: int = 5 + + def __post_init__(self) -> None: + """Normalize the base URL.""" + self.openai_base_url = self.openai_base_url.rstrip("/") + if self.api_key is None: + self.api_key = "not-needed" + + +class SummaryResult(BaseModel): + """Result of summarization. + + Contains the summary and metadata about the compression achieved. + """ + + summary: str | None = Field( + default=None, + description="The summary text (None if content already fit target)", + ) + input_tokens: int = Field(..., ge=0, description="Token count of the input content") + output_tokens: int = Field(..., ge=0, description="Token count of the output") + compression_ratio: float = Field( + ..., + ge=0.0, + le=1.0, + description="Ratio of output to input tokens (lower = more compression)", + ) + collapse_depth: int = Field( + default=0, + ge=0, + description="Number of collapse iterations in map-reduce (0 = no collapse needed)", + ) + created_at: datetime = Field( + default_factory=lambda: datetime.now(UTC), + description="Timestamp when summary was created", + ) + + def to_storage_metadata(self, conversation_id: str) -> list[dict[str, Any]]: + """Convert to metadata entry for ChromaDB storage. + + Returns a list with a single metadata dict for the summary. + Returns empty list if no summary was generated. + """ + if not self.summary: + return [] + + timestamp = self.created_at.isoformat() + + return [ + { + "id": f"{conversation_id}:summary", + "content": self.summary, + "metadata": { + "conversation_id": conversation_id, + "role": "summary", + "is_final": True, + "input_tokens": self.input_tokens, + "output_tokens": self.output_tokens, + "compression_ratio": self.compression_ratio, + "collapse_depth": self.collapse_depth, + "created_at": timestamp, + }, + }, + ] diff --git a/docs/aijournal-poc-comparison.md b/docs/aijournal-poc-comparison.md new file mode 100644 index 00000000..a6f928f0 --- /dev/null +++ b/docs/aijournal-poc-comparison.md @@ -0,0 +1,245 @@ +# AI Journal POC vs aijournal: Detailed Comparison + +This document analyzes the differences between our MemoryClient-based AI Journal POC and the full-featured aijournal project, identifying strengths, gaps, and potential paths forward. + +## Executive Summary + +| Aspect | Our POC | aijournal | +|--------|---------|-----------| +| **Complexity** | ~200 LOC | ~15,000+ LOC | +| **Setup Time** | Instant | `aijournal init` + config | +| **Profile Storage** | Generated on-demand | Persisted YAML with versioning | +| **Claim System** | Raw fact strings | Typed atoms with strength/decay | +| **Context Layers** | Single flat layer | 4 hierarchical layers (L1-L4) | +| **Learning** | Static extraction | Feedback loops + interview probing | + +## 1. Architecture Comparison + +### 1.1 Data Model + +**Our POC:** +``` +~/.aijournal/ + entries/ + journal/ + facts/ # Extracted facts as markdown + turns/ # Chat turns + chroma/ # Vector embeddings +``` + +**aijournal:** +``` +workspace/ + data/ + journal/YYYY/MM/DD/*.md # Raw entries + normalized/YYYY-MM-DD/ # Structured YAML + profile/ + self_profile.yaml # Facets (values, goals, traits) + claims.yaml # Typed claim atoms + derived/ + summaries/ # Daily summaries + microfacts/ # Extracted facts + persona/persona_core.yaml # L1 context (~1200 tokens) + index/ # Vector store + metadata + chat_sessions/ # Conversation history + pending/profile_updates/ # Queued changes +``` + +**Analysis:** aijournal separates authoritative data (human-editable) from derived data (reproducible). Our POC conflates these, making it harder to inspect or manually correct the knowledge base. + +### 1.2 Claim Representation + +**Our POC - Raw facts:** +``` +"Bas is a software engineer" +"The user loves hiking" +"The user's wife is named Anne" +``` + +**aijournal - Typed claim atoms:** +```yaml +- type: trait + subject: self + predicate: occupation + statement: "Works as a software engineer focused on AI systems" + scope: {domain: work, context: [professional]} + strength: 0.85 + status: accepted + provenance: + sources: [entry:2025-01-15-morning] + first_seen: 2025-01-15 + last_updated: 2025-01-20 +``` + +**Analysis:** aijournal's typed claims enable: +- Filtering by type (traits vs preferences vs goals) +- Confidence tracking via `strength` +- Time-decay for relevance +- Conflict detection between claims +- Source attribution for verification + +### 1.3 Context Layers + +**Our POC:** Single layer - all facts dumped into system prompt + +**aijournal - Hierarchical layers:** + +| Layer | Content | Tokens | Use Case | +|-------|---------|--------|----------| +| L1 | Persona core + top claims | ~1,200 | Quick chat, advice | +| L2 | L1 + recent summaries/facts | ~2,000 | Daily check-ins | +| L3 | L2 + full claims + facets | ~2,600 | Deep conversations | +| L4 | L3 + prompts + config + history | ~3,200 | External AI export | + +**Analysis:** Layered context prevents token overflow and allows appropriate depth for different interactions. + +## 2. Feature Comparison + +### 2.1 Fact Extraction + +| Feature | Our POC | aijournal | +|---------|---------|-----------| +| Extraction method | PydanticAI agent | Ollama + custom prompts | +| Output format | Raw strings | Typed MicroFact objects | +| Reconciliation | ADD/UPDATE/DELETE/NONE | Consolidation with strength weighting | +| Deduplication | Semantic similarity | Hash + semantic + scope matching | + +**Our POC advantage:** The reconciliation logic (PromptedOutput with JSON mode) prevents duplicate facts effectively. + +**aijournal advantage:** Consolidation weights existing evidence: `strength_new = clamp01((w_prev * strength_prev + w_obs * signal) / (w_prev + w_obs))` + +### 2.2 Profile Generation + +| Feature | Our POC | aijournal | +|---------|---------|-----------| +| Generation | On-demand via LLM | Pre-built `persona_core.yaml` | +| Caching | None | Persisted with staleness tracking | +| Categories | LLM-determined | Defined schema (values, goals, traits, etc.) | +| Token budget | Unlimited (risk of overflow) | Configurable (~1,200 default) | + +**Our POC advantage:** Flexible - LLM determines categories dynamically based on content. + +**aijournal advantage:** Deterministic, auditable, and respects token limits. + +### 2.3 Chat Integration + +| Feature | Our POC | aijournal | +|---------|---------|-----------| +| Context injection | All facts in system prompt | Layer-appropriate context | +| Citations | None | `[entry:id#p]` markers | +| Feedback | None | Up/down adjustments to claim strength | +| Memory storage | Bypassed (direct LLM call) | Persisted with telemetry | + +**Our POC advantage:** Simple, no side effects. + +**aijournal advantage:** Learning loop - feedback strengthens/weakens claims over time. + +### 2.4 Missing in Our POC + +1. **Interview/Probing Mode** + - aijournal generates questions to fill knowledge gaps + - Ranks facets by `staleness × impact_weight` to prioritize probing + +2. **Time Decay** + - aijournal: `effective_strength = strength × exp(-λ × staleness)` + - Our POC: All facts treated equally regardless of age + +3. **Conflict Resolution** + - aijournal: Detects contradictions, downgrades to `tentative`, queues questions + - Our POC: UPDATE replaces old fact entirely + +4. **Advisor Mode** + - aijournal: Separate `advise` command with coaching preferences + - Our POC: Generic chat only + +5. **Export/Packs** + - aijournal: Generate context bundles for external AIs + - Our POC: No export capability + +## 3. Test Results Analysis + +### 3.1 Blog Post Ingestion + +We fed 12+ blog posts into our POC: + +| Metric | Result | +|--------|--------| +| Posts processed | ~12 | +| Facts extracted | 52 | +| Extraction accuracy | High - captured key themes | +| Profile quality | Excellent - identified all major interests | + +**Sample extracted facts:** +- "Bas is a software engineer" +- "Bas works on AI systems" +- "The user loves hiking" +- "You went for a 5km run this morning" +- "You discovered that local vision models like Qwen3-VL-32B can identify niche books" + +### 3.2 Profile Generation Quality + +The generated profile correctly identified: +- ✅ Professional identity (software engineer, AI focus) +- ✅ Personal relationships (wife Anne) +- ✅ Hobbies (hiking, running, learning Dutch) +- ✅ Technical interests (local AI, terminal productivity, homelab) +- ✅ Values (minimalism, security, reproducibility) + +### 3.3 Chat Intelligence + +The chat demonstrated: +- **Specific recall:** "You use the Glove80 keyboard with programmable layers" +- **Temporal understanding:** Tracked evolution of views on AI coding +- **Theme synthesis:** Connected local AI + security + productivity interests +- **Nuanced responses:** Acknowledged both benefits and limitations + +## 4. Recommendations + +### 4.1 Quick Wins (Keep POC Simple) + +1. **Persist profile summary** - Cache the LLM-generated profile to avoid regeneration +2. **Add timestamps to facts** - Already have `created_at`, use it for recency weighting +3. **Token budgeting** - Limit facts sent to chat based on relevance + recency + +### 4.2 Medium-Term Enhancements + +1. **Claim typing** - Categorize facts into types (trait, preference, goal, relationship) +2. **Strength tracking** - Increment when same fact extracted multiple times +3. **Simple decay** - Weight recent facts higher in context + +### 4.3 aijournal Features Worth Adopting + +1. **Interview mode** - Generate questions to learn more +2. **Feedback loop** - Up/down on responses affects claim strength +3. **Layered context** - L1 for quick chats, L4 for deep dives +4. **Citations** - Link responses to source facts + +### 4.4 What NOT to Adopt + +1. **7-stage pipeline** - Overkill for our use case +2. **Strict schema governance** - Adds friction without clear benefit for POC +3. **Markdown file storage** - ChromaDB is sufficient for our needs + +## 5. Conclusion + +Our POC validates the core hypothesis: **MemoryClient can serve as the foundation for a personal knowledge system**. With ~200 lines of code, we achieved: + +- Accurate fact extraction from unstructured text +- Coherent profile generation from diverse content +- Personalized conversations using stored knowledge + +The main gap is **learning over time** - our system doesn't strengthen beliefs based on repetition or feedback. Adding simple strength tracking and decay would close 80% of the functionality gap with 20% of aijournal's complexity. + +### Recommended Next Step + +Add a `strength` field to stored facts and implement: +```python +# On duplicate fact detection +existing.strength = min(1.0, existing.strength + 0.1) +existing.last_seen = now() + +# On retrieval +effective_strength = fact.strength * exp(-0.1 * days_since_last_seen) +``` + +This single change would transform our static knowledge base into a learning system. diff --git a/docs/architecture/memory.md b/docs/architecture/memory.md index 83ae9720..2b2ab4a2 100644 --- a/docs/architecture/memory.md +++ b/docs/architecture/memory.md @@ -59,7 +59,7 @@ entries/ assistant/ __.md # Raw assistant responses summaries/ - summary.md # The single rolling summary of the conversation + __summary.md # Single final summary (map-reduce collapses to one) ``` **Deleted Directory Structure (Soft Deletes):** @@ -71,7 +71,7 @@ entries/ facts/ __.md summaries/ - summary.md # Tombstoned summary + __summary.md # Tombstoned summary ``` ### 2.2 File Format @@ -154,22 +154,28 @@ Executed via `_postprocess_after_turn` (background task). * **Output:** JSON list of strings. Failures fall back to `[]`. ### 4.3 Reconciliation (Memory Management) -Resolves contradictions using a "Search-Decide-Update" loop. +Resolves contradictions using a "Search-Decide-Update" loop with complete enumeration. 1. **Local Search:** For each new fact, retrieve a small neighborhood of existing `role="memory"` entries for the conversation. -2. **LLM Decision:** Uses `UPDATE_MEMORY_PROMPT` (examples + strict JSON schema) to compare `new_facts` vs `existing_memories`. +2. **LLM Decision:** Uses `UPDATE_MEMORY_PROMPT` to compare `new_facts` vs `existing_memories`. The model must return **all memories** (existing + new) with explicit events for each. * **Decisions:** `ADD`, `UPDATE`, `DELETE`, `NONE`. * If no existing memories are found, all new facts are added directly. * On LLM/network failure, defaults to adding all new facts. - * Safeguard: if the model returns only deletes/empties, the new facts are still added to avoid data loss. 3. **Execution:** * **Adds:** Creates new fact files and upserts to Chroma. * **Updates:** Implemented as delete + add with a fresh ID; tombstones record `replaced_by`. * **Deletes:** Soft-deletes files (moved under `deleted/`) and removes from Chroma. -### 4.4 Summarization +### 4.4 Summarization (Adaptive Map-Reduce) +Uses the `agent_cli.summarizer` module for research-backed adaptive summarization. + +* **Level Selection:** Automatically determines summarization strategy based on token count: + * `NONE` (< 100 tokens): No summary needed, facts only. + * `BRIEF` (100-500 tokens): Single-sentence summary. + * `MAP_REDUCE` (>= 500 tokens): Dynamic collapse using map-reduce with content-type aware prompts. +* **Algorithm:** LangChain-inspired map-reduce that recursively collapses until content fits token_max (3000). * **Input:** Previous summary (if any) + newly extracted facts. -* **Prompt:** `SUMMARY_PROMPT` (updates the running summary). -* **Persistence:** Writes a single `summaries/summary.md` per conversation (deterministic doc ID). +* **Persistence:** Stores single final summary in `summaries/` directory with YAML front matter containing compression metrics. +* **See:** `docs/architecture/summarizer.md` for detailed algorithm specification. ### 4.5 Eviction * **Trigger:** If total entries in conversation > `max_entries` (default 500). @@ -190,17 +196,22 @@ To replicate the system behavior, the following prompt strategies are required. * **Example:** "My wife is Anne" -> `["The user's wife is named Anne"]`. ### 5.2 Reconciliation (`UPDATE_MEMORY_PROMPT`) -* **Goal:** Compare `new_facts` against `existing_memories` (id + text) and output structured decisions. +* **Goal:** Compare `new_facts` against `existing_memories` and return **all memories** (existing + new) with explicit events. +* **Approach:** The model must enumerate every memory in its response, forcing deliberate decisions rather than implicit omissions. * **Operations:** - * **ADD:** New information (generates a new ID). - * **UPDATE:** Refines existing information (uses the provided short ID). - * **DELETE:** Contradicts existing information (e.g., "I hate pizza" vs "I love pizza"). **If deleting because of a replacement, the new fact must also be returned (ADD or UPDATE).** - * **NONE:** Fact already exists or is irrelevant. -* **Output constraints:** JSON list only; no prose/code fences; IDs for UPDATE/DELETE/NONE must come from the provided list. - -### 5.3 Summarization (`SUMMARY_PROMPT`) -* **Goal:** Maintain a concise running summary. -* **Constraints:** Aggregate related facts. Drop transient chit-chat. Focus on durable info. + * **ADD:** New information not present in existing memories (generates a new sequential ID). + * **UPDATE:** Refines existing information about the **same topic** (keeps the existing ID). + * **DELETE:** Explicitly contradicts existing information (e.g., "I hate pizza" vs "I love pizza"). + * **NONE:** Existing memory is unrelated to new facts, or new fact is an exact duplicate. +* **Output constraints:** JSON list containing all memories; each existing memory must have an event; new unrelated facts must be ADDed; no prose or code fences. + +### 5.3 Summarization (Adaptive Prompts) +The summarizer uses prompts from `agent_cli.summarizer._prompts`: +* **`BRIEF_SUMMARY_PROMPT`:** Single-sentence distillation for short content (100-500 tokens). +* **`GENERAL_SUMMARY_PROMPT`:** Paragraph summary with prior context integration (general content). +* **`CHUNK_SUMMARY_PROMPT`:** Individual chunk summarization for map phase. +* **`META_SUMMARY_PROMPT`:** Synthesizes multiple chunk summaries in reduce phase. +* **Content-type variants:** `CONVERSATION_SUMMARY_PROMPT`, `JOURNAL_SUMMARY_PROMPT`, `DOCUMENT_SUMMARY_PROMPT` for domain-specific summarization. --- diff --git a/docs/architecture/summarizer.md b/docs/architecture/summarizer.md new file mode 100644 index 00000000..c7476142 --- /dev/null +++ b/docs/architecture/summarizer.md @@ -0,0 +1,362 @@ +# Agent CLI: Adaptive Summarizer Technical Specification + +This document describes the architectural decisions, design rationale, and technical approach for the `agent-cli` adaptive summarization subsystem. + +## 1. System Overview + +The adaptive summarizer provides **content-aware compression** using a map-reduce approach inspired by LangChain's chains. It compresses content to fit within a specified token budget using a simple algorithm: + +``` +Input Content ──▶ Token Count ──▶ Compare to Target + │ + ┌───────────────────────┴───────────────────────┐ + │ │ + Fits target Exceeds target + │ │ + Return as-is Map-Reduce + (no LLM call) (dynamic collapse) +``` + +**Design Goals:** + +- **Maximum simplicity:** Single entry point with straightforward logic. +- **Flexible targeting:** Specify absolute token count or relative compression ratio. +- **Research-grounded defaults:** chunk_size=2048 (BOOOOKSCORE), token_max=3000 (LangChain). +- **Content-type awareness:** Domain-specific prompts for conversations, journals, documents. + +--- + +## 2. Research Foundations + +This section documents what techniques are borrowed from research vs. what is original design. + +### 2.1 Borrowed: LangChain Map-Reduce Pattern + +**Reference:** LangChain `ReduceDocumentsChain` + +LangChain's approach to document summarization uses a simple algorithm: +1. **Map phase:** Split content into chunks, summarize each in parallel +2. **Reduce phase:** If combined summaries exceed `token_max`, recursively collapse until they fit + +Key insight: No need for predetermined levels. Dynamic depth based on actual content length. LangChain's default `token_max=3000`. + +### 2.2 Borrowed: Chunk Size (BOOOOKSCORE) + +**Reference:** arXiv:2310.00785 (ICLR 2024) + +BOOOOKSCORE's research on book-length summarization found optimal chunk sizes. Their defaults: +- Chunk size: **2048 tokens** (we use this) +- Max summary length: **900 tokens** + +### 2.3 Original Design (Not Research-Backed) + +The following aspects are **original design choices without direct research justification**: + +- **Content-type prompts:** Domain-specific prompts are original design. +- **Target ratio parameter:** The option to specify compression as a percentage is a convenience feature. + +--- + +## 3. Architectural Decisions + +### 3.1 Simple Target-Based Logic + +**Decision:** Use a simple "fits? return : compress" algorithm. + +**Rationale:** + +- **Minimal complexity:** No level selection logic, threshold management, or multiple code paths. +- **Clear semantics:** If content fits the target, return it unchanged. Otherwise, compress. +- **Flexible targeting:** Users can specify exact token counts or relative ratios. + +**Algorithm:** + +```python +async def summarize( + content: str, + config: SummarizerConfig, + *, + target_tokens: int | None = None, # Absolute limit + target_ratio: float | None = None, # e.g., 0.2 = compress to 20% +) -> SummaryResult: + input_tokens = count_tokens(content) + + # Determine target + if target_ratio is not None: + target = max(1, int(input_tokens * target_ratio)) + elif target_tokens is not None: + target = target_tokens + else: + target = config.token_max # Default: 3000 + + # Already fits? Return as-is (no LLM call) + if input_tokens <= target: + return SummaryResult(summary=content, ...) + + # Compress using map-reduce + return await map_reduce_summarize(content, config, target) +``` + +### 3.2 Map-Reduce with Dynamic Collapse + +**Decision:** Use LangChain-style map-reduce for all compression. + +**Rationale:** + +- **Single algorithm:** One code path handles all content sizes. +- **Dynamic depth:** Collapse depth adapts to actual content length. +- **Research-backed:** LangChain's approach is battle-tested. + +**Algorithm:** + +```python +async def map_reduce_summarize(content, config, target): + # Map: Split and summarize chunks in parallel + chunks = chunk_text(content, chunk_size=2048) + summaries = await parallel_summarize(chunks) + + # Reduce: Recursively collapse until fits target + while total_tokens(summaries) > target: + groups = group_by_token_limit(summaries, target) + summaries = await parallel_synthesize(groups) + + return final_synthesis(summaries) +``` + +### 3.3 Research-Backed Defaults + +**Decision:** Use values from published research. + +| Parameter | Value | Source | +| :--- | :--- | :--- | +| `chunk_size` | 2048 | BOOOOKSCORE | +| `token_max` | 3000 | LangChain | +| `chunk_overlap` | 200 | Original | + +### 3.4 Semantic Boundary Chunking + +**Decision:** Split content on semantic boundaries (paragraphs, then sentences) rather than fixed character counts. + +**Rationale:** + +- **Coherence preservation:** Splitting mid-sentence loses context. +- **Natural units:** Paragraphs and sentences are natural semantic units. +- **Overlap for continuity:** The 200-token overlap ensures concepts spanning chunk boundaries aren't lost. + +**Fallback chain:** + +1. Prefer paragraph boundaries (double newlines) +2. Fall back to sentence boundaries (`.!?` followed by space + capital) +3. Final fallback to word-based splitting + +### 3.5 Content-Type Aware Prompts + +**Decision:** Use different prompt templates for different content domains. + +**Rationale:** + +- **Conversations:** Focus on user preferences, decisions, action items. +- **Journals:** Emphasize personal insights, emotional context, growth patterns. +- **Documents:** Prioritize key findings, methodology, conclusions. + +A generic summarization prompt loses domain-specific signal. + +### 3.6 Prior Summary Integration + +**Decision:** Provide the previous summary as context when generating updates. + +**Rationale:** + +- **Continuity:** New summaries build on existing context. +- **Incremental updates:** Avoid re-summarizing all historical content. +- **Information preservation:** Important information persists through the chain. + +### 3.7 Compression Ratio Tracking + +**Decision:** Track and report compression metrics for every summary. + +Every `SummaryResult` includes `input_tokens`, `output_tokens`, `compression_ratio`, and `collapse_depth` for observability. + +--- + +## 4. Processing Pipeline + +### 4.1 Entry Point + +The entry point (`summarize()`) implements simple logic: + +1. **Token counting:** Uses tiktoken with model-appropriate encoding. Falls back to character-based estimation (~4 chars/token) if tiktoken unavailable. +2. **Target calculation:** Determines target from `target_tokens`, `target_ratio`, or default `token_max`. +3. **Fit check:** If content fits target, return as-is. +4. **Compression:** Call map-reduce if content exceeds target. + +### 4.2 Single-Chunk Content + +For content that fits within `chunk_size` but exceeds target: + +- Single LLM call with content-type aware prompt +- Returns `SummaryResult` with compressed summary + +### 4.3 Multi-Chunk Content + +For larger content (> chunk_size tokens): + +1. **Map phase:** Split content into overlapping chunks, summarize each in parallel. +2. **Reduce phase:** If combined summaries exceed target, group and re-summarize recursively. +3. **Final synthesis:** Combine remaining summaries into final output. + +The `collapse_depth` field in the result indicates how many reduce iterations were needed. + +--- + +## 5. Data Models + +### 5.1 SummaryResult + +```python +class SummaryResult(BaseModel): + summary: str | None # None if content was empty + input_tokens: int + output_tokens: int + compression_ratio: float # 0.0-1.0 + collapse_depth: int # 0 = no collapse needed + created_at: datetime +``` + +### 5.2 SummarizerConfig + +```python +@dataclass +class SummarizerConfig: + openai_base_url: str + model: str + api_key: str | None = None + chunk_size: int = 2048 # BOOOOKSCORE + token_max: int = 3000 # LangChain (default target) + chunk_overlap: int = 200 + max_concurrent_chunks: int = 5 + timeout: float = 60.0 +``` + +--- + +## 6. Integration with Memory System + +### 6.1 Write Path + +The memory system triggers summarization during post-processing: + +1. Collect raw conversation turns +2. Retrieve existing summary as prior context +3. Call summarizer with content + prior summary + content type +4. Persist result to storage + +### 6.2 Read Path + +The memory retrieval system uses summaries for context injection: + +- Fetches summary for the conversation +- Injects as prefix to retrieved memories +- Provides high-level context that individual snippets lack + +### 6.3 Storage + +Summaries are stored with metadata: + +```python +{ + "id": "{conversation_id}:summary", + "content": summary_text, + "metadata": { + "conversation_id": conversation_id, + "role": "summary", + "input_tokens": 1500, + "output_tokens": 150, + "compression_ratio": 0.1, + "collapse_depth": 1, + "created_at": "2024-01-15T10:30:00Z", + "is_final": True, + }, +} +``` + +--- + +## 7. Error Handling + +Summarization follows a fail-fast philosophy: + +- **LLM errors:** Propagated as `SummarizationError` (base class for all summarization errors). +- **Empty input:** Returns result with `summary=None` immediately (not an error). +- **Encoding errors:** Falls back to character-based token estimation. +- **Max depth exceeded:** Warning logged, forces final synthesis even if over target. + +The caller decides how to handle failures—typically by proceeding without a summary rather than blocking the entire operation. + +--- + +## 8. Configuration + +| Parameter | Default | Source | +| :--- | :--- | :--- | +| `chunk_size` | 2048 | BOOOOKSCORE | +| `token_max` | 3000 | LangChain | +| `chunk_overlap` | 200 | Original | +| `max_concurrent` | 5 | Implementation choice | +| `max_collapse_depth` | 10 | Safety limit | + +--- + +## 9. Usage Examples + +### Basic Usage + +```python +from agent_cli.summarizer import SummarizerConfig, summarize + +config = SummarizerConfig( + openai_base_url="http://localhost:11434/v1", + model="llama3.1:8b", +) + +# Default: compress to fit 3000 tokens +result = await summarize(content, config) + +# Compress to specific token count +result = await summarize(content, config, target_tokens=500) + +# Compress to 20% of original size +result = await summarize(content, config, target_ratio=0.2) + +# With content type for better prompts +result = await summarize( + content, + config, + target_tokens=500, + content_type="conversation", +) +``` + +--- + +## 10. Limitations and Trade-offs + +### 10.1 Fact Preservation + +Summarization is inherently lossy. Specific facts (dates, numbers, names) are often dropped in favor of thematic content. If your use case requires fact retrieval: + +- Store original content alongside summaries +- Use fact extraction instead of summarization +- Use RAG to retrieve original chunks + +### 10.2 No Intermediate Summaries + +Unlike hierarchical approaches, map-reduce only stores the final summary. Intermediate chunk summaries are discarded after synthesis. This simplifies storage but removes granular access. + +--- + +## 11. Future Improvements + +1. **Benchmark against BOOOOKSCORE metrics** for coherence evaluation +2. **Add fact extraction mode** for use cases requiring specific detail preservation +3. **Streaming support** for real-time summarization feedback diff --git a/examples/aijournal_poc.py b/examples/aijournal_poc.py new file mode 100755 index 00000000..156c0b97 --- /dev/null +++ b/examples/aijournal_poc.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 +"""Minimal AI Journal proof-of-concept using MemoryClient. + +This validates the core hypothesis: MemoryClient can serve as the +foundation for a personal knowledge system (AI journal). + +Usage: + # Add a journal entry + python examples/aijournal_poc.py add "Today I learned about quantum computing at work" + + # Search memories + python examples/aijournal_poc.py search "what did I learn?" + + # Interactive chat with memory + python examples/aijournal_poc.py chat "What have I been working on lately?" +""" + +from __future__ import annotations + +import argparse +import asyncio +import logging +import os +from pathlib import Path + +import httpx + +from agent_cli.memory.client import MemoryClient + +# Enable debug logging for memory module +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s: %(message)s", + datefmt="%H:%M:%S", +) +# Enable DEBUG for memory ingest to see full prompts +logging.getLogger("agent_cli.memory._ingest").setLevel(logging.DEBUG) + + +# Defaults for local AI setup +DEFAULT_BASE_URL = "http://192.168.1.143:9292/v1" +DEFAULT_MODEL = "gpt-oss-high:20b" +DEFAULT_EMBEDDING_MODEL = "embeddinggemma:300m" + + +def get_client(model: str | None = None) -> tuple[MemoryClient, str]: + """Initialize the memory client with sensible defaults. + + Returns: + Tuple of (client, model_name) + + """ + base_url = os.environ.get("OPENAI_BASE_URL", DEFAULT_BASE_URL) + model_name = model or os.environ.get("OPENAI_MODEL", DEFAULT_MODEL) + embedding_model = os.environ.get("EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL) + api_key = os.environ.get("OPENAI_API_KEY", "not-needed-for-local") + + print(f"Using: {base_url}") + print(f" Chat model: {model_name}") + print(f" Embedding model: {embedding_model}") + + return MemoryClient( + memory_path=Path("~/.aijournal").expanduser(), + openai_base_url=base_url, + chat_api_key=api_key, + embedding_api_key=api_key, + embedding_model=embedding_model, + enable_summarization=True, + enable_git_versioning=False, # Keep it simple for POC + score_threshold=0.1, # Lower threshold for local models + ), model_name + + +async def cmd_add(text: str) -> None: + """Add a journal entry.""" + client, model = get_client() + print(f"Adding entry: {text[:50]}...") + await client.add(text, conversation_id="journal", model=model) + print("✓ Entry processed and facts extracted") + + +async def cmd_search(query: str, top_k: int = 5) -> None: + """Search memories.""" + client, model = get_client() + print(f"Searching for: {query}\n") + + result = await client.search(query, conversation_id="journal", top_k=top_k, model=model) + + if not result.entries: + print("No relevant memories found.") + return + + for i, entry in enumerate(result.entries, 1): + print(f"{i}. [{entry.role}] {entry.content}") + print(f" Score: {entry.score:.3f} | Created: {entry.created_at[:10]}") + print() + + +def cmd_show() -> None: + """Show all stored memories (what the system knows about you).""" + client, _ = get_client() + print("=== What I know about you ===\n") + + entries = client.list_all(conversation_id="journal") + + if not entries: + print("No memories stored yet. Add some journal entries first!") + return + + # Sort by created_at + entries.sort(key=lambda x: x["created_at"], reverse=True) + + for i, entry in enumerate(entries, 1): + date = entry["created_at"][:10] if entry["created_at"] else "unknown" + print(f"{i}. [{date}] {entry['content']}") + + print(f"\n--- Total: {len(entries)} memories ---") + + +PROFILE_PROMPT = """Based on the following facts about a person, create a brief profile summary. +Organize the information into categories like: +- **Identity**: Name, relationships, occupation +- **Interests & Activities**: Hobbies, regular activities +- **Goals & Values**: What they care about, what they're working towards +- **Recent Events**: Notable recent happenings + +Only include categories that have relevant information. Be concise. + +Facts: +{facts} + +Profile Summary:""" + + +async def cmd_profile() -> None: + """Generate a profile summary from stored memories.""" + client, model = get_client() + + entries = client.list_all(conversation_id="journal") + + if not entries: + print("No memories stored yet. Add some journal entries first!") + return + + # Format facts for the prompt + facts = "\n".join(f"- {e['content']}" for e in entries) + prompt = PROFILE_PROMPT.format(facts=facts) + + print("=== Your Profile ===\n") + print("(Generating profile from stored memories...)\n") + + # Direct LLM call (bypasses memory storage) + base_url = os.environ.get("OPENAI_BASE_URL", DEFAULT_BASE_URL) + api_key = os.environ.get("OPENAI_API_KEY", "not-needed-for-local") + + async with httpx.AsyncClient(timeout=120.0) as http: + response = await http.post( + f"{base_url}/chat/completions", + headers={"Authorization": f"Bearer {api_key}"}, + json={ + "model": model, + "messages": [{"role": "user", "content": prompt}], + "temperature": 0.7, + }, + ) + data = response.json() + + choices = data.get("choices", []) + if choices: + profile = choices[0].get("message", {}).get("content", "") + print(profile) + + print(f"\n--- Based on {len(entries)} memories ---") + + +CHAT_SYSTEM_PROMPT = """You are a helpful AI assistant with memory of the user. + +Here's what you know about the user: +{profile} + +Use this knowledge naturally in your responses. Be helpful and personable.""" + + +async def cmd_chat(question: str, with_profile: bool = True) -> None: + """Chat with memory-augmented LLM.""" + client, model = get_client() + + # Build profile context + profile_text = "" + if with_profile: + entries = client.list_all(conversation_id="journal") + if entries: + profile_text = "\n".join(f"- {e['content']}" for e in entries) + + print(f"Question: {question}\n") + + # Build messages with profile context + messages: list[dict[str, str]] = [] + if profile_text: + system_prompt = CHAT_SYSTEM_PROMPT.format(profile=profile_text) + messages.append({"role": "system", "content": system_prompt}) + messages.append({"role": "user", "content": question}) + + # Direct LLM call with profile context + base_url = os.environ.get("OPENAI_BASE_URL", DEFAULT_BASE_URL) + api_key = os.environ.get("OPENAI_API_KEY", "not-needed-for-local") + + async with httpx.AsyncClient(timeout=120.0) as http: + response = await http.post( + f"{base_url}/chat/completions", + headers={"Authorization": f"Bearer {api_key}"}, + json={ + "model": model, + "messages": messages, + "temperature": 0.7, + }, + ) + data = response.json() + + choices = data.get("choices", []) + if choices: + reply = choices[0].get("message", {}).get("content", "") + print(f"Answer: {reply}") + + if profile_text: + entry_count = len(client.list_all(conversation_id="journal")) + print(f"\n--- Using profile with {entry_count} memories ---") + + +def main() -> None: + """CLI entry point.""" + parser = argparse.ArgumentParser(description="AI Journal POC") + subparsers = parser.add_subparsers(dest="command", required=True) + + # Add command + add_parser = subparsers.add_parser("add", help="Add a journal entry") + add_parser.add_argument("text", help="The journal entry text") + + # Search command + search_parser = subparsers.add_parser("search", help="Search memories") + search_parser.add_argument("query", help="Search query") + search_parser.add_argument("-k", "--top-k", type=int, default=5, help="Number of results") + + # Chat command + chat_parser = subparsers.add_parser("chat", help="Chat with memory") + chat_parser.add_argument("question", help="Question to ask") + + # Show command - display what the system knows about you + subparsers.add_parser("show", help="Show all stored memories") + + # Profile command - generate a profile summary + subparsers.add_parser("profile", help="Generate profile from memories") + + args = parser.parse_args() + + if args.command == "add": + asyncio.run(cmd_add(args.text)) + elif args.command == "search": + asyncio.run(cmd_search(args.query, args.top_k)) + elif args.command == "chat": + asyncio.run(cmd_chat(args.question)) + elif args.command == "show": + cmd_show() + elif args.command == "profile": + asyncio.run(cmd_profile()) + + +if __name__ == "__main__": + main() diff --git a/examples/summarizer_demo.py b/examples/summarizer_demo.py new file mode 100644 index 00000000..f5d593a1 --- /dev/null +++ b/examples/summarizer_demo.py @@ -0,0 +1,431 @@ +"""Demonstrate the simplified summarizer on texts of varying lengths. + +This script fetches content of different sizes and shows how the adaptive +summarizer compresses content to fit different target token counts or ratios. + +Usage: + python examples/summarizer_demo.py + + # Test with specific target ratio + python examples/summarizer_demo.py --target-ratio 0.2 + + # Test with specific target token count + python examples/summarizer_demo.py --target-tokens 500 + + # Use a different model + python examples/summarizer_demo.py --model "gpt-4o-mini" +""" # noqa: INP001 + +from __future__ import annotations + +import argparse +import asyncio +import json +import os +import re +import textwrap +import traceback +from dataclasses import dataclass + +import httpx + +from agent_cli.summarizer import ( + SummarizerConfig, + SummaryResult, + summarize, +) + +# Defaults for local AI setup +DEFAULT_BASE_URL = "http://192.168.1.143:9292/v1" +DEFAULT_MODEL = "gpt-oss-high:20b" + + +@dataclass +class TextSample: + """A sample text for testing the summarizer.""" + + name: str + description: str + url: str + content_type: str = "general" + # If URL fetch fails, use this fallback + fallback_content: str | None = None + + +# Sample texts of varying lengths to demonstrate summarization +SAMPLES: list[TextSample] = [ + TextSample( + name="Short News Article", + description="~150-400 tokens - demonstrates small content handling", + url="https://httpbin.org/json", # Returns small JSON we'll convert to text + fallback_content=""" + Breaking News: Scientists at the Marine Biology Institute have made a + groundbreaking discovery in the Mariana Trench. A new species of deep-sea + fish, dubbed "Pseudoliparis swirei," has been found surviving at depths + exceeding 8,000 meters, making it one of the deepest-living fish ever + documented. + + The research team, led by Dr. Sarah Chen from the University of Washington, + used advanced unmanned submersibles equipped with high-resolution cameras + and collection apparatus. The expedition lasted three months and covered + multiple dive sites across the western Pacific. + + "This discovery fundamentally changes our understanding of life in extreme + environments," Dr. Chen stated in a press conference. "The adaptations + these fish have developed to survive crushing pressures and near-freezing + temperatures are remarkable." + + The fish displays several unique characteristics including translucent skin, + specialized proteins that prevent cellular damage under pressure, and an + unusual metabolism that allows survival with minimal oxygen. Scientists + believe studying these adaptations could lead to breakthroughs in medicine + and materials science. + + The finding has been published in the journal Nature and has already + generated significant interest from the scientific community worldwide. + Further expeditions are planned to study the species in its natural habitat. + """, + ), + TextSample( + name="Technology Article", + description="~800-2000 tokens - demonstrates medium content", + url="https://en.wikipedia.org/api/rest_v1/page/summary/Artificial_intelligence", + content_type="document", + fallback_content=""" + Artificial intelligence (AI) is the intelligence of machines or software, + as opposed to the intelligence of humans or other animals. It is a field + of computer science that develops and studies intelligent machines. The + field encompasses a wide range of approaches and technologies. + + AI research has been defined as the field of study of intelligent agents, + which refers to any system that perceives its environment and takes actions + that maximize its chances of achieving its goals. This definition emphasizes + the practical aspects of building systems that can operate effectively. + + The term "artificial intelligence" has been used to describe machines that + mimic cognitive functions that humans associate with the human mind, such + as learning and problem solving. As machines become increasingly capable, + tasks considered to require "intelligence" are often removed from the + definition of AI, a phenomenon known as the AI effect. + + History of Artificial Intelligence + + The field of AI research was founded at a workshop held on the campus of + Dartmouth College during the summer of 1956. The attendees became the + founders and leaders of AI research. They and their students produced + programs that the press described as astonishing. + + Early AI research in the 1950s explored topics like problem solving and + symbolic methods. In the 1960s, the US Department of Defense took interest + and began training computers to mimic basic human reasoning. DARPA completed + street mapping projects in the 1970s and produced intelligent personal + assistants in 2003, long before Siri, Alexa or Cortana. + + Modern AI Approaches + + Modern AI techniques have become pervasive and include machine learning, + deep learning, natural language processing, computer vision, robotics, + and autonomous systems. These technologies power everything from search + engines to self-driving cars. + + Machine learning is a subset of AI that enables systems to learn and improve + from experience without being explicitly programmed. Deep learning uses + neural networks with many layers to analyze various factors of data. + + Neural networks are computing systems inspired by biological neural networks. + They consist of interconnected nodes that process information using + connectionist approaches to computation. Modern neural networks can have + millions or billions of parameters. + + Applications of AI + + AI applications are transforming industries including healthcare, finance, + transportation, and entertainment. In healthcare, AI helps diagnose diseases + and develop new treatments. In finance, AI powers fraud detection and + algorithmic trading. + + Autonomous vehicles use AI to perceive their environment and make driving + decisions. Virtual assistants use natural language processing to understand + and respond to user queries. Recommendation systems use AI to suggest + content based on user preferences. + + Ethical Considerations + + The field was founded on the assumption that human intelligence can be + so precisely described that a machine can be made to simulate it. This + raised philosophical arguments about the mind and the ethical consequences + of creating artificial beings endowed with human-like intelligence. + + Major concerns include job displacement, algorithmic bias, privacy violations, + and the potential for misuse. Researchers and policymakers are working to + develop frameworks for responsible AI development and deployment. + + The future of AI holds both tremendous promise and significant challenges. + As these systems become more capable, society must grapple with questions + about control, accountability, and the nature of intelligence itself. + """, + ), + TextSample( + name="Full Article", + description="~4000-10000 tokens - demonstrates large content with chunking", + url="https://en.wikipedia.org/api/rest_v1/page/mobile-html/Machine_learning", + content_type="document", + fallback_content=None, # We'll generate synthetic content + ), +] + + +def generate_synthetic_content(target_tokens: int, topic: str = "technology") -> str: + """Generate synthetic content for testing when URLs fail.""" + # Each paragraph is roughly 50-100 tokens + paragraphs = [ + f"Section on {topic} - Part {{i}}: This section explores various aspects " + f"of {topic} and its implications for modern society. The development of " + f"new technologies continues to reshape how we live and work. Researchers " + f"have made significant progress in understanding the fundamentals.", + f"The history of {topic} spans many decades of innovation. Early pioneers " + f"laid the groundwork for current advancements. Their contributions remain " + f"relevant today as we build upon established foundations.", + f"Current applications of {topic} include healthcare, transportation, and " + f"communication. These sectors have seen dramatic improvements in efficiency " + f"and capability. Future developments promise even greater transformations.", + f"Challenges in {topic} include ethical considerations, resource constraints, " + f"and technical limitations. Addressing these requires collaboration across " + f"disciplines. Solutions often emerge from unexpected directions.", + f"The future of {topic} looks promising with continued investment and research. " + f"Emerging trends suggest new possibilities. Stakeholders must prepare for " + f"rapid change while maintaining focus on beneficial outcomes.", + ] + + result = [] + tokens_per_para = 75 # approximate + needed_paragraphs = target_tokens // tokens_per_para + 1 + + for i in range(needed_paragraphs): + para = paragraphs[i % len(paragraphs)].format(i=i + 1) + result.append(para) + + return "\n\n".join(result) + + +async def fetch_content(sample: TextSample, client: httpx.AsyncClient) -> str: + """Fetch content from URL or use fallback.""" + try: + # Add User-Agent header to avoid 403 errors from some sites + headers = { + "User-Agent": "Mozilla/5.0 (compatible; SummarizerDemo/1.0)", + } + response = await client.get( + sample.url, + timeout=30.0, + follow_redirects=True, + headers=headers, + ) + response.raise_for_status() + + content = response.text + + # Handle Wikipedia API JSON responses + if "wikipedia.org/api" in sample.url: + try: + data = json.loads(content) + if "extract" in data: + content = data["extract"] + elif "text" in data: + content = data["text"] + except json.JSONDecodeError: + pass + + # For httpbin JSON, create a readable summary + if "httpbin.org/json" in sample.url: + content = sample.fallback_content or "" + + # Strip HTML tags if present + if "<" in content and ">" in content: + content = re.sub(r"<[^>]+>", " ", content) + content = re.sub(r"\s+", " ", content).strip() + + # Check if content is too short + min_words = 80 + if len(content.split()) < min_words: + print(f" 📎 Fetched content too short ({len(content.split())} words), using fallback") + content = sample.fallback_content or generate_synthetic_content(1500) + + # For very long content, truncate to keep demo fast + words = content.split() + if len(words) > 13500: # noqa: PLR2004 + content = " ".join(words[:13500]) + print(" 📎 Truncated to ~13500 words for faster demo") + + return content.strip() + + except Exception as e: + print(f" ⚠️ Failed to fetch URL: {e}") + + if sample.fallback_content: + return sample.fallback_content.strip() + + # Generate synthetic content + return generate_synthetic_content(1500) + + +def print_result( + sample: TextSample, + result: SummaryResult, + content: str, + target_tokens: int | None, + target_ratio: float | None, +) -> None: + """Print a formatted summary result.""" + print("\n" + "=" * 70) + print(f"📄 {sample.name}") + print(f" {sample.description}") + print("=" * 70) + + # Input stats + word_count = len(content.split()) + print("\n📊 Input Statistics:") + print(f" Words: {word_count:,}") + print(f" Tokens: {result.input_tokens:,}") + print(f" Content type: {sample.content_type}") + + # Target info + print("\n🎯 Target:") + if target_ratio is not None: + print(f" Ratio: {target_ratio:.0%} of input") + print(f" Calculated target: ~{int(result.input_tokens * target_ratio):,} tokens") + elif target_tokens is not None: + print(f" Tokens: {target_tokens:,}") + else: + print(" Default: 3000 tokens (LangChain default)") + + # Result info + print("\n📝 Result:") + if result.summary == content: + print(" Status: ⏭️ Content already fits target (returned as-is)") + elif result.collapse_depth > 0: + print(f" Status: 🔄 Map-reduce summarization (collapse depth: {result.collapse_depth})") + else: + print(" Status: 📝 Single-pass summarization") + + print(f" Output tokens: {result.output_tokens:,}") + print(f" Compression: {result.compression_ratio:.1%}") + + # Summary content + if result.summary and result.summary != content: + print("\n📝 Summary:") + wrapped = textwrap.fill( + result.summary, + width=68, + initial_indent=" ", + subsequent_indent=" ", + ) + # Only show first ~500 chars of summary + if len(wrapped) > 600: # noqa: PLR2004 + wrapped = wrapped[:600] + "..." + print(wrapped) + + +async def run_demo( + target_tokens: int | None = None, + target_ratio: float | None = None, + model: str | None = None, + base_url: str | None = None, +) -> None: + """Run the summarizer demo.""" + # Configuration + actual_base_url = base_url or os.environ.get("OPENAI_BASE_URL", DEFAULT_BASE_URL) + actual_model = model or os.environ.get("OPENAI_MODEL", DEFAULT_MODEL) + api_key = os.environ.get("OPENAI_API_KEY", "not-needed-for-local") + + print("🔧 Configuration:") + print(f" Base URL: {actual_base_url}") + print(f" Model: {actual_model}") + + config = SummarizerConfig( + openai_base_url=actual_base_url, + model=actual_model, + api_key=api_key, + chunk_size=2048, # BOOOOKSCORE default + max_concurrent_chunks=3, + ) + + async with httpx.AsyncClient() as client: + for sample in SAMPLES: + print(f"\n⏳ Processing: {sample.name}...") + + # Fetch content + content = await fetch_content(sample, client) + + try: + # Summarize with specified target + result = await summarize( + content=content, + config=config, + target_tokens=target_tokens, + target_ratio=target_ratio, + content_type=sample.content_type, + ) + + # Display results + print_result(sample, result, content, target_tokens, target_ratio) + + except Exception as e: + print(f"\n❌ Error summarizing {sample.name}: {e}") + traceback.print_exc() + + print("\n" + "=" * 70) + print("✅ Demo complete!") + print("=" * 70) + + +def main() -> None: + """CLI entry point.""" + parser = argparse.ArgumentParser( + description="Demonstrate adaptive summarization on texts of varying lengths", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent(""" + Examples: + python examples/summarizer_demo.py + python examples/summarizer_demo.py --target-ratio 0.2 + python examples/summarizer_demo.py --target-tokens 500 + python examples/summarizer_demo.py --model "llama3.1:8b" --base-url "http://localhost:11434/v1" + """), + ) + + parser.add_argument( + "--target-ratio", + type=float, + help="Target ratio for compression (e.g., 0.2 = compress to 20%%)", + ) + parser.add_argument( + "--target-tokens", + type=int, + help="Target token count for summary", + ) + parser.add_argument( + "--model", + help=f"Model to use (default: {DEFAULT_MODEL})", + ) + parser.add_argument( + "--base-url", + help=f"OpenAI-compatible API base URL (default: {DEFAULT_BASE_URL})", + ) + + args = parser.parse_args() + + if args.target_ratio is not None and args.target_tokens is not None: + parser.error("Cannot specify both --target-ratio and --target-tokens") + + asyncio.run( + run_demo( + target_tokens=args.target_tokens, + target_ratio=args.target_ratio, + model=args.model, + base_url=args.base_url, + ), + ) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 38c3df61..3f31e6b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ memory = [ "watchfiles>=0.21.0", # Until here same as 'rag' extras "pyyaml>=6.0.0", + "tiktoken>=0.5.0", # For token counting in adaptive summarization ] test = [ "pytest>=7.0.0", @@ -60,6 +61,7 @@ test = [ "pydantic-ai-slim[openai]", "pytest-timeout", "pytest-mock", + "tiktoken>=0.5.0", # For summarizer tests ] dev = [ "agent-cli[test]", @@ -87,6 +89,7 @@ dev = [ "notebook", "audiostretchy>=1.3.0", "pre-commit-uv>=4.1.4", + "tiktoken>=0.5.0", # For summarizer tests ] [project.scripts] diff --git a/scripts/compare_summarizers.py b/scripts/compare_summarizers.py new file mode 100644 index 00000000..15265cb0 --- /dev/null +++ b/scripts/compare_summarizers.py @@ -0,0 +1,402 @@ +"""Compare old (L1-L4 hierarchical) vs new (adaptive map-reduce) summarizer. + +This script: +1. Shows what level each system would use for test content +2. Runs the NEW summarizer to produce actual summaries +3. Evaluates summary quality using needle-in-haystack questions +4. Uses LLM-as-judge for quality assessment + +Usage: + python scripts/compare_summarizers.py + python scripts/compare_summarizers.py --model "gpt-4o-mini" --base-url "https://api.openai.com/v1" +""" + +from __future__ import annotations + +import argparse +import asyncio +import os +import textwrap +from dataclasses import dataclass, field + +from agent_cli.summarizer import SummarizerConfig, summarize +from agent_cli.summarizer._utils import count_tokens + +# Old system thresholds +OLD_THRESHOLD_NONE = 100 +OLD_THRESHOLD_BRIEF = 500 +OLD_THRESHOLD_STANDARD = 3000 +OLD_THRESHOLD_DETAILED = 15000 + +# New system thresholds +NEW_THRESHOLD_NONE = 100 +NEW_THRESHOLD_BRIEF = 500 + +# Evaluation threshold +FACT_PRESERVATION_THRESHOLD = 0.5 + +# Test content at different sizes with embedded "needles" (specific facts) +TEST_CASES = [ + { + "name": "Brief Range (~300 tokens)", + "description": "Tests the 100-500 token range where OLD=BRIEF, NEW=BRIEF", + "content": """ + The artificial intelligence revolution is transforming every industry. + Machine learning algorithms now power recommendation systems, fraud detection, + and autonomous vehicles. Deep learning, a subset of machine learning, uses + neural networks with multiple layers to analyze complex patterns in data. + + Major tech companies are investing billions in AI research. Google's DeepMind + created AlphaGo, which defeated world champion Lee Sedol in March 2016 in + the ancient game of Go. OpenAI developed GPT models that can generate + human-like text. These advances raise both excitement and concerns about + the future of work and society. + + Researchers are working on making AI systems more transparent and aligned with + human values. The field of AI safety, pioneered by researchers like Stuart + Russell at UC Berkeley, aims to ensure that advanced AI systems remain + beneficial and under human control. + """, + "needles": [ + ("Who did AlphaGo defeat?", "Lee Sedol"), + ("When did AlphaGo win?", "March 2016"), + ("Who pioneered AI safety?", "Stuart Russell"), + ("Where does Stuart Russell work?", "UC Berkeley"), + ], + }, + { + "name": "Standard/MapReduce Range (~900 tokens)", + "description": "Tests 500-3000 range where OLD=STANDARD, NEW=MAP_REDUCE", + "content": """ + Climate change represents one of the most pressing challenges facing humanity. + The Earth's average temperature has risen approximately 1.1 degrees Celsius since + the pre-industrial era, primarily due to human activities that release greenhouse + gases. Carbon dioxide from burning fossil fuels accounts for 76% of emissions. + + The Intergovernmental Panel on Climate Change (IPCC), led by chair Hoesung Lee, + has warned that limiting warming to 1.5 degrees Celsius is crucial. The 2021 + report involved 234 authors from 66 countries analyzing over 14,000 scientific + papers. Their conclusion: human influence has warmed the climate at a rate + unprecedented in at least the last 2,000 years. + + Renewable energy offers hope. Solar panel costs dropped 89% between 2010 and 2020, + making solar competitive with fossil fuels. China leads with 306 gigawatts of + installed solar capacity. Wind energy has grown exponentially, with Denmark + generating 47% of its electricity from wind in 2019. + + Electric vehicles are gaining ground. Tesla delivered 936,172 vehicles in 2021, + while traditional automakers race to electrify. Norway leads adoption, with + electric vehicles representing 65% of new car sales in 2021. Battery costs + have fallen 89% since 2010, from $1,100 to $132 per kilowatt-hour. + + Carbon capture remains expensive at $250-$600 per ton of CO2. The Orca plant + in Iceland, opened in September 2021, captures just 4,000 tons annually. + Critics note this equals emissions from about 870 cars. More radical approaches + like solar radiation management could cool the planet but carry unknown risks. + + The Paris Agreement, signed by 196 parties in December 2015, aims to limit + warming to well below 2 degrees. Countries submit Nationally Determined + Contributions (NDCs) outlining their emission reduction plans. However, + current pledges put the world on track for 2.7 degrees of warming by 2100. + + Individual actions matter but systemic change is essential. Agriculture accounts + for 10-12% of global emissions. Beef production generates 60 kg of CO2 equivalent + per kilogram of meat. A plant-based diet could reduce food emissions by up to 73%. + """, + "needles": [ + ("Who chairs the IPCC?", "Hoesung Lee"), + ("How many authors contributed to the 2021 IPCC report?", "234"), + ("What percent of Denmark's electricity comes from wind?", "47%"), + ("When did the Orca plant open?", "September 2021"), + ("How many vehicles did Tesla deliver in 2021?", "936,172"), + ("What percent of Norway's new cars are electric?", "65%"), + ("When was the Paris Agreement signed?", "December 2015"), + ("How much CO2 does beef production generate per kg?", "60 kg"), + ], + }, + { + "name": "Detailed/MapReduce Range (~1800 tokens)", + "description": "Tests larger content where OLD=DETAILED (chunks+meta), NEW=MAP_REDUCE", + "content": """ + The history of computing spans centuries of human innovation, from ancient + calculating devices to quantum computers. Understanding this evolution reveals + how incremental advances compound into revolutionary change. + + Ancient Foundations (2400 BCE - 1600 CE) + + The abacus emerged independently in multiple civilizations. Chinese merchants + used the suanpan as early as 2400 BCE for arithmetic. The Roman abacus used + grooved beads, while the Japanese soroban featured a distinctive 1:4 bead + arrangement still used today. + + Mechanical Calculation (1600-1900) + + In 1642, nineteen-year-old Blaise Pascal invented the Pascaline to help his + tax-collector father. This brass rectangular box could add and subtract using + interlocking gears. Only 50 were built, and 9 survive in museums today. + + Gottfried Wilhelm Leibniz improved Pascal's design in 1694, creating the + Stepped Reckoner capable of multiplication and division. He also invented + binary arithmetic, writing "Explication de l'Arithmétique Binaire" in 1703, + laying groundwork for digital computing. + + Charles Babbage designed the Analytical Engine from 1833-1871, incorporating + a mill (processor), store (memory), and punch card input. Ada Lovelace wrote + detailed notes including what's considered the first algorithm - for computing + Bernoulli numbers. The engine was never completed; Babbage died in 1871. + + Electronic Era (1900-1970) + + Alan Turing published "On Computable Numbers" in 1936, defining the theoretical + Turing machine. During WWII, he led the team at Bletchley Park that cracked + the Enigma code, shortening the war by an estimated two years. + + ENIAC, completed February 14, 1946, at the University of Pennsylvania, was + the first general-purpose electronic computer. It weighed 30 tons, consumed + 150 kilowatts, and contained 17,468 vacuum tubes. Programming required + physically rewiring the machine, taking days for each new problem. + + The transistor, invented December 23, 1947, at Bell Labs by John Bardeen, + Walter Brattain, and William Shockley, revolutionized electronics. They + shared the 1956 Nobel Prize in Physics. By 1954, the TRADIC computer used + 800 transistors instead of vacuum tubes. + + Jack Kilby demonstrated the first integrated circuit on September 12, 1958, + at Texas Instruments. Robert Noyce independently developed a superior silicon + version at Fairchild. Kilby won the 2000 Nobel Prize; Noyce had died in 1990. + + Personal Computing (1970-2000) + + Intel's 4004, released November 15, 1971, was the first commercial microprocessor. + Designed by Federico Faggin, it contained 2,300 transistors running at 740 kHz. + The 8080 (1974) powered the Altair 8800, sparking the PC revolution. + + Steve Wozniak built the Apple I in 1976 in his garage. The Apple II (1977) + featured color graphics and cost $1,298. IBM entered with the PC on August 12, + 1981, using Microsoft's MS-DOS. By 1984, Apple's Macintosh introduced the GUI + to mainstream users at $2,495. + + Tim Berners-Lee invented the World Wide Web at CERN in 1989, proposing it + on March 12. The first website went live December 20, 1990. By 1995, the + internet had 16 million users; by 2000, 361 million. + + Modern Era (2000-Present) + + Moore's Law, predicting transistor doubling every two years, has held since + Gordon Moore's 1965 observation. Intel's 2021 Alder Lake processors contain + 10+ billion transistors on chips measuring 215 mm². + + Steve Jobs unveiled the iPhone on January 9, 2007. It sold 1.4 million units + in its first year. Smartphones now exceed 6.6 billion globally, containing + more power than 1990s supercomputers. + + Google claimed quantum supremacy October 23, 2019, with Sycamore completing + a calculation in 200 seconds that would take 10,000 years classically. + IBM disputed this, but the quantum era has clearly begun. + """, + "needles": [ + ("How old was Pascal when he invented the Pascaline?", "19"), + ("When did Leibniz write about binary arithmetic?", "1703"), + ("How many vacuum tubes did ENIAC contain?", "17,468"), + ("When was the transistor invented?", "December 23, 1947"), + ("When did Jack Kilby demonstrate the integrated circuit?", "September 12, 1958"), + ("How many transistors did the Intel 4004 have?", "2,300"), + ("When did the first website go live?", "December 20, 1990"), + ("When did Jobs unveil the iPhone?", "January 9, 2007"), + ("When did Google claim quantum supremacy?", "October 23, 2019"), + ], + }, +] + + +def get_old_level(tokens: int) -> tuple[str, str]: + """Determine what level the OLD (L1-L4) summarizer would use.""" + if tokens < OLD_THRESHOLD_NONE: + return "NONE", "No summary needed" + if tokens < OLD_THRESHOLD_BRIEF: + return "BRIEF", "Single sentence (~20% compression)" + if tokens < OLD_THRESHOLD_STANDARD: + return "STANDARD", "Paragraph with content-aware prompts (~12%)" + if tokens < OLD_THRESHOLD_DETAILED: + return "DETAILED", "Chunked L1 summaries + meta L3 (~7%)" + return "HIERARCHICAL", "Full L1/L2/L3 tree structure" + + +def get_new_level(tokens: int) -> tuple[str, str]: + """Determine what level the NEW (adaptive) summarizer would use.""" + if tokens < NEW_THRESHOLD_NONE: + return "NONE", "No summary needed" + if tokens < NEW_THRESHOLD_BRIEF: + return "BRIEF", "Single sentence" + return "MAP_REDUCE", "Dynamic collapse based on content" + + +@dataclass +class TestResult: + """Result of testing one content sample.""" + + name: str + tokens: int + old_level: str + old_description: str + new_level: str + new_description: str + new_summary: str | None = None + needles_found: int = 0 + total_needles: int = 0 + needle_details: list[tuple[str, str, bool]] = field(default_factory=list) + + +async def run_test(test_case: dict, config: dict) -> TestResult: + """Run a single test case.""" + content = test_case["content"].strip() + tokens = count_tokens(content, config["model"]) + + old_level, old_desc = get_old_level(tokens) + new_level, new_desc = get_new_level(tokens) + + # Run new summarizer + cfg = SummarizerConfig( + openai_base_url=config["base_url"], + model=config["model"], + api_key=config.get("api_key", "not-needed"), + ) + + result = await summarize(content, cfg, content_type="document") + + # Check needles in summary + needle_details = [] + needles_found = 0 + + if result.summary: + summary_lower = result.summary.lower() + for question, answer in test_case["needles"]: + # Check if the key fact is preserved + found = answer.lower() in summary_lower + needle_details.append((question, answer, found)) + if found: + needles_found += 1 + + return TestResult( + name=test_case["name"], + tokens=tokens, + old_level=old_level, + old_description=old_desc, + new_level=new_level, + new_description=new_desc, + new_summary=result.summary, + needles_found=needles_found, + total_needles=len(test_case["needles"]), + needle_details=needle_details, + ) + + +def print_result(result: TestResult) -> None: + """Print a test result.""" + print(f"\n{'=' * 70}") + print(f"{result.name}") + print(f"{'=' * 70}") + print(f"Input tokens: {result.tokens}") + print() + print("Level comparison:") + print(f" OLD: {result.old_level:12} - {result.old_description}") + print(f" NEW: {result.new_level:12} - {result.new_description}") + print() + + if result.new_summary: + print("New summary:") + wrapped = textwrap.fill( + result.new_summary, + width=68, + initial_indent=" ", + subsequent_indent=" ", + ) + print(wrapped) + print() + + print( + f"Needle-in-haystack test: {result.needles_found}/{result.total_needles} facts preserved", + ) + for question, answer, found in result.needle_details: + status = "[OK]" if found else "[MISSING]" + print(f" {status} {question} -> {answer}") + else: + print("No summary produced (NONE level)") + + +async def main() -> None: + """Run all tests.""" + parser = argparse.ArgumentParser(description="Compare summarizer versions") + parser.add_argument("--model", default=os.environ.get("OPENAI_MODEL", "gpt-oss-high:20b")) + parser.add_argument( + "--base-url", + default=os.environ.get("OPENAI_BASE_URL", "http://192.168.1.143:9292/v1"), + ) + parser.add_argument("--api-key", default=os.environ.get("OPENAI_API_KEY", "not-needed")) + args = parser.parse_args() + + config = { + "model": args.model, + "base_url": args.base_url, + "api_key": args.api_key, + } + + print("=" * 70) + print("SUMMARIZER COMPARISON: OLD (L1-L4) vs NEW (Adaptive Map-Reduce)") + print("=" * 70) + print(f"Model: {config['model']}") + print(f"Base URL: {config['base_url']}") + + results = [] + for test in TEST_CASES: + print(f"\nRunning: {test['name']}...") + result = await run_test(test, config) + results.append(result) + print_result(result) + + # Summary + print("\n" + "=" * 70) + print("SUMMARY") + print("=" * 70) + + total_needles = sum(r.total_needles for r in results) + found_needles = sum(r.needles_found for r in results) + + print( + f"\nOverall fact preservation: {found_needles}/{total_needles} ({100 * found_needles / total_needles:.1f}%)", + ) + print() + + print("Key differences:") + print(""" +OLD System (5 levels): + - NONE (<100), BRIEF (100-500), STANDARD (500-3000), + DETAILED (3000-15000), HIERARCHICAL (>15000) + - Fixed boundaries, L1/L2/L3 tree for large content + - Stored intermediate summaries at each level + - Chunk size: 3000 tokens + +NEW System (3 levels): + - NONE (<100), BRIEF (100-500), MAP_REDUCE (>=500) + - Dynamic collapse depth based on content + - Content-type aware prompts + - Chunk size: 2048 tokens (BOOOOKSCORE research) + - Only stores final summary + +Trade-offs: + + Simpler (3 levels vs 5) + + Research-backed parameters + + Content-aware prompts + - No intermediate level access + - All >=500 token content treated the same +""") + + print("Verdict: ", end="") + if found_needles / total_needles >= FACT_PRESERVATION_THRESHOLD: + print("NEW system preserves facts adequately") + else: + print("NEW system may lose important details - further tuning needed") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/tests/memory/test_engine.py b/tests/memory/test_engine.py index 0f032adf..d8cd3526 100644 --- a/tests/memory/test_engine.py +++ b/tests/memory/test_engine.py @@ -21,8 +21,8 @@ MemoryMetadata, Message, StoredMemory, - SummaryOutput, ) +from agent_cli.summarizer import SummaryResult class _DummyReranker: @@ -247,13 +247,13 @@ def fake_query_memories( ) monkeypatch.setattr( _retrieval, - "get_summary_entry", - lambda _collection, _cid, role: StoredMemory( # type: ignore[return-value] - id=f"{role}-id", - content=f"{role} content", + "get_final_summary", + lambda _collection, _cid: StoredMemory( + id="summary-id", + content="summary content", metadata=MemoryMetadata( conversation_id="conv1", - role=role, + role="summary", created_at=now.isoformat(), ), ), @@ -342,11 +342,17 @@ def __init__(self, output: Any) -> None: self.output = output prompt_str = str(prompt_text) - if "New facts:" in prompt_str: - return _Result(SummaryOutput(summary="summary up to 256")) if "Hello, I enjoy biking" in prompt_str: return _Result(["User likes cats.", "User loves biking."]) - return _Result(SummaryOutput(summary="noop")) + return _Result([]) + + async def fake_summarize_content(**_kwargs: Any) -> SummaryResult: + return SummaryResult( + summary="summary up to 256", + input_tokens=100, + output_tokens=20, + compression_ratio=0.2, + ) async def fake_reconcile( _collection: Any, @@ -370,6 +376,7 @@ async def fake_reconcile( import pydantic_ai # noqa: PLC0415 monkeypatch.setattr(pydantic_ai.Agent, "run", fake_agent_run) + monkeypatch.setattr(_ingest, "summarize_content", fake_summarize_content) # High relevance so they aren't filtered monkeypatch.setattr(_retrieval, "predict_relevance", lambda _model, pairs: [5.0 for _ in pairs]) @@ -562,11 +569,17 @@ def __init__(self, output: Any) -> None: self.output = output prompt_str = str(prompt_text) - if "New facts:" in prompt_str: - return _Result(SummaryOutput(summary="summary text")) if "My cat is Luna" in prompt_str: return _Result(["User has a cat named Luna."]) - return _Result(SummaryOutput(summary="noop")) + return _Result([]) + + async def fake_summarize_content(**_kwargs: Any) -> SummaryResult: + return SummaryResult( + summary="summary text", + input_tokens=100, + output_tokens=20, + compression_ratio=0.2, + ) monkeypatch.setattr(engine._streaming, "stream_chat_sse", fake_stream_chat_sse) @@ -592,6 +605,7 @@ async def fake_reconcile( import pydantic_ai # noqa: PLC0415 monkeypatch.setattr(pydantic_ai.Agent, "run", fake_agent_run) + monkeypatch.setattr(_ingest, "summarize_content", fake_summarize_content) response = await engine.process_chat_request( request, @@ -608,4 +622,4 @@ async def fake_reconcile( files = list(tmp_path.glob("entries/**/*.md")) assert len(files) == 4 # user + assistant + fact + 1 summary assert any("facts" in str(f) for f in files) - assert any("summaries/summary.md" in str(f) for f in files) + assert any("summaries" in str(f) for f in files) diff --git a/tests/memory/test_git_integration.py b/tests/memory/test_git_integration.py index 7d59f7c0..64130990 100644 --- a/tests/memory/test_git_integration.py +++ b/tests/memory/test_git_integration.py @@ -14,6 +14,7 @@ from agent_cli.memory import _ingest from agent_cli.memory.client import MemoryClient from agent_cli.memory.entities import Fact +from agent_cli.summarizer import SummaryResult if TYPE_CHECKING: from pathlib import Path @@ -63,12 +64,17 @@ async def fake_reconcile( ] return entries, [], {} - async def fake_update_summary(*_args: Any, **_kwargs: Any) -> str: - return "User likes testing." + async def fake_summarize_content(**_kwargs: Any) -> SummaryResult: + return SummaryResult( + summary="User likes testing.", + input_tokens=100, + output_tokens=20, + compression_ratio=0.2, + ) monkeypatch.setattr(_ingest, "extract_salient_facts", fake_extract) monkeypatch.setattr(_ingest, "reconcile_facts", fake_reconcile) - monkeypatch.setattr(_ingest, "update_summary", fake_update_summary) + monkeypatch.setattr(_ingest, "summarize_content", fake_summarize_content) # Patch Reranker to avoid loading ONNX model monkeypatch.setattr("agent_cli.memory.client.get_reranker_model", MagicMock()) diff --git a/tests/memory/test_store.py b/tests/memory/test_store.py index 98334e45..29dbe2e5 100644 --- a/tests/memory/test_store.py +++ b/tests/memory/test_store.py @@ -101,23 +101,6 @@ def query(self, **kwargs: Any) -> dict[str, Any]: assert {"role": {"$ne": "summary"}} in clauses -def test_get_summary_entry_returns_entry() -> None: - # ChromaDB's .get() returns flat lists (not nested like .query()) - fake = _FakeCollection( - get_result={ - "documents": ["summary text"], - "metadatas": [ - {"conversation_id": "c1", "role": "summary", "created_at": "now"}, - ], - "ids": ["sum1"], - }, - ) - entry = _store.get_summary_entry(fake, "c1", role="summary") - assert entry is not None - assert entry.id == "sum1" - assert entry.metadata.role == "summary" - - def test_list_conversation_entries_filters_summaries() -> None: # ChromaDB's .get() returns flat lists (not nested like .query()) fake = _FakeCollection( @@ -148,3 +131,144 @@ def test_upsert_and_delete_entries_delegate() -> None: _store.delete_entries(fake, ["x"]) assert fake.deleted == [["x"]] + + +# --- Summary Entry Tests --- + + +def test_upsert_summary_entries_simple() -> None: + """Test upserting a summary.""" + fake = _FakeCollection() + entries = [ + { + "id": "conv-123:summary", + "content": "A paragraph summary.", + "metadata": { + "conversation_id": "conv-123", + "role": "summary", + "is_final": True, + "summary_level": "MAP_REDUCE", + "input_tokens": 1000, + "output_tokens": 50, + "compression_ratio": 0.05, + "collapse_depth": 0, + "created_at": "2024-01-01T00:00:00", + }, + }, + ] + + ids = _store.upsert_summary_entries(fake, entries) + + assert ids == ["conv-123:summary"] + assert len(fake.upserts) == 1 + upserted_ids, upserted_docs, upserted_metas = fake.upserts[0] + assert upserted_ids == ["conv-123:summary"] + assert upserted_docs == ["A paragraph summary."] + assert upserted_metas[0]["is_final"] is True + + +def test_upsert_summary_entries_with_collapse_depth() -> None: + """Test upserting a summary with collapse depth metadata.""" + fake = _FakeCollection() + entries = [ + { + "id": "conv-456:summary", + "content": "Final synthesis", + "metadata": { + "conversation_id": "conv-456", + "role": "summary", + "is_final": True, + "summary_level": "MAP_REDUCE", + "input_tokens": 5000, + "output_tokens": 100, + "compression_ratio": 0.02, + "collapse_depth": 2, + "created_at": "2024-01-01T00:00:00", + }, + }, + ] + + ids = _store.upsert_summary_entries(fake, entries) + + assert len(ids) == 1 + assert ids[0] == "conv-456:summary" + assert fake.upserts[0][2][0]["collapse_depth"] == 2 + + +def test_upsert_summary_entries_empty() -> None: + """Test upserting when there are no entries (e.g., NONE level).""" + fake = _FakeCollection() + + ids = _store.upsert_summary_entries(fake, []) + + assert ids == [] + assert len(fake.upserts) == 0 + + +def test_get_final_summary_returns_summary() -> None: + """Test getting the final summary for a conversation.""" + fake = _FakeCollection( + get_result={ + "documents": ["The final summary"], + "metadatas": [ + { + "conversation_id": "c1", + "role": "summary", + "is_final": True, + "summary_level": "MAP_REDUCE", + "collapse_depth": 1, + "created_at": "now", + }, + ], + "ids": ["c1:summary"], + }, + ) + + result = _store.get_final_summary(fake, "c1") + + assert result is not None + assert result.content == "The final summary" + assert result.metadata.is_final is True + + +def test_get_final_summary_returns_none_when_missing() -> None: + """Test that get_final_summary returns None when no summary exists.""" + fake = _FakeCollection(get_result={"documents": [], "metadatas": [], "ids": []}) + + result = _store.get_final_summary(fake, "c1") + + assert result is None + + +def test_delete_summaries() -> None: + """Test deleting summaries for a conversation.""" + fake = _FakeCollection( + get_result={ + "documents": ["The summary"], + "metadatas": [ + { + "conversation_id": "c1", + "role": "summary", + "summary_level": "MAP_REDUCE", + "created_at": "now", + }, + ], + "ids": ["c1:summary"], + }, + ) + + deleted_count = _store.delete_summaries(fake, "c1") + + assert deleted_count == 1 + assert len(fake.deleted) == 1 + assert fake.deleted[0] == ["c1:summary"] + + +def test_delete_summaries_no_entries() -> None: + """Test deleting when no summaries exist.""" + fake = _FakeCollection(get_result={"documents": [], "metadatas": [], "ids": []}) + + deleted_count = _store.delete_summaries(fake, "c1") + + assert deleted_count == 0 + assert len(fake.deleted) == 0 diff --git a/tests/summarizer/__init__.py b/tests/summarizer/__init__.py new file mode 100644 index 00000000..d6801b31 --- /dev/null +++ b/tests/summarizer/__init__.py @@ -0,0 +1 @@ +"""Tests for the adaptive summarizer module.""" diff --git a/tests/summarizer/test_adaptive.py b/tests/summarizer/test_adaptive.py new file mode 100644 index 00000000..1fbf3d7b --- /dev/null +++ b/tests/summarizer/test_adaptive.py @@ -0,0 +1,245 @@ +"""Unit tests for adaptive summarization functions.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from agent_cli.summarizer._utils import ( + SummarizationError, + SummarizerConfig, + SummaryOutput, + generate_summary, +) +from agent_cli.summarizer.adaptive import summarize +from agent_cli.summarizer.map_reduce import MapReduceResult + + +class TestSummarizerConfig: + """Tests for SummarizerConfig initialization.""" + + def test_basic_init(self) -> None: + """Test basic initialization with required parameters.""" + config = SummarizerConfig( + openai_base_url="http://localhost:8000/v1", + model="llama3.1:8b", + ) + assert config.openai_base_url == "http://localhost:8000/v1" + assert config.model == "llama3.1:8b" + assert config.api_key == "not-needed" + + def test_init_with_api_key(self) -> None: + """Test initialization with custom API key.""" + config = SummarizerConfig( + openai_base_url="http://localhost:8000/v1", + model="gpt-4", + api_key="sk-test-key", + ) + assert config.api_key == "sk-test-key" + + def test_init_with_custom_settings(self) -> None: + """Test initialization with custom chunk settings.""" + config = SummarizerConfig( + openai_base_url="http://localhost:8000/v1", + model="gpt-4", + chunk_size=5000, + chunk_overlap=300, + max_concurrent_chunks=10, + ) + assert config.chunk_size == 5000 + assert config.chunk_overlap == 300 + assert config.max_concurrent_chunks == 10 + + def test_trailing_slash_stripped(self) -> None: + """Test that trailing slash is stripped from base URL.""" + config = SummarizerConfig( + openai_base_url="http://localhost:8000/v1/", + model="gpt-4", + ) + assert config.openai_base_url == "http://localhost:8000/v1" + + def test_default_chunk_size_is_booookscore(self) -> None: + """Test that default chunk_size follows BOOOOKSCORE recommendation.""" + config = SummarizerConfig( + openai_base_url="http://localhost:8000/v1", + model="gpt-4", + ) + assert config.chunk_size == 2048 # BOOOOKSCORE's tested default + + def test_default_token_max_is_langchain(self) -> None: + """Test that default token_max follows LangChain's default.""" + config = SummarizerConfig( + openai_base_url="http://localhost:8000/v1", + model="gpt-4", + ) + assert config.token_max == 3000 # LangChain's default + + +class TestSummarize: + """Tests for main summarize function.""" + + @pytest.fixture + def config(self) -> SummarizerConfig: + """Create a config instance.""" + return SummarizerConfig( + openai_base_url="http://localhost:8000/v1", + model="gpt-4", + ) + + @pytest.mark.asyncio + async def test_empty_content_returns_no_summary( + self, + config: SummarizerConfig, + ) -> None: + """Test that empty content returns result with no summary.""" + result = await summarize("", config) + assert result.summary is None + assert result.input_tokens == 0 + assert result.output_tokens == 0 + + @pytest.mark.asyncio + async def test_whitespace_only_returns_no_summary( + self, + config: SummarizerConfig, + ) -> None: + """Test that whitespace-only content returns result with no summary.""" + result = await summarize(" \n\n ", config) + assert result.summary is None + + @pytest.mark.asyncio + async def test_short_content_returns_as_is( + self, + config: SummarizerConfig, + ) -> None: + """Test that short content is returned as-is (no LLM call).""" + # Less than default token_max (3000) + result = await summarize("Hello world", config) + assert result.summary == "Hello world" + assert result.compression_ratio == 1.0 # No compression + + @pytest.mark.asyncio + async def test_target_tokens_respected( + self, + config: SummarizerConfig, + ) -> None: + """Test that content fitting target_tokens is returned as-is.""" + content = "Short content" + result = await summarize(content, config, target_tokens=1000) + assert result.summary == content + assert result.compression_ratio == 1.0 + + @pytest.mark.asyncio + async def test_target_ratio_calculates_target( + self, + config: SummarizerConfig, + ) -> None: + """Test that target_ratio calculates correct target.""" + # Short content that fits even with 10% target + content = "Hello" + result = await summarize(content, config, target_ratio=0.1) + # Content is so short it fits in 10% target + assert result.summary == content + + @pytest.mark.asyncio + @patch("agent_cli.summarizer.adaptive._content_aware_summary") + async def test_content_exceeding_target_gets_summarized( + self, + mock_summary: AsyncMock, + config: SummarizerConfig, + ) -> None: + """Test that content exceeding target gets summarized.""" + mock_summary.return_value = "Summarized content." + + # Create content that's ~500 tokens (exceeds target of 100) + content = "This is a test sentence. " * 100 + + result = await summarize(content, config, target_tokens=100) + + mock_summary.assert_called_once() + assert result.summary == "Summarized content." + + @pytest.mark.asyncio + @patch("agent_cli.summarizer.adaptive.map_reduce_summarize") + async def test_large_content_uses_map_reduce( + self, + mock_map_reduce: AsyncMock, + config: SummarizerConfig, + ) -> None: + """Test that content exceeding chunk_size uses map-reduce.""" + mock_map_reduce.return_value = MapReduceResult( + summary="Map-reduce summary.", + input_tokens=5000, + output_tokens=100, + compression_ratio=0.02, + collapse_depth=1, + intermediate_summaries=[["chunk1", "chunk2"]], + ) + + # Create content larger than chunk_size (2048) + content = "Word " * 3000 # ~3000 tokens + + result = await summarize(content, config, target_tokens=500) + + mock_map_reduce.assert_called_once() + assert result.summary == "Map-reduce summary." + + +class TestGenerateSummary: + """Tests for generate_summary function.""" + + @pytest.fixture + def config(self) -> SummarizerConfig: + """Create a config instance.""" + return SummarizerConfig( + openai_base_url="http://localhost:8000/v1", + model="gpt-4", + ) + + @pytest.mark.asyncio + async def test_generate_summary_with_pydantic_ai( + self, + config: SummarizerConfig, + ) -> None: + """Test summary generation using PydanticAI agent.""" + # Mock the entire agent creation and run + mock_result = MagicMock() + mock_result.output = SummaryOutput(summary="Generated summary.") + + with patch("pydantic_ai.Agent") as mock_agent_class: + mock_agent = MagicMock() + mock_agent.run = AsyncMock(return_value=mock_result) + mock_agent_class.return_value = mock_agent + + result = await generate_summary("Test prompt", config, max_tokens=100) + + assert result == "Generated summary." + mock_agent.run.assert_called_once_with("Test prompt") + + @pytest.mark.asyncio + async def test_raises_summarization_error_on_failure( + self, + config: SummarizerConfig, + ) -> None: + """Test that SummarizationError is raised on failure.""" + with patch("pydantic_ai.Agent") as mock_agent_class: + mock_agent = MagicMock() + mock_agent.run = AsyncMock(side_effect=Exception("API error")) + mock_agent_class.return_value = mock_agent + + with pytest.raises(SummarizationError, match="Summarization failed"): + await generate_summary("Test prompt", config, max_tokens=100) + + +class TestSummaryOutput: + """Tests for SummaryOutput pydantic model.""" + + def test_basic_creation(self) -> None: + """Test creating a SummaryOutput.""" + output = SummaryOutput(summary="Test summary text") + assert output.summary == "Test summary text" + + def test_whitespace_preserved(self) -> None: + """Test that whitespace in summary is preserved.""" + output = SummaryOutput(summary=" Summary with spaces ") + assert output.summary == " Summary with spaces " diff --git a/tests/summarizer/test_integration.py b/tests/summarizer/test_integration.py new file mode 100644 index 00000000..867815ce --- /dev/null +++ b/tests/summarizer/test_integration.py @@ -0,0 +1,40 @@ +"""Integration tests for summarizer with storage layer.""" + +from __future__ import annotations + +from agent_cli.summarizer.models import SummaryResult + + +class TestSummaryResultStorage: + """Tests for SummaryResult storage metadata generation.""" + + def test_to_storage_metadata_creates_entry(self) -> None: + """Test that to_storage_metadata creates a valid entry.""" + result = SummaryResult( + summary="A comprehensive summary.", + input_tokens=5000, + output_tokens=100, + compression_ratio=0.02, + collapse_depth=1, + ) + entries = result.to_storage_metadata("test-conversation") + + assert len(entries) == 1 + entry = entries[0] + assert entry["id"] == "test-conversation:summary" + assert entry["content"] == "A comprehensive summary." + assert entry["metadata"]["conversation_id"] == "test-conversation" + assert entry["metadata"]["role"] == "summary" + assert entry["metadata"]["is_final"] is True + assert entry["metadata"]["collapse_depth"] == 1 + + def test_no_summary_returns_empty(self) -> None: + """Test that no summary produces no storage entries.""" + result = SummaryResult( + summary=None, + input_tokens=50, + output_tokens=0, + compression_ratio=0.0, + ) + entries = result.to_storage_metadata("test-conversation") + assert entries == [] diff --git a/tests/summarizer/test_models.py b/tests/summarizer/test_models.py new file mode 100644 index 00000000..05d5625f --- /dev/null +++ b/tests/summarizer/test_models.py @@ -0,0 +1,117 @@ +"""Unit tests for summarizer models.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +import pytest + +from agent_cli.summarizer.models import ( + SummaryResult, +) + + +class TestSummaryResult: + """Tests for SummaryResult model.""" + + def test_result_with_no_summary(self) -> None: + """Test result when content already fits target.""" + result = SummaryResult( + summary=None, + input_tokens=50, + output_tokens=0, + compression_ratio=0.0, + ) + assert result.summary is None + assert result.collapse_depth == 0 + + def test_result_with_summary(self) -> None: + """Test result with a generated summary.""" + result = SummaryResult( + summary="A comprehensive summary.", + input_tokens=5000, + output_tokens=100, + compression_ratio=0.02, + collapse_depth=2, + ) + assert result.summary == "A comprehensive summary." + assert result.collapse_depth == 2 + + def test_to_storage_metadata_no_summary(self) -> None: + """Test that no summary produces no storage entries.""" + result = SummaryResult( + summary=None, + input_tokens=50, + output_tokens=0, + compression_ratio=0.0, + ) + entries = result.to_storage_metadata("conv-123") + assert entries == [] + + def test_to_storage_metadata_with_summary(self) -> None: + """Test storage metadata for a summary.""" + result = SummaryResult( + summary="A brief summary.", + input_tokens=200, + output_tokens=10, + compression_ratio=0.05, + ) + entries = result.to_storage_metadata("conv-456") + assert len(entries) == 1 + entry = entries[0] + assert entry["id"] == "conv-456:summary" + assert entry["content"] == "A brief summary." + assert entry["metadata"]["conversation_id"] == "conv-456" + assert entry["metadata"]["role"] == "summary" + assert entry["metadata"]["is_final"] is True + + def test_to_storage_metadata_with_collapse_depth(self) -> None: + """Test storage metadata includes collapse depth.""" + result = SummaryResult( + summary="Final synthesis of content.", + input_tokens=20000, + output_tokens=200, + compression_ratio=0.01, + collapse_depth=3, + ) + entries = result.to_storage_metadata("conv-789") + + assert len(entries) == 1 + entry = entries[0] + assert entry["id"] == "conv-789:summary" + assert entry["content"] == "Final synthesis of content." + assert entry["metadata"]["collapse_depth"] == 3 + assert entry["metadata"]["is_final"] is True + + def test_compression_ratio_bounds(self) -> None: + """Test compression ratio validation.""" + # Valid ratio + result = SummaryResult( + summary="Test", + input_tokens=100, + output_tokens=10, + compression_ratio=0.1, + ) + assert result.compression_ratio == 0.1 + + # Ratio must be between 0 and 1 + with pytest.raises(ValueError, match="less than or equal to 1"): + SummaryResult( + summary="Test", + input_tokens=100, + output_tokens=10, + compression_ratio=1.5, + ) + + def test_created_at_default(self) -> None: + """Test that created_at is automatically set.""" + before = datetime.now(UTC) + result = SummaryResult( + summary="Test", + input_tokens=100, + output_tokens=10, + compression_ratio=0.1, + ) + after = datetime.now(UTC) + # All datetimes should be UTC-aware + assert before <= result.created_at <= after diff --git a/tests/summarizer/test_prompts.py b/tests/summarizer/test_prompts.py new file mode 100644 index 00000000..825fe077 --- /dev/null +++ b/tests/summarizer/test_prompts.py @@ -0,0 +1,168 @@ +"""Unit tests for summarizer prompt templates.""" + +from __future__ import annotations + +from agent_cli.summarizer._prompts import ( + CHUNK_SUMMARY_PROMPT, + CONVERSATION_SUMMARY_PROMPT, + DOCUMENT_SUMMARY_PROMPT, + GENERAL_SUMMARY_PROMPT, + JOURNAL_SUMMARY_PROMPT, + META_SUMMARY_PROMPT, + format_prior_context, + format_summaries_for_meta, + get_prompt_for_content_type, +) + + +class TestPromptTemplates: + """Tests for prompt template structure.""" + + def test_general_prompt_has_placeholders(self) -> None: + """Test GENERAL prompt contains required placeholders.""" + assert "{content}" in GENERAL_SUMMARY_PROMPT + assert "{prior_context}" in GENERAL_SUMMARY_PROMPT + assert "{max_words}" in GENERAL_SUMMARY_PROMPT + + result = GENERAL_SUMMARY_PROMPT.format( + content="Main content", + prior_context="Previous context", + max_words=100, + ) + assert "Main content" in result + assert "Previous context" in result + assert "100" in result + + def test_chunk_prompt_has_placeholders(self) -> None: + """Test CHUNK prompt contains required placeholders.""" + assert "{content}" in CHUNK_SUMMARY_PROMPT + assert "{chunk_index}" in CHUNK_SUMMARY_PROMPT + assert "{total_chunks}" in CHUNK_SUMMARY_PROMPT + assert "{max_words}" in CHUNK_SUMMARY_PROMPT + + result = CHUNK_SUMMARY_PROMPT.format( + content="Chunk content", + chunk_index=1, + total_chunks=5, + max_words=50, + ) + assert "Chunk content" in result + assert "1" in result + assert "5" in result + + def test_meta_prompt_has_placeholders(self) -> None: + """Test META prompt contains required placeholders.""" + assert "{summaries}" in META_SUMMARY_PROMPT + assert "{max_words}" in META_SUMMARY_PROMPT + + result = META_SUMMARY_PROMPT.format( + summaries="Summary 1\n\nSummary 2", + max_words=200, + ) + assert "Summary 1" in result + assert "200" in result + + def test_conversation_prompt_has_placeholders(self) -> None: + """Test CONVERSATION prompt contains required placeholders.""" + assert "{content}" in CONVERSATION_SUMMARY_PROMPT + assert "{max_words}" in CONVERSATION_SUMMARY_PROMPT + assert "{prior_context}" in CONVERSATION_SUMMARY_PROMPT + + def test_journal_prompt_has_placeholders(self) -> None: + """Test JOURNAL prompt contains required placeholders.""" + assert "{content}" in JOURNAL_SUMMARY_PROMPT + assert "{max_words}" in JOURNAL_SUMMARY_PROMPT + assert "{prior_context}" in JOURNAL_SUMMARY_PROMPT + + def test_document_prompt_has_placeholders(self) -> None: + """Test DOCUMENT prompt contains required placeholders.""" + assert "{content}" in DOCUMENT_SUMMARY_PROMPT + assert "{max_words}" in DOCUMENT_SUMMARY_PROMPT + assert "{prior_context}" in DOCUMENT_SUMMARY_PROMPT + + +class TestGetPromptForContentType: + """Tests for get_prompt_for_content_type function.""" + + def test_general_returns_general(self) -> None: + """Test general content type returns general prompt.""" + prompt = get_prompt_for_content_type("general") + assert prompt == GENERAL_SUMMARY_PROMPT + + def test_conversation_returns_conversation(self) -> None: + """Test conversation content type returns conversation prompt.""" + prompt = get_prompt_for_content_type("conversation") + assert prompt == CONVERSATION_SUMMARY_PROMPT + + def test_journal_returns_journal(self) -> None: + """Test journal content type returns journal prompt.""" + prompt = get_prompt_for_content_type("journal") + assert prompt == JOURNAL_SUMMARY_PROMPT + + def test_document_returns_document(self) -> None: + """Test document content type returns document prompt.""" + prompt = get_prompt_for_content_type("document") + assert prompt == DOCUMENT_SUMMARY_PROMPT + + def test_unknown_returns_general(self) -> None: + """Test unknown content type falls back to general.""" + prompt = get_prompt_for_content_type("unknown_type") + assert prompt == GENERAL_SUMMARY_PROMPT + + def test_empty_returns_general(self) -> None: + """Test empty string falls back to general.""" + prompt = get_prompt_for_content_type("") + assert prompt == GENERAL_SUMMARY_PROMPT + + +class TestFormatPriorContext: + """Tests for format_prior_context function.""" + + def test_with_prior_summary(self) -> None: + """Test formatting with a prior summary.""" + result = format_prior_context("Previous summary text") + assert "Prior context" in result + assert "Previous summary text" in result + + def test_without_prior_summary(self) -> None: + """Test formatting without prior summary returns empty string.""" + result = format_prior_context(None) + assert result == "" + + def test_empty_string_prior_summary(self) -> None: + """Test formatting with empty string prior summary.""" + result = format_prior_context("") + assert result == "" + + +class TestFormatSummariesForMeta: + """Tests for format_summaries_for_meta function.""" + + def test_single_summary(self) -> None: + """Test formatting a single summary.""" + result = format_summaries_for_meta(["Summary one"]) + assert "[Section 1]" in result + assert "Summary one" in result + + def test_multiple_summaries(self) -> None: + """Test formatting multiple summaries.""" + summaries = ["First summary", "Second summary", "Third summary"] + result = format_summaries_for_meta(summaries) + + assert "[Section 1]" in result + assert "[Section 2]" in result + assert "[Section 3]" in result + assert "First summary" in result + assert "Second summary" in result + assert "Third summary" in result + + def test_empty_list(self) -> None: + """Test formatting empty list.""" + result = format_summaries_for_meta([]) + assert result == "" + + def test_summaries_separated(self) -> None: + """Test summaries are separated by double newlines.""" + summaries = ["Sum 1", "Sum 2"] + result = format_summaries_for_meta(summaries) + assert "\n\n" in result diff --git a/tests/summarizer/test_utils.py b/tests/summarizer/test_utils.py new file mode 100644 index 00000000..89a44171 --- /dev/null +++ b/tests/summarizer/test_utils.py @@ -0,0 +1,137 @@ +"""Unit tests for summarizer utility functions.""" + +from __future__ import annotations + +from agent_cli.summarizer._utils import ( + chunk_text, + count_tokens, + estimate_summary_tokens, + tokens_to_words, +) + + +class TestCountTokens: + """Tests for count_tokens function.""" + + def test_empty_string(self) -> None: + """Test counting tokens in empty string.""" + assert count_tokens("") == 0 + + def test_simple_sentence(self) -> None: + """Test counting tokens in a simple sentence.""" + # "Hello world" is typically 2 tokens + count = count_tokens("Hello world") + assert count > 0 + assert count < 10 + + def test_longer_text(self) -> None: + """Test that longer text has more tokens.""" + short = count_tokens("Hello") + long = count_tokens("Hello world, this is a longer sentence with more words.") + assert long > short + + def test_different_model_fallback(self) -> None: + """Test that unknown models fall back to cl100k_base.""" + # Should not raise, should fall back gracefully + count = count_tokens("Hello world", model="unknown-model-xyz") + assert count > 0 + + +class TestChunkText: + """Tests for chunk_text function.""" + + def test_empty_text(self) -> None: + """Test chunking empty text returns empty list.""" + assert chunk_text("") == [] + + def test_short_text_single_chunk(self) -> None: + """Test that short text stays as single chunk.""" + text = "This is a short paragraph." + chunks = chunk_text(text, chunk_size=1000) + assert len(chunks) == 1 + assert chunks[0] == text + + def test_multiple_paragraphs_chunking(self) -> None: + """Test chunking multiple paragraphs.""" + paragraphs = ["Paragraph one. " * 50, "Paragraph two. " * 50, "Paragraph three. " * 50] + text = "\n\n".join(paragraphs) + + # Use small chunk size to force splitting + chunks = chunk_text(text, chunk_size=200, overlap=20) + assert len(chunks) > 1 + + def test_overlap_preserved(self) -> None: + """Test that chunks have overlap for context continuity.""" + # Create text that will definitely need chunking + text = "Sentence one about topic A. " * 20 + "\n\n" + "Sentence two about topic B. " * 20 + + chunks = chunk_text(text, chunk_size=100, overlap=30) + + # With overlap, later chunks should contain some content from earlier + if len(chunks) > 1: + # Overlap means adjacent chunks share some content + # This is a rough check - exact overlap depends on tokenization + assert len(chunks) >= 2 + + def test_large_paragraph_sentence_split(self) -> None: + """Test that large paragraphs are split by sentences.""" + # One giant paragraph with multiple sentences + sentences = [ + f"This is sentence number {i}. It contains important information." for i in range(50) + ] + text = " ".join(sentences) + + chunks = chunk_text(text, chunk_size=100, overlap=20) + assert len(chunks) > 1 + + +class TestEstimateSummaryTokens: + """Tests for estimate_summary_tokens function.""" + + def test_typical_input(self) -> None: + """Test typical input uses ~10% compression.""" + # ~10% compression, capped at 500, minimum 50 + result = estimate_summary_tokens(1000) + assert result == 100 # 1000 // 10 = 100 + + def test_medium_input(self) -> None: + """Test medium input stays within bounds.""" + result = estimate_summary_tokens(2000) + assert result == 200 # 2000 // 10 = 200 + assert result >= 50 # above floor + assert result <= 500 # below ceiling + + def test_large_input_hits_cap(self) -> None: + """Test large input hits 500 token cap.""" + result = estimate_summary_tokens(50000) + assert result == 500 # capped at 500 + + def test_small_input_uses_floor(self) -> None: + """Test small input uses 50 token floor.""" + result = estimate_summary_tokens(100) + assert result == 50 # floor of 50 (100 // 10 = 10, but min is 50) + + def test_very_small_input(self) -> None: + """Test very small input still uses floor.""" + result = estimate_summary_tokens(10) + assert result == 50 # floor of 50 + + +class TestTokensToWords: + """Tests for tokens_to_words function.""" + + def test_basic_conversion(self) -> None: + """Test basic token to word conversion.""" + # 1 token ≈ 0.75 words + assert tokens_to_words(100) == 75 + assert tokens_to_words(1000) == 750 + + def test_zero_tokens(self) -> None: + """Test zero tokens returns zero words.""" + assert tokens_to_words(0) == 0 + + def test_small_values(self) -> None: + """Test small token values.""" + assert tokens_to_words(1) == 0 # int(0.75) = 0 + assert tokens_to_words(2) == 1 # int(1.5) = 1 + assert tokens_to_words(4) == 3 # int(3.0) = 3 diff --git a/uv.lock b/uv.lock index f69fd193..9bf4c468 100644 --- a/uv.lock +++ b/uv.lock @@ -41,6 +41,7 @@ dev = [ { name = "pytest-mock" }, { name = "pytest-timeout" }, { name = "ruff" }, + { name = "tiktoken" }, { name = "versioningit" }, ] memory = [ @@ -49,6 +50,7 @@ memory = [ { name = "huggingface-hub" }, { name = "onnxruntime" }, { name = "pyyaml" }, + { name = "tiktoken" }, { name = "transformers" }, { name = "watchfiles" }, ] @@ -74,6 +76,7 @@ test = [ { name = "pytest-cov" }, { name = "pytest-mock" }, { name = "pytest-timeout" }, + { name = "tiktoken" }, ] [package.dev-dependencies] @@ -90,6 +93,7 @@ dev = [ { name = "pytest-mock" }, { name = "pytest-timeout" }, { name = "ruff" }, + { name = "tiktoken" }, { name = "versioningit" }, ] @@ -127,6 +131,8 @@ requires-dist = [ { name = "rich" }, { name = "ruff", marker = "extra == 'dev'" }, { name = "sounddevice" }, + { name = "tiktoken", marker = "extra == 'memory'", specifier = ">=0.5.0" }, + { name = "tiktoken", marker = "extra == 'test'", specifier = ">=0.5.0" }, { name = "transformers", marker = "extra == 'memory'", specifier = ">=4.30.0" }, { name = "transformers", marker = "extra == 'rag'", specifier = ">=4.30.0" }, { name = "typer" }, @@ -151,6 +157,7 @@ dev = [ { name = "pytest-mock" }, { name = "pytest-timeout" }, { name = "ruff" }, + { name = "tiktoken", specifier = ">=0.5.0" }, { name = "versioningit" }, ] @@ -4378,6 +4385,60 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0", size = 14154, upload-time = "2024-03-12T14:34:36.569Z" }, ] +[[package]] +name = "tiktoken" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "regex" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/46/21ea696b21f1d6d1efec8639c204bdf20fde8bafb351e1355c72c5d7de52/tiktoken-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb", size = 1051565, upload-time = "2025-10-06T20:21:44.566Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d9/35c5d2d9e22bb2a5f74ba48266fb56c63d76ae6f66e02feb628671c0283e/tiktoken-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa", size = 995284, upload-time = "2025-10-06T20:21:45.622Z" }, + { url = "https://files.pythonhosted.org/packages/01/84/961106c37b8e49b9fdcf33fe007bb3a8fdcc380c528b20cc7fbba80578b8/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc", size = 1129201, upload-time = "2025-10-06T20:21:47.074Z" }, + { url = "https://files.pythonhosted.org/packages/6a/d0/3d9275198e067f8b65076a68894bb52fd253875f3644f0a321a720277b8a/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:47a5bc270b8c3db00bb46ece01ef34ad050e364b51d406b6f9730b64ac28eded", size = 1152444, upload-time = "2025-10-06T20:21:48.139Z" }, + { url = "https://files.pythonhosted.org/packages/78/db/a58e09687c1698a7c592e1038e01c206569b86a0377828d51635561f8ebf/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:508fa71810c0efdcd1b898fda574889ee62852989f7c1667414736bcb2b9a4bd", size = 1195080, upload-time = "2025-10-06T20:21:49.246Z" }, + { url = "https://files.pythonhosted.org/packages/9e/1b/a9e4d2bf91d515c0f74afc526fd773a812232dd6cda33ebea7f531202325/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1af81a6c44f008cba48494089dd98cccb8b313f55e961a52f5b222d1e507967", size = 1255240, upload-time = "2025-10-06T20:21:50.274Z" }, + { url = "https://files.pythonhosted.org/packages/9d/15/963819345f1b1fb0809070a79e9dd96938d4ca41297367d471733e79c76c/tiktoken-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:3e68e3e593637b53e56f7237be560f7a394451cb8c11079755e80ae64b9e6def", size = 879422, upload-time = "2025-10-06T20:21:51.734Z" }, + { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" }, + { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" }, + { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" }, + { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" }, + { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" }, + { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" }, + { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" }, + { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" }, + { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" }, + { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" }, + { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" }, + { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload-time = "2025-10-06T20:22:08.418Z" }, + { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" }, + { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" }, + { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" }, + { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" }, + { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" }, + { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" }, + { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" }, + { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" }, + { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" }, + { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" }, + { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" }, + { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload-time = "2025-10-06T20:22:26.753Z" }, + { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" }, + { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" }, + { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" }, + { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" }, + { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" }, + { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" }, + { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" }, +] + [[package]] name = "tinycss2" version = "1.4.0"