diff --git a/agent_cli/agents/__init__.py b/agent_cli/agents/__init__.py
index 26fba061..1cb20a8b 100644
--- a/agent_cli/agents/__init__.py
+++ b/agent_cli/agents/__init__.py
@@ -8,6 +8,7 @@
     rag_proxy,
     server,
     speak,
+    summarize,
     transcribe,
     voice_edit,
 )
@@ -20,6 +21,7 @@
     "rag_proxy",
     "server",
     "speak",
+    "summarize",
     "transcribe",
     "voice_edit",
 ]
diff --git a/agent_cli/agents/summarize.py b/agent_cli/agents/summarize.py
new file mode 100644
index 00000000..ecfd1e05
--- /dev/null
+++ b/agent_cli/agents/summarize.py
@@ -0,0 +1,410 @@
+"""Summarize text files or stdin using adaptive map-reduce summarization."""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import json
+import sys
+import time
+from enum import Enum
+from pathlib import Path  # noqa: TC003
+from typing import TYPE_CHECKING
+
+import typer
+
+from agent_cli import config, opts
+from agent_cli.cli import app
+from agent_cli.core.utils import (
+    console,
+    create_status,
+    print_command_line_args,
+    print_error_message,
+    print_input_panel,
+    print_output_panel,
+    print_with_style,
+    setup_logging,
+)
+from agent_cli.summarizer import SummarizationError, SummarizerConfig, summarize
+from agent_cli.summarizer._utils import count_tokens
+
+if TYPE_CHECKING:
+    from agent_cli.summarizer import SummaryResult
+
+
+class ContentType(str, Enum):
+    """Content type for specialized summarization prompts."""
+
+    general = "general"
+    conversation = "conversation"
+    journal = "journal"
+    document = "document"
+
+
+class OutputFormat(str, Enum):
+    """Output format for the summarization result."""
+
+    text = "text"
+    json = "json"
+    full = "full"
+
+
+def _read_input(file_path: Path | None) -> str | None:
+    """Read input from file or stdin."""
+    if file_path:
+        if not file_path.exists():
+            print_error_message(
+                f"File not found: {file_path}",
+                "Please check the file path and try again.",
+            )
+            return None
+        return file_path.read_text(encoding="utf-8")
+
+    # Read from stdin
+    if sys.stdin.isatty():
+        print_error_message(
+            "No input provided",
+            "Provide a file path or pipe content via stdin.",
+        )
+        return None
+
+    return sys.stdin.read()
+
+
+def _display_input_preview(
+    content: str,
+    token_count: int,
+    *,
+    quiet: bool,
+    max_preview_chars: int = 500,
+) -> None:
+    """Display a preview of the input content."""
+    if quiet:
+        return
+
+    preview = content[:max_preview_chars]
+    if len(content) > max_preview_chars:
+        preview += f"\n... [{len(content) - max_preview_chars} more characters]"
+
+    print_input_panel(
+        preview,
+        title=f"Input ({token_count:,} tokens)",
+    )
+
+
+def _display_result(
+    result: SummaryResult,
+    elapsed: float,
+    output_format: OutputFormat,
+    *,
+    quiet: bool,
+) -> None:
+    """Display the summarization result."""
+    if output_format == OutputFormat.json:
+        print(json.dumps(result.model_dump(mode="json"), indent=2))
+        return
+
+    if output_format == OutputFormat.full:
+        _display_full_result(result, elapsed, quiet=quiet)
+        return
+
+    # Text output - just the summary
+    if quiet:
+        if result.summary:
+            print(result.summary)
+    elif result.summary:
+        print_output_panel(
+            result.summary,
+            title="Summary",
+            subtitle=f"[dim]{result.output_tokens:,} tokens | {result.compression_ratio:.1%} of original | {elapsed:.2f}s[/dim]",
+        )
+    else:
+        print_with_style(
+            f"No summary generated (input too short: {result.input_tokens} tokens)",
+            style="yellow",
+        )
+
+
+def _display_full_result(
+    result: SummaryResult,
+    elapsed: float,
+    *,
+    quiet: bool,
+) -> None:
+    """Display full result with all metadata."""
+    if quiet:
+        if result.summary:
+            print(result.summary)
+        return
+
+    console.print()
+    console.print("[bold cyan]Summarization Result[/bold cyan]")
+    console.print(f"  Input tokens: [bold]{result.input_tokens:,}[/bold]")
+    console.print(f"  Output tokens: [bold]{result.output_tokens:,}[/bold]")
+    console.print(f"  Compression: [bold]{result.compression_ratio:.1%}[/bold]")
+    if result.collapse_depth > 0:
+        console.print(f"  Collapse depth: [bold]{result.collapse_depth}[/bold]")
+    console.print(f"  Time: [bold]{elapsed:.2f}s[/bold]")
+    console.print()
+
+    if result.summary:
+        print_output_panel(
+            result.summary,
+            title="Summary",
+        )
+
+
+def _get_llm_config(
+    provider_cfg: config.ProviderSelection,
+    ollama_cfg: config.Ollama,
+    openai_llm_cfg: config.OpenAILLM,
+    gemini_llm_cfg: config.GeminiLLM,
+) -> tuple[str, str, str | None]:
+    """Get openai_base_url, model, and api_key from provider config."""
+    if provider_cfg.llm_provider == "ollama":
+        # Ollama uses OpenAI-compatible API at /v1
+        base_url = ollama_cfg.llm_ollama_host.rstrip("/")
+        if not base_url.endswith("/v1"):
+            base_url = f"{base_url}/v1"
+        return base_url, ollama_cfg.llm_ollama_model, None
+    if provider_cfg.llm_provider == "openai":
+        base_url = openai_llm_cfg.openai_base_url or "https://api.openai.com/v1"
+        return base_url, openai_llm_cfg.llm_openai_model, openai_llm_cfg.openai_api_key
+    # gemini
+    return (
+        "https://generativelanguage.googleapis.com/v1beta/openai",
+        gemini_llm_cfg.llm_gemini_model,
+        gemini_llm_cfg.gemini_api_key,
+    )
+
+
+async def _async_summarize(
+    content: str,
+    *,
+    content_type: ContentType,
+    prior_summary: str | None,
+    provider_cfg: config.ProviderSelection,
+    ollama_cfg: config.Ollama,
+    openai_llm_cfg: config.OpenAILLM,
+    gemini_llm_cfg: config.GeminiLLM,
+    general_cfg: config.General,
+    chunk_size: int,
+    chunk_overlap: int,
+    max_concurrent_chunks: int,
+    output_format: OutputFormat,
+) -> None:
+    """Asynchronous summarization entry point."""
+    setup_logging(general_cfg.log_level, general_cfg.log_file, quiet=general_cfg.quiet)
+
+    openai_base_url, model, api_key = _get_llm_config(
+        provider_cfg,
+        ollama_cfg,
+        openai_llm_cfg,
+        gemini_llm_cfg,
+    )
+
+    token_count = count_tokens(content, model)
+    _display_input_preview(content, token_count, quiet=general_cfg.quiet)
+
+    summarizer_config = SummarizerConfig(
+        openai_base_url=openai_base_url,
+        model=model,
+        api_key=api_key,
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        max_concurrent_chunks=max_concurrent_chunks,
+    )
+
+    try:
+        if not general_cfg.quiet:
+            status = create_status(f"Summarizing with {model}...", "bold yellow")
+        else:
+            status = contextlib.nullcontext()
+
+        with status:
+            start_time = time.monotonic()
+            result = await summarize(
+                content,
+                summarizer_config,
+                prior_summary=prior_summary,
+                content_type=content_type.value,
+            )
+            elapsed = time.monotonic() - start_time
+
+        _display_result(result, elapsed, output_format, quiet=general_cfg.quiet)
+
+    except SummarizationError as e:
+        print_error_message(
+            str(e),
+            f"Check that your LLM server is running at {openai_base_url}",
+        )
+        sys.exit(1)
+    except Exception as e:
+        print_error_message(str(e), "An unexpected error occurred during summarization.")
+        sys.exit(1)
+
+
+@app.command("summarize")
+def summarize_command(
+    *,
+    file_path: Path | None = typer.Argument(  # noqa: B008
+        None,
+        help="Path to file to summarize. If not provided, reads from stdin.",
+    ),
+    # --- Content Options ---
+    content_type: ContentType = typer.Option(  # noqa: B008
+        ContentType.general,
+        "--type",
+        "-t",
+        help="Content type for specialized summarization prompts.",
+        rich_help_panel="Content Options",
+    ),
+    prior_summary: str | None = typer.Option(
+        None,
+        "--prior-summary",
+        help="Prior summary to integrate with (for rolling summaries).",
+        rich_help_panel="Content Options",
+    ),
+    prior_summary_file: Path | None = typer.Option(  # noqa: B008
+        None,
+        "--prior-summary-file",
+        help="File containing prior summary to integrate with.",
+        rich_help_panel="Content Options",
+    ),
+    # --- Chunking Options ---
+    chunk_size: int = typer.Option(
+        2048,
+        "--chunk-size",
+        help="Target token count per chunk for map-reduce summarization.",
+        rich_help_panel="Chunking Options",
+    ),
+    chunk_overlap: int = typer.Option(
+        200,
+        "--chunk-overlap",
+        help="Token overlap between chunks for context continuity.",
+        rich_help_panel="Chunking Options",
+    ),
+    max_concurrent_chunks: int = typer.Option(
+        5,
+        "--max-concurrent",
+        help="Maximum number of chunks to process in parallel.",
+        rich_help_panel="Chunking Options",
+    ),
+    # --- Output Options ---
+    output_format: OutputFormat = typer.Option(  # noqa: B008
+        OutputFormat.text,
+        "--output",
+        "-o",
+        help="Output format: 'text' (summary only), 'json' (full result), 'full' (all levels).",
+        rich_help_panel="Output Options",
+    ),
+    # --- Provider Selection ---
+    llm_provider: str = opts.LLM_PROVIDER,
+    # --- LLM Configuration ---
+    # Ollama (local service)
+    llm_ollama_model: str = opts.LLM_OLLAMA_MODEL,
+    llm_ollama_host: str = opts.LLM_OLLAMA_HOST,
+    # OpenAI
+    llm_openai_model: str = opts.LLM_OPENAI_MODEL,
+    openai_api_key: str | None = opts.OPENAI_API_KEY,
+    openai_base_url: str | None = opts.OPENAI_BASE_URL,
+    # Gemini
+    llm_gemini_model: str = opts.LLM_GEMINI_MODEL,
+    gemini_api_key: str | None = opts.GEMINI_API_KEY,
+    # --- General Options ---
+    log_level: str = opts.LOG_LEVEL,
+    log_file: str | None = opts.LOG_FILE,
+    quiet: bool = opts.QUIET,
+    config_file: str | None = opts.CONFIG_FILE,
+    print_args: bool = opts.PRINT_ARGS,
+) -> None:
+    """Summarize text using adaptive map-reduce summarization.
+
+    Reads from a file or stdin and produces a summary scaled to the input complexity:
+
+    - NONE (<100 tokens): No summary needed
+    - BRIEF (100-500): Single sentence
+    - MAP_REDUCE (>500): Dynamic collapse until fits token budget
+
+    Examples:
+        # Summarize a file
+        agent-cli summarize document.txt
+
+        # Summarize with conversation-specific prompts
+        agent-cli summarize chat.txt --type conversation
+
+        # Pipe content from stdin
+        cat book.txt | agent-cli summarize
+
+        # Get full output with all metadata
+        agent-cli summarize large_document.txt --output full
+
+        # Use OpenAI instead of Ollama
+        agent-cli summarize notes.md --llm-provider openai
+
+    """
+    if print_args:
+        print_command_line_args(locals())
+
+    # Create config objects following the standard pattern
+    provider_cfg = config.ProviderSelection(
+        llm_provider=llm_provider,
+        asr_provider="wyoming",  # Not used, but required by model
+        tts_provider="wyoming",  # Not used, but required by model
+    )
+    ollama_cfg = config.Ollama(
+        llm_ollama_model=llm_ollama_model,
+        llm_ollama_host=llm_ollama_host,
+    )
+    openai_llm_cfg = config.OpenAILLM(
+        llm_openai_model=llm_openai_model,
+        openai_api_key=openai_api_key,
+        openai_base_url=openai_base_url,
+    )
+    gemini_llm_cfg = config.GeminiLLM(
+        llm_gemini_model=llm_gemini_model,
+        gemini_api_key=gemini_api_key,
+    )
+    general_cfg = config.General(
+        log_level=log_level,
+        log_file=log_file,
+        quiet=quiet,
+        clipboard=False,  # summarize doesn't use clipboard
+    )
+
+    # Read content
+    content = _read_input(file_path)
+    if content is None:
+        raise typer.Exit(1)
+
+    if not content.strip():
+        print_error_message("Empty input", "The input file or stdin is empty.")
+        raise typer.Exit(1)
+
+    # Handle prior summary from file
+    actual_prior_summary = prior_summary
+    if prior_summary_file:
+        if not prior_summary_file.exists():
+            print_error_message(
+                f"Prior summary file not found: {prior_summary_file}",
+                "Please check the file path.",
+            )
+            raise typer.Exit(1)
+        actual_prior_summary = prior_summary_file.read_text(encoding="utf-8")
+
+    asyncio.run(
+        _async_summarize(
+            content,
+            content_type=content_type,
+            prior_summary=actual_prior_summary,
+            provider_cfg=provider_cfg,
+            ollama_cfg=ollama_cfg,
+            openai_llm_cfg=openai_llm_cfg,
+            gemini_llm_cfg=gemini_llm_cfg,
+            general_cfg=general_cfg,
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+            max_concurrent_chunks=max_concurrent_chunks,
+            output_format=output_format,
+        ),
+    )
diff --git a/agent_cli/cli.py b/agent_cli/cli.py
index d8e74c79..f36c5c61 100644
--- a/agent_cli/cli.py
+++ b/agent_cli/cli.py
@@ -60,6 +60,7 @@ def set_config_defaults(ctx: typer.Context, config_file: str | None) -> None:
     rag_proxy,
     server,
     speak,
+    summarize,
     transcribe,
     voice_edit,
 )
diff --git a/agent_cli/core/chroma.py b/agent_cli/core/chroma.py
index 56d54ede..89e289b7 100644
--- a/agent_cli/core/chroma.py
+++ b/agent_cli/core/chroma.py
@@ -53,12 +53,28 @@ def upsert(
     ids: list[str],
     documents: list[str],
     metadatas: Sequence[BaseModel],
+    batch_size: int = 10,
 ) -> None:
-    """Upsert documents with JSON-serialized metadata."""
+    """Upsert documents with JSON-serialized metadata.
+
+    Args:
+        collection: ChromaDB collection.
+        ids: Document IDs.
+        documents: Document contents.
+        metadatas: Pydantic metadata models.
+        batch_size: Max documents per embedding API call (default: 10).
+
+    """
     if not ids:
         return
     serialized = flatten_metadatas(metadatas)
-    collection.upsert(ids=ids, documents=documents, metadatas=serialized)
+
+    # Process in batches to avoid overwhelming the embedding service
+    for i in range(0, len(ids), batch_size):
+        batch_ids = ids[i : i + batch_size]
+        batch_docs = documents[i : i + batch_size]
+        batch_metas = serialized[i : i + batch_size]
+        collection.upsert(ids=batch_ids, documents=batch_docs, metadatas=batch_metas)
 
 
 def delete(collection: Collection, ids: list[str]) -> None:
diff --git a/agent_cli/memory/_files.py b/agent_cli/memory/_files.py
index d55133d9..a51c7ad1 100644
--- a/agent_cli/memory/_files.py
+++ b/agent_cli/memory/_files.py
@@ -24,6 +24,11 @@
 _SNAPSHOT_FILENAME = "memory_index.json"
 _DELETED_DIRNAME = "deleted"
 
+# Summary level constants for file structure (kept for backward compatibility)
+_SUMMARY_LEVEL_L1 = 1
+_SUMMARY_LEVEL_L2 = 2
+_SUMMARY_LEVEL_L3 = 3
+
 
 @dataclass
 class MemoryFileRecord:
@@ -83,46 +88,74 @@ def soft_delete_memory_file(
 def write_memory_file(
     root: Path,
     *,
-    conversation_id: str,
-    role: str,
-    created_at: str,
     content: str,
-    summary_kind: str | None = None,
     doc_id: str | None = None,
+    # Either pass pre-built metadata OR individual fields
+    metadata: MemoryMetadata | None = None,
+    # Individual fields (used when metadata is None)
+    conversation_id: str | None = None,
+    role: str | None = None,
+    created_at: str | None = None,
+    summary_kind: str | None = None,
     source_id: str | None = None,
 ) -> MemoryFileRecord:
-    """Render and persist a memory document to disk."""
+    """Render and persist a memory document to disk.
+
+    Can be called in two ways:
+    1. With pre-built metadata: write_memory_file(root, content=..., metadata=..., doc_id=...)
+    2. With individual fields: write_memory_file(root, content=..., conversation_id=..., role=..., ...)
+
+    """
     entries_dir, _ = ensure_store_dirs(root)
-    safe_conversation = _slugify(conversation_id)
     doc_id = doc_id or str(uuid4())
-    safe_ts = _safe_timestamp(created_at)
+
+    # Build or use provided metadata
+    if metadata is not None:
+        meta = metadata
+    else:
+        if conversation_id is None or role is None or created_at is None:
+            msg = "Must provide metadata or (conversation_id, role, created_at)"
+            raise ValueError(msg)
+        meta = MemoryMetadata(
+            conversation_id=conversation_id,
+            role=role,
+            created_at=created_at,
+            summary_kind=summary_kind,
+            source_id=source_id,
+        )
+
+    safe_conversation = _slugify(meta.conversation_id)
+    safe_ts = _safe_timestamp(meta.created_at)
 
     # Route by role/category for readability
-    if summary_kind:
+    if meta.summary_kind and meta.level is not None:
+        # Hierarchical summary file structure
+        if meta.level == _SUMMARY_LEVEL_L1:
+            subdir = Path("summaries") / "L1"
+            filename = f"chunk_{meta.chunk_index or 0}.md"
+        elif meta.level == _SUMMARY_LEVEL_L2:
+            subdir = Path("summaries") / "L2"
+            filename = f"group_{meta.group_index or 0}.md"
+        else:  # level == _SUMMARY_LEVEL_L3
+            subdir = Path("summaries") / "L3"
+            filename = "final.md"
+    elif meta.summary_kind:
         subdir = Path("summaries")
         filename = "summary.md"
-    elif role == "user":
+    elif meta.role == "user":
         subdir = Path("turns") / "user"
         filename = f"{safe_ts}__{doc_id}.md"
-    elif role == "assistant":
+    elif meta.role == "assistant":
         subdir = Path("turns") / "assistant"
         filename = f"{safe_ts}__{doc_id}.md"
-    elif role == "memory":
+    elif meta.role == "memory":
         subdir = Path("facts")
         filename = f"{safe_ts}__{doc_id}.md"
     else:
         subdir = Path()
         filename = f"{doc_id}.md"
 
-    metadata = MemoryMetadata(
-        conversation_id=conversation_id,
-        role=role,
-        created_at=created_at,
-        summary_kind=summary_kind,
-        source_id=source_id,
-    )
-
-    front_matter = _render_front_matter(doc_id, metadata)
+    front_matter = _render_front_matter(doc_id, meta)
     body = front_matter + "\n" + content.strip() + "\n"
 
     file_path = entries_dir / safe_conversation / subdir / filename
@@ -130,7 +163,7 @@ def write_memory_file(
 
     atomic_write_text(file_path, body)
 
-    return MemoryFileRecord(id=doc_id, path=file_path, metadata=metadata, content=content)
+    return MemoryFileRecord(id=doc_id, path=file_path, metadata=meta, content=content)
 
 
 def load_memory_files(root: Path) -> list[MemoryFileRecord]:
diff --git a/agent_cli/memory/_ingest.py b/agent_cli/memory/_ingest.py
index 6211c029..b98bee13 100644
--- a/agent_cli/memory/_ingest.py
+++ b/agent_cli/memory/_ingest.py
@@ -12,23 +12,25 @@
 import httpx
 
 from agent_cli.memory._git import commit_changes
-from agent_cli.memory._persistence import delete_memory_files, persist_entries, persist_summary
+from agent_cli.memory._persistence import (
+    delete_memory_files,
+    persist_entries,
+    persist_summary,
+)
 from agent_cli.memory._prompt import (
     FACT_INSTRUCTIONS,
     FACT_SYSTEM_PROMPT,
-    SUMMARY_PROMPT,
     UPDATE_MEMORY_PROMPT,
 )
 from agent_cli.memory._retrieval import gather_relevant_existing_memories
-from agent_cli.memory._store import delete_entries, get_summary_entry
-from agent_cli.memory.entities import Fact, Summary
+from agent_cli.memory._store import delete_entries, get_final_summary
+from agent_cli.memory.entities import Fact
 from agent_cli.memory.models import (
     MemoryAdd,
     MemoryDecision,
     MemoryDelete,
     MemoryIgnore,
     MemoryUpdate,
-    SummaryOutput,
 )
 
 if TYPE_CHECKING:
@@ -36,9 +38,9 @@
 
     from chromadb import Collection
 
-LOGGER = logging.getLogger(__name__)
+    from agent_cli.summarizer import SummaryResult
 
-_SUMMARY_ROLE = "summary"
+LOGGER = logging.getLogger(__name__)
 
 
 def _elapsed_ms(start: float) -> float:
@@ -178,7 +180,7 @@ async def reconcile_facts(
     existing_json = [{"id": idx, "text": mem.content} for idx, mem in enumerate(existing)]
     existing_ids = set(id_map.keys())
 
-    from pydantic_ai import Agent, ModelRetry  # noqa: PLC0415
+    from pydantic_ai import Agent, ModelRetry, PromptedOutput  # noqa: PLC0415
     from pydantic_ai.exceptions import AgentRunError, UnexpectedModelBehavior  # noqa: PLC0415
     from pydantic_ai.models.openai import OpenAIChatModel  # noqa: PLC0415
     from pydantic_ai.providers.openai import OpenAIProvider  # noqa: PLC0415
@@ -193,7 +195,7 @@ async def reconcile_facts(
     agent = Agent(
         model=model_cfg,
         system_prompt=UPDATE_MEMORY_PROMPT,
-        output_type=list[MemoryDecision],
+        output_type=PromptedOutput(list[MemoryDecision]),  # JSON mode instead of tool calls
         retries=3,
     )
 
@@ -275,39 +277,74 @@ def validate_decisions(decisions: list[MemoryDecision]) -> list[MemoryDecision]:
     return to_add, to_delete, replacement_map
 
 
-async def update_summary(
+async def summarize_content(
     *,
-    prior_summary: str | None,
-    new_facts: list[str],
+    content: str,
+    prior_summary: str | None = None,
+    content_type: str = "general",
     openai_base_url: str,
     api_key: str | None,
     model: str,
-    max_tokens: int = 256,
-) -> str | None:
-    """Update the conversation summary based on new facts."""
-    if not new_facts:
-        return prior_summary
+) -> SummaryResult:
+    """Adaptively summarize content based on its length.
 
-    from pydantic_ai import Agent  # noqa: PLC0415
-    from pydantic_ai.models.openai import OpenAIChatModel  # noqa: PLC0415
-    from pydantic_ai.providers.openai import OpenAIProvider  # noqa: PLC0415
-    from pydantic_ai.settings import ModelSettings  # noqa: PLC0415
+    Automatically selects the appropriate summarization strategy
+    (NONE, BRIEF, MAP_REDUCE) based on input token count.
 
-    system_prompt = SUMMARY_PROMPT
-    user_parts: list[str] = []
-    if prior_summary:
-        user_parts.append(f"Previous summary:\n{prior_summary}")
-    user_parts.append("New facts:\n" + "\n".join(f"- {fact}" for fact in new_facts))
-    prompt_text = "\n\n".join(user_parts)
-    provider = OpenAIProvider(api_key=api_key or "dummy", base_url=openai_base_url)
-    model_cfg = OpenAIChatModel(
-        model_name=model,
-        provider=provider,
-        settings=ModelSettings(temperature=0.2, max_tokens=max_tokens),
+    Args:
+        content: The content to summarize.
+        prior_summary: Optional prior summary for context continuity.
+        content_type: Type of content ("general", "conversation", "journal", "document").
+        openai_base_url: Base URL for OpenAI-compatible API.
+        api_key: API key for the LLM.
+        model: Model name to use for summarization.
+
+    Returns:
+        SummaryResult with the summary and metadata.
+
+    """
+    # Import here to avoid circular imports and allow optional dependency
+    from agent_cli.summarizer import SummarizerConfig, summarize  # noqa: PLC0415
+
+    config = SummarizerConfig(
+        openai_base_url=openai_base_url,
+        model=model,
+        api_key=api_key,
+    )
+    return await summarize(
+        content=content,
+        config=config,
+        prior_summary=prior_summary,
+        content_type=content_type,
+    )
+
+
+async def store_adaptive_summary(
+    collection: Collection,
+    memory_root: Path,
+    conversation_id: str,
+    summary_result: SummaryResult,
+) -> list[str]:
+    """Store a summary result to files and ChromaDB.
+
+    Old summaries are deleted first, then the new summary is stored.
+
+    Args:
+        collection: ChromaDB collection.
+        memory_root: Root path for memory files.
+        conversation_id: The conversation this summary belongs to.
+        summary_result: The result from summarize().
+
+    Returns:
+        List of IDs that were stored.
+
+    """
+    return persist_summary(
+        collection,
+        memory_root=memory_root,
+        conversation_id=conversation_id,
+        summary_result=summary_result,
     )
-    agent = Agent(model=model_cfg, system_prompt=system_prompt, output_type=SummaryOutput)
-    result = await agent.run(prompt_text)
-    return result.output.summary or prior_summary
 
 
 async def extract_and_store_facts_and_summaries(
@@ -370,37 +407,41 @@ async def extract_and_store_facts_and_summaries(
             entries=list(to_add),
         )
 
-    if enable_summarization:
-        prior_summary_entry = get_summary_entry(
-            collection,
-            conversation_id,
-            role=_SUMMARY_ROLE,
-        )
+    # Summarize raw conversation turns (not extracted facts)
+    has_content = user_message or assistant_message
+    if enable_summarization and has_content:
+        prior_summary_entry = get_final_summary(collection, conversation_id)
         prior_summary = prior_summary_entry.content if prior_summary_entry else None
 
+        # Build conversation transcript
+        parts = []
+        if user_message:
+            parts.append(f"User: {user_message}")
+        if assistant_message:
+            parts.append(f"Assistant: {assistant_message}")
+        content_to_summarize = "\n".join(parts)
+
         summary_start = perf_counter()
-        new_summary = await update_summary(
+        summary_result = await summarize_content(
+            content=content_to_summarize,
             prior_summary=prior_summary,
-            new_facts=facts,
+            content_type="conversation",
             openai_base_url=openai_base_url,
             api_key=api_key,
             model=model,
         )
         LOGGER.info(
-            "Summary update completed in %.1f ms (conversation=%s)",
+            "Summary update completed in %.1f ms (conversation=%s, compression=%.1f%%)",
             _elapsed_ms(summary_start),
             conversation_id,
+            summary_result.compression_ratio * 100,
         )
-        if new_summary:
-            summary_obj = Summary(
-                conversation_id=conversation_id,
-                content=new_summary,
-                created_at=datetime.now(UTC),
-            )
-            persist_summary(
+        if summary_result.summary:
+            await store_adaptive_summary(
                 collection,
                 memory_root=memory_root,
-                summary=summary_obj,
+                conversation_id=conversation_id,
+                summary_result=summary_result,
             )
 
     if enable_git_versioning:
diff --git a/agent_cli/memory/_persistence.py b/agent_cli/memory/_persistence.py
index bd8f4dfd..46ac0363 100644
--- a/agent_cli/memory/_persistence.py
+++ b/agent_cli/memory/_persistence.py
@@ -3,10 +3,13 @@
 from __future__ import annotations
 
 import logging
+import shutil
+from datetime import UTC, datetime
 from typing import TYPE_CHECKING
 
 from agent_cli.memory._files import (
     _DELETED_DIRNAME,
+    _slugify,
     ensure_store_dirs,
     load_snapshot,
     read_memory_file,
@@ -14,15 +17,22 @@
     write_memory_file,
     write_snapshot,
 )
-from agent_cli.memory._store import delete_entries, list_conversation_entries, upsert_memories
-from agent_cli.memory.entities import Fact, Summary, Turn
+from agent_cli.memory._store import (
+    delete_entries,
+    delete_summaries,
+    list_conversation_entries,
+    upsert_memories,
+    upsert_summary_entries,
+)
+from agent_cli.memory.entities import Fact, Turn
+from agent_cli.memory.models import MemoryMetadata
 
 if TYPE_CHECKING:
     from pathlib import Path
 
     from chromadb import Collection
 
-    from agent_cli.memory.models import MemoryMetadata
+    from agent_cli.summarizer import SummaryResult
 
 LOGGER = logging.getLogger(__name__)
 
@@ -79,31 +89,6 @@ def persist_entries(
         upsert_memories(collection, ids=ids, contents=contents, metadatas=metadatas)
 
 
-def persist_summary(
-    collection: Collection,
-    *,
-    memory_root: Path,
-    summary: Summary,
-) -> None:
-    """Persist a summary to disk and Chroma."""
-    doc_id = _safe_identifier(f"{summary.conversation_id}{_SUMMARY_DOC_ID_SUFFIX}-summary")
-    record = write_memory_file(
-        memory_root,
-        conversation_id=summary.conversation_id,
-        role="summary",
-        created_at=summary.created_at.isoformat(),
-        content=summary.content,
-        summary_kind="summary",
-        doc_id=doc_id,
-    )
-    upsert_memories(
-        collection,
-        ids=[record.id],
-        contents=[record.content],
-        metadatas=[record.metadata],
-    )
-
-
 def delete_memory_files(
     memory_root: Path,
     conversation_id: str,
@@ -180,3 +165,99 @@ def evict_if_needed(
     ids_to_remove = [e.id for e in overflow]
     delete_entries(collection, ids_to_remove)
     delete_memory_files(memory_root, conversation_id, ids_to_remove)
+
+
+def persist_summary(
+    collection: Collection,
+    *,
+    memory_root: Path,
+    conversation_id: str,
+    summary_result: SummaryResult,
+) -> list[str]:
+    """Persist a summary to disk and ChromaDB.
+
+    This function:
+    1. Deletes existing summaries (files and ChromaDB entries)
+    2. Writes new summary file to disk
+    3. Stores entry in ChromaDB
+
+    Args:
+        collection: ChromaDB collection.
+        memory_root: Root path for memory files.
+        conversation_id: The conversation this summary belongs to.
+        summary_result: The result from summarize().
+
+    Returns:
+        List of IDs that were stored.
+
+    """
+    # Skip if no summary was generated
+    if not summary_result.summary:
+        return []
+
+    # Delete existing summary files
+    _delete_summary_files(memory_root, conversation_id)
+
+    # Delete existing ChromaDB entries
+    delete_summaries(collection, conversation_id)
+
+    # Get storage metadata from SummaryResult
+    entries = summary_result.to_storage_metadata(conversation_id)
+    if not entries:
+        return []
+
+    stored_ids: list[str] = []
+    created_at = datetime.now(UTC).isoformat()
+
+    for entry in entries:
+        meta_dict = entry["metadata"]
+        # Build MemoryMetadata from the summary result's metadata dict
+        metadata = MemoryMetadata(
+            conversation_id=meta_dict["conversation_id"],
+            role=meta_dict["role"],
+            created_at=meta_dict.get("created_at", created_at),
+            summary_kind="summary",
+            is_final=meta_dict.get("is_final"),
+            input_tokens=meta_dict.get("input_tokens"),
+            output_tokens=meta_dict.get("output_tokens"),
+            compression_ratio=meta_dict.get("compression_ratio"),
+            summary_level=meta_dict.get("summary_level"),
+            collapse_depth=meta_dict.get("collapse_depth"),
+        )
+        record = write_memory_file(
+            memory_root,
+            content=entry["content"],
+            doc_id=entry["id"],
+            metadata=metadata,
+        )
+        LOGGER.info(
+            "Persisted summary file: %s (level=%s)",
+            record.path,
+            meta_dict.get("summary_level"),
+        )
+        stored_ids.append(record.id)
+
+    # Store in ChromaDB (reuse the entries we already built)
+    upsert_summary_entries(collection, entries)
+
+    return stored_ids
+
+
+def _delete_summary_files(memory_root: Path, conversation_id: str) -> None:
+    """Delete all summary files for a conversation."""
+    entries_dir, _ = ensure_store_dirs(memory_root)
+    safe_conversation = _slugify(conversation_id)
+    summaries_dir = entries_dir / safe_conversation / "summaries"
+
+    if summaries_dir.exists():
+        # Move to deleted folder instead of hard delete
+        deleted_dir = entries_dir / _DELETED_DIRNAME / safe_conversation / "summaries"
+        deleted_dir.parent.mkdir(parents=True, exist_ok=True)
+
+        # If deleted summaries already exist, remove them first
+        if deleted_dir.exists():
+            shutil.rmtree(deleted_dir)
+
+        # Move current summaries to deleted
+        shutil.move(str(summaries_dir), str(deleted_dir))
+        LOGGER.info("Moved old summaries to deleted: %s", deleted_dir)
diff --git a/agent_cli/memory/_retrieval.py b/agent_cli/memory/_retrieval.py
index 6091f109..82c7296f 100644
--- a/agent_cli/memory/_retrieval.py
+++ b/agent_cli/memory/_retrieval.py
@@ -7,7 +7,7 @@
 from datetime import UTC, datetime
 from typing import TYPE_CHECKING, Any
 
-from agent_cli.memory._store import get_summary_entry, query_memories
+from agent_cli.memory._store import get_final_summary, query_memories
 from agent_cli.memory.models import (
     ChatRequest,
     MemoryEntry,
@@ -24,7 +24,6 @@
 LOGGER = logging.getLogger(__name__)
 
 _DEFAULT_MMR_LAMBDA = 0.7
-_SUMMARY_ROLE = "summary"
 
 
 def gather_relevant_existing_memories(
@@ -202,7 +201,7 @@ def recency_score(meta: MemoryMetadata) -> float:
 
     summaries: list[str] = []
     if include_summary:
-        summary_entry = get_summary_entry(collection, conversation_id, role=_SUMMARY_ROLE)
+        summary_entry = get_final_summary(collection, conversation_id)
         if summary_entry:
             summaries.append(f"Conversation summary:\n{summary_entry.content}")
 
diff --git a/agent_cli/memory/_store.py b/agent_cli/memory/_store.py
index 96e7c66a..36ace588 100644
--- a/agent_cli/memory/_store.py
+++ b/agent_cli/memory/_store.py
@@ -111,31 +111,6 @@ def query_memories(
     return records
 
 
-def get_summary_entry(
-    collection: Collection,
-    conversation_id: str,
-    *,
-    role: str = "summary",
-) -> StoredMemory | None:
-    """Return the latest summary entry for a conversation, if present."""
-    result = collection.get(
-        where={"$and": [{"conversation_id": conversation_id}, {"role": role}]},
-    )
-    docs = result.get("documents") or []
-    metas = result.get("metadatas") or []
-    ids = result.get("ids") or []
-
-    if not docs or not metas or not ids:
-        return None
-
-    return StoredMemory(
-        id=ids[0],
-        content=docs[0],
-        metadata=MemoryMetadata(**dict(metas[0])),
-        distance=None,
-    )
-
-
 def list_conversation_entries(
     collection: Collection,
     conversation_id: str,
@@ -167,3 +142,124 @@ def list_conversation_entries(
 def delete_entries(collection: Collection, ids: list[str]) -> None:
     """Delete entries by ID."""
     delete_docs(collection, ids)
+
+
+def upsert_summary_entries(
+    collection: Collection,
+    entries: list[dict[str, Any]],
+) -> list[str]:
+    """Store pre-built summary entries (from to_storage_metadata) to ChromaDB."""
+    if not entries:
+        return []
+
+    ids: list[str] = []
+    contents: list[str] = []
+    metadatas: list[MemoryMetadata] = []
+
+    for entry in entries:
+        ids.append(entry["id"])
+        contents.append(entry["content"])
+        # Convert the raw metadata dict to MemoryMetadata
+        meta_dict = entry["metadata"]
+        metadatas.append(MemoryMetadata(**meta_dict))
+
+    upsert_memories(collection, ids=ids, contents=contents, metadatas=metadatas)
+    return ids
+
+
+def get_summary_at_level(
+    collection: Collection,
+    conversation_id: str,
+    level: int,
+) -> list[StoredMemory]:
+    """Retrieve summaries at a specific level for a conversation.
+
+    Args:
+        collection: ChromaDB collection.
+        conversation_id: The conversation to retrieve summaries for.
+        level: Summary level (1=chunk, 2=group, 3=final).
+
+    Returns:
+        List of StoredMemory entries at the requested level.
+
+    """
+    filters: list[dict[str, Any]] = [
+        {"conversation_id": conversation_id},
+        {"role": "summary"},
+        {"level": level},
+    ]
+    result = collection.get(where={"$and": filters})
+    docs = result.get("documents") or []
+    metas = result.get("metadatas") or []
+    ids = result.get("ids") or []
+
+    records: list[StoredMemory] = []
+    for doc, meta, entry_id in zip(docs, metas, ids, strict=False):
+        records.append(
+            StoredMemory(
+                id=entry_id,
+                content=doc,
+                metadata=MemoryMetadata(**dict(meta)),
+                distance=None,
+            ),
+        )
+    return records
+
+
+def get_final_summary(
+    collection: Collection,
+    conversation_id: str,
+) -> StoredMemory | None:
+    """Get the L3 (final) summary for a conversation.
+
+    This is a convenience wrapper around get_summary_at_level for the
+    most common use case of retrieving the top-level summary.
+
+    Args:
+        collection: ChromaDB collection.
+        conversation_id: The conversation to retrieve the summary for.
+
+    Returns:
+        The final summary entry, or None if not found.
+
+    """
+    summaries = get_summary_at_level(collection, conversation_id, level=3)
+    # Return the one marked as final, or the first if none marked
+    for summary in summaries:
+        if summary.metadata.is_final:
+            return summary
+    return summaries[0] if summaries else None
+
+
+def delete_summaries(
+    collection: Collection,
+    conversation_id: str,
+    *,
+    levels: list[int] | None = None,
+) -> int:
+    """Delete summary entries for a conversation.
+
+    Args:
+        collection: ChromaDB collection.
+        conversation_id: The conversation to delete summaries from.
+        levels: Optional list of levels to delete. If None, deletes all levels.
+
+    Returns:
+        Number of entries deleted.
+
+    """
+    filters: list[dict[str, Any]] = [
+        {"conversation_id": conversation_id},
+        {"role": "summary"},
+    ]
+    if levels:
+        filters.append({"level": {"$in": levels}})
+
+    # First get the IDs to count them
+    result = collection.get(where={"$and": filters})
+    ids = result.get("ids") or []
+
+    if ids:
+        delete_docs(collection, list(ids))
+
+    return len(ids)
diff --git a/agent_cli/memory/client.py b/agent_cli/memory/client.py
index b5ea3a7f..a3cc970d 100644
--- a/agent_cli/memory/client.py
+++ b/agent_cli/memory/client.py
@@ -14,7 +14,7 @@
 from agent_cli.memory._ingest import extract_and_store_facts_and_summaries
 from agent_cli.memory._persistence import evict_if_needed
 from agent_cli.memory._retrieval import augment_chat_request
-from agent_cli.memory._store import init_memory_collection
+from agent_cli.memory._store import init_memory_collection, list_conversation_entries
 from agent_cli.memory.engine import process_chat_request
 from agent_cli.memory.models import ChatRequest, MemoryRetrieval, Message
 from agent_cli.rag._retriever import get_reranker_model
@@ -185,6 +185,36 @@ async def search(
         )
         return retrieval or MemoryRetrieval(entries=[])
 
+    def list_all(
+        self,
+        conversation_id: str = "default",
+        include_summary: bool = False,
+    ) -> list[dict[str, Any]]:
+        """List all stored memories for a conversation.
+
+        Args:
+            conversation_id: Conversation scope.
+            include_summary: Whether to include summary entries.
+
+        Returns:
+            List of memory entries with id, content, and metadata.
+
+        """
+        entries = list_conversation_entries(
+            self.collection,
+            conversation_id,
+            include_summary=include_summary,
+        )
+        return [
+            {
+                "id": e.id,
+                "content": e.content,
+                "role": e.metadata.role,
+                "created_at": e.metadata.created_at,
+            }
+            for e in entries
+        ]
+
     async def chat(
         self,
         messages: list[dict[str, str]] | list[Any],
diff --git a/agent_cli/memory/entities.py b/agent_cli/memory/entities.py
index 70b16a78..a352b0bb 100644
--- a/agent_cli/memory/entities.py
+++ b/agent_cli/memory/entities.py
@@ -32,12 +32,3 @@ class Fact(BaseModel):
     source_id: str = Field(..., description="UUID of the Turn this fact was extracted from")
     created_at: datetime
     # Facts are always role="memory" implicitly in the storage layer
-
-
-class Summary(BaseModel):
-    """The rolling summary of a conversation."""
-
-    conversation_id: str
-    content: str
-    created_at: datetime
-    # Summaries are role="summary" implicitly
diff --git a/agent_cli/memory/models.py b/agent_cli/memory/models.py
index 9ef076d5..d52d952c 100644
--- a/agent_cli/memory/models.py
+++ b/agent_cli/memory/models.py
@@ -4,7 +4,7 @@
 
 from typing import Literal
 
-from pydantic import BaseModel, ConfigDict, field_validator
+from pydantic import BaseModel, ConfigDict
 
 
 class Message(BaseModel):
@@ -49,19 +49,25 @@ class MemoryMetadata(BaseModel):
     replaced_by: str | None = None
     source_id: str | None = None
 
-
-class SummaryOutput(BaseModel):
-    """Structured summary returned by the LLM."""
-
-    summary: str
-
-    @field_validator("summary")
-    @classmethod
-    def _not_empty(cls, v: str) -> str:
-        if not v or not str(v).strip():
-            msg = "field must be non-empty"
-            raise ValueError(msg)
-        return str(v).strip()
+    # Summary fields (only used when role="summary")
+    level: int | None = None
+    """Summary level (deprecated, kept for file structure compatibility)."""
+    is_final: bool | None = None
+    """Whether this is the final summary."""
+    chunk_index: int | None = None
+    """Deprecated: index of the source chunk."""
+    group_index: int | None = None
+    """Deprecated: index of this group."""
+    input_tokens: int | None = None
+    """Number of tokens in the original input."""
+    output_tokens: int | None = None
+    """Number of tokens in the summary output."""
+    compression_ratio: float | None = None
+    """Ratio of output to input tokens."""
+    summary_level: str | None = None
+    """Deprecated: previously stored SummaryLevel enum name."""
+    collapse_depth: int | None = None
+    """Number of collapse iterations in map-reduce (0 = no collapse needed)."""
 
 
 class StoredMemory(BaseModel):
diff --git a/agent_cli/rag/client.py b/agent_cli/rag/client.py
index 3e43939a..940985de 100644
--- a/agent_cli/rag/client.py
+++ b/agent_cli/rag/client.py
@@ -124,8 +124,17 @@ def add(
             for i in range(len(chunks))
         ]
 
-        # Upsert to collection
-        self.collection.upsert(ids=ids, documents=chunks, metadatas=metadatas)
+        # Upsert to collection in batches to avoid overwhelming the embedding service
+        batch_size = 10
+        for i in range(0, len(ids), batch_size):
+            batch_ids = ids[i : i + batch_size]
+            batch_docs = chunks[i : i + batch_size]
+            batch_metas = metadatas[i : i + batch_size]
+            self.collection.upsert(
+                ids=batch_ids,
+                documents=batch_docs,
+                metadatas=batch_metas,
+            )
         logger.info("Added doc_id=%s with %d chunks", doc_id, len(chunks))
 
         return doc_id
diff --git a/agent_cli/summarizer/__init__.py b/agent_cli/summarizer/__init__.py
new file mode 100644
index 00000000..7c7603b9
--- /dev/null
+++ b/agent_cli/summarizer/__init__.py
@@ -0,0 +1,42 @@
+"""Adaptive summarization module for variable-length content.
+
+This module provides map-reduce summarization inspired by LangChain's approach:
+1. If content fits target, return as-is (no LLM call)
+2. Otherwise, split into chunks and summarize each in parallel (map phase)
+3. Recursively collapse summaries until they fit target (reduce phase)
+
+Research foundations:
+- LangChain ReduceDocumentsChain: token_max=3000, recursive collapse
+- BOOOOKSCORE (arXiv:2310.00785): chunk_size=2048 optimal
+
+Example:
+    from agent_cli.summarizer import summarize, SummarizerConfig
+
+    config = SummarizerConfig(
+        openai_base_url="http://localhost:8000/v1",
+        model="gpt-4",
+    )
+
+    # Compress to fit 4000 tokens
+    result = await summarize(long_document, config, target_tokens=4000)
+
+    # Compress to 20% of original size
+    result = await summarize(long_document, config, target_ratio=0.2)
+
+    print(f"Compression: {result.compression_ratio:.1%}")
+
+"""
+
+from agent_cli.summarizer.adaptive import summarize
+from agent_cli.summarizer.models import (
+    SummarizationError,
+    SummarizerConfig,
+    SummaryResult,
+)
+
+__all__ = [
+    "SummarizationError",
+    "SummarizerConfig",
+    "SummaryResult",
+    "summarize",
+]
diff --git a/agent_cli/summarizer/_prompts.py b/agent_cli/summarizer/_prompts.py
new file mode 100644
index 00000000..e49fd417
--- /dev/null
+++ b/agent_cli/summarizer/_prompts.py
@@ -0,0 +1,118 @@
+"""Prompt templates for adaptive summarization.
+
+These prompts are designed to work with various LLM sizes (8B-20B parameters)
+and are optimized for structured, factual output.
+"""
+
+# Paragraph summary for "general" content type (default when no specific type provided)
+GENERAL_SUMMARY_PROMPT = """Summarize the following content concisely in a short paragraph.
+
+Focus on:
+- Key facts, decisions, and outcomes
+- Important context that should be remembered
+- Skip transient details, greetings, and chitchat
+
+{prior_context}
+
+Content to summarize:
+{content}
+
+Summary (maximum {max_words} words):""".strip()
+
+# CHUNK - Used in map phase of map-reduce summarization
+CHUNK_SUMMARY_PROMPT = """Summarize this section of a longer document.
+Capture the main points while preserving important details.
+
+Section {chunk_index} of {total_chunks}:
+{content}
+
+Summary of this section (maximum {max_words} words):""".strip()
+
+# META - Combine multiple summaries in reduce phase
+META_SUMMARY_PROMPT = """Synthesize these summaries into a single coherent overview.
+Identify common themes and key points across all sections.
+Eliminate redundancy while preserving unique insights.
+
+Summaries to combine:
+{summaries}
+
+Combined summary (maximum {max_words} words):""".strip()
+
+# For conversation-specific summarization
+CONVERSATION_SUMMARY_PROMPT = """Summarize this conversation from the AI assistant's perspective.
+Focus on:
+- What the user wanted or asked about
+- Key information the user shared about themselves
+- Decisions made or conclusions reached
+- Any commitments or follow-ups mentioned
+
+{prior_context}
+
+Conversation:
+{content}
+
+Summary (maximum {max_words} words):""".strip()
+
+# For journal/personal content
+JOURNAL_SUMMARY_PROMPT = """Summarize this personal entry or reflection.
+Preserve:
+- Key events and experiences mentioned
+- Emotions and insights expressed
+- Goals, plans, or intentions stated
+- People, places, or things that are important
+
+{prior_context}
+
+Entry:
+{content}
+
+Summary (maximum {max_words} words):""".strip()
+
+# For technical/document content
+DOCUMENT_SUMMARY_PROMPT = """Summarize this technical content or documentation.
+Focus on:
+- Main concepts and their relationships
+- Key procedures or processes described
+- Important specifications or requirements
+- Conclusions or recommendations
+
+{prior_context}
+
+Document:
+{content}
+
+Summary (maximum {max_words} words):""".strip()
+
+
+def get_prompt_for_content_type(content_type: str) -> str:
+    """Get the appropriate prompt template for a content type.
+
+    Args:
+        content_type: One of "general", "conversation", "journal", "document".
+
+    Returns:
+        The prompt template string.
+
+    """
+    prompts = {
+        "general": GENERAL_SUMMARY_PROMPT,
+        "conversation": CONVERSATION_SUMMARY_PROMPT,
+        "journal": JOURNAL_SUMMARY_PROMPT,
+        "document": DOCUMENT_SUMMARY_PROMPT,
+    }
+    return prompts.get(content_type, GENERAL_SUMMARY_PROMPT)
+
+
+def format_prior_context(prior_summary: str | None) -> str:
+    """Format prior summary context for inclusion in prompts."""
+    if prior_summary:
+        return f"Prior context (for continuity):\n{prior_summary}\n"
+    return ""
+
+
+def format_summaries_for_meta(summaries: list[str]) -> str:
+    """Format a list of summaries for the meta-summary prompt."""
+    formatted = []
+    for i, summary in enumerate(summaries, 1):
+        formatted.append(f"[Section {i}]\n{summary}")
+    return "\n\n".join(formatted)
diff --git a/agent_cli/summarizer/_utils.py b/agent_cli/summarizer/_utils.py
new file mode 100644
index 00000000..64c72b8f
--- /dev/null
+++ b/agent_cli/summarizer/_utils.py
@@ -0,0 +1,246 @@
+"""Utility functions for adaptive summarization."""
+
+from __future__ import annotations
+
+import re
+from functools import lru_cache
+from typing import TYPE_CHECKING
+
+from pydantic import BaseModel
+
+from agent_cli.summarizer.models import SummarizationError, SummarizerConfig
+
+if TYPE_CHECKING:
+    import tiktoken
+
+
+class SummaryOutput(BaseModel):
+    """Structured output for summary generation."""
+
+    summary: str
+
+
+async def generate_summary(
+    prompt: str,
+    config: SummarizerConfig,
+    max_tokens: int = 256,
+) -> str:
+    """Call the LLM to generate a summary.
+
+    Args:
+        prompt: The prompt to send to the LLM.
+        config: Summarizer configuration.
+        max_tokens: Maximum tokens for the response.
+
+    Returns:
+        The generated summary text.
+
+    Raises:
+        SummarizationError: If the LLM call fails.
+
+    """
+    from pydantic_ai import Agent  # noqa: PLC0415
+    from pydantic_ai.models.openai import OpenAIChatModel  # noqa: PLC0415
+    from pydantic_ai.providers.openai import OpenAIProvider  # noqa: PLC0415
+    from pydantic_ai.settings import ModelSettings  # noqa: PLC0415
+
+    provider = OpenAIProvider(api_key=config.api_key, base_url=config.openai_base_url)
+    model = OpenAIChatModel(
+        model_name=config.model,
+        provider=provider,
+        settings=ModelSettings(
+            temperature=0.3,
+            max_tokens=max_tokens,
+        ),
+    )
+
+    agent = Agent(
+        model=model,
+        system_prompt="You are a concise summarizer. Output only the summary, no preamble.",
+        output_type=SummaryOutput,
+        retries=2,
+    )
+
+    try:
+        result = await agent.run(prompt)
+        return result.output.summary.strip()
+    except Exception as e:
+        msg = f"Summarization failed: {e}"
+        raise SummarizationError(msg) from e
+
+
+@lru_cache(maxsize=4)
+def _get_encoding(model: str = "gpt-4") -> tiktoken.Encoding | None:
+    """Get tiktoken encoding for a model, with caching.
+
+    Falls back to cl100k_base for unknown models (covers most modern LLMs).
+    Returns None when tiktoken is not installed so callers can use a heuristic.
+    """
+    try:
+        import tiktoken  # noqa: PLC0415
+    except ModuleNotFoundError:
+        return None
+
+    try:
+        return tiktoken.encoding_for_model(model)
+    except KeyError:
+        return tiktoken.get_encoding("cl100k_base")
+
+
+def count_tokens(text: str, model: str = "gpt-4") -> int:
+    """Count tokens using tiktoken, falling back to char-based estimate."""
+    if not text:
+        return 0
+    enc = _get_encoding(model)
+    if enc is None:
+        return _estimate_token_count(text)
+    # Disable special token checking - LLM outputs may contain special tokens
+    # like <|constrain|>, <|endoftext|>, etc. that we want to count normally
+    return len(enc.encode(text, disallowed_special=()))
+
+
+def _estimate_token_count(text: str) -> int:
+    """Very rough token estimate based on character length (~4 chars/token)."""
+    return max(1, (len(text) + 3) // 4)
+
+
+def chunk_text(
+    text: str,
+    chunk_size: int = 3000,
+    overlap: int = 200,
+    model: str = "gpt-4",
+) -> list[str]:
+    """Split text into overlapping chunks by token count.
+
+    Uses semantic boundaries (paragraphs, sentences) when possible to avoid
+    splitting mid-thought. Falls back to token-based splitting if no good
+    boundaries are found.
+
+    Args:
+        text: The text to chunk.
+        chunk_size: Target token count per chunk.
+        overlap: Token overlap between chunks for context continuity.
+        model: Model name for tokenizer.
+
+    Returns:
+        List of text chunks.
+
+    """
+    if not text:
+        return []
+
+    total_tokens = count_tokens(text, model)
+    if total_tokens <= chunk_size:
+        return [text]
+
+    # Split into paragraphs first
+    paragraphs = re.split(r"\n\s*\n", text)
+    paragraphs = [p.strip() for p in paragraphs if p.strip()]
+
+    if not paragraphs:
+        return [text]
+
+    chunks: list[str] = []
+    current_chunk: list[str] = []
+    current_tokens = 0
+
+    for para in paragraphs:
+        para_tokens = count_tokens(para, model)
+
+        # If single paragraph exceeds chunk size, split it further
+        if para_tokens > chunk_size:
+            # Flush current chunk if any
+            if current_chunk:
+                chunks.append("\n\n".join(current_chunk))
+                current_chunk = []
+                current_tokens = 0
+
+            # Split large paragraph by sentences
+            sentences = _split_sentences(para)
+            for sentence in sentences:
+                sent_tokens = count_tokens(sentence, model)
+                if current_tokens + sent_tokens > chunk_size and current_chunk:
+                    chunks.append(" ".join(current_chunk))
+                    # Keep overlap from end of previous chunk
+                    overlap_text = _get_overlap_text(current_chunk, overlap, model)
+                    current_chunk = [overlap_text] if overlap_text else []
+                    current_tokens = count_tokens(overlap_text, model) if overlap_text else 0
+                current_chunk.append(sentence)
+                current_tokens += sent_tokens
+        elif current_tokens + para_tokens > chunk_size:
+            # Flush current chunk and start new one
+            chunks.append("\n\n".join(current_chunk))
+            # Keep overlap from end of previous chunk
+            overlap_text = _get_overlap_text(current_chunk, overlap, model)
+            current_chunk = [overlap_text, para] if overlap_text else [para]
+            current_tokens = (
+                count_tokens(overlap_text, model) + para_tokens if overlap_text else para_tokens
+            )
+        else:
+            current_chunk.append(para)
+            current_tokens += para_tokens
+
+    # Don't forget the last chunk
+    if current_chunk:
+        chunks.append("\n\n".join(current_chunk))
+
+    return chunks
+
+
+def _split_sentences(text: str) -> list[str]:
+    """Split text into sentences, preserving common abbreviations."""
+    # Simple sentence splitting that handles common cases
+    # Matches period/question/exclamation followed by space and capital letter
+    sentences = re.split(r"(?<=[.!?])\s+(?=[A-Z])", text)
+    return [s.strip() for s in sentences if s.strip()]
+
+
+def _get_overlap_text(chunks: list[str], target_tokens: int, model: str) -> str:
+    """Extract overlap text from end of chunk list.
+
+    Takes text from the end of the chunk list until reaching target_tokens.
+    """
+    if not chunks or target_tokens <= 0:
+        return ""
+
+    # Work backwards through chunks
+    overlap_parts: list[str] = []
+    tokens_collected = 0
+
+    for chunk in reversed(chunks):
+        chunk_tokens = count_tokens(chunk, model)
+        if tokens_collected + chunk_tokens <= target_tokens:
+            overlap_parts.insert(0, chunk)
+            tokens_collected += chunk_tokens
+        else:
+            # Take partial chunk if needed
+            words = chunk.split()
+            partial: list[str] = []
+            for word in reversed(words):
+                word_tokens = count_tokens(word, model)
+                if tokens_collected + word_tokens <= target_tokens:
+                    partial.insert(0, word)
+                    tokens_collected += word_tokens
+                else:
+                    break
+            if partial:
+                overlap_parts.insert(0, " ".join(partial))
+            break
+
+    return " ".join(overlap_parts)
+
+
+def estimate_summary_tokens(input_tokens: int) -> int:
+    """Estimate target summary tokens based on input size.
+
+    Uses ~10% compression ratio with floor/ceiling bounds.
+    """
+    return min(500, max(50, input_tokens // 10))
+
+
+def tokens_to_words(tokens: int) -> int:
+    """Convert token count to approximate word count.
+
+    Rough approximation: 1 token ≈ 0.75 words for English text.
+    """
+    return int(tokens * 0.75)
diff --git a/agent_cli/summarizer/adaptive.py b/agent_cli/summarizer/adaptive.py
new file mode 100644
index 00000000..2a772062
--- /dev/null
+++ b/agent_cli/summarizer/adaptive.py
@@ -0,0 +1,164 @@
+"""Adaptive summarization using map-reduce with dynamic collapse.
+
+Implements a simple algorithm inspired by LangChain's map-reduce chains:
+1. If content fits target, return as-is (no LLM call)
+2. Otherwise, split into chunks and summarize each (map phase)
+3. Recursively collapse summaries until they fit target (reduce phase)
+
+Research foundations:
+- LangChain ReduceDocumentsChain: token_max=3000, recursive collapse
+- BOOOOKSCORE (arXiv:2310.00785): chunk_size=2048 optimal
+
+See docs/architecture/summarizer.md for detailed design rationale.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from agent_cli.summarizer._prompts import (
+    format_prior_context,
+    get_prompt_for_content_type,
+)
+from agent_cli.summarizer._utils import (
+    count_tokens,
+    generate_summary,
+    tokens_to_words,
+)
+from agent_cli.summarizer.map_reduce import map_reduce_summarize
+from agent_cli.summarizer.models import (
+    SummarizerConfig,
+    SummaryResult,
+)
+
+logger = logging.getLogger(__name__)
+
+__all__ = [
+    "SummarizerConfig",
+    "summarize",
+]
+
+
+async def summarize(
+    content: str,
+    config: SummarizerConfig,
+    *,
+    target_tokens: int | None = None,
+    target_ratio: float | None = None,
+    prior_summary: str | None = None,
+    content_type: str = "general",
+) -> SummaryResult:
+    """Summarize content to fit within a target token limit.
+
+    Simple algorithm:
+    - If content already fits target, return as-is (no LLM call)
+    - Otherwise, use map-reduce to compress until it fits
+
+    Args:
+        content: The content to summarize.
+        config: Summarizer configuration.
+        target_tokens: Absolute token limit (e.g., 4000). Defaults to config.token_max.
+        target_ratio: Relative compression ratio (e.g., 0.2 = compress to 20% of input).
+            Takes precedence over target_tokens if both provided.
+        prior_summary: Optional prior summary for context continuity.
+        content_type: Type of content ("general", "conversation", "journal", "document").
+
+    Returns:
+        SummaryResult with summary and compression metrics.
+
+    Examples:
+        # Compress to fit 4000 tokens
+        result = await summarize(huge_doc, config, target_tokens=4000)
+
+        # Compress to 20% of original size
+        result = await summarize(huge_doc, config, target_ratio=0.2)
+
+        # Use default (config.token_max = 3000)
+        result = await summarize(huge_doc, config)
+
+    """
+    if not content or not content.strip():
+        return SummaryResult(
+            summary=None,
+            input_tokens=0,
+            output_tokens=0,
+            compression_ratio=0.0,
+        )
+
+    input_tokens = count_tokens(content, config.model)
+
+    # Determine target
+    if target_ratio is not None:
+        target = max(1, int(input_tokens * target_ratio))
+    elif target_tokens is not None:
+        target = target_tokens
+    else:
+        target = config.token_max
+
+    logger.info(
+        "Summarizing %d tokens to target %d (type=%s)",
+        input_tokens,
+        target,
+        content_type,
+    )
+
+    # Already fits? Return content as-is (no LLM call)
+    if input_tokens <= target:
+        return SummaryResult(
+            summary=content,
+            input_tokens=input_tokens,
+            output_tokens=input_tokens,
+            compression_ratio=1.0,
+            collapse_depth=0,
+        )
+
+    # Content fits in single chunk but exceeds target - use content-aware summary
+    if input_tokens <= config.chunk_size:
+        summary = await _content_aware_summary(
+            content,
+            config,
+            target,
+            prior_summary,
+            content_type,
+        )
+        output_tokens = count_tokens(summary, config.model)
+        return SummaryResult(
+            summary=summary,
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            compression_ratio=output_tokens / input_tokens,
+            collapse_depth=0,
+        )
+
+    # Large content - use map-reduce with dynamic collapse
+    result = await map_reduce_summarize(content, config, target)
+
+    return SummaryResult(
+        summary=result.summary,
+        input_tokens=result.input_tokens,
+        output_tokens=result.output_tokens,
+        compression_ratio=result.compression_ratio,
+        collapse_depth=result.collapse_depth,
+    )
+
+
+async def _content_aware_summary(
+    content: str,
+    config: SummarizerConfig,
+    target_tokens: int,
+    prior_summary: str | None,
+    content_type: str,
+) -> str:
+    """Generate a content-type aware summary for single-chunk content."""
+    max_words = tokens_to_words(target_tokens)
+
+    prompt_template = get_prompt_for_content_type(content_type)
+    prior_context = format_prior_context(prior_summary)
+
+    prompt = prompt_template.format(
+        content=content,
+        prior_context=prior_context,
+        max_words=max_words,
+    )
+
+    return await generate_summary(prompt, config, max_tokens=target_tokens + 50)
diff --git a/agent_cli/summarizer/map_reduce.py b/agent_cli/summarizer/map_reduce.py
new file mode 100644
index 00000000..86e8b796
--- /dev/null
+++ b/agent_cli/summarizer/map_reduce.py
@@ -0,0 +1,242 @@
+"""Map-reduce summarization inspired by LangChain's approach.
+
+Simple algorithm:
+1. Map: Split content into chunks, summarize each in parallel
+2. Reduce: If combined summaries exceed target, recursively collapse
+
+Key insight from LangChain: No need for predetermined levels (L1/L2/L3).
+Just keep collapsing until content fits. Dynamic depth based on actual content.
+
+References:
+- LangChain ReduceDocumentsChain: token_max=3000, recursive collapse
+- BOOOOKSCORE: chunk_size=2048 optimal for summarization
+
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from agent_cli.summarizer._prompts import (
+    CHUNK_SUMMARY_PROMPT,
+    META_SUMMARY_PROMPT,
+    format_summaries_for_meta,
+)
+from agent_cli.summarizer._utils import (
+    chunk_text,
+    count_tokens,
+    estimate_summary_tokens,
+    generate_summary,
+    tokens_to_words,
+)
+
+if TYPE_CHECKING:
+    from agent_cli.summarizer.models import SummarizerConfig
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class MapReduceResult:
+    """Result of map-reduce summarization.
+
+    Attributes:
+        summary: The final collapsed summary.
+        input_tokens: Token count of original content.
+        output_tokens: Token count of final summary.
+        compression_ratio: output_tokens / input_tokens.
+        collapse_depth: How many reduce iterations were needed.
+        intermediate_summaries: All intermediate summaries (for debugging/storage).
+
+    """
+
+    summary: str
+    input_tokens: int
+    output_tokens: int
+    compression_ratio: float
+    collapse_depth: int
+    intermediate_summaries: list[list[str]]  # Each level of collapse
+
+
+async def map_reduce_summarize(
+    content: str,
+    config: SummarizerConfig,
+    target: int | None = None,
+    max_collapse_depth: int = 10,
+) -> MapReduceResult:
+    """Summarize content using map-reduce with dynamic collapse.
+
+    Algorithm:
+    1. Split into chunks and summarize each (map phase)
+    2. If combined summaries exceed target, recursively collapse (reduce phase)
+    3. Continue until everything fits in target
+
+    Args:
+        content: The content to summarize.
+        config: Summarizer configuration.
+        target: Target token count. Defaults to config.token_max.
+        max_collapse_depth: Safety limit on recursive collapse depth.
+
+    Returns:
+        MapReduceResult with summary and metadata.
+
+    """
+    if target is None:
+        target = config.token_max
+
+    input_tokens = count_tokens(content, config.model)
+
+    # Map phase: Split and summarize chunks in parallel
+    chunks = chunk_text(
+        content,
+        chunk_size=config.chunk_size,
+        overlap=config.chunk_overlap,
+        model=config.model,
+    )
+
+    logger.info("Map phase: processing %d chunks", len(chunks))
+    summaries = await _map_summarize(chunks, config)
+    intermediate_summaries = [summaries.copy()]
+
+    # Reduce phase: Recursively collapse until fits target
+    depth = 0
+    while _total_tokens(summaries, config.model) > target:
+        depth += 1
+        if depth > max_collapse_depth:
+            logger.warning(
+                "Hit max collapse depth %d, forcing final summary",
+                max_collapse_depth,
+            )
+            break
+
+        logger.info(
+            "Reduce phase (depth %d): collapsing %d summaries (%d tokens) to target %d",
+            depth,
+            len(summaries),
+            _total_tokens(summaries, config.model),
+            target,
+        )
+        summaries = await _collapse_summaries(summaries, config, target)
+        intermediate_summaries.append(summaries.copy())
+
+    # Final synthesis if we have multiple summaries left
+    if len(summaries) > 1:
+        final_summary = await _synthesize(summaries, config, target)
+    else:
+        final_summary = summaries[0]
+
+    output_tokens = count_tokens(final_summary, config.model)
+
+    return MapReduceResult(
+        summary=final_summary,
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        compression_ratio=output_tokens / input_tokens,
+        collapse_depth=depth,
+        intermediate_summaries=intermediate_summaries,
+    )
+
+
+def _total_tokens(texts: list[str], model: str) -> int:
+    """Count total tokens across all texts."""
+    return sum(count_tokens(t, model) for t in texts)
+
+
+async def _map_summarize(chunks: list[str], config: SummarizerConfig) -> list[str]:
+    """Summarize each chunk in parallel (map phase)."""
+    semaphore = asyncio.Semaphore(config.max_concurrent_chunks)
+    total = len(chunks)
+
+    async def summarize_chunk(idx: int, chunk: str) -> str:
+        async with semaphore:
+            return await _summarize_chunk(chunk, idx, total, config)
+
+    tasks = [summarize_chunk(i, chunk) for i, chunk in enumerate(chunks)]
+    return list(await asyncio.gather(*tasks))
+
+
+async def _summarize_chunk(
+    chunk: str,
+    chunk_index: int,
+    total_chunks: int,
+    config: SummarizerConfig,
+) -> str:
+    """Summarize a single chunk."""
+    source_tokens = count_tokens(chunk, config.model)
+    target_tokens = estimate_summary_tokens(source_tokens)
+    max_words = tokens_to_words(target_tokens)
+
+    prompt = CHUNK_SUMMARY_PROMPT.format(
+        chunk_index=chunk_index + 1,
+        total_chunks=total_chunks,
+        content=chunk,
+        max_words=max_words,
+    )
+
+    return await generate_summary(prompt, config, max_tokens=target_tokens + 50)
+
+
+async def _collapse_summaries(
+    summaries: list[str],
+    config: SummarizerConfig,
+    target: int,
+) -> list[str]:
+    """Collapse summaries by grouping and re-summarizing (reduce phase).
+
+    Groups summaries that together fit within target, then summarizes each group.
+    This is similar to LangChain's split_list_of_docs approach.
+    """
+    if len(summaries) <= 1:
+        return summaries
+
+    # Group summaries that together fit within target
+    groups: list[list[str]] = []
+    current_group: list[str] = []
+    current_tokens = 0
+
+    for summary in summaries:
+        summary_tokens = count_tokens(summary, config.model)
+
+        # If adding this summary would exceed target, start new group
+        if current_tokens + summary_tokens > target and current_group:
+            groups.append(current_group)
+            current_group = [summary]
+            current_tokens = summary_tokens
+        else:
+            current_group.append(summary)
+            current_tokens += summary_tokens
+
+    if current_group:
+        groups.append(current_group)
+
+    # Summarize each group in parallel
+    semaphore = asyncio.Semaphore(config.max_concurrent_chunks)
+
+    async def summarize_group(group: list[str]) -> str:
+        async with semaphore:
+            return await _synthesize(group, config, target)
+
+    tasks = [summarize_group(g) for g in groups]
+    return list(await asyncio.gather(*tasks))
+
+
+async def _synthesize(
+    summaries: list[str],
+    config: SummarizerConfig,
+    target: int,
+) -> str:
+    """Synthesize multiple summaries into one."""
+    combined_tokens = sum(count_tokens(s, config.model) for s in summaries)
+    # Aim for target tokens but use estimate if combined is smaller
+    target_tokens = min(target, estimate_summary_tokens(combined_tokens))
+    max_words = tokens_to_words(target_tokens)
+
+    prompt = META_SUMMARY_PROMPT.format(
+        summaries=format_summaries_for_meta(summaries),
+        max_words=max_words,
+    )
+
+    return await generate_summary(prompt, config, max_tokens=target_tokens + 100)
diff --git a/agent_cli/summarizer/models.py b/agent_cli/summarizer/models.py
new file mode 100644
index 00000000..721201da
--- /dev/null
+++ b/agent_cli/summarizer/models.py
@@ -0,0 +1,99 @@
+"""Data models for map-reduce summarization."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class SummarizationError(Exception):
+    """Raised when summarization fails after all retries."""
+
+
+@dataclass
+class SummarizerConfig:
+    """Configuration for summarization operations.
+
+    Example:
+        config = SummarizerConfig(
+            openai_base_url="http://localhost:8000/v1",
+            model="llama3.1:8b",
+        )
+        result = await summarize(long_document, config)
+        print(f"Compression: {result.compression_ratio:.1%}")
+
+    """
+
+    openai_base_url: str
+    model: str
+    api_key: str | None = None
+    chunk_size: int = 2048  # BOOOOKSCORE's tested default
+    token_max: int = 3000  # LangChain's default - target size after compression
+    chunk_overlap: int = 200
+    max_concurrent_chunks: int = 5
+
+    def __post_init__(self) -> None:
+        """Normalize the base URL."""
+        self.openai_base_url = self.openai_base_url.rstrip("/")
+        if self.api_key is None:
+            self.api_key = "not-needed"
+
+
+class SummaryResult(BaseModel):
+    """Result of summarization.
+
+    Contains the summary and metadata about the compression achieved.
+    """
+
+    summary: str | None = Field(
+        default=None,
+        description="The summary text (None if content already fit target)",
+    )
+    input_tokens: int = Field(..., ge=0, description="Token count of the input content")
+    output_tokens: int = Field(..., ge=0, description="Token count of the output")
+    compression_ratio: float = Field(
+        ...,
+        ge=0.0,
+        le=1.0,
+        description="Ratio of output to input tokens (lower = more compression)",
+    )
+    collapse_depth: int = Field(
+        default=0,
+        ge=0,
+        description="Number of collapse iterations in map-reduce (0 = no collapse needed)",
+    )
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(UTC),
+        description="Timestamp when summary was created",
+    )
+
+    def to_storage_metadata(self, conversation_id: str) -> list[dict[str, Any]]:
+        """Convert to metadata entry for ChromaDB storage.
+
+        Returns a list with a single metadata dict for the summary.
+        Returns empty list if no summary was generated.
+        """
+        if not self.summary:
+            return []
+
+        timestamp = self.created_at.isoformat()
+
+        return [
+            {
+                "id": f"{conversation_id}:summary",
+                "content": self.summary,
+                "metadata": {
+                    "conversation_id": conversation_id,
+                    "role": "summary",
+                    "is_final": True,
+                    "input_tokens": self.input_tokens,
+                    "output_tokens": self.output_tokens,
+                    "compression_ratio": self.compression_ratio,
+                    "collapse_depth": self.collapse_depth,
+                    "created_at": timestamp,
+                },
+            },
+        ]
diff --git a/docs/aijournal-poc-comparison.md b/docs/aijournal-poc-comparison.md
new file mode 100644
index 00000000..a6f928f0
--- /dev/null
+++ b/docs/aijournal-poc-comparison.md
@@ -0,0 +1,245 @@
+# AI Journal POC vs aijournal: Detailed Comparison
+
+This document analyzes the differences between our MemoryClient-based AI Journal POC and the full-featured aijournal project, identifying strengths, gaps, and potential paths forward.
+
+## Executive Summary
+
+| Aspect | Our POC | aijournal |
+|--------|---------|-----------|
+| **Complexity** | ~200 LOC | ~15,000+ LOC |
+| **Setup Time** | Instant | `aijournal init` + config |
+| **Profile Storage** | Generated on-demand | Persisted YAML with versioning |
+| **Claim System** | Raw fact strings | Typed atoms with strength/decay |
+| **Context Layers** | Single flat layer | 4 hierarchical layers (L1-L4) |
+| **Learning** | Static extraction | Feedback loops + interview probing |
+
+## 1. Architecture Comparison
+
+### 1.1 Data Model
+
+**Our POC:**
+```
+~/.aijournal/
+  entries/
+    journal/
+      facts/           # Extracted facts as markdown
+      turns/           # Chat turns
+  chroma/              # Vector embeddings
+```
+
+**aijournal:**
+```
+workspace/
+  data/
+    journal/YYYY/MM/DD/*.md    # Raw entries
+    normalized/YYYY-MM-DD/     # Structured YAML
+  profile/
+    self_profile.yaml          # Facets (values, goals, traits)
+    claims.yaml                # Typed claim atoms
+  derived/
+    summaries/                 # Daily summaries
+    microfacts/                # Extracted facts
+    persona/persona_core.yaml  # L1 context (~1200 tokens)
+    index/                     # Vector store + metadata
+    chat_sessions/             # Conversation history
+    pending/profile_updates/   # Queued changes
+```
+
+**Analysis:** aijournal separates authoritative data (human-editable) from derived data (reproducible). Our POC conflates these, making it harder to inspect or manually correct the knowledge base.
+
+### 1.2 Claim Representation
+
+**Our POC - Raw facts:**
+```
+"Bas is a software engineer"
+"The user loves hiking"
+"The user's wife is named Anne"
+```
+
+**aijournal - Typed claim atoms:**
+```yaml
+- type: trait
+  subject: self
+  predicate: occupation
+  statement: "Works as a software engineer focused on AI systems"
+  scope: {domain: work, context: [professional]}
+  strength: 0.85
+  status: accepted
+  provenance:
+    sources: [entry:2025-01-15-morning]
+    first_seen: 2025-01-15
+    last_updated: 2025-01-20
+```
+
+**Analysis:** aijournal's typed claims enable:
+- Filtering by type (traits vs preferences vs goals)
+- Confidence tracking via `strength`
+- Time-decay for relevance
+- Conflict detection between claims
+- Source attribution for verification
+
+### 1.3 Context Layers
+
+**Our POC:** Single layer - all facts dumped into system prompt
+
+**aijournal - Hierarchical layers:**
+
+| Layer | Content | Tokens | Use Case |
+|-------|---------|--------|----------|
+| L1 | Persona core + top claims | ~1,200 | Quick chat, advice |
+| L2 | L1 + recent summaries/facts | ~2,000 | Daily check-ins |
+| L3 | L2 + full claims + facets | ~2,600 | Deep conversations |
+| L4 | L3 + prompts + config + history | ~3,200 | External AI export |
+
+**Analysis:** Layered context prevents token overflow and allows appropriate depth for different interactions.
+
+## 2. Feature Comparison
+
+### 2.1 Fact Extraction
+
+| Feature | Our POC | aijournal |
+|---------|---------|-----------|
+| Extraction method | PydanticAI agent | Ollama + custom prompts |
+| Output format | Raw strings | Typed MicroFact objects |
+| Reconciliation | ADD/UPDATE/DELETE/NONE | Consolidation with strength weighting |
+| Deduplication | Semantic similarity | Hash + semantic + scope matching |
+
+**Our POC advantage:** The reconciliation logic (PromptedOutput with JSON mode) prevents duplicate facts effectively.
+
+**aijournal advantage:** Consolidation weights existing evidence: `strength_new = clamp01((w_prev * strength_prev + w_obs * signal) / (w_prev + w_obs))`
+
+### 2.2 Profile Generation
+
+| Feature | Our POC | aijournal |
+|---------|---------|-----------|
+| Generation | On-demand via LLM | Pre-built `persona_core.yaml` |
+| Caching | None | Persisted with staleness tracking |
+| Categories | LLM-determined | Defined schema (values, goals, traits, etc.) |
+| Token budget | Unlimited (risk of overflow) | Configurable (~1,200 default) |
+
+**Our POC advantage:** Flexible - LLM determines categories dynamically based on content.
+
+**aijournal advantage:** Deterministic, auditable, and respects token limits.
+
+### 2.3 Chat Integration
+
+| Feature | Our POC | aijournal |
+|---------|---------|-----------|
+| Context injection | All facts in system prompt | Layer-appropriate context |
+| Citations | None | `[entry:id#p<idx>]` markers |
+| Feedback | None | Up/down adjustments to claim strength |
+| Memory storage | Bypassed (direct LLM call) | Persisted with telemetry |
+
+**Our POC advantage:** Simple, no side effects.
+
+**aijournal advantage:** Learning loop - feedback strengthens/weakens claims over time.
+
+### 2.4 Missing in Our POC
+
+1. **Interview/Probing Mode**
+   - aijournal generates questions to fill knowledge gaps
+   - Ranks facets by `staleness × impact_weight` to prioritize probing
+
+2. **Time Decay**
+   - aijournal: `effective_strength = strength × exp(-λ × staleness)`
+   - Our POC: All facts treated equally regardless of age
+
+3. **Conflict Resolution**
+   - aijournal: Detects contradictions, downgrades to `tentative`, queues questions
+   - Our POC: UPDATE replaces old fact entirely
+
+4. **Advisor Mode**
+   - aijournal: Separate `advise` command with coaching preferences
+   - Our POC: Generic chat only
+
+5. **Export/Packs**
+   - aijournal: Generate context bundles for external AIs
+   - Our POC: No export capability
+
+## 3. Test Results Analysis
+
+### 3.1 Blog Post Ingestion
+
+We fed 12+ blog posts into our POC:
+
+| Metric | Result |
+|--------|--------|
+| Posts processed | ~12 |
+| Facts extracted | 52 |
+| Extraction accuracy | High - captured key themes |
+| Profile quality | Excellent - identified all major interests |
+
+**Sample extracted facts:**
+- "Bas is a software engineer"
+- "Bas works on AI systems"
+- "The user loves hiking"
+- "You went for a 5km run this morning"
+- "You discovered that local vision models like Qwen3-VL-32B can identify niche books"
+
+### 3.2 Profile Generation Quality
+
+The generated profile correctly identified:
+- ✅ Professional identity (software engineer, AI focus)
+- ✅ Personal relationships (wife Anne)
+- ✅ Hobbies (hiking, running, learning Dutch)
+- ✅ Technical interests (local AI, terminal productivity, homelab)
+- ✅ Values (minimalism, security, reproducibility)
+
+### 3.3 Chat Intelligence
+
+The chat demonstrated:
+- **Specific recall:** "You use the Glove80 keyboard with programmable layers"
+- **Temporal understanding:** Tracked evolution of views on AI coding
+- **Theme synthesis:** Connected local AI + security + productivity interests
+- **Nuanced responses:** Acknowledged both benefits and limitations
+
+## 4. Recommendations
+
+### 4.1 Quick Wins (Keep POC Simple)
+
+1. **Persist profile summary** - Cache the LLM-generated profile to avoid regeneration
+2. **Add timestamps to facts** - Already have `created_at`, use it for recency weighting
+3. **Token budgeting** - Limit facts sent to chat based on relevance + recency
+
+### 4.2 Medium-Term Enhancements
+
+1. **Claim typing** - Categorize facts into types (trait, preference, goal, relationship)
+2. **Strength tracking** - Increment when same fact extracted multiple times
+3. **Simple decay** - Weight recent facts higher in context
+
+### 4.3 aijournal Features Worth Adopting
+
+1. **Interview mode** - Generate questions to learn more
+2. **Feedback loop** - Up/down on responses affects claim strength
+3. **Layered context** - L1 for quick chats, L4 for deep dives
+4. **Citations** - Link responses to source facts
+
+### 4.4 What NOT to Adopt
+
+1. **7-stage pipeline** - Overkill for our use case
+2. **Strict schema governance** - Adds friction without clear benefit for POC
+3. **Markdown file storage** - ChromaDB is sufficient for our needs
+
+## 5. Conclusion
+
+Our POC validates the core hypothesis: **MemoryClient can serve as the foundation for a personal knowledge system**. With ~200 lines of code, we achieved:
+
+- Accurate fact extraction from unstructured text
+- Coherent profile generation from diverse content
+- Personalized conversations using stored knowledge
+
+The main gap is **learning over time** - our system doesn't strengthen beliefs based on repetition or feedback. Adding simple strength tracking and decay would close 80% of the functionality gap with 20% of aijournal's complexity.
+
+### Recommended Next Step
+
+Add a `strength` field to stored facts and implement:
+```python
+# On duplicate fact detection
+existing.strength = min(1.0, existing.strength + 0.1)
+existing.last_seen = now()
+
+# On retrieval
+effective_strength = fact.strength * exp(-0.1 * days_since_last_seen)
+```
+
+This single change would transform our static knowledge base into a learning system.
diff --git a/docs/architecture/memory.md b/docs/architecture/memory.md
index 83ae9720..2b2ab4a2 100644
--- a/docs/architecture/memory.md
+++ b/docs/architecture/memory.md
@@ -59,7 +59,7 @@ entries/
       assistant/
         <timestamp>__<uuid>.md     # Raw assistant responses
     summaries/
-      summary.md                   # The single rolling summary of the conversation
+      <conversation_id>__summary.md  # Single final summary (map-reduce collapses to one)
 ```
 
 **Deleted Directory Structure (Soft Deletes):**
@@ -71,7 +71,7 @@ entries/
       facts/
         <timestamp>__<uuid>.md
       summaries/
-        summary.md                 # Tombstoned summary
+        <conversation_id>__summary.md  # Tombstoned summary
 ```
 
 ### 2.2 File Format
@@ -154,22 +154,28 @@ Executed via `_postprocess_after_turn` (background task).
 *   **Output:** JSON list of strings. Failures fall back to `[]`.
 
 ### 4.3 Reconciliation (Memory Management)
-Resolves contradictions using a "Search-Decide-Update" loop.
+Resolves contradictions using a "Search-Decide-Update" loop with complete enumeration.
 1.  **Local Search:** For each new fact, retrieve a small neighborhood of existing `role="memory"` entries for the conversation.
-2.  **LLM Decision:** Uses `UPDATE_MEMORY_PROMPT` (examples + strict JSON schema) to compare `new_facts` vs `existing_memories`.
+2.  **LLM Decision:** Uses `UPDATE_MEMORY_PROMPT` to compare `new_facts` vs `existing_memories`. The model must return **all memories** (existing + new) with explicit events for each.
     *   **Decisions:** `ADD`, `UPDATE`, `DELETE`, `NONE`.
     *   If no existing memories are found, all new facts are added directly.
     *   On LLM/network failure, defaults to adding all new facts.
-    *   Safeguard: if the model returns only deletes/empties, the new facts are still added to avoid data loss.
 3.  **Execution:**
     *   **Adds:** Creates new fact files and upserts to Chroma.
     *   **Updates:** Implemented as delete + add with a fresh ID; tombstones record `replaced_by`.
     *   **Deletes:** Soft-deletes files (moved under `deleted/`) and removes from Chroma.
 
-### 4.4 Summarization
+### 4.4 Summarization (Adaptive Map-Reduce)
+Uses the `agent_cli.summarizer` module for research-backed adaptive summarization.
+
+*   **Level Selection:** Automatically determines summarization strategy based on token count:
+    *   `NONE` (< 100 tokens): No summary needed, facts only.
+    *   `BRIEF` (100-500 tokens): Single-sentence summary.
+    *   `MAP_REDUCE` (>= 500 tokens): Dynamic collapse using map-reduce with content-type aware prompts.
+*   **Algorithm:** LangChain-inspired map-reduce that recursively collapses until content fits token_max (3000).
 *   **Input:** Previous summary (if any) + newly extracted facts.
-*   **Prompt:** `SUMMARY_PROMPT` (updates the running summary).
-*   **Persistence:** Writes a single `summaries/summary.md` per conversation (deterministic doc ID).
+*   **Persistence:** Stores single final summary in `summaries/` directory with YAML front matter containing compression metrics.
+*   **See:** `docs/architecture/summarizer.md` for detailed algorithm specification.
 
 ### 4.5 Eviction
 *   **Trigger:** If total entries in conversation > `max_entries` (default 500).
@@ -190,17 +196,22 @@ To replicate the system behavior, the following prompt strategies are required.
 *   **Example:** "My wife is Anne" -> `["The user's wife is named Anne"]`.
 
 ### 5.2 Reconciliation (`UPDATE_MEMORY_PROMPT`)
-*   **Goal:** Compare `new_facts` against `existing_memories` (id + text) and output structured decisions.
+*   **Goal:** Compare `new_facts` against `existing_memories` and return **all memories** (existing + new) with explicit events.
+*   **Approach:** The model must enumerate every memory in its response, forcing deliberate decisions rather than implicit omissions.
 *   **Operations:**
-    *   **ADD:** New information (generates a new ID).
-    *   **UPDATE:** Refines existing information (uses the provided short ID).
-    *   **DELETE:** Contradicts existing information (e.g., "I hate pizza" vs "I love pizza"). **If deleting because of a replacement, the new fact must also be returned (ADD or UPDATE).**
-    *   **NONE:** Fact already exists or is irrelevant.
-*   **Output constraints:** JSON list only; no prose/code fences; IDs for UPDATE/DELETE/NONE must come from the provided list.
-
-### 5.3 Summarization (`SUMMARY_PROMPT`)
-*   **Goal:** Maintain a concise running summary.
-*   **Constraints:** Aggregate related facts. Drop transient chit-chat. Focus on durable info.
+    *   **ADD:** New information not present in existing memories (generates a new sequential ID).
+    *   **UPDATE:** Refines existing information about the **same topic** (keeps the existing ID).
+    *   **DELETE:** Explicitly contradicts existing information (e.g., "I hate pizza" vs "I love pizza").
+    *   **NONE:** Existing memory is unrelated to new facts, or new fact is an exact duplicate.
+*   **Output constraints:** JSON list containing all memories; each existing memory must have an event; new unrelated facts must be ADDed; no prose or code fences.
+
+### 5.3 Summarization (Adaptive Prompts)
+The summarizer uses prompts from `agent_cli.summarizer._prompts`:
+*   **`BRIEF_SUMMARY_PROMPT`:** Single-sentence distillation for short content (100-500 tokens).
+*   **`GENERAL_SUMMARY_PROMPT`:** Paragraph summary with prior context integration (general content).
+*   **`CHUNK_SUMMARY_PROMPT`:** Individual chunk summarization for map phase.
+*   **`META_SUMMARY_PROMPT`:** Synthesizes multiple chunk summaries in reduce phase.
+*   **Content-type variants:** `CONVERSATION_SUMMARY_PROMPT`, `JOURNAL_SUMMARY_PROMPT`, `DOCUMENT_SUMMARY_PROMPT` for domain-specific summarization.
 
 ---
 
diff --git a/docs/architecture/summarizer.md b/docs/architecture/summarizer.md
new file mode 100644
index 00000000..c7476142
--- /dev/null
+++ b/docs/architecture/summarizer.md
@@ -0,0 +1,362 @@
+# Agent CLI: Adaptive Summarizer Technical Specification
+
+This document describes the architectural decisions, design rationale, and technical approach for the `agent-cli` adaptive summarization subsystem.
+
+## 1. System Overview
+
+The adaptive summarizer provides **content-aware compression** using a map-reduce approach inspired by LangChain's chains. It compresses content to fit within a specified token budget using a simple algorithm:
+
+```
+Input Content ──▶ Token Count ──▶ Compare to Target
+                                        │
+                ┌───────────────────────┴───────────────────────┐
+                │                                               │
+          Fits target                                    Exceeds target
+                │                                               │
+          Return as-is                                   Map-Reduce
+          (no LLM call)                               (dynamic collapse)
+```
+
+**Design Goals:**
+
+- **Maximum simplicity:** Single entry point with straightforward logic.
+- **Flexible targeting:** Specify absolute token count or relative compression ratio.
+- **Research-grounded defaults:** chunk_size=2048 (BOOOOKSCORE), token_max=3000 (LangChain).
+- **Content-type awareness:** Domain-specific prompts for conversations, journals, documents.
+
+---
+
+## 2. Research Foundations
+
+This section documents what techniques are borrowed from research vs. what is original design.
+
+### 2.1 Borrowed: LangChain Map-Reduce Pattern
+
+**Reference:** LangChain `ReduceDocumentsChain`
+
+LangChain's approach to document summarization uses a simple algorithm:
+1. **Map phase:** Split content into chunks, summarize each in parallel
+2. **Reduce phase:** If combined summaries exceed `token_max`, recursively collapse until they fit
+
+Key insight: No need for predetermined levels. Dynamic depth based on actual content length. LangChain's default `token_max=3000`.
+
+### 2.2 Borrowed: Chunk Size (BOOOOKSCORE)
+
+**Reference:** arXiv:2310.00785 (ICLR 2024)
+
+BOOOOKSCORE's research on book-length summarization found optimal chunk sizes. Their defaults:
+- Chunk size: **2048 tokens** (we use this)
+- Max summary length: **900 tokens**
+
+### 2.3 Original Design (Not Research-Backed)
+
+The following aspects are **original design choices without direct research justification**:
+
+- **Content-type prompts:** Domain-specific prompts are original design.
+- **Target ratio parameter:** The option to specify compression as a percentage is a convenience feature.
+
+---
+
+## 3. Architectural Decisions
+
+### 3.1 Simple Target-Based Logic
+
+**Decision:** Use a simple "fits? return : compress" algorithm.
+
+**Rationale:**
+
+- **Minimal complexity:** No level selection logic, threshold management, or multiple code paths.
+- **Clear semantics:** If content fits the target, return it unchanged. Otherwise, compress.
+- **Flexible targeting:** Users can specify exact token counts or relative ratios.
+
+**Algorithm:**
+
+```python
+async def summarize(
+    content: str,
+    config: SummarizerConfig,
+    *,
+    target_tokens: int | None = None,   # Absolute limit
+    target_ratio: float | None = None,  # e.g., 0.2 = compress to 20%
+) -> SummaryResult:
+    input_tokens = count_tokens(content)
+
+    # Determine target
+    if target_ratio is not None:
+        target = max(1, int(input_tokens * target_ratio))
+    elif target_tokens is not None:
+        target = target_tokens
+    else:
+        target = config.token_max  # Default: 3000
+
+    # Already fits? Return as-is (no LLM call)
+    if input_tokens <= target:
+        return SummaryResult(summary=content, ...)
+
+    # Compress using map-reduce
+    return await map_reduce_summarize(content, config, target)
+```
+
+### 3.2 Map-Reduce with Dynamic Collapse
+
+**Decision:** Use LangChain-style map-reduce for all compression.
+
+**Rationale:**
+
+- **Single algorithm:** One code path handles all content sizes.
+- **Dynamic depth:** Collapse depth adapts to actual content length.
+- **Research-backed:** LangChain's approach is battle-tested.
+
+**Algorithm:**
+
+```python
+async def map_reduce_summarize(content, config, target):
+    # Map: Split and summarize chunks in parallel
+    chunks = chunk_text(content, chunk_size=2048)
+    summaries = await parallel_summarize(chunks)
+
+    # Reduce: Recursively collapse until fits target
+    while total_tokens(summaries) > target:
+        groups = group_by_token_limit(summaries, target)
+        summaries = await parallel_synthesize(groups)
+
+    return final_synthesis(summaries)
+```
+
+### 3.3 Research-Backed Defaults
+
+**Decision:** Use values from published research.
+
+| Parameter | Value | Source |
+| :--- | :--- | :--- |
+| `chunk_size` | 2048 | BOOOOKSCORE |
+| `token_max` | 3000 | LangChain |
+| `chunk_overlap` | 200 | Original |
+
+### 3.4 Semantic Boundary Chunking
+
+**Decision:** Split content on semantic boundaries (paragraphs, then sentences) rather than fixed character counts.
+
+**Rationale:**
+
+- **Coherence preservation:** Splitting mid-sentence loses context.
+- **Natural units:** Paragraphs and sentences are natural semantic units.
+- **Overlap for continuity:** The 200-token overlap ensures concepts spanning chunk boundaries aren't lost.
+
+**Fallback chain:**
+
+1. Prefer paragraph boundaries (double newlines)
+2. Fall back to sentence boundaries (`.!?` followed by space + capital)
+3. Final fallback to word-based splitting
+
+### 3.5 Content-Type Aware Prompts
+
+**Decision:** Use different prompt templates for different content domains.
+
+**Rationale:**
+
+- **Conversations:** Focus on user preferences, decisions, action items.
+- **Journals:** Emphasize personal insights, emotional context, growth patterns.
+- **Documents:** Prioritize key findings, methodology, conclusions.
+
+A generic summarization prompt loses domain-specific signal.
+
+### 3.6 Prior Summary Integration
+
+**Decision:** Provide the previous summary as context when generating updates.
+
+**Rationale:**
+
+- **Continuity:** New summaries build on existing context.
+- **Incremental updates:** Avoid re-summarizing all historical content.
+- **Information preservation:** Important information persists through the chain.
+
+### 3.7 Compression Ratio Tracking
+
+**Decision:** Track and report compression metrics for every summary.
+
+Every `SummaryResult` includes `input_tokens`, `output_tokens`, `compression_ratio`, and `collapse_depth` for observability.
+
+---
+
+## 4. Processing Pipeline
+
+### 4.1 Entry Point
+
+The entry point (`summarize()`) implements simple logic:
+
+1. **Token counting:** Uses tiktoken with model-appropriate encoding. Falls back to character-based estimation (~4 chars/token) if tiktoken unavailable.
+2. **Target calculation:** Determines target from `target_tokens`, `target_ratio`, or default `token_max`.
+3. **Fit check:** If content fits target, return as-is.
+4. **Compression:** Call map-reduce if content exceeds target.
+
+### 4.2 Single-Chunk Content
+
+For content that fits within `chunk_size` but exceeds target:
+
+- Single LLM call with content-type aware prompt
+- Returns `SummaryResult` with compressed summary
+
+### 4.3 Multi-Chunk Content
+
+For larger content (> chunk_size tokens):
+
+1. **Map phase:** Split content into overlapping chunks, summarize each in parallel.
+2. **Reduce phase:** If combined summaries exceed target, group and re-summarize recursively.
+3. **Final synthesis:** Combine remaining summaries into final output.
+
+The `collapse_depth` field in the result indicates how many reduce iterations were needed.
+
+---
+
+## 5. Data Models
+
+### 5.1 SummaryResult
+
+```python
+class SummaryResult(BaseModel):
+    summary: str | None      # None if content was empty
+    input_tokens: int
+    output_tokens: int
+    compression_ratio: float  # 0.0-1.0
+    collapse_depth: int       # 0 = no collapse needed
+    created_at: datetime
+```
+
+### 5.2 SummarizerConfig
+
+```python
+@dataclass
+class SummarizerConfig:
+    openai_base_url: str
+    model: str
+    api_key: str | None = None
+    chunk_size: int = 2048      # BOOOOKSCORE
+    token_max: int = 3000       # LangChain (default target)
+    chunk_overlap: int = 200
+    max_concurrent_chunks: int = 5
+    timeout: float = 60.0
+```
+
+---
+
+## 6. Integration with Memory System
+
+### 6.1 Write Path
+
+The memory system triggers summarization during post-processing:
+
+1. Collect raw conversation turns
+2. Retrieve existing summary as prior context
+3. Call summarizer with content + prior summary + content type
+4. Persist result to storage
+
+### 6.2 Read Path
+
+The memory retrieval system uses summaries for context injection:
+
+- Fetches summary for the conversation
+- Injects as prefix to retrieved memories
+- Provides high-level context that individual snippets lack
+
+### 6.3 Storage
+
+Summaries are stored with metadata:
+
+```python
+{
+    "id": "{conversation_id}:summary",
+    "content": summary_text,
+    "metadata": {
+        "conversation_id": conversation_id,
+        "role": "summary",
+        "input_tokens": 1500,
+        "output_tokens": 150,
+        "compression_ratio": 0.1,
+        "collapse_depth": 1,
+        "created_at": "2024-01-15T10:30:00Z",
+        "is_final": True,
+    },
+}
+```
+
+---
+
+## 7. Error Handling
+
+Summarization follows a fail-fast philosophy:
+
+- **LLM errors:** Propagated as `SummarizationError` (base class for all summarization errors).
+- **Empty input:** Returns result with `summary=None` immediately (not an error).
+- **Encoding errors:** Falls back to character-based token estimation.
+- **Max depth exceeded:** Warning logged, forces final synthesis even if over target.
+
+The caller decides how to handle failures—typically by proceeding without a summary rather than blocking the entire operation.
+
+---
+
+## 8. Configuration
+
+| Parameter | Default | Source |
+| :--- | :--- | :--- |
+| `chunk_size` | 2048 | BOOOOKSCORE |
+| `token_max` | 3000 | LangChain |
+| `chunk_overlap` | 200 | Original |
+| `max_concurrent` | 5 | Implementation choice |
+| `max_collapse_depth` | 10 | Safety limit |
+
+---
+
+## 9. Usage Examples
+
+### Basic Usage
+
+```python
+from agent_cli.summarizer import SummarizerConfig, summarize
+
+config = SummarizerConfig(
+    openai_base_url="http://localhost:11434/v1",
+    model="llama3.1:8b",
+)
+
+# Default: compress to fit 3000 tokens
+result = await summarize(content, config)
+
+# Compress to specific token count
+result = await summarize(content, config, target_tokens=500)
+
+# Compress to 20% of original size
+result = await summarize(content, config, target_ratio=0.2)
+
+# With content type for better prompts
+result = await summarize(
+    content,
+    config,
+    target_tokens=500,
+    content_type="conversation",
+)
+```
+
+---
+
+## 10. Limitations and Trade-offs
+
+### 10.1 Fact Preservation
+
+Summarization is inherently lossy. Specific facts (dates, numbers, names) are often dropped in favor of thematic content. If your use case requires fact retrieval:
+
+- Store original content alongside summaries
+- Use fact extraction instead of summarization
+- Use RAG to retrieve original chunks
+
+### 10.2 No Intermediate Summaries
+
+Unlike hierarchical approaches, map-reduce only stores the final summary. Intermediate chunk summaries are discarded after synthesis. This simplifies storage but removes granular access.
+
+---
+
+## 11. Future Improvements
+
+1. **Benchmark against BOOOOKSCORE metrics** for coherence evaluation
+2. **Add fact extraction mode** for use cases requiring specific detail preservation
+3. **Streaming support** for real-time summarization feedback
diff --git a/examples/aijournal_poc.py b/examples/aijournal_poc.py
new file mode 100755
index 00000000..156c0b97
--- /dev/null
+++ b/examples/aijournal_poc.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python3
+"""Minimal AI Journal proof-of-concept using MemoryClient.
+
+This validates the core hypothesis: MemoryClient can serve as the
+foundation for a personal knowledge system (AI journal).
+
+Usage:
+    # Add a journal entry
+    python examples/aijournal_poc.py add "Today I learned about quantum computing at work"
+
+    # Search memories
+    python examples/aijournal_poc.py search "what did I learn?"
+
+    # Interactive chat with memory
+    python examples/aijournal_poc.py chat "What have I been working on lately?"
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import logging
+import os
+from pathlib import Path
+
+import httpx
+
+from agent_cli.memory.client import MemoryClient
+
+# Enable debug logging for memory module
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+    datefmt="%H:%M:%S",
+)
+# Enable DEBUG for memory ingest to see full prompts
+logging.getLogger("agent_cli.memory._ingest").setLevel(logging.DEBUG)
+
+
+# Defaults for local AI setup
+DEFAULT_BASE_URL = "http://192.168.1.143:9292/v1"
+DEFAULT_MODEL = "gpt-oss-high:20b"
+DEFAULT_EMBEDDING_MODEL = "embeddinggemma:300m"
+
+
+def get_client(model: str | None = None) -> tuple[MemoryClient, str]:
+    """Initialize the memory client with sensible defaults.
+
+    Returns:
+        Tuple of (client, model_name)
+
+    """
+    base_url = os.environ.get("OPENAI_BASE_URL", DEFAULT_BASE_URL)
+    model_name = model or os.environ.get("OPENAI_MODEL", DEFAULT_MODEL)
+    embedding_model = os.environ.get("EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL)
+    api_key = os.environ.get("OPENAI_API_KEY", "not-needed-for-local")
+
+    print(f"Using: {base_url}")
+    print(f"  Chat model: {model_name}")
+    print(f"  Embedding model: {embedding_model}")
+
+    return MemoryClient(
+        memory_path=Path("~/.aijournal").expanduser(),
+        openai_base_url=base_url,
+        chat_api_key=api_key,
+        embedding_api_key=api_key,
+        embedding_model=embedding_model,
+        enable_summarization=True,
+        enable_git_versioning=False,  # Keep it simple for POC
+        score_threshold=0.1,  # Lower threshold for local models
+    ), model_name
+
+
+async def cmd_add(text: str) -> None:
+    """Add a journal entry."""
+    client, model = get_client()
+    print(f"Adding entry: {text[:50]}...")
+    await client.add(text, conversation_id="journal", model=model)
+    print("✓ Entry processed and facts extracted")
+
+
+async def cmd_search(query: str, top_k: int = 5) -> None:
+    """Search memories."""
+    client, model = get_client()
+    print(f"Searching for: {query}\n")
+
+    result = await client.search(query, conversation_id="journal", top_k=top_k, model=model)
+
+    if not result.entries:
+        print("No relevant memories found.")
+        return
+
+    for i, entry in enumerate(result.entries, 1):
+        print(f"{i}. [{entry.role}] {entry.content}")
+        print(f"   Score: {entry.score:.3f} | Created: {entry.created_at[:10]}")
+        print()
+
+
+def cmd_show() -> None:
+    """Show all stored memories (what the system knows about you)."""
+    client, _ = get_client()
+    print("=== What I know about you ===\n")
+
+    entries = client.list_all(conversation_id="journal")
+
+    if not entries:
+        print("No memories stored yet. Add some journal entries first!")
+        return
+
+    # Sort by created_at
+    entries.sort(key=lambda x: x["created_at"], reverse=True)
+
+    for i, entry in enumerate(entries, 1):
+        date = entry["created_at"][:10] if entry["created_at"] else "unknown"
+        print(f"{i}. [{date}] {entry['content']}")
+
+    print(f"\n--- Total: {len(entries)} memories ---")
+
+
+PROFILE_PROMPT = """Based on the following facts about a person, create a brief profile summary.
+Organize the information into categories like:
+- **Identity**: Name, relationships, occupation
+- **Interests & Activities**: Hobbies, regular activities
+- **Goals & Values**: What they care about, what they're working towards
+- **Recent Events**: Notable recent happenings
+
+Only include categories that have relevant information. Be concise.
+
+Facts:
+{facts}
+
+Profile Summary:"""
+
+
+async def cmd_profile() -> None:
+    """Generate a profile summary from stored memories."""
+    client, model = get_client()
+
+    entries = client.list_all(conversation_id="journal")
+
+    if not entries:
+        print("No memories stored yet. Add some journal entries first!")
+        return
+
+    # Format facts for the prompt
+    facts = "\n".join(f"- {e['content']}" for e in entries)
+    prompt = PROFILE_PROMPT.format(facts=facts)
+
+    print("=== Your Profile ===\n")
+    print("(Generating profile from stored memories...)\n")
+
+    # Direct LLM call (bypasses memory storage)
+    base_url = os.environ.get("OPENAI_BASE_URL", DEFAULT_BASE_URL)
+    api_key = os.environ.get("OPENAI_API_KEY", "not-needed-for-local")
+
+    async with httpx.AsyncClient(timeout=120.0) as http:
+        response = await http.post(
+            f"{base_url}/chat/completions",
+            headers={"Authorization": f"Bearer {api_key}"},
+            json={
+                "model": model,
+                "messages": [{"role": "user", "content": prompt}],
+                "temperature": 0.7,
+            },
+        )
+        data = response.json()
+
+    choices = data.get("choices", [])
+    if choices:
+        profile = choices[0].get("message", {}).get("content", "")
+        print(profile)
+
+    print(f"\n--- Based on {len(entries)} memories ---")
+
+
+CHAT_SYSTEM_PROMPT = """You are a helpful AI assistant with memory of the user.
+
+Here's what you know about the user:
+{profile}
+
+Use this knowledge naturally in your responses. Be helpful and personable."""
+
+
+async def cmd_chat(question: str, with_profile: bool = True) -> None:
+    """Chat with memory-augmented LLM."""
+    client, model = get_client()
+
+    # Build profile context
+    profile_text = ""
+    if with_profile:
+        entries = client.list_all(conversation_id="journal")
+        if entries:
+            profile_text = "\n".join(f"- {e['content']}" for e in entries)
+
+    print(f"Question: {question}\n")
+
+    # Build messages with profile context
+    messages: list[dict[str, str]] = []
+    if profile_text:
+        system_prompt = CHAT_SYSTEM_PROMPT.format(profile=profile_text)
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": question})
+
+    # Direct LLM call with profile context
+    base_url = os.environ.get("OPENAI_BASE_URL", DEFAULT_BASE_URL)
+    api_key = os.environ.get("OPENAI_API_KEY", "not-needed-for-local")
+
+    async with httpx.AsyncClient(timeout=120.0) as http:
+        response = await http.post(
+            f"{base_url}/chat/completions",
+            headers={"Authorization": f"Bearer {api_key}"},
+            json={
+                "model": model,
+                "messages": messages,
+                "temperature": 0.7,
+            },
+        )
+        data = response.json()
+
+    choices = data.get("choices", [])
+    if choices:
+        reply = choices[0].get("message", {}).get("content", "")
+        print(f"Answer: {reply}")
+
+    if profile_text:
+        entry_count = len(client.list_all(conversation_id="journal"))
+        print(f"\n--- Using profile with {entry_count} memories ---")
+
+
+def main() -> None:
+    """CLI entry point."""
+    parser = argparse.ArgumentParser(description="AI Journal POC")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    # Add command
+    add_parser = subparsers.add_parser("add", help="Add a journal entry")
+    add_parser.add_argument("text", help="The journal entry text")
+
+    # Search command
+    search_parser = subparsers.add_parser("search", help="Search memories")
+    search_parser.add_argument("query", help="Search query")
+    search_parser.add_argument("-k", "--top-k", type=int, default=5, help="Number of results")
+
+    # Chat command
+    chat_parser = subparsers.add_parser("chat", help="Chat with memory")
+    chat_parser.add_argument("question", help="Question to ask")
+
+    # Show command - display what the system knows about you
+    subparsers.add_parser("show", help="Show all stored memories")
+
+    # Profile command - generate a profile summary
+    subparsers.add_parser("profile", help="Generate profile from memories")
+
+    args = parser.parse_args()
+
+    if args.command == "add":
+        asyncio.run(cmd_add(args.text))
+    elif args.command == "search":
+        asyncio.run(cmd_search(args.query, args.top_k))
+    elif args.command == "chat":
+        asyncio.run(cmd_chat(args.question))
+    elif args.command == "show":
+        cmd_show()
+    elif args.command == "profile":
+        asyncio.run(cmd_profile())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/summarizer_demo.py b/examples/summarizer_demo.py
new file mode 100644
index 00000000..f5d593a1
--- /dev/null
+++ b/examples/summarizer_demo.py
@@ -0,0 +1,431 @@
+"""Demonstrate the simplified summarizer on texts of varying lengths.
+
+This script fetches content of different sizes and shows how the adaptive
+summarizer compresses content to fit different target token counts or ratios.
+
+Usage:
+    python examples/summarizer_demo.py
+
+    # Test with specific target ratio
+    python examples/summarizer_demo.py --target-ratio 0.2
+
+    # Test with specific target token count
+    python examples/summarizer_demo.py --target-tokens 500
+
+    # Use a different model
+    python examples/summarizer_demo.py --model "gpt-4o-mini"
+"""  # noqa: INP001
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import os
+import re
+import textwrap
+import traceback
+from dataclasses import dataclass
+
+import httpx
+
+from agent_cli.summarizer import (
+    SummarizerConfig,
+    SummaryResult,
+    summarize,
+)
+
+# Defaults for local AI setup
+DEFAULT_BASE_URL = "http://192.168.1.143:9292/v1"
+DEFAULT_MODEL = "gpt-oss-high:20b"
+
+
+@dataclass
+class TextSample:
+    """A sample text for testing the summarizer."""
+
+    name: str
+    description: str
+    url: str
+    content_type: str = "general"
+    # If URL fetch fails, use this fallback
+    fallback_content: str | None = None
+
+
+# Sample texts of varying lengths to demonstrate summarization
+SAMPLES: list[TextSample] = [
+    TextSample(
+        name="Short News Article",
+        description="~150-400 tokens - demonstrates small content handling",
+        url="https://httpbin.org/json",  # Returns small JSON we'll convert to text
+        fallback_content="""
+        Breaking News: Scientists at the Marine Biology Institute have made a
+        groundbreaking discovery in the Mariana Trench. A new species of deep-sea
+        fish, dubbed "Pseudoliparis swirei," has been found surviving at depths
+        exceeding 8,000 meters, making it one of the deepest-living fish ever
+        documented.
+
+        The research team, led by Dr. Sarah Chen from the University of Washington,
+        used advanced unmanned submersibles equipped with high-resolution cameras
+        and collection apparatus. The expedition lasted three months and covered
+        multiple dive sites across the western Pacific.
+
+        "This discovery fundamentally changes our understanding of life in extreme
+        environments," Dr. Chen stated in a press conference. "The adaptations
+        these fish have developed to survive crushing pressures and near-freezing
+        temperatures are remarkable."
+
+        The fish displays several unique characteristics including translucent skin,
+        specialized proteins that prevent cellular damage under pressure, and an
+        unusual metabolism that allows survival with minimal oxygen. Scientists
+        believe studying these adaptations could lead to breakthroughs in medicine
+        and materials science.
+
+        The finding has been published in the journal Nature and has already
+        generated significant interest from the scientific community worldwide.
+        Further expeditions are planned to study the species in its natural habitat.
+        """,
+    ),
+    TextSample(
+        name="Technology Article",
+        description="~800-2000 tokens - demonstrates medium content",
+        url="https://en.wikipedia.org/api/rest_v1/page/summary/Artificial_intelligence",
+        content_type="document",
+        fallback_content="""
+        Artificial intelligence (AI) is the intelligence of machines or software,
+        as opposed to the intelligence of humans or other animals. It is a field
+        of computer science that develops and studies intelligent machines. The
+        field encompasses a wide range of approaches and technologies.
+
+        AI research has been defined as the field of study of intelligent agents,
+        which refers to any system that perceives its environment and takes actions
+        that maximize its chances of achieving its goals. This definition emphasizes
+        the practical aspects of building systems that can operate effectively.
+
+        The term "artificial intelligence" has been used to describe machines that
+        mimic cognitive functions that humans associate with the human mind, such
+        as learning and problem solving. As machines become increasingly capable,
+        tasks considered to require "intelligence" are often removed from the
+        definition of AI, a phenomenon known as the AI effect.
+
+        History of Artificial Intelligence
+
+        The field of AI research was founded at a workshop held on the campus of
+        Dartmouth College during the summer of 1956. The attendees became the
+        founders and leaders of AI research. They and their students produced
+        programs that the press described as astonishing.
+
+        Early AI research in the 1950s explored topics like problem solving and
+        symbolic methods. In the 1960s, the US Department of Defense took interest
+        and began training computers to mimic basic human reasoning. DARPA completed
+        street mapping projects in the 1970s and produced intelligent personal
+        assistants in 2003, long before Siri, Alexa or Cortana.
+
+        Modern AI Approaches
+
+        Modern AI techniques have become pervasive and include machine learning,
+        deep learning, natural language processing, computer vision, robotics,
+        and autonomous systems. These technologies power everything from search
+        engines to self-driving cars.
+
+        Machine learning is a subset of AI that enables systems to learn and improve
+        from experience without being explicitly programmed. Deep learning uses
+        neural networks with many layers to analyze various factors of data.
+
+        Neural networks are computing systems inspired by biological neural networks.
+        They consist of interconnected nodes that process information using
+        connectionist approaches to computation. Modern neural networks can have
+        millions or billions of parameters.
+
+        Applications of AI
+
+        AI applications are transforming industries including healthcare, finance,
+        transportation, and entertainment. In healthcare, AI helps diagnose diseases
+        and develop new treatments. In finance, AI powers fraud detection and
+        algorithmic trading.
+
+        Autonomous vehicles use AI to perceive their environment and make driving
+        decisions. Virtual assistants use natural language processing to understand
+        and respond to user queries. Recommendation systems use AI to suggest
+        content based on user preferences.
+
+        Ethical Considerations
+
+        The field was founded on the assumption that human intelligence can be
+        so precisely described that a machine can be made to simulate it. This
+        raised philosophical arguments about the mind and the ethical consequences
+        of creating artificial beings endowed with human-like intelligence.
+
+        Major concerns include job displacement, algorithmic bias, privacy violations,
+        and the potential for misuse. Researchers and policymakers are working to
+        develop frameworks for responsible AI development and deployment.
+
+        The future of AI holds both tremendous promise and significant challenges.
+        As these systems become more capable, society must grapple with questions
+        about control, accountability, and the nature of intelligence itself.
+        """,
+    ),
+    TextSample(
+        name="Full Article",
+        description="~4000-10000 tokens - demonstrates large content with chunking",
+        url="https://en.wikipedia.org/api/rest_v1/page/mobile-html/Machine_learning",
+        content_type="document",
+        fallback_content=None,  # We'll generate synthetic content
+    ),
+]
+
+
+def generate_synthetic_content(target_tokens: int, topic: str = "technology") -> str:
+    """Generate synthetic content for testing when URLs fail."""
+    # Each paragraph is roughly 50-100 tokens
+    paragraphs = [
+        f"Section on {topic} - Part {{i}}: This section explores various aspects "
+        f"of {topic} and its implications for modern society. The development of "
+        f"new technologies continues to reshape how we live and work. Researchers "
+        f"have made significant progress in understanding the fundamentals.",
+        f"The history of {topic} spans many decades of innovation. Early pioneers "
+        f"laid the groundwork for current advancements. Their contributions remain "
+        f"relevant today as we build upon established foundations.",
+        f"Current applications of {topic} include healthcare, transportation, and "
+        f"communication. These sectors have seen dramatic improvements in efficiency "
+        f"and capability. Future developments promise even greater transformations.",
+        f"Challenges in {topic} include ethical considerations, resource constraints, "
+        f"and technical limitations. Addressing these requires collaboration across "
+        f"disciplines. Solutions often emerge from unexpected directions.",
+        f"The future of {topic} looks promising with continued investment and research. "
+        f"Emerging trends suggest new possibilities. Stakeholders must prepare for "
+        f"rapid change while maintaining focus on beneficial outcomes.",
+    ]
+
+    result = []
+    tokens_per_para = 75  # approximate
+    needed_paragraphs = target_tokens // tokens_per_para + 1
+
+    for i in range(needed_paragraphs):
+        para = paragraphs[i % len(paragraphs)].format(i=i + 1)
+        result.append(para)
+
+    return "\n\n".join(result)
+
+
+async def fetch_content(sample: TextSample, client: httpx.AsyncClient) -> str:
+    """Fetch content from URL or use fallback."""
+    try:
+        # Add User-Agent header to avoid 403 errors from some sites
+        headers = {
+            "User-Agent": "Mozilla/5.0 (compatible; SummarizerDemo/1.0)",
+        }
+        response = await client.get(
+            sample.url,
+            timeout=30.0,
+            follow_redirects=True,
+            headers=headers,
+        )
+        response.raise_for_status()
+
+        content = response.text
+
+        # Handle Wikipedia API JSON responses
+        if "wikipedia.org/api" in sample.url:
+            try:
+                data = json.loads(content)
+                if "extract" in data:
+                    content = data["extract"]
+                elif "text" in data:
+                    content = data["text"]
+            except json.JSONDecodeError:
+                pass
+
+        # For httpbin JSON, create a readable summary
+        if "httpbin.org/json" in sample.url:
+            content = sample.fallback_content or ""
+
+        # Strip HTML tags if present
+        if "<" in content and ">" in content:
+            content = re.sub(r"<[^>]+>", " ", content)
+            content = re.sub(r"\s+", " ", content).strip()
+
+        # Check if content is too short
+        min_words = 80
+        if len(content.split()) < min_words:
+            print(f"  📎 Fetched content too short ({len(content.split())} words), using fallback")
+            content = sample.fallback_content or generate_synthetic_content(1500)
+
+        # For very long content, truncate to keep demo fast
+        words = content.split()
+        if len(words) > 13500:  # noqa: PLR2004
+            content = " ".join(words[:13500])
+            print("  📎 Truncated to ~13500 words for faster demo")
+
+        return content.strip()
+
+    except Exception as e:
+        print(f"  ⚠️  Failed to fetch URL: {e}")
+
+        if sample.fallback_content:
+            return sample.fallback_content.strip()
+
+        # Generate synthetic content
+        return generate_synthetic_content(1500)
+
+
+def print_result(
+    sample: TextSample,
+    result: SummaryResult,
+    content: str,
+    target_tokens: int | None,
+    target_ratio: float | None,
+) -> None:
+    """Print a formatted summary result."""
+    print("\n" + "=" * 70)
+    print(f"📄 {sample.name}")
+    print(f"   {sample.description}")
+    print("=" * 70)
+
+    # Input stats
+    word_count = len(content.split())
+    print("\n📊 Input Statistics:")
+    print(f"   Words: {word_count:,}")
+    print(f"   Tokens: {result.input_tokens:,}")
+    print(f"   Content type: {sample.content_type}")
+
+    # Target info
+    print("\n🎯 Target:")
+    if target_ratio is not None:
+        print(f"   Ratio: {target_ratio:.0%} of input")
+        print(f"   Calculated target: ~{int(result.input_tokens * target_ratio):,} tokens")
+    elif target_tokens is not None:
+        print(f"   Tokens: {target_tokens:,}")
+    else:
+        print("   Default: 3000 tokens (LangChain default)")
+
+    # Result info
+    print("\n📝 Result:")
+    if result.summary == content:
+        print("   Status: ⏭️  Content already fits target (returned as-is)")
+    elif result.collapse_depth > 0:
+        print(f"   Status: 🔄 Map-reduce summarization (collapse depth: {result.collapse_depth})")
+    else:
+        print("   Status: 📝 Single-pass summarization")
+
+    print(f"   Output tokens: {result.output_tokens:,}")
+    print(f"   Compression: {result.compression_ratio:.1%}")
+
+    # Summary content
+    if result.summary and result.summary != content:
+        print("\n📝 Summary:")
+        wrapped = textwrap.fill(
+            result.summary,
+            width=68,
+            initial_indent="   ",
+            subsequent_indent="   ",
+        )
+        # Only show first ~500 chars of summary
+        if len(wrapped) > 600:  # noqa: PLR2004
+            wrapped = wrapped[:600] + "..."
+        print(wrapped)
+
+
+async def run_demo(
+    target_tokens: int | None = None,
+    target_ratio: float | None = None,
+    model: str | None = None,
+    base_url: str | None = None,
+) -> None:
+    """Run the summarizer demo."""
+    # Configuration
+    actual_base_url = base_url or os.environ.get("OPENAI_BASE_URL", DEFAULT_BASE_URL)
+    actual_model = model or os.environ.get("OPENAI_MODEL", DEFAULT_MODEL)
+    api_key = os.environ.get("OPENAI_API_KEY", "not-needed-for-local")
+
+    print("🔧 Configuration:")
+    print(f"   Base URL: {actual_base_url}")
+    print(f"   Model: {actual_model}")
+
+    config = SummarizerConfig(
+        openai_base_url=actual_base_url,
+        model=actual_model,
+        api_key=api_key,
+        chunk_size=2048,  # BOOOOKSCORE default
+        max_concurrent_chunks=3,
+    )
+
+    async with httpx.AsyncClient() as client:
+        for sample in SAMPLES:
+            print(f"\n⏳ Processing: {sample.name}...")
+
+            # Fetch content
+            content = await fetch_content(sample, client)
+
+            try:
+                # Summarize with specified target
+                result = await summarize(
+                    content=content,
+                    config=config,
+                    target_tokens=target_tokens,
+                    target_ratio=target_ratio,
+                    content_type=sample.content_type,
+                )
+
+                # Display results
+                print_result(sample, result, content, target_tokens, target_ratio)
+
+            except Exception as e:
+                print(f"\n❌ Error summarizing {sample.name}: {e}")
+                traceback.print_exc()
+
+    print("\n" + "=" * 70)
+    print("✅ Demo complete!")
+    print("=" * 70)
+
+
+def main() -> None:
+    """CLI entry point."""
+    parser = argparse.ArgumentParser(
+        description="Demonstrate adaptive summarization on texts of varying lengths",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=textwrap.dedent("""
+        Examples:
+          python examples/summarizer_demo.py
+          python examples/summarizer_demo.py --target-ratio 0.2
+          python examples/summarizer_demo.py --target-tokens 500
+          python examples/summarizer_demo.py --model "llama3.1:8b" --base-url "http://localhost:11434/v1"
+        """),
+    )
+
+    parser.add_argument(
+        "--target-ratio",
+        type=float,
+        help="Target ratio for compression (e.g., 0.2 = compress to 20%%)",
+    )
+    parser.add_argument(
+        "--target-tokens",
+        type=int,
+        help="Target token count for summary",
+    )
+    parser.add_argument(
+        "--model",
+        help=f"Model to use (default: {DEFAULT_MODEL})",
+    )
+    parser.add_argument(
+        "--base-url",
+        help=f"OpenAI-compatible API base URL (default: {DEFAULT_BASE_URL})",
+    )
+
+    args = parser.parse_args()
+
+    if args.target_ratio is not None and args.target_tokens is not None:
+        parser.error("Cannot specify both --target-ratio and --target-tokens")
+
+    asyncio.run(
+        run_demo(
+            target_tokens=args.target_tokens,
+            target_ratio=args.target_ratio,
+            model=args.model,
+            base_url=args.base_url,
+        ),
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index 38c3df61..3f31e6b7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,6 +52,7 @@ memory = [
     "watchfiles>=0.21.0",
     # Until here same as 'rag' extras
     "pyyaml>=6.0.0",
+    "tiktoken>=0.5.0",  # For token counting in adaptive summarization
 ]
 test = [
     "pytest>=7.0.0",
@@ -60,6 +61,7 @@ test = [
     "pydantic-ai-slim[openai]",
     "pytest-timeout",
     "pytest-mock",
+    "tiktoken>=0.5.0",  # For summarizer tests
 ]
 dev = [
     "agent-cli[test]",
@@ -87,6 +89,7 @@ dev = [
     "notebook",
     "audiostretchy>=1.3.0",
     "pre-commit-uv>=4.1.4",
+    "tiktoken>=0.5.0",  # For summarizer tests
 ]
 
 [project.scripts]
diff --git a/scripts/compare_summarizers.py b/scripts/compare_summarizers.py
new file mode 100644
index 00000000..15265cb0
--- /dev/null
+++ b/scripts/compare_summarizers.py
@@ -0,0 +1,402 @@
+"""Compare old (L1-L4 hierarchical) vs new (adaptive map-reduce) summarizer.
+
+This script:
+1. Shows what level each system would use for test content
+2. Runs the NEW summarizer to produce actual summaries
+3. Evaluates summary quality using needle-in-haystack questions
+4. Uses LLM-as-judge for quality assessment
+
+Usage:
+    python scripts/compare_summarizers.py
+    python scripts/compare_summarizers.py --model "gpt-4o-mini" --base-url "https://api.openai.com/v1"
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import os
+import textwrap
+from dataclasses import dataclass, field
+
+from agent_cli.summarizer import SummarizerConfig, summarize
+from agent_cli.summarizer._utils import count_tokens
+
+# Old system thresholds
+OLD_THRESHOLD_NONE = 100
+OLD_THRESHOLD_BRIEF = 500
+OLD_THRESHOLD_STANDARD = 3000
+OLD_THRESHOLD_DETAILED = 15000
+
+# New system thresholds
+NEW_THRESHOLD_NONE = 100
+NEW_THRESHOLD_BRIEF = 500
+
+# Evaluation threshold
+FACT_PRESERVATION_THRESHOLD = 0.5
+
+# Test content at different sizes with embedded "needles" (specific facts)
+TEST_CASES = [
+    {
+        "name": "Brief Range (~300 tokens)",
+        "description": "Tests the 100-500 token range where OLD=BRIEF, NEW=BRIEF",
+        "content": """
+        The artificial intelligence revolution is transforming every industry.
+        Machine learning algorithms now power recommendation systems, fraud detection,
+        and autonomous vehicles. Deep learning, a subset of machine learning, uses
+        neural networks with multiple layers to analyze complex patterns in data.
+
+        Major tech companies are investing billions in AI research. Google's DeepMind
+        created AlphaGo, which defeated world champion Lee Sedol in March 2016 in
+        the ancient game of Go. OpenAI developed GPT models that can generate
+        human-like text. These advances raise both excitement and concerns about
+        the future of work and society.
+
+        Researchers are working on making AI systems more transparent and aligned with
+        human values. The field of AI safety, pioneered by researchers like Stuart
+        Russell at UC Berkeley, aims to ensure that advanced AI systems remain
+        beneficial and under human control.
+        """,
+        "needles": [
+            ("Who did AlphaGo defeat?", "Lee Sedol"),
+            ("When did AlphaGo win?", "March 2016"),
+            ("Who pioneered AI safety?", "Stuart Russell"),
+            ("Where does Stuart Russell work?", "UC Berkeley"),
+        ],
+    },
+    {
+        "name": "Standard/MapReduce Range (~900 tokens)",
+        "description": "Tests 500-3000 range where OLD=STANDARD, NEW=MAP_REDUCE",
+        "content": """
+        Climate change represents one of the most pressing challenges facing humanity.
+        The Earth's average temperature has risen approximately 1.1 degrees Celsius since
+        the pre-industrial era, primarily due to human activities that release greenhouse
+        gases. Carbon dioxide from burning fossil fuels accounts for 76% of emissions.
+
+        The Intergovernmental Panel on Climate Change (IPCC), led by chair Hoesung Lee,
+        has warned that limiting warming to 1.5 degrees Celsius is crucial. The 2021
+        report involved 234 authors from 66 countries analyzing over 14,000 scientific
+        papers. Their conclusion: human influence has warmed the climate at a rate
+        unprecedented in at least the last 2,000 years.
+
+        Renewable energy offers hope. Solar panel costs dropped 89% between 2010 and 2020,
+        making solar competitive with fossil fuels. China leads with 306 gigawatts of
+        installed solar capacity. Wind energy has grown exponentially, with Denmark
+        generating 47% of its electricity from wind in 2019.
+
+        Electric vehicles are gaining ground. Tesla delivered 936,172 vehicles in 2021,
+        while traditional automakers race to electrify. Norway leads adoption, with
+        electric vehicles representing 65% of new car sales in 2021. Battery costs
+        have fallen 89% since 2010, from $1,100 to $132 per kilowatt-hour.
+
+        Carbon capture remains expensive at $250-$600 per ton of CO2. The Orca plant
+        in Iceland, opened in September 2021, captures just 4,000 tons annually.
+        Critics note this equals emissions from about 870 cars. More radical approaches
+        like solar radiation management could cool the planet but carry unknown risks.
+
+        The Paris Agreement, signed by 196 parties in December 2015, aims to limit
+        warming to well below 2 degrees. Countries submit Nationally Determined
+        Contributions (NDCs) outlining their emission reduction plans. However,
+        current pledges put the world on track for 2.7 degrees of warming by 2100.
+
+        Individual actions matter but systemic change is essential. Agriculture accounts
+        for 10-12% of global emissions. Beef production generates 60 kg of CO2 equivalent
+        per kilogram of meat. A plant-based diet could reduce food emissions by up to 73%.
+        """,
+        "needles": [
+            ("Who chairs the IPCC?", "Hoesung Lee"),
+            ("How many authors contributed to the 2021 IPCC report?", "234"),
+            ("What percent of Denmark's electricity comes from wind?", "47%"),
+            ("When did the Orca plant open?", "September 2021"),
+            ("How many vehicles did Tesla deliver in 2021?", "936,172"),
+            ("What percent of Norway's new cars are electric?", "65%"),
+            ("When was the Paris Agreement signed?", "December 2015"),
+            ("How much CO2 does beef production generate per kg?", "60 kg"),
+        ],
+    },
+    {
+        "name": "Detailed/MapReduce Range (~1800 tokens)",
+        "description": "Tests larger content where OLD=DETAILED (chunks+meta), NEW=MAP_REDUCE",
+        "content": """
+        The history of computing spans centuries of human innovation, from ancient
+        calculating devices to quantum computers. Understanding this evolution reveals
+        how incremental advances compound into revolutionary change.
+
+        Ancient Foundations (2400 BCE - 1600 CE)
+
+        The abacus emerged independently in multiple civilizations. Chinese merchants
+        used the suanpan as early as 2400 BCE for arithmetic. The Roman abacus used
+        grooved beads, while the Japanese soroban featured a distinctive 1:4 bead
+        arrangement still used today.
+
+        Mechanical Calculation (1600-1900)
+
+        In 1642, nineteen-year-old Blaise Pascal invented the Pascaline to help his
+        tax-collector father. This brass rectangular box could add and subtract using
+        interlocking gears. Only 50 were built, and 9 survive in museums today.
+
+        Gottfried Wilhelm Leibniz improved Pascal's design in 1694, creating the
+        Stepped Reckoner capable of multiplication and division. He also invented
+        binary arithmetic, writing "Explication de l'Arithmétique Binaire" in 1703,
+        laying groundwork for digital computing.
+
+        Charles Babbage designed the Analytical Engine from 1833-1871, incorporating
+        a mill (processor), store (memory), and punch card input. Ada Lovelace wrote
+        detailed notes including what's considered the first algorithm - for computing
+        Bernoulli numbers. The engine was never completed; Babbage died in 1871.
+
+        Electronic Era (1900-1970)
+
+        Alan Turing published "On Computable Numbers" in 1936, defining the theoretical
+        Turing machine. During WWII, he led the team at Bletchley Park that cracked
+        the Enigma code, shortening the war by an estimated two years.
+
+        ENIAC, completed February 14, 1946, at the University of Pennsylvania, was
+        the first general-purpose electronic computer. It weighed 30 tons, consumed
+        150 kilowatts, and contained 17,468 vacuum tubes. Programming required
+        physically rewiring the machine, taking days for each new problem.
+
+        The transistor, invented December 23, 1947, at Bell Labs by John Bardeen,
+        Walter Brattain, and William Shockley, revolutionized electronics. They
+        shared the 1956 Nobel Prize in Physics. By 1954, the TRADIC computer used
+        800 transistors instead of vacuum tubes.
+
+        Jack Kilby demonstrated the first integrated circuit on September 12, 1958,
+        at Texas Instruments. Robert Noyce independently developed a superior silicon
+        version at Fairchild. Kilby won the 2000 Nobel Prize; Noyce had died in 1990.
+
+        Personal Computing (1970-2000)
+
+        Intel's 4004, released November 15, 1971, was the first commercial microprocessor.
+        Designed by Federico Faggin, it contained 2,300 transistors running at 740 kHz.
+        The 8080 (1974) powered the Altair 8800, sparking the PC revolution.
+
+        Steve Wozniak built the Apple I in 1976 in his garage. The Apple II (1977)
+        featured color graphics and cost $1,298. IBM entered with the PC on August 12,
+        1981, using Microsoft's MS-DOS. By 1984, Apple's Macintosh introduced the GUI
+        to mainstream users at $2,495.
+
+        Tim Berners-Lee invented the World Wide Web at CERN in 1989, proposing it
+        on March 12. The first website went live December 20, 1990. By 1995, the
+        internet had 16 million users; by 2000, 361 million.
+
+        Modern Era (2000-Present)
+
+        Moore's Law, predicting transistor doubling every two years, has held since
+        Gordon Moore's 1965 observation. Intel's 2021 Alder Lake processors contain
+        10+ billion transistors on chips measuring 215 mm².
+
+        Steve Jobs unveiled the iPhone on January 9, 2007. It sold 1.4 million units
+        in its first year. Smartphones now exceed 6.6 billion globally, containing
+        more power than 1990s supercomputers.
+
+        Google claimed quantum supremacy October 23, 2019, with Sycamore completing
+        a calculation in 200 seconds that would take 10,000 years classically.
+        IBM disputed this, but the quantum era has clearly begun.
+        """,
+        "needles": [
+            ("How old was Pascal when he invented the Pascaline?", "19"),
+            ("When did Leibniz write about binary arithmetic?", "1703"),
+            ("How many vacuum tubes did ENIAC contain?", "17,468"),
+            ("When was the transistor invented?", "December 23, 1947"),
+            ("When did Jack Kilby demonstrate the integrated circuit?", "September 12, 1958"),
+            ("How many transistors did the Intel 4004 have?", "2,300"),
+            ("When did the first website go live?", "December 20, 1990"),
+            ("When did Jobs unveil the iPhone?", "January 9, 2007"),
+            ("When did Google claim quantum supremacy?", "October 23, 2019"),
+        ],
+    },
+]
+
+
+def get_old_level(tokens: int) -> tuple[str, str]:
+    """Determine what level the OLD (L1-L4) summarizer would use."""
+    if tokens < OLD_THRESHOLD_NONE:
+        return "NONE", "No summary needed"
+    if tokens < OLD_THRESHOLD_BRIEF:
+        return "BRIEF", "Single sentence (~20% compression)"
+    if tokens < OLD_THRESHOLD_STANDARD:
+        return "STANDARD", "Paragraph with content-aware prompts (~12%)"
+    if tokens < OLD_THRESHOLD_DETAILED:
+        return "DETAILED", "Chunked L1 summaries + meta L3 (~7%)"
+    return "HIERARCHICAL", "Full L1/L2/L3 tree structure"
+
+
+def get_new_level(tokens: int) -> tuple[str, str]:
+    """Determine what level the NEW (adaptive) summarizer would use."""
+    if tokens < NEW_THRESHOLD_NONE:
+        return "NONE", "No summary needed"
+    if tokens < NEW_THRESHOLD_BRIEF:
+        return "BRIEF", "Single sentence"
+    return "MAP_REDUCE", "Dynamic collapse based on content"
+
+
+@dataclass
+class TestResult:
+    """Result of testing one content sample."""
+
+    name: str
+    tokens: int
+    old_level: str
+    old_description: str
+    new_level: str
+    new_description: str
+    new_summary: str | None = None
+    needles_found: int = 0
+    total_needles: int = 0
+    needle_details: list[tuple[str, str, bool]] = field(default_factory=list)
+
+
+async def run_test(test_case: dict, config: dict) -> TestResult:
+    """Run a single test case."""
+    content = test_case["content"].strip()
+    tokens = count_tokens(content, config["model"])
+
+    old_level, old_desc = get_old_level(tokens)
+    new_level, new_desc = get_new_level(tokens)
+
+    # Run new summarizer
+    cfg = SummarizerConfig(
+        openai_base_url=config["base_url"],
+        model=config["model"],
+        api_key=config.get("api_key", "not-needed"),
+    )
+
+    result = await summarize(content, cfg, content_type="document")
+
+    # Check needles in summary
+    needle_details = []
+    needles_found = 0
+
+    if result.summary:
+        summary_lower = result.summary.lower()
+        for question, answer in test_case["needles"]:
+            # Check if the key fact is preserved
+            found = answer.lower() in summary_lower
+            needle_details.append((question, answer, found))
+            if found:
+                needles_found += 1
+
+    return TestResult(
+        name=test_case["name"],
+        tokens=tokens,
+        old_level=old_level,
+        old_description=old_desc,
+        new_level=new_level,
+        new_description=new_desc,
+        new_summary=result.summary,
+        needles_found=needles_found,
+        total_needles=len(test_case["needles"]),
+        needle_details=needle_details,
+    )
+
+
+def print_result(result: TestResult) -> None:
+    """Print a test result."""
+    print(f"\n{'=' * 70}")
+    print(f"{result.name}")
+    print(f"{'=' * 70}")
+    print(f"Input tokens: {result.tokens}")
+    print()
+    print("Level comparison:")
+    print(f"  OLD: {result.old_level:12} - {result.old_description}")
+    print(f"  NEW: {result.new_level:12} - {result.new_description}")
+    print()
+
+    if result.new_summary:
+        print("New summary:")
+        wrapped = textwrap.fill(
+            result.new_summary,
+            width=68,
+            initial_indent="  ",
+            subsequent_indent="  ",
+        )
+        print(wrapped)
+        print()
+
+        print(
+            f"Needle-in-haystack test: {result.needles_found}/{result.total_needles} facts preserved",
+        )
+        for question, answer, found in result.needle_details:
+            status = "[OK]" if found else "[MISSING]"
+            print(f"  {status} {question} -> {answer}")
+    else:
+        print("No summary produced (NONE level)")
+
+
+async def main() -> None:
+    """Run all tests."""
+    parser = argparse.ArgumentParser(description="Compare summarizer versions")
+    parser.add_argument("--model", default=os.environ.get("OPENAI_MODEL", "gpt-oss-high:20b"))
+    parser.add_argument(
+        "--base-url",
+        default=os.environ.get("OPENAI_BASE_URL", "http://192.168.1.143:9292/v1"),
+    )
+    parser.add_argument("--api-key", default=os.environ.get("OPENAI_API_KEY", "not-needed"))
+    args = parser.parse_args()
+
+    config = {
+        "model": args.model,
+        "base_url": args.base_url,
+        "api_key": args.api_key,
+    }
+
+    print("=" * 70)
+    print("SUMMARIZER COMPARISON: OLD (L1-L4) vs NEW (Adaptive Map-Reduce)")
+    print("=" * 70)
+    print(f"Model: {config['model']}")
+    print(f"Base URL: {config['base_url']}")
+
+    results = []
+    for test in TEST_CASES:
+        print(f"\nRunning: {test['name']}...")
+        result = await run_test(test, config)
+        results.append(result)
+        print_result(result)
+
+    # Summary
+    print("\n" + "=" * 70)
+    print("SUMMARY")
+    print("=" * 70)
+
+    total_needles = sum(r.total_needles for r in results)
+    found_needles = sum(r.needles_found for r in results)
+
+    print(
+        f"\nOverall fact preservation: {found_needles}/{total_needles} ({100 * found_needles / total_needles:.1f}%)",
+    )
+    print()
+
+    print("Key differences:")
+    print("""
+OLD System (5 levels):
+  - NONE (<100), BRIEF (100-500), STANDARD (500-3000),
+    DETAILED (3000-15000), HIERARCHICAL (>15000)
+  - Fixed boundaries, L1/L2/L3 tree for large content
+  - Stored intermediate summaries at each level
+  - Chunk size: 3000 tokens
+
+NEW System (3 levels):
+  - NONE (<100), BRIEF (100-500), MAP_REDUCE (>=500)
+  - Dynamic collapse depth based on content
+  - Content-type aware prompts
+  - Chunk size: 2048 tokens (BOOOOKSCORE research)
+  - Only stores final summary
+
+Trade-offs:
+  + Simpler (3 levels vs 5)
+  + Research-backed parameters
+  + Content-aware prompts
+  - No intermediate level access
+  - All >=500 token content treated the same
+""")
+
+    print("Verdict: ", end="")
+    if found_needles / total_needles >= FACT_PRESERVATION_THRESHOLD:
+        print("NEW system preserves facts adequately")
+    else:
+        print("NEW system may lose important details - further tuning needed")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/tests/memory/test_engine.py b/tests/memory/test_engine.py
index 0f032adf..d8cd3526 100644
--- a/tests/memory/test_engine.py
+++ b/tests/memory/test_engine.py
@@ -21,8 +21,8 @@
     MemoryMetadata,
     Message,
     StoredMemory,
-    SummaryOutput,
 )
+from agent_cli.summarizer import SummaryResult
 
 
 class _DummyReranker:
@@ -247,13 +247,13 @@ def fake_query_memories(
     )
     monkeypatch.setattr(
         _retrieval,
-        "get_summary_entry",
-        lambda _collection, _cid, role: StoredMemory(  # type: ignore[return-value]
-            id=f"{role}-id",
-            content=f"{role} content",
+        "get_final_summary",
+        lambda _collection, _cid: StoredMemory(
+            id="summary-id",
+            content="summary content",
             metadata=MemoryMetadata(
                 conversation_id="conv1",
-                role=role,
+                role="summary",
                 created_at=now.isoformat(),
             ),
         ),
@@ -342,11 +342,17 @@ def __init__(self, output: Any) -> None:
                 self.output = output
 
         prompt_str = str(prompt_text)
-        if "New facts:" in prompt_str:
-            return _Result(SummaryOutput(summary="summary up to 256"))
         if "Hello, I enjoy biking" in prompt_str:
             return _Result(["User likes cats.", "User loves biking."])
-        return _Result(SummaryOutput(summary="noop"))
+        return _Result([])
+
+    async def fake_summarize_content(**_kwargs: Any) -> SummaryResult:
+        return SummaryResult(
+            summary="summary up to 256",
+            input_tokens=100,
+            output_tokens=20,
+            compression_ratio=0.2,
+        )
 
     async def fake_reconcile(
         _collection: Any,
@@ -370,6 +376,7 @@ async def fake_reconcile(
     import pydantic_ai  # noqa: PLC0415
 
     monkeypatch.setattr(pydantic_ai.Agent, "run", fake_agent_run)
+    monkeypatch.setattr(_ingest, "summarize_content", fake_summarize_content)
     # High relevance so they aren't filtered
     monkeypatch.setattr(_retrieval, "predict_relevance", lambda _model, pairs: [5.0 for _ in pairs])
 
@@ -562,11 +569,17 @@ def __init__(self, output: Any) -> None:
                 self.output = output
 
         prompt_str = str(prompt_text)
-        if "New facts:" in prompt_str:
-            return _Result(SummaryOutput(summary="summary text"))
         if "My cat is Luna" in prompt_str:
             return _Result(["User has a cat named Luna."])
-        return _Result(SummaryOutput(summary="noop"))
+        return _Result([])
+
+    async def fake_summarize_content(**_kwargs: Any) -> SummaryResult:
+        return SummaryResult(
+            summary="summary text",
+            input_tokens=100,
+            output_tokens=20,
+            compression_ratio=0.2,
+        )
 
     monkeypatch.setattr(engine._streaming, "stream_chat_sse", fake_stream_chat_sse)
 
@@ -592,6 +605,7 @@ async def fake_reconcile(
     import pydantic_ai  # noqa: PLC0415
 
     monkeypatch.setattr(pydantic_ai.Agent, "run", fake_agent_run)
+    monkeypatch.setattr(_ingest, "summarize_content", fake_summarize_content)
 
     response = await engine.process_chat_request(
         request,
@@ -608,4 +622,4 @@ async def fake_reconcile(
     files = list(tmp_path.glob("entries/**/*.md"))
     assert len(files) == 4  # user + assistant + fact + 1 summary
     assert any("facts" in str(f) for f in files)
-    assert any("summaries/summary.md" in str(f) for f in files)
+    assert any("summaries" in str(f) for f in files)
diff --git a/tests/memory/test_git_integration.py b/tests/memory/test_git_integration.py
index 7d59f7c0..64130990 100644
--- a/tests/memory/test_git_integration.py
+++ b/tests/memory/test_git_integration.py
@@ -14,6 +14,7 @@
 from agent_cli.memory import _ingest
 from agent_cli.memory.client import MemoryClient
 from agent_cli.memory.entities import Fact
+from agent_cli.summarizer import SummaryResult
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -63,12 +64,17 @@ async def fake_reconcile(
         ]
         return entries, [], {}
 
-    async def fake_update_summary(*_args: Any, **_kwargs: Any) -> str:
-        return "User likes testing."
+    async def fake_summarize_content(**_kwargs: Any) -> SummaryResult:
+        return SummaryResult(
+            summary="User likes testing.",
+            input_tokens=100,
+            output_tokens=20,
+            compression_ratio=0.2,
+        )
 
     monkeypatch.setattr(_ingest, "extract_salient_facts", fake_extract)
     monkeypatch.setattr(_ingest, "reconcile_facts", fake_reconcile)
-    monkeypatch.setattr(_ingest, "update_summary", fake_update_summary)
+    monkeypatch.setattr(_ingest, "summarize_content", fake_summarize_content)
 
     # Patch Reranker to avoid loading ONNX model
     monkeypatch.setattr("agent_cli.memory.client.get_reranker_model", MagicMock())
diff --git a/tests/memory/test_store.py b/tests/memory/test_store.py
index 98334e45..29dbe2e5 100644
--- a/tests/memory/test_store.py
+++ b/tests/memory/test_store.py
@@ -101,23 +101,6 @@ def query(self, **kwargs: Any) -> dict[str, Any]:
     assert {"role": {"$ne": "summary"}} in clauses
 
 
-def test_get_summary_entry_returns_entry() -> None:
-    # ChromaDB's .get() returns flat lists (not nested like .query())
-    fake = _FakeCollection(
-        get_result={
-            "documents": ["summary text"],
-            "metadatas": [
-                {"conversation_id": "c1", "role": "summary", "created_at": "now"},
-            ],
-            "ids": ["sum1"],
-        },
-    )
-    entry = _store.get_summary_entry(fake, "c1", role="summary")
-    assert entry is not None
-    assert entry.id == "sum1"
-    assert entry.metadata.role == "summary"
-
-
 def test_list_conversation_entries_filters_summaries() -> None:
     # ChromaDB's .get() returns flat lists (not nested like .query())
     fake = _FakeCollection(
@@ -148,3 +131,144 @@ def test_upsert_and_delete_entries_delegate() -> None:
 
     _store.delete_entries(fake, ["x"])
     assert fake.deleted == [["x"]]
+
+
+# --- Summary Entry Tests ---
+
+
+def test_upsert_summary_entries_simple() -> None:
+    """Test upserting a summary."""
+    fake = _FakeCollection()
+    entries = [
+        {
+            "id": "conv-123:summary",
+            "content": "A paragraph summary.",
+            "metadata": {
+                "conversation_id": "conv-123",
+                "role": "summary",
+                "is_final": True,
+                "summary_level": "MAP_REDUCE",
+                "input_tokens": 1000,
+                "output_tokens": 50,
+                "compression_ratio": 0.05,
+                "collapse_depth": 0,
+                "created_at": "2024-01-01T00:00:00",
+            },
+        },
+    ]
+
+    ids = _store.upsert_summary_entries(fake, entries)
+
+    assert ids == ["conv-123:summary"]
+    assert len(fake.upserts) == 1
+    upserted_ids, upserted_docs, upserted_metas = fake.upserts[0]
+    assert upserted_ids == ["conv-123:summary"]
+    assert upserted_docs == ["A paragraph summary."]
+    assert upserted_metas[0]["is_final"] is True
+
+
+def test_upsert_summary_entries_with_collapse_depth() -> None:
+    """Test upserting a summary with collapse depth metadata."""
+    fake = _FakeCollection()
+    entries = [
+        {
+            "id": "conv-456:summary",
+            "content": "Final synthesis",
+            "metadata": {
+                "conversation_id": "conv-456",
+                "role": "summary",
+                "is_final": True,
+                "summary_level": "MAP_REDUCE",
+                "input_tokens": 5000,
+                "output_tokens": 100,
+                "compression_ratio": 0.02,
+                "collapse_depth": 2,
+                "created_at": "2024-01-01T00:00:00",
+            },
+        },
+    ]
+
+    ids = _store.upsert_summary_entries(fake, entries)
+
+    assert len(ids) == 1
+    assert ids[0] == "conv-456:summary"
+    assert fake.upserts[0][2][0]["collapse_depth"] == 2
+
+
+def test_upsert_summary_entries_empty() -> None:
+    """Test upserting when there are no entries (e.g., NONE level)."""
+    fake = _FakeCollection()
+
+    ids = _store.upsert_summary_entries(fake, [])
+
+    assert ids == []
+    assert len(fake.upserts) == 0
+
+
+def test_get_final_summary_returns_summary() -> None:
+    """Test getting the final summary for a conversation."""
+    fake = _FakeCollection(
+        get_result={
+            "documents": ["The final summary"],
+            "metadatas": [
+                {
+                    "conversation_id": "c1",
+                    "role": "summary",
+                    "is_final": True,
+                    "summary_level": "MAP_REDUCE",
+                    "collapse_depth": 1,
+                    "created_at": "now",
+                },
+            ],
+            "ids": ["c1:summary"],
+        },
+    )
+
+    result = _store.get_final_summary(fake, "c1")
+
+    assert result is not None
+    assert result.content == "The final summary"
+    assert result.metadata.is_final is True
+
+
+def test_get_final_summary_returns_none_when_missing() -> None:
+    """Test that get_final_summary returns None when no summary exists."""
+    fake = _FakeCollection(get_result={"documents": [], "metadatas": [], "ids": []})
+
+    result = _store.get_final_summary(fake, "c1")
+
+    assert result is None
+
+
+def test_delete_summaries() -> None:
+    """Test deleting summaries for a conversation."""
+    fake = _FakeCollection(
+        get_result={
+            "documents": ["The summary"],
+            "metadatas": [
+                {
+                    "conversation_id": "c1",
+                    "role": "summary",
+                    "summary_level": "MAP_REDUCE",
+                    "created_at": "now",
+                },
+            ],
+            "ids": ["c1:summary"],
+        },
+    )
+
+    deleted_count = _store.delete_summaries(fake, "c1")
+
+    assert deleted_count == 1
+    assert len(fake.deleted) == 1
+    assert fake.deleted[0] == ["c1:summary"]
+
+
+def test_delete_summaries_no_entries() -> None:
+    """Test deleting when no summaries exist."""
+    fake = _FakeCollection(get_result={"documents": [], "metadatas": [], "ids": []})
+
+    deleted_count = _store.delete_summaries(fake, "c1")
+
+    assert deleted_count == 0
+    assert len(fake.deleted) == 0
diff --git a/tests/summarizer/__init__.py b/tests/summarizer/__init__.py
new file mode 100644
index 00000000..d6801b31
--- /dev/null
+++ b/tests/summarizer/__init__.py
@@ -0,0 +1 @@
+"""Tests for the adaptive summarizer module."""
diff --git a/tests/summarizer/test_adaptive.py b/tests/summarizer/test_adaptive.py
new file mode 100644
index 00000000..1fbf3d7b
--- /dev/null
+++ b/tests/summarizer/test_adaptive.py
@@ -0,0 +1,245 @@
+"""Unit tests for adaptive summarization functions."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from agent_cli.summarizer._utils import (
+    SummarizationError,
+    SummarizerConfig,
+    SummaryOutput,
+    generate_summary,
+)
+from agent_cli.summarizer.adaptive import summarize
+from agent_cli.summarizer.map_reduce import MapReduceResult
+
+
+class TestSummarizerConfig:
+    """Tests for SummarizerConfig initialization."""
+
+    def test_basic_init(self) -> None:
+        """Test basic initialization with required parameters."""
+        config = SummarizerConfig(
+            openai_base_url="http://localhost:8000/v1",
+            model="llama3.1:8b",
+        )
+        assert config.openai_base_url == "http://localhost:8000/v1"
+        assert config.model == "llama3.1:8b"
+        assert config.api_key == "not-needed"
+
+    def test_init_with_api_key(self) -> None:
+        """Test initialization with custom API key."""
+        config = SummarizerConfig(
+            openai_base_url="http://localhost:8000/v1",
+            model="gpt-4",
+            api_key="sk-test-key",
+        )
+        assert config.api_key == "sk-test-key"
+
+    def test_init_with_custom_settings(self) -> None:
+        """Test initialization with custom chunk settings."""
+        config = SummarizerConfig(
+            openai_base_url="http://localhost:8000/v1",
+            model="gpt-4",
+            chunk_size=5000,
+            chunk_overlap=300,
+            max_concurrent_chunks=10,
+        )
+        assert config.chunk_size == 5000
+        assert config.chunk_overlap == 300
+        assert config.max_concurrent_chunks == 10
+
+    def test_trailing_slash_stripped(self) -> None:
+        """Test that trailing slash is stripped from base URL."""
+        config = SummarizerConfig(
+            openai_base_url="http://localhost:8000/v1/",
+            model="gpt-4",
+        )
+        assert config.openai_base_url == "http://localhost:8000/v1"
+
+    def test_default_chunk_size_is_booookscore(self) -> None:
+        """Test that default chunk_size follows BOOOOKSCORE recommendation."""
+        config = SummarizerConfig(
+            openai_base_url="http://localhost:8000/v1",
+            model="gpt-4",
+        )
+        assert config.chunk_size == 2048  # BOOOOKSCORE's tested default
+
+    def test_default_token_max_is_langchain(self) -> None:
+        """Test that default token_max follows LangChain's default."""
+        config = SummarizerConfig(
+            openai_base_url="http://localhost:8000/v1",
+            model="gpt-4",
+        )
+        assert config.token_max == 3000  # LangChain's default
+
+
+class TestSummarize:
+    """Tests for main summarize function."""
+
+    @pytest.fixture
+    def config(self) -> SummarizerConfig:
+        """Create a config instance."""
+        return SummarizerConfig(
+            openai_base_url="http://localhost:8000/v1",
+            model="gpt-4",
+        )
+
+    @pytest.mark.asyncio
+    async def test_empty_content_returns_no_summary(
+        self,
+        config: SummarizerConfig,
+    ) -> None:
+        """Test that empty content returns result with no summary."""
+        result = await summarize("", config)
+        assert result.summary is None
+        assert result.input_tokens == 0
+        assert result.output_tokens == 0
+
+    @pytest.mark.asyncio
+    async def test_whitespace_only_returns_no_summary(
+        self,
+        config: SummarizerConfig,
+    ) -> None:
+        """Test that whitespace-only content returns result with no summary."""
+        result = await summarize("   \n\n   ", config)
+        assert result.summary is None
+
+    @pytest.mark.asyncio
+    async def test_short_content_returns_as_is(
+        self,
+        config: SummarizerConfig,
+    ) -> None:
+        """Test that short content is returned as-is (no LLM call)."""
+        # Less than default token_max (3000)
+        result = await summarize("Hello world", config)
+        assert result.summary == "Hello world"
+        assert result.compression_ratio == 1.0  # No compression
+
+    @pytest.mark.asyncio
+    async def test_target_tokens_respected(
+        self,
+        config: SummarizerConfig,
+    ) -> None:
+        """Test that content fitting target_tokens is returned as-is."""
+        content = "Short content"
+        result = await summarize(content, config, target_tokens=1000)
+        assert result.summary == content
+        assert result.compression_ratio == 1.0
+
+    @pytest.mark.asyncio
+    async def test_target_ratio_calculates_target(
+        self,
+        config: SummarizerConfig,
+    ) -> None:
+        """Test that target_ratio calculates correct target."""
+        # Short content that fits even with 10% target
+        content = "Hello"
+        result = await summarize(content, config, target_ratio=0.1)
+        # Content is so short it fits in 10% target
+        assert result.summary == content
+
+    @pytest.mark.asyncio
+    @patch("agent_cli.summarizer.adaptive._content_aware_summary")
+    async def test_content_exceeding_target_gets_summarized(
+        self,
+        mock_summary: AsyncMock,
+        config: SummarizerConfig,
+    ) -> None:
+        """Test that content exceeding target gets summarized."""
+        mock_summary.return_value = "Summarized content."
+
+        # Create content that's ~500 tokens (exceeds target of 100)
+        content = "This is a test sentence. " * 100
+
+        result = await summarize(content, config, target_tokens=100)
+
+        mock_summary.assert_called_once()
+        assert result.summary == "Summarized content."
+
+    @pytest.mark.asyncio
+    @patch("agent_cli.summarizer.adaptive.map_reduce_summarize")
+    async def test_large_content_uses_map_reduce(
+        self,
+        mock_map_reduce: AsyncMock,
+        config: SummarizerConfig,
+    ) -> None:
+        """Test that content exceeding chunk_size uses map-reduce."""
+        mock_map_reduce.return_value = MapReduceResult(
+            summary="Map-reduce summary.",
+            input_tokens=5000,
+            output_tokens=100,
+            compression_ratio=0.02,
+            collapse_depth=1,
+            intermediate_summaries=[["chunk1", "chunk2"]],
+        )
+
+        # Create content larger than chunk_size (2048)
+        content = "Word " * 3000  # ~3000 tokens
+
+        result = await summarize(content, config, target_tokens=500)
+
+        mock_map_reduce.assert_called_once()
+        assert result.summary == "Map-reduce summary."
+
+
+class TestGenerateSummary:
+    """Tests for generate_summary function."""
+
+    @pytest.fixture
+    def config(self) -> SummarizerConfig:
+        """Create a config instance."""
+        return SummarizerConfig(
+            openai_base_url="http://localhost:8000/v1",
+            model="gpt-4",
+        )
+
+    @pytest.mark.asyncio
+    async def test_generate_summary_with_pydantic_ai(
+        self,
+        config: SummarizerConfig,
+    ) -> None:
+        """Test summary generation using PydanticAI agent."""
+        # Mock the entire agent creation and run
+        mock_result = MagicMock()
+        mock_result.output = SummaryOutput(summary="Generated summary.")
+
+        with patch("pydantic_ai.Agent") as mock_agent_class:
+            mock_agent = MagicMock()
+            mock_agent.run = AsyncMock(return_value=mock_result)
+            mock_agent_class.return_value = mock_agent
+
+            result = await generate_summary("Test prompt", config, max_tokens=100)
+
+            assert result == "Generated summary."
+            mock_agent.run.assert_called_once_with("Test prompt")
+
+    @pytest.mark.asyncio
+    async def test_raises_summarization_error_on_failure(
+        self,
+        config: SummarizerConfig,
+    ) -> None:
+        """Test that SummarizationError is raised on failure."""
+        with patch("pydantic_ai.Agent") as mock_agent_class:
+            mock_agent = MagicMock()
+            mock_agent.run = AsyncMock(side_effect=Exception("API error"))
+            mock_agent_class.return_value = mock_agent
+
+            with pytest.raises(SummarizationError, match="Summarization failed"):
+                await generate_summary("Test prompt", config, max_tokens=100)
+
+
+class TestSummaryOutput:
+    """Tests for SummaryOutput pydantic model."""
+
+    def test_basic_creation(self) -> None:
+        """Test creating a SummaryOutput."""
+        output = SummaryOutput(summary="Test summary text")
+        assert output.summary == "Test summary text"
+
+    def test_whitespace_preserved(self) -> None:
+        """Test that whitespace in summary is preserved."""
+        output = SummaryOutput(summary="  Summary with spaces  ")
+        assert output.summary == "  Summary with spaces  "
diff --git a/tests/summarizer/test_integration.py b/tests/summarizer/test_integration.py
new file mode 100644
index 00000000..867815ce
--- /dev/null
+++ b/tests/summarizer/test_integration.py
@@ -0,0 +1,40 @@
+"""Integration tests for summarizer with storage layer."""
+
+from __future__ import annotations
+
+from agent_cli.summarizer.models import SummaryResult
+
+
+class TestSummaryResultStorage:
+    """Tests for SummaryResult storage metadata generation."""
+
+    def test_to_storage_metadata_creates_entry(self) -> None:
+        """Test that to_storage_metadata creates a valid entry."""
+        result = SummaryResult(
+            summary="A comprehensive summary.",
+            input_tokens=5000,
+            output_tokens=100,
+            compression_ratio=0.02,
+            collapse_depth=1,
+        )
+        entries = result.to_storage_metadata("test-conversation")
+
+        assert len(entries) == 1
+        entry = entries[0]
+        assert entry["id"] == "test-conversation:summary"
+        assert entry["content"] == "A comprehensive summary."
+        assert entry["metadata"]["conversation_id"] == "test-conversation"
+        assert entry["metadata"]["role"] == "summary"
+        assert entry["metadata"]["is_final"] is True
+        assert entry["metadata"]["collapse_depth"] == 1
+
+    def test_no_summary_returns_empty(self) -> None:
+        """Test that no summary produces no storage entries."""
+        result = SummaryResult(
+            summary=None,
+            input_tokens=50,
+            output_tokens=0,
+            compression_ratio=0.0,
+        )
+        entries = result.to_storage_metadata("test-conversation")
+        assert entries == []
diff --git a/tests/summarizer/test_models.py b/tests/summarizer/test_models.py
new file mode 100644
index 00000000..05d5625f
--- /dev/null
+++ b/tests/summarizer/test_models.py
@@ -0,0 +1,117 @@
+"""Unit tests for summarizer models."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+import pytest
+
+from agent_cli.summarizer.models import (
+    SummaryResult,
+)
+
+
+class TestSummaryResult:
+    """Tests for SummaryResult model."""
+
+    def test_result_with_no_summary(self) -> None:
+        """Test result when content already fits target."""
+        result = SummaryResult(
+            summary=None,
+            input_tokens=50,
+            output_tokens=0,
+            compression_ratio=0.0,
+        )
+        assert result.summary is None
+        assert result.collapse_depth == 0
+
+    def test_result_with_summary(self) -> None:
+        """Test result with a generated summary."""
+        result = SummaryResult(
+            summary="A comprehensive summary.",
+            input_tokens=5000,
+            output_tokens=100,
+            compression_ratio=0.02,
+            collapse_depth=2,
+        )
+        assert result.summary == "A comprehensive summary."
+        assert result.collapse_depth == 2
+
+    def test_to_storage_metadata_no_summary(self) -> None:
+        """Test that no summary produces no storage entries."""
+        result = SummaryResult(
+            summary=None,
+            input_tokens=50,
+            output_tokens=0,
+            compression_ratio=0.0,
+        )
+        entries = result.to_storage_metadata("conv-123")
+        assert entries == []
+
+    def test_to_storage_metadata_with_summary(self) -> None:
+        """Test storage metadata for a summary."""
+        result = SummaryResult(
+            summary="A brief summary.",
+            input_tokens=200,
+            output_tokens=10,
+            compression_ratio=0.05,
+        )
+        entries = result.to_storage_metadata("conv-456")
+        assert len(entries) == 1
+        entry = entries[0]
+        assert entry["id"] == "conv-456:summary"
+        assert entry["content"] == "A brief summary."
+        assert entry["metadata"]["conversation_id"] == "conv-456"
+        assert entry["metadata"]["role"] == "summary"
+        assert entry["metadata"]["is_final"] is True
+
+    def test_to_storage_metadata_with_collapse_depth(self) -> None:
+        """Test storage metadata includes collapse depth."""
+        result = SummaryResult(
+            summary="Final synthesis of content.",
+            input_tokens=20000,
+            output_tokens=200,
+            compression_ratio=0.01,
+            collapse_depth=3,
+        )
+        entries = result.to_storage_metadata("conv-789")
+
+        assert len(entries) == 1
+        entry = entries[0]
+        assert entry["id"] == "conv-789:summary"
+        assert entry["content"] == "Final synthesis of content."
+        assert entry["metadata"]["collapse_depth"] == 3
+        assert entry["metadata"]["is_final"] is True
+
+    def test_compression_ratio_bounds(self) -> None:
+        """Test compression ratio validation."""
+        # Valid ratio
+        result = SummaryResult(
+            summary="Test",
+            input_tokens=100,
+            output_tokens=10,
+            compression_ratio=0.1,
+        )
+        assert result.compression_ratio == 0.1
+
+        # Ratio must be between 0 and 1
+        with pytest.raises(ValueError, match="less than or equal to 1"):
+            SummaryResult(
+                summary="Test",
+                input_tokens=100,
+                output_tokens=10,
+                compression_ratio=1.5,
+            )
+
+    def test_created_at_default(self) -> None:
+        """Test that created_at is automatically set."""
+        before = datetime.now(UTC)
+        result = SummaryResult(
+            summary="Test",
+            input_tokens=100,
+            output_tokens=10,
+            compression_ratio=0.1,
+        )
+        after = datetime.now(UTC)
+        # All datetimes should be UTC-aware
+        assert before <= result.created_at <= after
diff --git a/tests/summarizer/test_prompts.py b/tests/summarizer/test_prompts.py
new file mode 100644
index 00000000..825fe077
--- /dev/null
+++ b/tests/summarizer/test_prompts.py
@@ -0,0 +1,168 @@
+"""Unit tests for summarizer prompt templates."""
+
+from __future__ import annotations
+
+from agent_cli.summarizer._prompts import (
+    CHUNK_SUMMARY_PROMPT,
+    CONVERSATION_SUMMARY_PROMPT,
+    DOCUMENT_SUMMARY_PROMPT,
+    GENERAL_SUMMARY_PROMPT,
+    JOURNAL_SUMMARY_PROMPT,
+    META_SUMMARY_PROMPT,
+    format_prior_context,
+    format_summaries_for_meta,
+    get_prompt_for_content_type,
+)
+
+
+class TestPromptTemplates:
+    """Tests for prompt template structure."""
+
+    def test_general_prompt_has_placeholders(self) -> None:
+        """Test GENERAL prompt contains required placeholders."""
+        assert "{content}" in GENERAL_SUMMARY_PROMPT
+        assert "{prior_context}" in GENERAL_SUMMARY_PROMPT
+        assert "{max_words}" in GENERAL_SUMMARY_PROMPT
+
+        result = GENERAL_SUMMARY_PROMPT.format(
+            content="Main content",
+            prior_context="Previous context",
+            max_words=100,
+        )
+        assert "Main content" in result
+        assert "Previous context" in result
+        assert "100" in result
+
+    def test_chunk_prompt_has_placeholders(self) -> None:
+        """Test CHUNK prompt contains required placeholders."""
+        assert "{content}" in CHUNK_SUMMARY_PROMPT
+        assert "{chunk_index}" in CHUNK_SUMMARY_PROMPT
+        assert "{total_chunks}" in CHUNK_SUMMARY_PROMPT
+        assert "{max_words}" in CHUNK_SUMMARY_PROMPT
+
+        result = CHUNK_SUMMARY_PROMPT.format(
+            content="Chunk content",
+            chunk_index=1,
+            total_chunks=5,
+            max_words=50,
+        )
+        assert "Chunk content" in result
+        assert "1" in result
+        assert "5" in result
+
+    def test_meta_prompt_has_placeholders(self) -> None:
+        """Test META prompt contains required placeholders."""
+        assert "{summaries}" in META_SUMMARY_PROMPT
+        assert "{max_words}" in META_SUMMARY_PROMPT
+
+        result = META_SUMMARY_PROMPT.format(
+            summaries="Summary 1\n\nSummary 2",
+            max_words=200,
+        )
+        assert "Summary 1" in result
+        assert "200" in result
+
+    def test_conversation_prompt_has_placeholders(self) -> None:
+        """Test CONVERSATION prompt contains required placeholders."""
+        assert "{content}" in CONVERSATION_SUMMARY_PROMPT
+        assert "{max_words}" in CONVERSATION_SUMMARY_PROMPT
+        assert "{prior_context}" in CONVERSATION_SUMMARY_PROMPT
+
+    def test_journal_prompt_has_placeholders(self) -> None:
+        """Test JOURNAL prompt contains required placeholders."""
+        assert "{content}" in JOURNAL_SUMMARY_PROMPT
+        assert "{max_words}" in JOURNAL_SUMMARY_PROMPT
+        assert "{prior_context}" in JOURNAL_SUMMARY_PROMPT
+
+    def test_document_prompt_has_placeholders(self) -> None:
+        """Test DOCUMENT prompt contains required placeholders."""
+        assert "{content}" in DOCUMENT_SUMMARY_PROMPT
+        assert "{max_words}" in DOCUMENT_SUMMARY_PROMPT
+        assert "{prior_context}" in DOCUMENT_SUMMARY_PROMPT
+
+
+class TestGetPromptForContentType:
+    """Tests for get_prompt_for_content_type function."""
+
+    def test_general_returns_general(self) -> None:
+        """Test general content type returns general prompt."""
+        prompt = get_prompt_for_content_type("general")
+        assert prompt == GENERAL_SUMMARY_PROMPT
+
+    def test_conversation_returns_conversation(self) -> None:
+        """Test conversation content type returns conversation prompt."""
+        prompt = get_prompt_for_content_type("conversation")
+        assert prompt == CONVERSATION_SUMMARY_PROMPT
+
+    def test_journal_returns_journal(self) -> None:
+        """Test journal content type returns journal prompt."""
+        prompt = get_prompt_for_content_type("journal")
+        assert prompt == JOURNAL_SUMMARY_PROMPT
+
+    def test_document_returns_document(self) -> None:
+        """Test document content type returns document prompt."""
+        prompt = get_prompt_for_content_type("document")
+        assert prompt == DOCUMENT_SUMMARY_PROMPT
+
+    def test_unknown_returns_general(self) -> None:
+        """Test unknown content type falls back to general."""
+        prompt = get_prompt_for_content_type("unknown_type")
+        assert prompt == GENERAL_SUMMARY_PROMPT
+
+    def test_empty_returns_general(self) -> None:
+        """Test empty string falls back to general."""
+        prompt = get_prompt_for_content_type("")
+        assert prompt == GENERAL_SUMMARY_PROMPT
+
+
+class TestFormatPriorContext:
+    """Tests for format_prior_context function."""
+
+    def test_with_prior_summary(self) -> None:
+        """Test formatting with a prior summary."""
+        result = format_prior_context("Previous summary text")
+        assert "Prior context" in result
+        assert "Previous summary text" in result
+
+    def test_without_prior_summary(self) -> None:
+        """Test formatting without prior summary returns empty string."""
+        result = format_prior_context(None)
+        assert result == ""
+
+    def test_empty_string_prior_summary(self) -> None:
+        """Test formatting with empty string prior summary."""
+        result = format_prior_context("")
+        assert result == ""
+
+
+class TestFormatSummariesForMeta:
+    """Tests for format_summaries_for_meta function."""
+
+    def test_single_summary(self) -> None:
+        """Test formatting a single summary."""
+        result = format_summaries_for_meta(["Summary one"])
+        assert "[Section 1]" in result
+        assert "Summary one" in result
+
+    def test_multiple_summaries(self) -> None:
+        """Test formatting multiple summaries."""
+        summaries = ["First summary", "Second summary", "Third summary"]
+        result = format_summaries_for_meta(summaries)
+
+        assert "[Section 1]" in result
+        assert "[Section 2]" in result
+        assert "[Section 3]" in result
+        assert "First summary" in result
+        assert "Second summary" in result
+        assert "Third summary" in result
+
+    def test_empty_list(self) -> None:
+        """Test formatting empty list."""
+        result = format_summaries_for_meta([])
+        assert result == ""
+
+    def test_summaries_separated(self) -> None:
+        """Test summaries are separated by double newlines."""
+        summaries = ["Sum 1", "Sum 2"]
+        result = format_summaries_for_meta(summaries)
+        assert "\n\n" in result
diff --git a/tests/summarizer/test_utils.py b/tests/summarizer/test_utils.py
new file mode 100644
index 00000000..89a44171
--- /dev/null
+++ b/tests/summarizer/test_utils.py
@@ -0,0 +1,137 @@
+"""Unit tests for summarizer utility functions."""
+
+from __future__ import annotations
+
+from agent_cli.summarizer._utils import (
+    chunk_text,
+    count_tokens,
+    estimate_summary_tokens,
+    tokens_to_words,
+)
+
+
+class TestCountTokens:
+    """Tests for count_tokens function."""
+
+    def test_empty_string(self) -> None:
+        """Test counting tokens in empty string."""
+        assert count_tokens("") == 0
+
+    def test_simple_sentence(self) -> None:
+        """Test counting tokens in a simple sentence."""
+        # "Hello world" is typically 2 tokens
+        count = count_tokens("Hello world")
+        assert count > 0
+        assert count < 10
+
+    def test_longer_text(self) -> None:
+        """Test that longer text has more tokens."""
+        short = count_tokens("Hello")
+        long = count_tokens("Hello world, this is a longer sentence with more words.")
+        assert long > short
+
+    def test_different_model_fallback(self) -> None:
+        """Test that unknown models fall back to cl100k_base."""
+        # Should not raise, should fall back gracefully
+        count = count_tokens("Hello world", model="unknown-model-xyz")
+        assert count > 0
+
+
+class TestChunkText:
+    """Tests for chunk_text function."""
+
+    def test_empty_text(self) -> None:
+        """Test chunking empty text returns empty list."""
+        assert chunk_text("") == []
+
+    def test_short_text_single_chunk(self) -> None:
+        """Test that short text stays as single chunk."""
+        text = "This is a short paragraph."
+        chunks = chunk_text(text, chunk_size=1000)
+        assert len(chunks) == 1
+        assert chunks[0] == text
+
+    def test_multiple_paragraphs_chunking(self) -> None:
+        """Test chunking multiple paragraphs."""
+        paragraphs = ["Paragraph one. " * 50, "Paragraph two. " * 50, "Paragraph three. " * 50]
+        text = "\n\n".join(paragraphs)
+
+        # Use small chunk size to force splitting
+        chunks = chunk_text(text, chunk_size=200, overlap=20)
+        assert len(chunks) > 1
+
+    def test_overlap_preserved(self) -> None:
+        """Test that chunks have overlap for context continuity."""
+        # Create text that will definitely need chunking
+        text = "Sentence one about topic A. " * 20 + "\n\n" + "Sentence two about topic B. " * 20
+
+        chunks = chunk_text(text, chunk_size=100, overlap=30)
+
+        # With overlap, later chunks should contain some content from earlier
+        if len(chunks) > 1:
+            # Overlap means adjacent chunks share some content
+            # This is a rough check - exact overlap depends on tokenization
+            assert len(chunks) >= 2
+
+    def test_large_paragraph_sentence_split(self) -> None:
+        """Test that large paragraphs are split by sentences."""
+        # One giant paragraph with multiple sentences
+        sentences = [
+            f"This is sentence number {i}. It contains important information." for i in range(50)
+        ]
+        text = " ".join(sentences)
+
+        chunks = chunk_text(text, chunk_size=100, overlap=20)
+        assert len(chunks) > 1
+
+
+class TestEstimateSummaryTokens:
+    """Tests for estimate_summary_tokens function."""
+
+    def test_typical_input(self) -> None:
+        """Test typical input uses ~10% compression."""
+        # ~10% compression, capped at 500, minimum 50
+        result = estimate_summary_tokens(1000)
+        assert result == 100  # 1000 // 10 = 100
+
+    def test_medium_input(self) -> None:
+        """Test medium input stays within bounds."""
+        result = estimate_summary_tokens(2000)
+        assert result == 200  # 2000 // 10 = 200
+        assert result >= 50  # above floor
+        assert result <= 500  # below ceiling
+
+    def test_large_input_hits_cap(self) -> None:
+        """Test large input hits 500 token cap."""
+        result = estimate_summary_tokens(50000)
+        assert result == 500  # capped at 500
+
+    def test_small_input_uses_floor(self) -> None:
+        """Test small input uses 50 token floor."""
+        result = estimate_summary_tokens(100)
+        assert result == 50  # floor of 50 (100 // 10 = 10, but min is 50)
+
+    def test_very_small_input(self) -> None:
+        """Test very small input still uses floor."""
+        result = estimate_summary_tokens(10)
+        assert result == 50  # floor of 50
+
+
+class TestTokensToWords:
+    """Tests for tokens_to_words function."""
+
+    def test_basic_conversion(self) -> None:
+        """Test basic token to word conversion."""
+        # 1 token ≈ 0.75 words
+        assert tokens_to_words(100) == 75
+        assert tokens_to_words(1000) == 750
+
+    def test_zero_tokens(self) -> None:
+        """Test zero tokens returns zero words."""
+        assert tokens_to_words(0) == 0
+
+    def test_small_values(self) -> None:
+        """Test small token values."""
+        assert tokens_to_words(1) == 0  # int(0.75) = 0
+        assert tokens_to_words(2) == 1  # int(1.5) = 1
+        assert tokens_to_words(4) == 3  # int(3.0) = 3
diff --git a/uv.lock b/uv.lock
index f69fd193..9bf4c468 100644
--- a/uv.lock
+++ b/uv.lock
@@ -41,6 +41,7 @@ dev = [
     { name = "pytest-mock" },
     { name = "pytest-timeout" },
     { name = "ruff" },
+    { name = "tiktoken" },
     { name = "versioningit" },
 ]
 memory = [
@@ -49,6 +50,7 @@ memory = [
     { name = "huggingface-hub" },
     { name = "onnxruntime" },
     { name = "pyyaml" },
+    { name = "tiktoken" },
     { name = "transformers" },
     { name = "watchfiles" },
 ]
@@ -74,6 +76,7 @@ test = [
     { name = "pytest-cov" },
     { name = "pytest-mock" },
     { name = "pytest-timeout" },
+    { name = "tiktoken" },
 ]
 
 [package.dev-dependencies]
@@ -90,6 +93,7 @@ dev = [
     { name = "pytest-mock" },
     { name = "pytest-timeout" },
     { name = "ruff" },
+    { name = "tiktoken" },
     { name = "versioningit" },
 ]
 
@@ -127,6 +131,8 @@ requires-dist = [
     { name = "rich" },
     { name = "ruff", marker = "extra == 'dev'" },
     { name = "sounddevice" },
+    { name = "tiktoken", marker = "extra == 'memory'", specifier = ">=0.5.0" },
+    { name = "tiktoken", marker = "extra == 'test'", specifier = ">=0.5.0" },
     { name = "transformers", marker = "extra == 'memory'", specifier = ">=4.30.0" },
     { name = "transformers", marker = "extra == 'rag'", specifier = ">=4.30.0" },
     { name = "typer" },
@@ -151,6 +157,7 @@ dev = [
     { name = "pytest-mock" },
     { name = "pytest-timeout" },
     { name = "ruff" },
+    { name = "tiktoken", specifier = ">=0.5.0" },
     { name = "versioningit" },
 ]
 
@@ -4378,6 +4385,60 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0", size = 14154, upload-time = "2024-03-12T14:34:36.569Z" },
 ]
 
+[[package]]
+name = "tiktoken"
+version = "0.12.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "regex" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/46/21ea696b21f1d6d1efec8639c204bdf20fde8bafb351e1355c72c5d7de52/tiktoken-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb", size = 1051565, upload-time = "2025-10-06T20:21:44.566Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d9/35c5d2d9e22bb2a5f74ba48266fb56c63d76ae6f66e02feb628671c0283e/tiktoken-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa", size = 995284, upload-time = "2025-10-06T20:21:45.622Z" },
+    { url = "https://files.pythonhosted.org/packages/01/84/961106c37b8e49b9fdcf33fe007bb3a8fdcc380c528b20cc7fbba80578b8/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc", size = 1129201, upload-time = "2025-10-06T20:21:47.074Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/d0/3d9275198e067f8b65076a68894bb52fd253875f3644f0a321a720277b8a/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:47a5bc270b8c3db00bb46ece01ef34ad050e364b51d406b6f9730b64ac28eded", size = 1152444, upload-time = "2025-10-06T20:21:48.139Z" },
+    { url = "https://files.pythonhosted.org/packages/78/db/a58e09687c1698a7c592e1038e01c206569b86a0377828d51635561f8ebf/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:508fa71810c0efdcd1b898fda574889ee62852989f7c1667414736bcb2b9a4bd", size = 1195080, upload-time = "2025-10-06T20:21:49.246Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/1b/a9e4d2bf91d515c0f74afc526fd773a812232dd6cda33ebea7f531202325/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1af81a6c44f008cba48494089dd98cccb8b313f55e961a52f5b222d1e507967", size = 1255240, upload-time = "2025-10-06T20:21:50.274Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/15/963819345f1b1fb0809070a79e9dd96938d4ca41297367d471733e79c76c/tiktoken-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:3e68e3e593637b53e56f7237be560f7a394451cb8c11079755e80ae64b9e6def", size = 879422, upload-time = "2025-10-06T20:21:51.734Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" },
+    { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" },
+    { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" },
+    { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" },
+    { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload-time = "2025-10-06T20:22:08.418Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" },
+    { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" },
+    { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" },
+    { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" },
+    { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" },
+    { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" },
+    { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" },
+    { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" },
+    { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" },
+    { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload-time = "2025-10-06T20:22:26.753Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" },
+    { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" },
+    { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" },
+]
+
 [[package]]
 name = "tinycss2"
 version = "1.4.0"