From e7b82f886b232e9d52c46dcd40e84204724c8160 Mon Sep 17 00:00:00 2001
From: Daniele Briggi <=>
Date: Fri, 17 Oct 2025 13:38:24 +0000
Subject: [PATCH 1/7] feat(sentences): introduce sentences to improve results
 preview

refact(settings): extensions options are generated by a setting method
chore(settings):
- default chunk_size equals to the model context window
- increase FTS weight
---
 src/sqlite_rag/cli.py                    |   7 +-
 src/sqlite_rag/database.py               |  29 +++-
 src/sqlite_rag/engine.py                 | 114 ++++++++++++--
 src/sqlite_rag/formatters.py             | 188 ++++++++++++++++++++++-
 src/sqlite_rag/models/chunk.py           |   9 +-
 src/sqlite_rag/models/document.py        |   2 +-
 src/sqlite_rag/models/document_result.py |   7 +-
 src/sqlite_rag/models/sentence.py        |  11 ++
 src/sqlite_rag/models/sentence_result.py |  16 ++
 src/sqlite_rag/repository.py             |  21 ++-
 src/sqlite_rag/sentence_splitter.py      |  38 +++++
 src/sqlite_rag/settings.py               |  46 +++++-
 src/sqlite_rag/sqliterag.py              |  28 +++-
 tests/test_engine.py                     |   4 +
 tests/test_sentence_splitter.py          |  71 +++++++++
 tests/test_settings.py                   |  12 +-
 16 files changed, 560 insertions(+), 43 deletions(-)
 create mode 100644 src/sqlite_rag/models/sentence.py
 create mode 100644 src/sqlite_rag/models/sentence_result.py
 create mode 100644 src/sqlite_rag/sentence_splitter.py
 create mode 100644 tests/test_sentence_splitter.py

diff --git a/src/sqlite_rag/cli.py b/src/sqlite_rag/cli.py
index 6574889..13278cc 100644
--- a/src/sqlite_rag/cli.py
+++ b/src/sqlite_rag/cli.py
@@ -446,6 +446,11 @@ def search(
         "--debug",
         help="Print extra debug information with modern formatting",
     ),
+    debug2: bool = typer.Option(
+        False,
+        "--debug2",
+        help="Print debug format with sentence-level details and snippet context",
+    ),
     peek: bool = typer.Option(
         False, "--peek", help="Print debug information using compact table format"
     ),
@@ -462,7 +467,7 @@ def search(
     results = results[:limit]
 
     # Get the appropriate formatter and display results
-    formatter = get_formatter(debug=debug, table_view=peek)
+    formatter = get_formatter(debug=debug, debug2=debug2, table_view=peek)
     formatter.format_results(results, query)
 
     typer.echo(f"{search_time:.3f} seconds")
diff --git a/src/sqlite_rag/database.py b/src/sqlite_rag/database.py
index 8ad1791..7bdd357 100644
--- a/src/sqlite_rag/database.py
+++ b/src/sqlite_rag/database.py
@@ -88,6 +88,21 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):
         """
         )
 
+        # TODO: remove sequence
+        cursor.execute(
+            """
+            CREATE TABLE IF NOT EXISTS sentences (
+                id TEXT PRIMARY KEY,
+                chunk_id INTEGER,
+                content TEXT,
+                embedding BLOB,
+                sequence INTEGER,
+                start_offset INTEGER,
+                end_offset INTEGER
+            )
+            """
+        )
+
         cursor.execute(
             """
             CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(content, content='chunks', content_rowid='id');
@@ -95,9 +110,17 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):
         )
 
         cursor.execute(
-            f"""
-            SELECT vector_init('chunks', 'embedding', 'type={settings.vector_type},dimension={settings.embedding_dim},{settings.other_vector_options}');
-        """
+            """
+            SELECT vector_init('chunks', 'embedding', ?);
+        """,
+            (settings.get_vector_init_options(),),
+        )
+        # TODO: same configuration as chunks (or different options?)
+        cursor.execute(
+            """
+            SELECT vector_init('sentences', 'embedding', ?);
+        """,
+            (settings.get_vector_init_options(),),
         )
 
         conn.commit()
diff --git a/src/sqlite_rag/engine.py b/src/sqlite_rag/engine.py
index c2372d2..840992c 100644
--- a/src/sqlite_rag/engine.py
+++ b/src/sqlite_rag/engine.py
@@ -1,10 +1,12 @@
 import json
-import re
 import sqlite3
 from pathlib import Path
+from typing import List
 
 from sqlite_rag.logger import Logger
 from sqlite_rag.models.document_result import DocumentResult
+from sqlite_rag.models.sentence_result import SentenceResult
+from sqlite_rag.sentence_splitter import SentenceSplitter
 
 from .chunker import Chunker
 from .models.document import Document
@@ -15,10 +17,17 @@ class Engine:
     # Considered a good default to normilize the score for RRF
     DEFAULT_RRF_K = 60
 
-    def __init__(self, conn: sqlite3.Connection, settings: Settings, chunker: Chunker):
+    def __init__(
+        self,
+        conn: sqlite3.Connection,
+        settings: Settings,
+        chunker: Chunker,
+        sentence_chunker: SentenceSplitter,
+    ):
         self._conn = conn
         self._settings = settings
         self._chunker = chunker
+        self._sentence_chunker = sentence_chunker
         self._logger = Logger()
 
     def load_model(self):
@@ -30,7 +39,7 @@ def load_model(self):
 
         self._conn.execute(
             "SELECT llm_model_load(?, ?);",
-            (self._settings.model_path, self._settings.model_options),
+            (self._settings.model_path, self._settings.other_model_options),
         )
 
     def process(self, document: Document) -> Document:
@@ -46,6 +55,11 @@ def process(self, document: Document) -> Document:
             chunk.title = document.get_title()
             chunk.embedding = self.generate_embedding(chunk.get_embedding_text())
 
+            sentences = self._sentence_chunker.split(chunk)
+            for sentence in sentences:
+                sentence.embedding = self.generate_embedding(sentence.content)
+            chunk.sentences = sentences
+
         document.chunks = chunks
 
         return document
@@ -72,6 +86,7 @@ def quantize(self) -> None:
         cursor = self._conn.cursor()
 
         cursor.execute("SELECT vector_quantize('chunks', 'embedding');")
+        cursor.execute("SELECT vector_quantize('sentences', 'embedding');")
 
         self._conn.commit()
         self._logger.debug("Quantization completed.")
@@ -81,21 +96,25 @@ def quantize_preload(self) -> None:
         cursor = self._conn.cursor()
 
         cursor.execute("SELECT vector_quantize_preload('chunks', 'embedding');")
+        cursor.execute("SELECT vector_quantize_preload('sentences', 'embedding');")
 
     def quantize_cleanup(self) -> None:
         """Clean up internal structures related to a previously quantized table/column."""
         cursor = self._conn.cursor()
 
         cursor.execute("SELECT vector_quantize_cleanup('chunks', 'embedding');")
+        cursor.execute("SELECT vector_quantize_cleanup('sentences', 'embedding');")
 
         self._conn.commit()
 
     def create_new_context(self) -> None:
-        """"""
+        """Create a new LLM context with optional runtime overrides."""
         cursor = self._conn.cursor()
+        context_options = self._settings.get_embeddings_context_options()
 
         cursor.execute(
-            "SELECT llm_context_create(?);", (self._settings.model_context_options,)
+            "SELECT llm_context_create(?);",
+            (context_options,),
         )
 
     def free_context(self) -> None:
@@ -104,13 +123,11 @@ def free_context(self) -> None:
 
         cursor.execute("SELECT llm_context_free();")
 
-    def search(self, query: str, top_k: int = 10) -> list[DocumentResult]:
+    def search(
+        self, semantic_query: str, fts_query, top_k: int = 10
+    ) -> list[DocumentResult]:
         """Semantic search and full-text search sorted with Reciprocal Rank Fusion."""
-        query_embedding = self.generate_embedding(query)
-
-        # Clean up and split into words
-        # '*' is used to match while typing
-        query = " ".join(re.findall(r"\b\w+\b", query.lower())) + "*"
+        query_embedding = self.generate_embedding(semantic_query)
 
         vector_scan_type = (
             "vector_quantize_scan"
@@ -119,8 +136,7 @@ def search(self, query: str, top_k: int = 10) -> list[DocumentResult]:
         )
 
         cursor = self._conn.cursor()
-        # TODO: understand how to sort results depending on the distance metric
-        # Eg, for cosine distance, higher is better (closer to 1)
+
         cursor.execute(
             f"""
             -- sqlite-vector KNN vector search results
@@ -163,6 +179,7 @@ def search(self, query: str, top_k: int = 10) -> list[DocumentResult]:
                 documents.uri,
                 documents.content as document_content,
                 documents.metadata,
+                chunks.id AS chunk_id,
                 chunks.content AS snippet,
                 vec_rank,
                 fts_rank,
@@ -176,7 +193,7 @@ def search(self, query: str, top_k: int = 10) -> list[DocumentResult]:
             ;
             """,  # nosec B608
             {
-                "query": query,
+                "query": fts_query,
                 "query_embedding": query_embedding,
                 "k": top_k,
                 "rrf_k": Engine.DEFAULT_RRF_K,
@@ -186,7 +203,7 @@ def search(self, query: str, top_k: int = 10) -> list[DocumentResult]:
         )
 
         rows = cursor.fetchall()
-        return [
+        results = [
             DocumentResult(
                 document=Document(
                     id=row["id"],
@@ -194,6 +211,7 @@ def search(self, query: str, top_k: int = 10) -> list[DocumentResult]:
                     content=row["document_content"],
                     metadata=json.loads(row["metadata"]) if row["metadata"] else {},
                 ),
+                chunk_id=row["chunk_id"],
                 snippet=row["snippet"],
                 vec_rank=row["vec_rank"],
                 fts_rank=row["fts_rank"],
@@ -204,6 +222,72 @@ def search(self, query: str, top_k: int = 10) -> list[DocumentResult]:
             for row in rows
         ]
 
+        return results
+
+    def search_sentences(
+        self, query: str, chunk_id: int, k: int
+    ) -> List[SentenceResult]:
+        query_embedding = self.generate_embedding(query)
+
+        vector_scan_type = (
+            "vector_quantize_scan_stream"
+            if self._settings.quantize_scan
+            else "vector_full_scan_stream"
+        )
+
+        cursor = self._conn.cursor()
+
+        cursor.execute(
+            f"""
+            WITH vec_matches AS (
+                SELECT
+                    v.rowid AS sentence_id,
+                    row_number() OVER (ORDER BY v.distance) AS rank_number,
+                    v.distance,
+                    sentences.content as sentence_content,
+                    sentences.sequence as sentence_sequence,
+                    sentences.start_offset as sentence_start_offset,
+                    sentences.end_offset as sentence_end_offset
+                FROM {vector_scan_type}('sentences', 'embedding', :query_embedding) AS v
+                    JOIN sentences ON sentences.rowid = v.rowid
+                WHERE sentences.chunk_id = :chunk_id
+                LIMIT :k
+            )
+            SELECT
+                sentence_id,
+                sentence_content,
+                sentence_sequence,
+                sentence_start_offset,
+                sentence_end_offset,
+                rank_number,
+                distance
+            FROM vec_matches
+            ORDER BY rank_number ASC
+            """,  # nosec B608
+            {
+                "query_embedding": query_embedding,
+                "k": k,
+                "chunk_id": chunk_id,
+            },
+        )
+
+        rows = cursor.fetchall()
+        sentences = []
+        for row in rows:
+            sentences.append(
+                SentenceResult(
+                    id=row["sentence_id"],
+                    chunk_id=chunk_id,
+                    sequence=row["sentence_sequence"],
+                    rank=row["rank_number"],
+                    distance=row["distance"],
+                    start_offset=row["sentence_start_offset"],
+                    end_offset=row["sentence_end_offset"],
+                )
+            )
+
+        return sentences[:k]
+
     def versions(self) -> dict:
         """Get versions of the loaded extensions."""
         cursor = self._conn.cursor()
diff --git a/src/sqlite_rag/formatters.py b/src/sqlite_rag/formatters.py
index 255f3f2..27bf026 100644
--- a/src/sqlite_rag/formatters.py
+++ b/src/sqlite_rag/formatters.py
@@ -6,6 +6,8 @@
 
 import typer
 
+from sqlite_rag.models.sentence_result import SentenceResult
+
 from .models.document_result import DocumentResult
 
 
@@ -163,6 +165,188 @@ def _should_show_debug(self) -> bool:
         return True
 
 
+class BoxedDebug2Formatter(BoxedFormatter):
+    """Debug formatter showing sentence-level details with snippet preview from top sentences."""
+
+    def _get_debug_line(self, doc: DocumentResult) -> str:
+        """Format debug metrics line."""
+        combined = (
+            f"{doc.combined_rank:.5f}" if doc.combined_rank is not None else "N/A"
+        )
+        vec_info = (
+            f"#{doc.vec_rank} ({doc.vec_distance:.6f})"
+            if doc.vec_rank is not None
+            else "N/A"
+        )
+        fts_info = (
+            f"#{doc.fts_rank} ({doc.fts_score:.6f})"
+            if doc.fts_rank is not None
+            else "N/A"
+        )
+        return f"│ Combined: {combined} │ Vector: {vec_info} │ FTS: {fts_info}"
+
+    def _should_show_debug(self) -> bool:
+        return True
+
+    def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
+        """Format a single result with box layout including sentence details."""
+        icon = self._get_file_icon(doc.document.uri or "")
+
+        # Draw the result box header
+        header = f"┌─ Result #{idx} " + "─" * (67 - len(str(idx)))
+        typer.echo(header)
+
+        # Display URI if available
+        if doc.document.uri:
+            uri_display = self._format_uri_display(doc.document.uri, icon, 75)
+            typer.echo(f"│ {uri_display:<75}│")
+
+            # Add debug info
+            debug_line = self._get_debug_line(doc)
+            if debug_line:
+                typer.echo(debug_line)
+
+            typer.echo("├" + "─" * 77 + "┤")
+        elif self._should_show_debug():
+            debug_line = self._get_debug_line(doc)
+            if debug_line:
+                typer.echo(debug_line)
+                typer.echo("├" + "─" * 77 + "┤")
+
+        # Display snippet preview from top sentences
+        if doc.sentences:
+            snippet_preview = self._build_sentence_preview(doc.snippet, doc.sentences)
+            preview_lines = self._clean_and_wrap_snippet(
+                snippet_preview, width=75, max_length=400
+            )
+
+            typer.echo(
+                "│ Preview (top 3 sentences):                                                │"
+            )
+            for line in preview_lines:
+                typer.echo(f"│ {line:<75} │")
+
+            typer.echo("├" + "─" * 77 + "┤")
+            typer.echo(
+                "│ Sentences:                                                                │"
+            )
+
+            # Display sentences with their distances
+            for i, sentence in enumerate(doc.sentences, 1):
+                distance_str = (
+                    f"{sentence.distance:.6f}"
+                    if sentence.distance is not None
+                    else "N/A"
+                )
+                rank_str = f"#{sentence.rank}" if sentence.rank is not None else "N/A"
+
+                # Format sentence header
+                sentence_header = (
+                    f"│   {i}. [Rank: {rank_str}, Distance: {distance_str}]"
+                )
+                typer.echo(sentence_header.ljust(78) + " │")
+
+                # Extract sentence text using offsets from the chunk snippet
+                if (
+                    sentence.start_offset is not None
+                    and sentence.end_offset is not None
+                ):
+                    sentence_text = doc.snippet[
+                        sentence.start_offset : sentence.end_offset
+                    ]
+                else:
+                    sentence_text = "[No offset information available]"
+
+                # Wrap and display sentence content
+                sentence_lines = self._clean_and_wrap_snippet(
+                    sentence_text, width=72, max_length=400
+                )
+                for line in sentence_lines:
+                    typer.echo(f"│      {line:<72} │")
+        else:
+            # Fallback to regular snippet display if no sentences
+            snippet_lines = self._clean_and_wrap_snippet(
+                doc.snippet, width=75, max_length=400
+            )
+            for line in snippet_lines:
+                typer.echo(f"│ {line:<75} │")
+
+        typer.echo("└" + "─" * 77 + "┘")
+        typer.echo()
+
+    def _build_sentence_preview(
+        self,
+        chunk_content: str,
+        sentences: List[SentenceResult],
+        max_chars: int = 400,
+    ) -> str:
+        """Build preview from top 3 ranked sentences with [...] for gaps.
+
+        Args:
+            chunk_content: The full chunk text
+            sentences: List of SentenceResult objects (should already be sorted by rank)
+            max_chars: Maximum total characters for preview
+
+        Returns:
+            Preview string with top sentences and [...] separators
+        """
+
+        # Take top 3 sentences (they should already be sorted by rank/distance)
+        top_sentences = sentences[:3]
+
+        if not top_sentences:
+            return chunk_content[:max_chars]
+
+        # Sort sentences by their position in the chunk (using start_offset)
+        # so we can build a preview in the order they appear
+        sentences_with_offsets = [
+            s
+            for s in top_sentences
+            if s.start_offset is not None and s.end_offset is not None
+        ]
+
+        if not sentences_with_offsets:
+            # Fallback: no offset information, return truncated chunk content
+            return chunk_content[:max_chars]
+
+        # Sort by start_offset to maintain document order
+        sentences_with_offsets.sort(key=lambda s: s.start_offset)
+
+        preview_parts = []
+        total_chars = 0
+        prev_end_offset = None
+
+        for sentence in sentences_with_offsets:
+            # Extract sentence text using offsets
+            sentence_text = chunk_content[
+                sentence.start_offset : sentence.end_offset
+            ].strip()
+
+            # Calculate remaining budget including potential separator
+            separator_len = len(" [...] ") if preview_parts else 0
+            remaining = max_chars - total_chars - separator_len
+
+            if remaining <= 0:
+                break
+
+            # Truncate sentence if needed
+            if len(sentence_text) > remaining:
+                sentence_text = sentence_text[: remaining - 3] + "..."
+
+            # Check if there's a gap > 10 chars from previous sentence
+            if prev_end_offset is not None:
+                gap_size = sentence.start_offset - prev_end_offset
+                if gap_size > 10:
+                    preview_parts.append("[...]")
+                    total_chars += len(" [...] ")
+
+            preview_parts.append(sentence_text)
+            total_chars += len(sentence_text)
+            prev_end_offset = sentence.end_offset
+
+        return " ".join(preview_parts)
+
+
 class TableDebugFormatter(SearchResultFormatter):
     """Table view debug formatter."""
 
@@ -225,11 +409,13 @@ def _print_table_row(self, idx: int, doc: DocumentResult) -> None:
 
 
 def get_formatter(
-    debug: bool = False, table_view: bool = False
+    debug: bool = False, debug2: bool = False, table_view: bool = False
 ) -> SearchResultFormatter:
     """Factory function to get the appropriate formatter."""
     if table_view:
         return TableDebugFormatter()
+    elif debug2:
+        return BoxedDebug2Formatter()
     elif debug:
         return BoxedDebugFormatter()
     else:
diff --git a/src/sqlite_rag/models/chunk.py b/src/sqlite_rag/models/chunk.py
index 15bb26b..89b987e 100644
--- a/src/sqlite_rag/models/chunk.py
+++ b/src/sqlite_rag/models/chunk.py
@@ -1,4 +1,6 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+
+from sqlite_rag.models.sentence import Sentence
 
 
 @dataclass
@@ -6,7 +8,8 @@ class Chunk:
     id: int | None = None
     document_id: int | None = None
     # The human readable content of the chunk
-    # (not the representation of the embedding vector)
+    # (it does not represent the embedding vector which
+    # may be altered with prompt or overlap text)
     content: str = ""
     embedding: str | bytes = b""
 
@@ -14,6 +17,8 @@ class Chunk:
     head_overlap_text: str = ""
     title: str | None = None
 
+    sentences: list[Sentence] = field(default_factory=list)
+
     def get_embedding_text(self) -> str:
         """Get the content used to generate the embedding from.
         It can be enriched with overlap text and prompt instructions,
diff --git a/src/sqlite_rag/models/document.py b/src/sqlite_rag/models/document.py
index e8e4685..535b08b 100644
--- a/src/sqlite_rag/models/document.py
+++ b/src/sqlite_rag/models/document.py
@@ -18,7 +18,7 @@ class Document:
     created_at: datetime | None = None
     updated_at: datetime | None = None
 
-    chunks: list["Chunk"] = field(default_factory=list)
+    chunks: list[Chunk] = field(default_factory=list)
 
     def hash(self) -> str:
         """Generate a hash for the document content using SHA-3 for maximum collision resistance"""
diff --git a/src/sqlite_rag/models/document_result.py b/src/sqlite_rag/models/document_result.py
index 2a89298..86a6f9a 100644
--- a/src/sqlite_rag/models/document_result.py
+++ b/src/sqlite_rag/models/document_result.py
@@ -1,12 +1,14 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 
 from .document import Document
+from .sentence_result import SentenceResult
 
 
 @dataclass
 class DocumentResult:
     document: Document
 
+    chunk_id: int
     snippet: str
 
     combined_rank: float
@@ -15,3 +17,6 @@ class DocumentResult:
 
     vec_distance: float | None = None
     fts_score: float | None = None
+
+    # highlight sentences
+    sentences: list[SentenceResult] = field(default_factory=list)
diff --git a/src/sqlite_rag/models/sentence.py b/src/sqlite_rag/models/sentence.py
new file mode 100644
index 0000000..a7f3d1e
--- /dev/null
+++ b/src/sqlite_rag/models/sentence.py
@@ -0,0 +1,11 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class Sentence:
+    id: int | None = None
+    content: str = ""
+    embedding: str | bytes = b""
+    sequence: int | None = None
+    start_offset: int | None = None
+    end_offset: int | None = None
diff --git a/src/sqlite_rag/models/sentence_result.py b/src/sqlite_rag/models/sentence_result.py
new file mode 100644
index 0000000..2718400
--- /dev/null
+++ b/src/sqlite_rag/models/sentence_result.py
@@ -0,0 +1,16 @@
+from dataclasses import dataclass
+
+
+@dataclass
+class SentenceResult:
+    id: int | None = None
+    # content: str = ""
+
+    chunk_id: int | None = None
+    sequence: int | None = None
+
+    rank: float | None = None
+    distance: float | None = None
+
+    start_offset: int | None = None
+    end_offset: int | None = None
diff --git a/src/sqlite_rag/repository.py b/src/sqlite_rag/repository.py
index be1bc50..4f3e08a 100644
--- a/src/sqlite_rag/repository.py
+++ b/src/sqlite_rag/repository.py
@@ -32,11 +32,28 @@ def add_document(self, document: Document) -> str:
                 "INSERT INTO chunks (document_id, content, embedding) VALUES (?, ?, ?)",
                 (document_id, chunk.content, chunk.embedding),
             )
+
+            chunk_id = cursor.lastrowid
+
             cursor.execute(
-                "INSERT INTO chunks_fts (rowid, content) VALUES (last_insert_rowid(), ?)",
-                (chunk.content,),
+                "INSERT INTO chunks_fts (rowid, content) VALUES (?, ?)",
+                (chunk_id, chunk.content),
             )
 
+            for sentence in chunk.sentences:
+                cursor.execute(
+                    "INSERT INTO sentences (id, chunk_id, content, sequence, embedding, start_offset, end_offset) VALUES (?, ?, ?, ?, ?, ?, ?)",
+                    (
+                        str(uuid4()),
+                        chunk_id,
+                        sentence.content,
+                        sentence.sequence,
+                        sentence.embedding,
+                        sentence.start_offset,
+                        sentence.end_offset,
+                    ),
+                )
+
         self._conn.commit()
 
         return document_id
diff --git a/src/sqlite_rag/sentence_splitter.py b/src/sqlite_rag/sentence_splitter.py
new file mode 100644
index 0000000..75642eb
--- /dev/null
+++ b/src/sqlite_rag/sentence_splitter.py
@@ -0,0 +1,38 @@
+import re
+from typing import List
+
+from sqlite_rag.models.chunk import Chunk
+from sqlite_rag.models.sentence import Sentence
+
+
+class SentenceSplitter:
+
+    def split(self, chunk: Chunk) -> List[Sentence]:
+        """Split chunk into sentences."""
+        sentence_chunks = []
+
+        sentences = self._split_into_sentences(chunk.content)
+        start_offset = 0
+        end_offset = 0
+        for i, sentence in enumerate(sentences):
+            start_offset = chunk.content.index(sentence, end_offset)
+            end_offset = start_offset + len(sentence)
+
+            sentence_chunk = Sentence(
+                content=sentence,
+                sequence=i,
+                start_offset=start_offset,
+                end_offset=end_offset,
+            )
+            sentence_chunks.append(sentence_chunk)
+
+        return sentence_chunks
+
+    def _split_into_sentences(self, text: str) -> List[str]:
+        """Split into focused segments for semantic matching."""
+        # Split on: sentence endings, semicolons, or paragraph breaks
+        sentence_endings = re.compile(r'(?<=[.!?;])(?:"|\')?\s+(?=[A-Z])|[\n]{2,}')
+        sentences = sentence_endings.split(text)
+
+        # Keep segments that are substantial enough (20+ chars for meaningful matching)
+        return [s.strip() for s in sentences if len(s.strip()) > 20]
diff --git a/src/sqlite_rag/settings.py b/src/sqlite_rag/settings.py
index ef41fb2..42b39fc 100644
--- a/src/sqlite_rag/settings.py
+++ b/src/sqlite_rag/settings.py
@@ -15,11 +15,14 @@ class Settings:
         "./models/unsloth/embeddinggemma-300m-GGUF/embeddinggemma-300M-Q8_0.gguf"
     )
     # See: https://github.com/sqliteai/sqlite-ai/blob/main/API.md#llm_model_loadpath-text-options-text
-    model_options: str = ""
+    other_model_options: str = ""
+
     # See: https://github.com/sqliteai/sqlite-ai/blob/main/API.md#llm_context_createoptions-text
-    model_context_options: str = (
-        "generate_embedding=1,normalize_embedding=1,pooling_type=mean,embedding_type=INT8"
-    )
+    other_model_context_options: str = ""
+
+    # How the model pools token embeddings into a single embedding
+    # Options: "mean", "max", "min", "last", "first"
+    pooling_type: str = "mean"
 
     # Allow the sqlite-ai extension to use the GPU
     # See: https://github.com/sqliteai/sqlite-ai
@@ -27,14 +30,15 @@ class Settings:
 
     vector_type: str = "INT8"
     embedding_dim: int = 768
+
     other_vector_options: str = (
         "distance=cosine"  # e.g. distance=metric,other=value,...
     )
 
     # It includes the overlap size and the prompt template length
-    chunk_size: int = 512
+    chunk_size: int = 2048
     # Tokens overlap between chunks
-    chunk_overlap: int = 61
+    chunk_overlap: int = 256
 
     #
     # Search settings
@@ -46,7 +50,7 @@ class Settings:
     quantize_preload: bool = False
 
     # Weights for combining FTS and vector search results
-    weight_fts: float = 1.0
+    weight_fts: float = 1.5
     weight_vec: float = 1.0
 
     #
@@ -61,7 +65,7 @@ class Settings:
 
     # Template to index documents for retrieval, use `{title}` with the title or the string `"none"`
     prompt_template_retrieval_document: str = "title: {title} | text: {content}"
-    prompt_template_retrieval_query: str = "task: search result | query: {content}"
+    prompt_template_retrieval_query: str = 'title: "none" | text: {content}'
 
     #
     # Index settings
@@ -71,6 +75,31 @@ class Settings:
     max_document_size_bytes: int = 5 * 1024 * 1024  # 5 MB
     # Zero means no limit
     max_chunks_per_document: int = 1000
+    # Number of top sentences to return per document
+    top_k_sentences: int = 3
+
+    def get_embeddings_context_options(self) -> str:
+        """Get the context options for embeddings generation."""
+        options = {
+            "n_ctx": self.chunk_size,
+            "embedding_type": self.vector_type,
+            "pooling_type": self.pooling_type,
+            "generate_embedding": 1,
+            "normalize_embedding": 1,
+        }
+
+        return ",".join(f"{k}={v}" for k, v in options.items()) + (
+            f",{self.other_model_context_options}"
+            if self.other_model_context_options
+            else ""
+        )
+
+    def get_vector_init_options(self) -> str:
+        """Get the vector init options for the vector store."""
+        options = {"type": self.vector_type, "dimension": self.embedding_dim}
+        return ",".join(f"{k}={v}" for k, v in options.items()) + (
+            f",{self.other_vector_options}" if self.other_vector_options else ""
+        )
 
 
 class SettingsManager:
@@ -177,4 +206,5 @@ def has_critical_changes(
             new_settings.model_path != current_settings.model_path
             or new_settings.embedding_dim != current_settings.embedding_dim
             or new_settings.vector_type != current_settings.vector_type
+            or new_settings.pooling_type != current_settings.pooling_type
         )
diff --git a/src/sqlite_rag/sqliterag.py b/src/sqlite_rag/sqliterag.py
index 8be35b6..4b14e89 100644
--- a/src/sqlite_rag/sqliterag.py
+++ b/src/sqlite_rag/sqliterag.py
@@ -1,3 +1,4 @@
+import re
 import sqlite3
 from dataclasses import asdict
 from pathlib import Path
@@ -6,6 +7,7 @@
 from sqlite_rag.extractor import Extractor
 from sqlite_rag.logger import Logger
 from sqlite_rag.models.document_result import DocumentResult
+from sqlite_rag.sentence_splitter import SentenceSplitter
 
 from .chunker import Chunker
 from .database import Database
@@ -25,7 +27,12 @@ def __init__(self, connection: sqlite3.Connection, settings: Settings):
 
         self._repository = Repository(self._conn, settings)
         self._chunker = Chunker(self._conn, settings)
-        self._engine = Engine(self._conn, settings, chunker=self._chunker)
+        self._engine = Engine(
+            self._conn,
+            settings,
+            chunker=self._chunker,
+            sentence_chunker=SentenceSplitter(),
+        )
         self._extractor = Extractor()
 
         self.ready = False
@@ -310,10 +317,25 @@ def search(
         if new_context:
             self._engine.create_new_context()
 
+        semantic_query = query
         if self._settings.use_prompt_templates:
-            query = self._settings.prompt_template_retrieval_query.format(content=query)
+            semantic_query = self._settings.prompt_template_retrieval_query.format(
+                content=query
+            )
+
+        # Clean up and split into words
+        # '*' is used to match while typing
+        fts_query = " ".join(re.findall(r"\b\w+\b", query.lower())) + "*"
+
+        results = self._engine.search(semantic_query, fts_query, top_k=top_k)
+
+        # Refine chunks with top sentences
+        for result in results:
+            result.sentences = self._engine.search_sentences(
+                semantic_query, result.chunk_id, k=self._settings.top_k_sentences
+            )
 
-        return self._engine.search(query, top_k=top_k)
+        return results
 
     def get_settings(self) -> dict:
         """Get settings and more useful information"""
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 2f38f35..0de2517 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -204,6 +204,10 @@ def test_search_fts_results(self, db_conn):
 
         assert len(results) > 0
         assert doc1_id == results[0].document.id
+        assert results[0].fts_rank
+        assert results[0].fts_rank == 1
+        assert results[0].fts_score
+        assert results[0].fts_score > 0
 
     def test_search_without_quantization(self, db_conn):
         # Arrange
diff --git a/tests/test_sentence_splitter.py b/tests/test_sentence_splitter.py
new file mode 100644
index 0000000..09bb151
--- /dev/null
+++ b/tests/test_sentence_splitter.py
@@ -0,0 +1,71 @@
+from sqlite_rag.models.chunk import Chunk
+from sqlite_rag.sentence_splitter import SentenceSplitter
+
+
+class TestSentenceSplitter:
+    def test_split(self):
+
+        splitter = SentenceSplitter()
+
+        chunk = Chunk(
+            id=1,
+            document_id=1,
+            title="Test Chunk",
+            content="This is the first sentence.\nHere is the second sentence! And what about the third?",
+            embedding=b"",
+            sentences=[],
+        )
+
+        sentences = splitter.split(chunk)
+
+        assert len(sentences) == 3
+        assert sentences[0].content == "This is the first sentence."
+        assert sentences[0].sequence == 0
+        assert sentences[0].start_offset == 0
+        assert sentences[0].end_offset == 27
+
+        assert sentences[1].content == "Here is the second sentence!"
+        assert sentences[1].sequence == 1
+        assert sentences[1].start_offset == 28
+        assert sentences[1].end_offset == 28 + 28
+
+        assert sentences[2].content == "And what about the third?"
+        assert sentences[2].sequence == 2
+        assert sentences[2].start_offset == 57
+        assert sentences[2].end_offset == 57 + 25
+
+    def test_split_empty(self):
+        splitter = SentenceSplitter()
+
+        chunk = Chunk(
+            id=1,
+            document_id=1,
+            title="Empty Chunk",
+            content="",
+            embedding=b"",
+            sentences=[],
+        )
+
+        sentences = splitter.split(chunk)
+
+        assert len(sentences) == 0
+
+    def test_split_no_punctuation(self):
+        splitter = SentenceSplitter()
+
+        chunk = Chunk(
+            id=1,
+            document_id=1,
+            title="No Punctuation Chunk",
+            content="This is a sentence without punctuation and another one follows it",
+            embedding=b"",
+            sentences=[],
+        )
+
+        sentences = splitter.split(chunk)
+
+        assert len(sentences) == 1
+        assert sentences[0].content == chunk.content
+        assert sentences[0].sequence == 0
+        assert sentences[0].start_offset == 0
+        assert sentences[0].end_offset == len(chunk.content)
diff --git a/tests/test_settings.py b/tests/test_settings.py
index c8b6e3e..a26f0eb 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -9,7 +9,7 @@ def test_store_settings(self, db_conn):
         settings_manager = SettingsManager(db_conn[0])
         settings = Settings(
             model_path="test_model",
-            model_options="test_config",
+            other_model_options="test_config",
             embedding_dim=768,
             vector_type="test_store",
             chunk_overlap=100,
@@ -23,7 +23,7 @@ def test_store_settings(self, db_conn):
 
         assert stored_settings is not None
         assert stored_settings.model_path == "test_model"
-        assert stored_settings.model_options == "test_config"
+        assert stored_settings.other_model_options == "test_config"
         assert stored_settings.embedding_dim == 768
         assert stored_settings.vector_type == "test_store"
         assert stored_settings.chunk_overlap == 100
@@ -34,7 +34,7 @@ def test_store_settings_when_exist(self, db_conn):
         settings_manager = SettingsManager(db_conn[0])
         settings = Settings(
             model_path="test_model",
-            model_options="test_config",
+            other_model_options="test_config",
             embedding_dim=768,
             vector_type="test_store",
             chunk_overlap=100,
@@ -47,7 +47,7 @@ def test_store_settings_when_exist(self, db_conn):
         # Store again with different values
         new_settings = Settings(
             model_path="new_model",
-            model_options="new_config",
+            other_model_options="new_config",
             embedding_dim=512,
             vector_type="new_store",
             chunk_overlap=50,
@@ -60,7 +60,7 @@ def test_store_settings_when_exist(self, db_conn):
 
         assert stored_settings is not None
         assert stored_settings.model_path == "new_model"
-        assert stored_settings.model_options == "new_config"
+        assert stored_settings.other_model_options == "new_config"
         assert stored_settings.embedding_dim == 512
         assert stored_settings.vector_type == "new_store"
         assert stored_settings.chunk_overlap == 50
@@ -82,7 +82,7 @@ def test_load_settings_with_defaults(self, db_conn):
 
         assert loaded_settings is not None
         assert loaded_settings.model_path == settings.model_path
-        assert loaded_settings.model_options == settings.model_options
+        assert loaded_settings.other_model_options == settings.other_model_options
         assert loaded_settings.embedding_dim == settings.embedding_dim
         assert loaded_settings.vector_type == settings.vector_type
         assert loaded_settings.chunk_overlap == settings.chunk_overlap

From 20731f3ee3ee61d3a9c30593a80a2da7ff920d4f Mon Sep 17 00:00:00 2001
From: Daniele Briggi <=>
Date: Fri, 17 Oct 2025 14:22:58 +0000
Subject: [PATCH 2/7] refact(formatters): use sentences

---
 src/sqlite_rag/cli.py                    |  11 +-
 src/sqlite_rag/database.py               |   1 -
 src/sqlite_rag/engine.py                 |   3 -
 src/sqlite_rag/formatters.py             | 259 +++++++++++------------
 src/sqlite_rag/models/sentence.py        |   1 -
 src/sqlite_rag/models/sentence_result.py |   3 -
 src/sqlite_rag/repository.py             |   3 +-
 src/sqlite_rag/sentence_splitter.py      |   8 +-
 8 files changed, 132 insertions(+), 157 deletions(-)

diff --git a/src/sqlite_rag/cli.py b/src/sqlite_rag/cli.py
index 13278cc..7035ff5 100644
--- a/src/sqlite_rag/cli.py
+++ b/src/sqlite_rag/cli.py
@@ -439,17 +439,12 @@ def reset(
 def search(
     ctx: typer.Context,
     query: str,
-    limit: int = typer.Option(10, help="Number of results to return"),
+    limit: int = typer.Option(5, help="Number of results to return"),
     debug: bool = typer.Option(
         False,
         "-d",
         "--debug",
-        help="Print extra debug information with modern formatting",
-    ),
-    debug2: bool = typer.Option(
-        False,
-        "--debug2",
-        help="Print debug format with sentence-level details and snippet context",
+        help="Print extra debug information with sentence-level details",
     ),
     peek: bool = typer.Option(
         False, "--peek", help="Print debug information using compact table format"
@@ -467,7 +462,7 @@ def search(
     results = results[:limit]
 
     # Get the appropriate formatter and display results
-    formatter = get_formatter(debug=debug, debug2=debug2, table_view=peek)
+    formatter = get_formatter(debug=debug, table_view=peek)
     formatter.format_results(results, query)
 
     typer.echo(f"{search_time:.3f} seconds")
diff --git a/src/sqlite_rag/database.py b/src/sqlite_rag/database.py
index 7bdd357..b5ed743 100644
--- a/src/sqlite_rag/database.py
+++ b/src/sqlite_rag/database.py
@@ -96,7 +96,6 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):
                 chunk_id INTEGER,
                 content TEXT,
                 embedding BLOB,
-                sequence INTEGER,
                 start_offset INTEGER,
                 end_offset INTEGER
             )
diff --git a/src/sqlite_rag/engine.py b/src/sqlite_rag/engine.py
index 840992c..2da3169 100644
--- a/src/sqlite_rag/engine.py
+++ b/src/sqlite_rag/engine.py
@@ -245,7 +245,6 @@ def search_sentences(
                     row_number() OVER (ORDER BY v.distance) AS rank_number,
                     v.distance,
                     sentences.content as sentence_content,
-                    sentences.sequence as sentence_sequence,
                     sentences.start_offset as sentence_start_offset,
                     sentences.end_offset as sentence_end_offset
                 FROM {vector_scan_type}('sentences', 'embedding', :query_embedding) AS v
@@ -256,7 +255,6 @@ def search_sentences(
             SELECT
                 sentence_id,
                 sentence_content,
-                sentence_sequence,
                 sentence_start_offset,
                 sentence_end_offset,
                 rank_number,
@@ -278,7 +276,6 @@ def search_sentences(
                 SentenceResult(
                     id=row["sentence_id"],
                     chunk_id=chunk_id,
-                    sequence=row["sentence_sequence"],
                     rank=row["rank_number"],
                     distance=row["distance"],
                     start_offset=row["sentence_start_offset"],
diff --git a/src/sqlite_rag/formatters.py b/src/sqlite_rag/formatters.py
index 27bf026..ca3b0df 100644
--- a/src/sqlite_rag/formatters.py
+++ b/src/sqlite_rag/formatters.py
@@ -82,6 +82,81 @@ def _format_uri_display(self, uri: str, icon: str, max_width: int = 75) -> str:
             uri_display = f"{icon} ...{uri[-available_width:]}"
         return uri_display
 
+    def _build_sentence_preview(
+        self,
+        chunk_content: str,
+        sentences: List[SentenceResult],
+        max_chars: int = 400,
+    ) -> str:
+        """Build preview from top 3 ranked sentences with [...] for gaps.
+
+        Args:
+            chunk_content: The full chunk text
+            sentences: List of SentenceResult objects (should already be sorted by rank)
+            max_chars: Maximum total characters for preview
+
+        Returns:
+            Preview string with top sentences and [...] separators.
+            Falls back to truncated chunk_content if sentences have no offsets.
+        """
+
+        # Take top 3 sentences (they should already be sorted by rank/distance)
+        top_sentences = sentences[:3] if sentences else []
+
+        if not top_sentences:
+            # Fallback: no sentences, return truncated chunk content
+            return chunk_content[:max_chars]
+
+        # Filter sentences that have offset information
+        sentences_with_offsets = [
+            s
+            for s in top_sentences
+            if s.start_offset is not None and s.end_offset is not None
+        ]
+
+        if not sentences_with_offsets:
+            # Fallback: sentences exist but no offset information, return truncated chunk content
+            return chunk_content[:max_chars]
+
+        # Sort by start_offset to maintain document order
+        sentences_with_offsets.sort(
+            key=lambda s: s.start_offset if s.start_offset is not None else -1
+        )
+
+        preview_parts = []
+        total_chars = 0
+        prev_end_offset = None
+
+        for sentence in sentences_with_offsets:
+            # Extract sentence text using offsets
+            sentence_text = chunk_content[
+                sentence.start_offset : sentence.end_offset
+            ].strip()
+
+            # Calculate remaining budget including potential separator
+            separator_len = len(" [...] ") if preview_parts else 0
+            remaining = max_chars - total_chars - separator_len
+
+            if remaining <= 0:
+                break
+
+            # Truncate sentence if needed
+            if len(sentence_text) > remaining:
+                sentence_text = sentence_text[: remaining - 3] + "..."
+
+            # Check if there's a gap > 10 chars from previous sentence
+            if prev_end_offset is not None and sentence.start_offset is not None:
+                gap_size = sentence.start_offset - prev_end_offset
+                if gap_size > 10:
+                    preview_parts.append("[...]")
+                    total_chars += len(" [...] ")
+
+            preview_parts.append(sentence_text)
+            total_chars += len(sentence_text)
+            prev_end_offset = sentence.end_offset
+
+        return " ".join(preview_parts)
+
 
 class BoxedFormatter(SearchResultFormatter):
     """Base class for boxed result formatters."""
@@ -100,8 +175,15 @@ def format_results(self, results: List[DocumentResult], query: str) -> None:
     def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
         """Format a single result with box layout."""
         icon = self._get_file_icon(doc.document.uri or "")
+
+        # Use sentence-based preview if sentences are available
+        if doc.sentences:
+            snippet_text = self._build_sentence_preview(doc.snippet, doc.sentences)
+        else:
+            snippet_text = doc.snippet
+
         snippet_lines = self._clean_and_wrap_snippet(
-            doc.snippet, width=75, max_length=400
+            snippet_text, width=75, max_length=400
         )
 
         # Draw the result box header
@@ -164,33 +246,19 @@ def _get_debug_line(self, doc: DocumentResult) -> str:
     def _should_show_debug(self) -> bool:
         return True
 
+    def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
+        """Format a single result with box layout including sentence summary."""
+        icon = self._get_file_icon(doc.document.uri or "")
 
-class BoxedDebug2Formatter(BoxedFormatter):
-    """Debug formatter showing sentence-level details with snippet preview from top sentences."""
+        # Use sentence-based preview if sentences are available
+        if doc.sentences:
+            snippet_text = self._build_sentence_preview(doc.snippet, doc.sentences)
+        else:
+            snippet_text = doc.snippet
 
-    def _get_debug_line(self, doc: DocumentResult) -> str:
-        """Format debug metrics line."""
-        combined = (
-            f"{doc.combined_rank:.5f}" if doc.combined_rank is not None else "N/A"
-        )
-        vec_info = (
-            f"#{doc.vec_rank} ({doc.vec_distance:.6f})"
-            if doc.vec_rank is not None
-            else "N/A"
-        )
-        fts_info = (
-            f"#{doc.fts_rank} ({doc.fts_score:.6f})"
-            if doc.fts_rank is not None
-            else "N/A"
+        snippet_lines = self._clean_and_wrap_snippet(
+            snippet_text, width=75, max_length=400
         )
-        return f"│ Combined: {combined} │ Vector: {vec_info} │ FTS: {fts_info}"
-
-    def _should_show_debug(self) -> bool:
-        return True
-
-    def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
-        """Format a single result with box layout including sentence details."""
-        icon = self._get_file_icon(doc.document.uri or "")
 
         # Draw the result box header
         header = f"┌─ Result #{idx} " + "─" * (67 - len(str(idx)))
@@ -213,26 +281,18 @@ def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
                 typer.echo(debug_line)
                 typer.echo("├" + "─" * 77 + "┤")
 
-        # Display snippet preview from top sentences
-        if doc.sentences:
-            snippet_preview = self._build_sentence_preview(doc.snippet, doc.sentences)
-            preview_lines = self._clean_and_wrap_snippet(
-                snippet_preview, width=75, max_length=400
-            )
-
-            typer.echo(
-                "│ Preview (top 3 sentences):                                                │"
-            )
-            for line in preview_lines:
-                typer.echo(f"│ {line:<75} │")
+        # Display snippet preview
+        for line in snippet_lines:
+            typer.echo(f"│ {line:<75} │")
 
+        # Display sentence details if available
+        if doc.sentences:
             typer.echo("├" + "─" * 77 + "┤")
             typer.echo(
                 "│ Sentences:                                                                │"
             )
 
-            # Display sentences with their distances
-            for i, sentence in enumerate(doc.sentences, 1):
+            for sentence in doc.sentences[:5]:  # Show max 5 sentences
                 distance_str = (
                     f"{sentence.distance:.6f}"
                     if sentence.distance is not None
@@ -240,112 +300,33 @@ def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
                 )
                 rank_str = f"#{sentence.rank}" if sentence.rank is not None else "N/A"
 
-                # Format sentence header
-                sentence_header = (
-                    f"│   {i}. [Rank: {rank_str}, Distance: {distance_str}]"
-                )
-                typer.echo(sentence_header.ljust(78) + " │")
-
-                # Extract sentence text using offsets from the chunk snippet
+                # Extract sentence preview (first 50 chars)
                 if (
                     sentence.start_offset is not None
                     and sentence.end_offset is not None
                 ):
                     sentence_text = doc.snippet[
                         sentence.start_offset : sentence.end_offset
-                    ]
+                    ].strip()
+                    # Truncate and clean for display
+                    sentence_preview = sentence_text.replace("\n", " ").replace(
+                        "\r", ""
+                    )
+                    if len(sentence_preview) > 50:
+                        sentence_preview = sentence_preview[:47] + "..."
                 else:
-                    sentence_text = "[No offset information available]"
+                    sentence_preview = "[No offset info]"
 
-                # Wrap and display sentence content
-                sentence_lines = self._clean_and_wrap_snippet(
-                    sentence_text, width=72, max_length=400
+                # Format sentence line
+                sentence_line = (
+                    f"│   {rank_str:>3} ({distance_str}) | {sentence_preview}"
                 )
-                for line in sentence_lines:
-                    typer.echo(f"│      {line:<72} │")
-        else:
-            # Fallback to regular snippet display if no sentences
-            snippet_lines = self._clean_and_wrap_snippet(
-                doc.snippet, width=75, max_length=400
-            )
-            for line in snippet_lines:
-                typer.echo(f"│ {line:<75} │")
+                # Pad to 78 chars and add closing border
+                typer.echo(sentence_line.ljust(78) + " │")
 
         typer.echo("└" + "─" * 77 + "┘")
         typer.echo()
 
-    def _build_sentence_preview(
-        self,
-        chunk_content: str,
-        sentences: List[SentenceResult],
-        max_chars: int = 400,
-    ) -> str:
-        """Build preview from top 3 ranked sentences with [...] for gaps.
-
-        Args:
-            chunk_content: The full chunk text
-            sentences: List of SentenceResult objects (should already be sorted by rank)
-            max_chars: Maximum total characters for preview
-
-        Returns:
-            Preview string with top sentences and [...] separators
-        """
-
-        # Take top 3 sentences (they should already be sorted by rank/distance)
-        top_sentences = sentences[:3]
-
-        if not top_sentences:
-            return chunk_content[:max_chars]
-
-        # Sort sentences by their position in the chunk (using start_offset)
-        # so we can build a preview in the order they appear
-        sentences_with_offsets = [
-            s
-            for s in top_sentences
-            if s.start_offset is not None and s.end_offset is not None
-        ]
-
-        if not sentences_with_offsets:
-            # Fallback: no offset information, return truncated chunk content
-            return chunk_content[:max_chars]
-
-        # Sort by start_offset to maintain document order
-        sentences_with_offsets.sort(key=lambda s: s.start_offset)
-
-        preview_parts = []
-        total_chars = 0
-        prev_end_offset = None
-
-        for sentence in sentences_with_offsets:
-            # Extract sentence text using offsets
-            sentence_text = chunk_content[
-                sentence.start_offset : sentence.end_offset
-            ].strip()
-
-            # Calculate remaining budget including potential separator
-            separator_len = len(" [...] ") if preview_parts else 0
-            remaining = max_chars - total_chars - separator_len
-
-            if remaining <= 0:
-                break
-
-            # Truncate sentence if needed
-            if len(sentence_text) > remaining:
-                sentence_text = sentence_text[: remaining - 3] + "..."
-
-            # Check if there's a gap > 10 chars from previous sentence
-            if prev_end_offset is not None:
-                gap_size = sentence.start_offset - prev_end_offset
-                if gap_size > 10:
-                    preview_parts.append("[...]")
-                    total_chars += len(" [...] ")
-
-            preview_parts.append(sentence_text)
-            total_chars += len(sentence_text)
-            prev_end_offset = sentence.end_offset
-
-        return " ".join(preview_parts)
-
 
 class TableDebugFormatter(SearchResultFormatter):
     """Table view debug formatter."""
@@ -383,8 +364,16 @@ def _print_table_header(self) -> None:
 
     def _print_table_row(self, idx: int, doc: DocumentResult) -> None:
         """Print a single table row."""
+        # Use sentence-based preview if sentences are available
+        if doc.sentences:
+            snippet = self._build_sentence_preview(
+                doc.snippet, doc.sentences, max_chars=52
+            )
+        else:
+            snippet = doc.snippet
+
         # Clean snippet display
-        snippet = doc.snippet.replace("\n", " ").replace("\r", "")
+        snippet = snippet.replace("\n", " ").replace("\r", "")
         snippet = snippet[:49] + "..." if len(snippet) > 52 else snippet
 
         # Clean URI display
@@ -409,13 +398,11 @@ def _print_table_row(self, idx: int, doc: DocumentResult) -> None:
 
 
 def get_formatter(
-    debug: bool = False, debug2: bool = False, table_view: bool = False
+    debug: bool = False, table_view: bool = False
 ) -> SearchResultFormatter:
     """Factory function to get the appropriate formatter."""
     if table_view:
         return TableDebugFormatter()
-    elif debug2:
-        return BoxedDebug2Formatter()
     elif debug:
         return BoxedDebugFormatter()
     else:
diff --git a/src/sqlite_rag/models/sentence.py b/src/sqlite_rag/models/sentence.py
index a7f3d1e..064b233 100644
--- a/src/sqlite_rag/models/sentence.py
+++ b/src/sqlite_rag/models/sentence.py
@@ -6,6 +6,5 @@ class Sentence:
     id: int | None = None
     content: str = ""
     embedding: str | bytes = b""
-    sequence: int | None = None
     start_offset: int | None = None
     end_offset: int | None = None
diff --git a/src/sqlite_rag/models/sentence_result.py b/src/sqlite_rag/models/sentence_result.py
index 2718400..7094efd 100644
--- a/src/sqlite_rag/models/sentence_result.py
+++ b/src/sqlite_rag/models/sentence_result.py
@@ -4,10 +4,7 @@
 @dataclass
 class SentenceResult:
     id: int | None = None
-    # content: str = ""
-
     chunk_id: int | None = None
-    sequence: int | None = None
 
     rank: float | None = None
     distance: float | None = None
diff --git a/src/sqlite_rag/repository.py b/src/sqlite_rag/repository.py
index 4f3e08a..005bf80 100644
--- a/src/sqlite_rag/repository.py
+++ b/src/sqlite_rag/repository.py
@@ -42,12 +42,11 @@ def add_document(self, document: Document) -> str:
 
             for sentence in chunk.sentences:
                 cursor.execute(
-                    "INSERT INTO sentences (id, chunk_id, content, sequence, embedding, start_offset, end_offset) VALUES (?, ?, ?, ?, ?, ?, ?)",
+                    "INSERT INTO sentences (id, chunk_id, content, embedding, start_offset, end_offset) VALUES (?, ?, ?, ?, ?, ?)",
                     (
                         str(uuid4()),
                         chunk_id,
                         sentence.content,
-                        sentence.sequence,
                         sentence.embedding,
                         sentence.start_offset,
                         sentence.end_offset,
diff --git a/src/sqlite_rag/sentence_splitter.py b/src/sqlite_rag/sentence_splitter.py
index 75642eb..e22b74f 100644
--- a/src/sqlite_rag/sentence_splitter.py
+++ b/src/sqlite_rag/sentence_splitter.py
@@ -6,6 +6,7 @@
 
 
 class SentenceSplitter:
+    MIN_CHARS_PER_SENTENCE = 20
 
     def split(self, chunk: Chunk) -> List[Sentence]:
         """Split chunk into sentences."""
@@ -14,13 +15,12 @@ def split(self, chunk: Chunk) -> List[Sentence]:
         sentences = self._split_into_sentences(chunk.content)
         start_offset = 0
         end_offset = 0
-        for i, sentence in enumerate(sentences):
+        for sentence in sentences:
             start_offset = chunk.content.index(sentence, end_offset)
             end_offset = start_offset + len(sentence)
 
             sentence_chunk = Sentence(
                 content=sentence,
-                sequence=i,
                 start_offset=start_offset,
                 end_offset=end_offset,
             )
@@ -35,4 +35,6 @@ def _split_into_sentences(self, text: str) -> List[str]:
         sentences = sentence_endings.split(text)
 
         # Keep segments that are substantial enough (20+ chars for meaningful matching)
-        return [s.strip() for s in sentences if len(s.strip()) > 20]
+        return [
+            s.strip() for s in sentences if len(s.strip()) > self.MIN_CHARS_PER_SENTENCE
+        ]

From fa06d146cd222d427fdc5462a5af5a296656f07a Mon Sep 17 00:00:00 2001
From: Daniele Briggi <=>
Date: Fri, 17 Oct 2025 15:30:56 +0000
Subject: [PATCH 3/7] fix(tests): engine tests

---
 src/sqlite_rag/engine.py            |  16 +-
 src/sqlite_rag/sentence_splitter.py |  18 +-
 src/sqlite_rag/sqliterag.py         |   4 +-
 tests/conftest.py                   |   8 +-
 tests/integration/test_engine.py    | 252 ++++++++++++++++++++++++++++
 tests/test_chunker.py               |   4 +-
 tests/test_engine.py                | 217 ++++++------------------
 7 files changed, 328 insertions(+), 191 deletions(-)

diff --git a/src/sqlite_rag/engine.py b/src/sqlite_rag/engine.py
index 2da3169..f8ce76e 100644
--- a/src/sqlite_rag/engine.py
+++ b/src/sqlite_rag/engine.py
@@ -22,12 +22,12 @@ def __init__(
         conn: sqlite3.Connection,
         settings: Settings,
         chunker: Chunker,
-        sentence_chunker: SentenceSplitter,
+        sentence_splitter: SentenceSplitter,
     ):
         self._conn = conn
         self._settings = settings
         self._chunker = chunker
-        self._sentence_chunker = sentence_chunker
+        self._sentence_splitter = sentence_splitter
         self._logger = Logger()
 
     def load_model(self):
@@ -55,7 +55,7 @@ def process(self, document: Document) -> Document:
             chunk.title = document.get_title()
             chunk.embedding = self.generate_embedding(chunk.get_embedding_text())
 
-            sentences = self._sentence_chunker.split(chunk)
+            sentences = self._sentence_splitter.split(chunk)
             for sentence in sentences:
                 sentence.embedding = self.generate_embedding(sentence.content)
             chunk.sentences = sentences
@@ -225,7 +225,7 @@ def search(
         return results
 
     def search_sentences(
-        self, query: str, chunk_id: int, k: int
+        self, query: str, chunk_id: int, top_k: int
     ) -> List[SentenceResult]:
         query_embedding = self.generate_embedding(query)
 
@@ -250,7 +250,8 @@ def search_sentences(
                 FROM {vector_scan_type}('sentences', 'embedding', :query_embedding) AS v
                     JOIN sentences ON sentences.rowid = v.rowid
                 WHERE sentences.chunk_id = :chunk_id
-                LIMIT :k
+                ORDER BY rank_number ASC
+                LIMIT :top_k
             )
             SELECT
                 sentence_id,
@@ -260,11 +261,10 @@ def search_sentences(
                 rank_number,
                 distance
             FROM vec_matches
-            ORDER BY rank_number ASC
             """,  # nosec B608
             {
                 "query_embedding": query_embedding,
-                "k": k,
+                "top_k": top_k,
                 "chunk_id": chunk_id,
             },
         )
@@ -283,7 +283,7 @@ def search_sentences(
                 )
             )
 
-        return sentences[:k]
+        return sentences[:top_k]
 
     def versions(self) -> dict:
         """Get versions of the loaded extensions."""
diff --git a/src/sqlite_rag/sentence_splitter.py b/src/sqlite_rag/sentence_splitter.py
index e22b74f..dc5e82a 100644
--- a/src/sqlite_rag/sentence_splitter.py
+++ b/src/sqlite_rag/sentence_splitter.py
@@ -10,23 +10,23 @@ class SentenceSplitter:
 
     def split(self, chunk: Chunk) -> List[Sentence]:
         """Split chunk into sentences."""
-        sentence_chunks = []
+        sentences = []
 
-        sentences = self._split_into_sentences(chunk.content)
+        sentences_text = self._split_into_sentences(chunk.content)
         start_offset = 0
         end_offset = 0
-        for sentence in sentences:
-            start_offset = chunk.content.index(sentence, end_offset)
-            end_offset = start_offset + len(sentence)
+        for sentence_text in sentences_text:
+            start_offset = chunk.content.index(sentence_text, end_offset)
+            end_offset = start_offset + len(sentence_text)
 
-            sentence_chunk = Sentence(
-                content=sentence,
+            sentence = Sentence(
+                content=sentence_text,
                 start_offset=start_offset,
                 end_offset=end_offset,
             )
-            sentence_chunks.append(sentence_chunk)
+            sentences.append(sentence)
 
-        return sentence_chunks
+        return sentences
 
     def _split_into_sentences(self, text: str) -> List[str]:
         """Split into focused segments for semantic matching."""
diff --git a/src/sqlite_rag/sqliterag.py b/src/sqlite_rag/sqliterag.py
index 4b14e89..da2078b 100644
--- a/src/sqlite_rag/sqliterag.py
+++ b/src/sqlite_rag/sqliterag.py
@@ -31,7 +31,7 @@ def __init__(self, connection: sqlite3.Connection, settings: Settings):
             self._conn,
             settings,
             chunker=self._chunker,
-            sentence_chunker=SentenceSplitter(),
+            sentence_splitter=SentenceSplitter(),
         )
         self._extractor = Extractor()
 
@@ -332,7 +332,7 @@ def search(
         # Refine chunks with top sentences
         for result in results:
             result.sentences = self._engine.search_sentences(
-                semantic_query, result.chunk_id, k=self._settings.top_k_sentences
+                semantic_query, result.chunk_id, top_k=self._settings.top_k_sentences
             )
 
         return results
diff --git a/tests/conftest.py b/tests/conftest.py
index 477832d..04a3596 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,6 +6,7 @@
 from sqlite_rag.chunker import Chunker
 from sqlite_rag.database import Database
 from sqlite_rag.engine import Engine
+from sqlite_rag.sentence_splitter import SentenceSplitter
 from sqlite_rag.settings import Settings
 
 
@@ -28,7 +29,12 @@ def db_conn():
 def engine(db_conn) -> Engine:
     conn, settings = db_conn
 
-    engine = Engine(conn, settings, chunker=Chunker(conn, settings))
+    engine = Engine(
+        conn,
+        settings,
+        chunker=Chunker(conn, settings),
+        sentence_splitter=SentenceSplitter(),
+    )
     engine.load_model()
     engine.quantize()
     engine.create_new_context()
diff --git a/tests/integration/test_engine.py b/tests/integration/test_engine.py
index 9b99ff6..d9ab3ea 100644
--- a/tests/integration/test_engine.py
+++ b/tests/integration/test_engine.py
@@ -1,8 +1,15 @@
 import random
 import string
+from sqlite3 import OperationalError
 
 import pytest
 
+from sqlite_rag.chunker import Chunker
+from sqlite_rag.engine import Engine
+from sqlite_rag.models.document import Document
+from sqlite_rag.repository import Repository
+from sqlite_rag.sentence_splitter import SentenceSplitter
+
 
 class TestEngine:
     @pytest.mark.slow
@@ -26,3 +33,248 @@ def random_string(length=30):
 
         # Assert
         assert len(result_chunks) == 1000
+
+
+class TestEngineQuantization:
+    def test_quantize_embedding(self, engine):
+        """Test quantize called for chunks and sentences embeddings."""
+        engine.quantize()
+
+        # If no exception is raised, the test passes
+        engine.search("test query", "test query")
+
+    def test_quantize_cleanup(self, engine):
+        """Test quantize cleanup works without errors."""
+        engine.quantize()
+        engine.quantize_cleanup()
+
+        with pytest.raises(OperationalError) as exc_info:
+            engine.search("test query", "test query")
+        assert "Ensure that vector_quantize() has been called" in str(exc_info.value)
+
+
+class TestEngineSearch:
+    def test_search_with_empty_database(self, engine):
+        results = engine.search("nonexistent query", top_k=5)
+
+        assert len(results) == 0
+
+    def test_search_with_semantic_and_fts(self, db_conn):
+        # Arrange
+        conn, settings = db_conn
+
+        engine = Engine(conn, settings, Chunker(conn, settings), SentenceSplitter())
+        engine.load_model()
+        engine.create_new_context()
+
+        doc1 = Document(
+            content="The quick brown fox jumps over the lazy dog.",
+            uri="document1.txt",
+        )
+        doc2 = Document(
+            content="How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
+            uri="document2.txt",
+        )
+        doc3 = Document(
+            content="This document discusses about woodcutters and wood.",
+            uri="document3.txt",
+        )
+
+        engine.process(doc1)
+        engine.process(doc2)
+        engine.process(doc3)
+
+        repository = Repository(conn, settings)
+        repository.add_document(doc1)
+        repository.add_document(doc2)
+        doc3_id = repository.add_document(doc3)
+
+        engine.quantize()
+
+        # Act
+        results = engine.search("wood lumberjack", "wood lumberjack", top_k=5)
+
+        assert len(results) > 0
+        assert doc3_id == results[0].document.id
+
+    def test_search_semantic_result(self, db_conn):
+        # Arrange
+        conn, settings = db_conn
+
+        engine = Engine(conn, settings, Chunker(conn, settings), SentenceSplitter())
+        engine.load_model()
+        engine.create_new_context()
+
+        doc1 = Document(
+            content="The quick brown fox jumps over the lazy dog.",
+            uri="document1.txt",
+        )
+        doc2 = Document(
+            content="How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
+            uri="document2.txt",
+        )
+        doc3 = Document(
+            content="This document discusses about woodcutters and wood.",
+            uri="document3.txt",
+        )
+
+        engine.process(doc1)
+        engine.process(doc2)
+        engine.process(doc3)
+
+        repository = Repository(conn, settings)
+        repository.add_document(doc1)
+        repository.add_document(doc2)
+        doc3_id = repository.add_document(doc3)
+
+        engine.quantize()
+
+        # Act
+        results = engine.search("about lumberjack", "about lumberjack", top_k=5)
+
+        assert len(results) > 0
+        assert doc3_id == results[0].document.id
+
+    def test_search_fts_results(self, db_conn):
+        # Arrange
+        conn, settings = db_conn
+
+        engine = Engine(conn, settings, Chunker(conn, settings), SentenceSplitter())
+        engine.load_model()
+        engine.create_new_context()
+
+        doc1 = Document(
+            content="The quick brown fox jumps over the lazy dog.",
+            uri="document1.txt",
+        )
+        doc2 = Document(
+            content="How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
+            uri="document2.txt",
+        )
+        doc3 = Document(
+            content="This document discusses about woodcutters and wood.",
+            uri="document3.txt",
+        )
+
+        engine.process(doc1)
+        engine.process(doc2)
+        engine.process(doc3)
+
+        repository = Repository(conn, settings)
+        doc1_id = repository.add_document(doc1)
+        repository.add_document(doc2)
+        repository.add_document(doc3)
+
+        engine.quantize()
+
+        # Act
+        results = engine.search("quick brown fox", "quick brown fox", top_k=5)
+
+        assert len(results) > 0
+        assert doc1_id == results[0].document.id
+        assert results[0].fts_rank
+        assert results[0].fts_rank == 1
+        assert results[0].fts_score
+
+    def test_search_without_quantization(self, db_conn):
+        # Arrange
+        conn, settings = db_conn
+        settings.quantize_scan = False
+
+        engine = Engine(conn, settings, Chunker(conn, settings), SentenceSplitter())
+        engine.load_model()
+
+        doc = Document(
+            content="The quick brown fox jumps over the lazy dog.",
+            uri="document1.txt",
+        )
+
+        engine.create_new_context()
+        engine.process(doc)
+
+        repository = Repository(conn, settings)
+        doc_id = repository.add_document(doc)
+
+        # Act
+        results = engine.search("wood lumberjack", "wood lumberjack")
+
+        assert len(results) > 0
+        assert doc_id == results[0].document.id
+
+    def test_search_exact_match(self, db_conn):
+        conn, settings = db_conn
+        # cosin distance for searching embedding is exact 0.0 when strings match
+        settings.other_vector_options = "distance=cosine"
+        settings.use_prompt_templates = False
+
+        engine = Engine(conn, settings, Chunker(conn, settings), SentenceSplitter())
+        engine.load_model()
+        engine.create_new_context()
+
+        doc1 = Document(
+            content="The quick brown fox jumps over the lazy dog",
+            uri="document1.txt",
+        )
+        doc2 = Document(
+            content="How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
+            uri="document2.txt",
+        )
+
+        engine.process(doc1)
+        engine.process(doc2)
+
+        repository = Repository(conn, settings)
+        doc1_id = repository.add_document(doc1)
+        repository.add_document(doc2)
+
+        engine.quantize()
+
+        # Act
+        results = engine.search(
+            "The quick brown fox jumps over the lazy dog",
+            "The quick brown fox jumps over the lazy dog",
+        )
+
+        assert len(results) > 0
+        assert doc1_id == results[0].document.id
+        assert 0.0 == results[0].vec_distance
+
+
+class TestEngineSearchSentences:
+    def test_search_sentences(self, db_conn):
+        conn, settings = db_conn
+        settings.use_prompt_templates = False
+        settings.quantize_scan = False
+
+        engine = Engine(conn, settings, Chunker(conn, settings), SentenceSplitter())
+        engine.load_model()
+        engine.create_new_context()
+
+        doc = Document(
+            content=(
+                """The quick brown fox jumps over the lazy dog.
+                A stitch in time saves nine.
+                An apple a day keeps the doctor away.
+                """
+            ),
+            uri="document1.txt",
+        )
+
+        engine.process(doc)
+
+        repository = Repository(conn, settings)
+        doc_id = repository.add_document(doc)
+
+        cursor = conn.execute("SELECT id FROM chunks WHERE document_id = ?", (doc_id,))
+        chunk_id = cursor.fetchone()[0]
+
+        # Act
+        results = engine.search_sentences(
+            "stitch time",
+            chunk_id,
+            top_k=1,
+        )
+
+        assert len(results) > 0
+        assert results[0].start_offset == 61  # it's the second sentence
+        assert results[0].end_offset == 89
diff --git a/tests/test_chunker.py b/tests/test_chunker.py
index 8c54949..792e21c 100644
--- a/tests/test_chunker.py
+++ b/tests/test_chunker.py
@@ -322,9 +322,9 @@ def test_chunk_size_equals_overlap(self, mock_conn):
         chunker = Chunker(mock_conn, settings)
         text = "This is a test sentence that should be handled gracefully."
 
-        with pytest.raises(ValueError) as excinfo:
+        with pytest.raises(ValueError) as exc_info:
             chunker.chunk(Document(content=text))
-        assert "Chunk size must be greater than chunk overlap." in str(excinfo.value)
+        assert "Chunk size must be greater than chunk overlap." in str(exc_info.value)
 
     def test_very_small_chunk_size(self, mock_conn):
         """Test with chunk_size = 1."""
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 0de2517..7f7ead6 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -1,10 +1,9 @@
 import pytest
 
-from sqlite_rag.chunker import Chunker
 from sqlite_rag.engine import Engine
 from sqlite_rag.models.chunk import Chunk
 from sqlite_rag.models.document import Document
-from sqlite_rag.repository import Repository
+from sqlite_rag.models.sentence import Sentence
 from sqlite_rag.settings import Settings
 
 
@@ -32,8 +31,10 @@ def test_process_uses_get_embedding_text(self, mocker):
         mock_conn = mocker.Mock()
         mock_chunker = mocker.Mock()
         mock_chunker.chunk.return_value = [mock_chunk]
+        mock_sentence_splitter = mocker.Mock()
+        mock_sentence_splitter.split.return_value = []
 
-        engine = Engine(mock_conn, settings, mock_chunker)
+        engine = Engine(mock_conn, settings, mock_chunker, mock_sentence_splitter)
 
         # Mock generate_embedding completely
         mock_generate = mocker.patch.object(
@@ -65,8 +66,10 @@ def test_process_with_max_chunks_per_document(
         settings = Settings(max_chunks_per_document=max_chunks_per_document)
         mock_chunker = mocker.Mock()
         mock_chunker.chunk.return_value = chunks
+        mock_sentence_splitter = mocker.Mock()
+        mock_sentence_splitter.split.return_value = []
 
-        engine = Engine(mock_conn, settings, mock_chunker)
+        engine = Engine(mock_conn, settings, mock_chunker, mock_sentence_splitter)
 
         mock_generate_embedding = mocker.patch.object(engine, "generate_embedding")
         mock_generate_embedding = mocker.spy(
@@ -84,187 +87,63 @@ def test_process_with_max_chunks_per_document(
             chunks = call_args[0][0]  # First argument
             assert len(chunks) == expected_chunk_count
 
-
-class TestEngineSearch:
-    def test_search_with_empty_database(self, engine):
-        results = engine.search("nonexistent query", top_k=5)
-
-        assert len(results) == 0
-
-    def test_search_with_semantic_and_fts(self, db_conn):
-        # Arrange
-        conn, settings = db_conn
-
-        engine = Engine(conn, settings, Chunker(conn, settings))
-        engine.load_model()
-        engine.create_new_context()
-
-        doc1 = Document(
-            content="The quick brown fox jumps over the lazy dog.",
-            uri="document1.txt",
-        )
-        doc2 = Document(
-            content="How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
-            uri="document2.txt",
-        )
-        doc3 = Document(
-            content="This document discusses about woodcutters and wood.",
-            uri="document3.txt",
-        )
-
-        engine.process(doc1)
-        engine.process(doc2)
-        engine.process(doc3)
-
-        repository = Repository(conn, settings)
-        repository.add_document(doc1)
-        repository.add_document(doc2)
-        doc3_id = repository.add_document(doc3)
-
-        engine.quantize()
-
-        # Act
-        results = engine.search("wood lumberjack", top_k=5)
-
-        assert len(results) > 0
-        assert doc3_id == results[0].document.id
-
-    def test_search_semantic_result(self, db_conn):
+    def test_process_with_sentences(self, mocker):
         # Arrange
-        conn, settings = db_conn
+        chunks = [Chunk(content="Chunk 1"), Chunk(content="Chunk 2")]
 
-        engine = Engine(conn, settings, Chunker(conn, settings))
-        engine.load_model()
-        engine.create_new_context()
-
-        doc1 = Document(
-            content="The quick brown fox jumps over the lazy dog.",
-            uri="document1.txt",
-        )
-        doc2 = Document(
-            content="How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
-            uri="document2.txt",
-        )
-        doc3 = Document(
-            content="This document discusses about woodcutters and wood.",
-            uri="document3.txt",
-        )
-
-        engine.process(doc1)
-        engine.process(doc2)
-        engine.process(doc3)
-
-        repository = Repository(conn, settings)
-        repository.add_document(doc1)
-        repository.add_document(doc2)
-        doc3_id = repository.add_document(doc3)
-
-        engine.quantize()
-
-        # Act
-        results = engine.search("about lumberjack", top_k=5)
-
-        assert len(results) > 0
-        assert doc3_id == results[0].document.id
-
-    def test_search_fts_results(self, db_conn):
-        # Arrange
-        conn, settings = db_conn
+        mock_conn = mocker.Mock()
+        settings = Settings()
+        mock_chunker = mocker.Mock()
+        mock_chunker.chunk.return_value = chunks
+        mock_sentence_splitter = mocker.Mock()
+        # return different number of sentences per chunk
+        mock_sentence_splitter.split.side_effect = [
+            [Sentence(content="Sentence 1.1")],
+            [Sentence(content="Sentence 2.1"), Sentence(content="Sentence 2.2")],
+        ]
 
-        engine = Engine(conn, settings, Chunker(conn, settings))
-        engine.load_model()
-        engine.create_new_context()
+        engine = Engine(mock_conn, settings, mock_chunker, mock_sentence_splitter)
 
-        doc1 = Document(
-            content="The quick brown fox jumps over the lazy dog.",
-            uri="document1.txt",
-        )
-        doc2 = Document(
-            content="How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
-            uri="document2.txt",
-        )
-        doc3 = Document(
-            content="This document discusses about woodcutters and wood.",
-            uri="document3.txt",
+        mock_generate_embedding = mocker.patch.object(engine, "generate_embedding")
+        mock_generate_embedding = mocker.spy(
+            mock_generate_embedding, "generate_embedding"
         )
+        mock_generate_embedding.return_value = chunks
 
-        engine.process(doc1)
-        engine.process(doc2)
-        engine.process(doc3)
-
-        repository = Repository(conn, settings)
-        doc1_id = repository.add_document(doc1)
-        repository.add_document(doc2)
-        repository.add_document(doc3)
-
-        engine.quantize()
+        document = Document(content="Test document content")
 
         # Act
-        results = engine.search("quick brown fox", top_k=5)
+        engine.process(document)
 
-        assert len(results) > 0
-        assert doc1_id == results[0].document.id
-        assert results[0].fts_rank
-        assert results[0].fts_rank == 1
-        assert results[0].fts_score
-        assert results[0].fts_score > 0
+        # Assert
+        assert len(document.chunks) == 2
+        assert len(document.chunks[0].sentences) == 1
+        assert len(document.chunks[1].sentences) == 2
 
-    def test_search_without_quantization(self, db_conn):
+    def test_process_without_sentences(self, mocker):
         # Arrange
-        conn, settings = db_conn
-        settings.quantize_scan = False
-
-        engine = Engine(conn, settings, Chunker(conn, settings))
-        engine.load_model()
-
-        doc = Document(
-            content="The quick brown fox jumps over the lazy dog.",
-            uri="document1.txt",
-        )
-
-        engine.create_new_context()
-        engine.process(doc)
-
-        repository = Repository(conn, settings)
-        doc_id = repository.add_document(doc)
+        chunks = [Chunk(content="Chunk 1")]
 
-        # Act
-        results = engine.search("wood lumberjack")
-
-        assert len(results) > 0
-        assert doc_id == results[0].document.id
-
-    def test_search_exact_match(self, db_conn):
-        conn, settings = db_conn
-        # cosin distance for searching embedding is exact 0.0 when strings match
-        settings.other_vector_options = "distance=cosine"
-        settings.use_prompt_templates = False
+        mock_conn = mocker.Mock()
+        settings = Settings()
+        mock_chunker = mocker.Mock()
+        mock_chunker.chunk.return_value = chunks
+        mock_sentence_splitter = mocker.Mock()
+        mock_sentence_splitter.split.return_value = []
 
-        engine = Engine(conn, settings, Chunker(conn, settings))
-        engine.load_model()
-        engine.create_new_context()
+        engine = Engine(mock_conn, settings, mock_chunker, mock_sentence_splitter)
 
-        doc1 = Document(
-            content="The quick brown fox jumps over the lazy dog",
-            uri="document1.txt",
-        )
-        doc2 = Document(
-            content="How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
-            uri="document2.txt",
+        mock_generate_embedding = mocker.patch.object(engine, "generate_embedding")
+        mock_generate_embedding = mocker.spy(
+            mock_generate_embedding, "generate_embedding"
         )
+        mock_generate_embedding.return_value = chunks
 
-        engine.process(doc1)
-        engine.process(doc2)
-
-        repository = Repository(conn, settings)
-        doc1_id = repository.add_document(doc1)
-        repository.add_document(doc2)
-
-        engine.quantize()
+        document = Document(content="Test document content")
 
         # Act
-        results = engine.search("The quick brown fox jumps over the lazy dog")
+        engine.process(document)
 
-        assert len(results) > 0
-        assert doc1_id == results[0].document.id
-        assert 0.0 == results[0].vec_distance
+        # Assert
+        assert len(document.chunks) == 1
+        assert len(document.chunks[0].sentences) == 0

From 012b3e7695845b64d375abd6300c5ea283855f5e Mon Sep 17 00:00:00 2001
From: Daniele Briggi <=>
Date: Mon, 20 Oct 2025 08:35:15 +0000
Subject: [PATCH 4/7] refact(tests): simplified sentences splitter

---
 src/sqlite_rag/sentence_splitter.py | 51 ++++++++++++++++++++---------
 tests/integration/test_engine.py    |  8 ++---
 tests/test_sentence_splitter.py     |  4 ---
 tests/test_sqlite_rag.py            | 16 ++++++---
 4 files changed, 52 insertions(+), 27 deletions(-)

diff --git a/src/sqlite_rag/sentence_splitter.py b/src/sqlite_rag/sentence_splitter.py
index dc5e82a..a1cee83 100644
--- a/src/sqlite_rag/sentence_splitter.py
+++ b/src/sqlite_rag/sentence_splitter.py
@@ -10,27 +10,48 @@ class SentenceSplitter:
 
     def split(self, chunk: Chunk) -> List[Sentence]:
         """Split chunk into sentences."""
-        sentences = []
-
-        sentences_text = self._split_into_sentences(chunk.content)
-        start_offset = 0
-        end_offset = 0
-        for sentence_text in sentences_text:
-            start_offset = chunk.content.index(sentence_text, end_offset)
-            end_offset = start_offset + len(sentence_text)
+        # Split on: sentence endings, semicolons, or paragraph breaks
+        sentence_regex = re.compile(r'(?<=[.!?;])(?:"|\')?\s+(?=[A-Z])|[\n]{2,}')
 
-            sentence = Sentence(
-                content=sentence_text,
-                start_offset=start_offset,
-                end_offset=end_offset,
-            )
-            sentences.append(sentence)
+        sentences = []
+        last_end = 0
+        text = chunk.content
+
+        for match in sentence_regex.finditer(text):
+            segment = text[last_end : match.end()]
+
+            segment = segment.strip()
+            if len(segment) > self.MIN_CHARS_PER_SENTENCE:
+                sentences.append(
+                    Sentence(
+                        content=segment,
+                        start_offset=last_end,
+                        end_offset=last_end + len(segment),
+                    )
+                )
+
+            # Position after the current match
+            last_end = match.end()
+
+        # Last segment
+        if last_end < len(text):
+            segment = text[last_end:]
+
+            segment = segment.strip()
+            if len(segment) > self.MIN_CHARS_PER_SENTENCE:
+                sentences.append(
+                    Sentence(
+                        content=segment,
+                        start_offset=last_end,
+                        end_offset=last_end + len(segment),
+                    )
+                )
 
         return sentences
 
     def _split_into_sentences(self, text: str) -> List[str]:
         """Split into focused segments for semantic matching."""
-        # Split on: sentence endings, semicolons, or paragraph breaks
+
         sentence_endings = re.compile(r'(?<=[.!?;])(?:"|\')?\s+(?=[A-Z])|[\n]{2,}')
         sentences = sentence_endings.split(text)
 
diff --git a/tests/integration/test_engine.py b/tests/integration/test_engine.py
index d9ab3ea..a25a9a1 100644
--- a/tests/integration/test_engine.py
+++ b/tests/integration/test_engine.py
@@ -13,7 +13,7 @@
 
 class TestEngine:
     @pytest.mark.slow
-    def test_stress_embedding_generation(self, engine):
+    def test_stress_embedding_generation(self, engine: Engine):
         """Test embedding generation with a large number of chunks
         to not fail and to never generate duplicated embeddings."""
 
@@ -36,7 +36,7 @@ def random_string(length=30):
 
 
 class TestEngineQuantization:
-    def test_quantize_embedding(self, engine):
+    def test_quantize_embedding(self, engine: Engine):
         """Test quantize called for chunks and sentences embeddings."""
         engine.quantize()
 
@@ -54,8 +54,8 @@ def test_quantize_cleanup(self, engine):
 
 
 class TestEngineSearch:
-    def test_search_with_empty_database(self, engine):
-        results = engine.search("nonexistent query", top_k=5)
+    def test_search_with_empty_database(self, engine: Engine):
+        results = engine.search("nonexistent query", "nonexistent query", top_k=5)
 
         assert len(results) == 0
 
diff --git a/tests/test_sentence_splitter.py b/tests/test_sentence_splitter.py
index 09bb151..7030b68 100644
--- a/tests/test_sentence_splitter.py
+++ b/tests/test_sentence_splitter.py
@@ -20,17 +20,14 @@ def test_split(self):
 
         assert len(sentences) == 3
         assert sentences[0].content == "This is the first sentence."
-        assert sentences[0].sequence == 0
         assert sentences[0].start_offset == 0
         assert sentences[0].end_offset == 27
 
         assert sentences[1].content == "Here is the second sentence!"
-        assert sentences[1].sequence == 1
         assert sentences[1].start_offset == 28
         assert sentences[1].end_offset == 28 + 28
 
         assert sentences[2].content == "And what about the third?"
-        assert sentences[2].sequence == 2
         assert sentences[2].start_offset == 57
         assert sentences[2].end_offset == 57 + 25
 
@@ -66,6 +63,5 @@ def test_split_no_punctuation(self):
 
         assert len(sentences) == 1
         assert sentences[0].content == chunk.content
-        assert sentences[0].sequence == 0
         assert sentences[0].start_offset == 0
         assert sentences[0].end_offset == len(chunk.content)
diff --git a/tests/test_sqlite_rag.py b/tests/test_sqlite_rag.py
index 19fedb7..a04fc71 100644
--- a/tests/test_sqlite_rag.py
+++ b/tests/test_sqlite_rag.py
@@ -838,10 +838,14 @@ def test_search_uses_retrieval_query_template(self, mocker):
         rag.search(query)
 
         # Assert that engine.search was called with the formatted template
-        expected_query = rag._settings.prompt_template_retrieval_query.format(
+        expected_semantic_query = rag._settings.prompt_template_retrieval_query.format(
             content=query
         )
-        mock_engine.search.assert_called_once_with(expected_query, top_k=10)
+        expected_fts_query = query + "*"
+
+        mock_engine.search.assert_called_once_with(
+            expected_semantic_query, expected_fts_query, top_k=10
+        )
 
     @pytest.mark.parametrize("use_prompt_templates", [True, False])
     def test_search_with_prompt_template(self, mocker, use_prompt_templates):
@@ -865,9 +869,13 @@ def test_search_with_prompt_template(self, mocker, use_prompt_templates):
         rag.search("test query", new_context=False)
 
         # Assert - verify engine.search was called with correct formatted query
-        expected_query = (
+        expected_semantic_query = (
             "task: search result | query: test query"
             if use_prompt_templates
             else "test query"
         )
-        mock_engine.search.assert_called_once_with(expected_query, top_k=10)
+        expected_fts_query = "test query*"
+
+        mock_engine.search.assert_called_once_with(
+            expected_semantic_query, expected_fts_query, top_k=10
+        )

From db93d7c77d3dc430320c075dcb22a321a58a4033 Mon Sep 17 00:00:00 2001
From: Daniele Briggi <=>
Date: Mon, 20 Oct 2025 14:03:37 +0000
Subject: [PATCH 5/7] refact(search): embed query once per search

---
 src/sqlite_rag/database.py               |   1 -
 src/sqlite_rag/engine.py                 |  45 +++-
 src/sqlite_rag/formatters.py             | 102 +--------
 src/sqlite_rag/models/document_result.py |  66 +++++-
 src/sqlite_rag/sentence_splitter.py      |  11 -
 src/sqlite_rag/sqliterag.py              |  21 +-
 tests/integration/test_engine.py         |  66 ++++--
 tests/models/test_document_result.py     | 258 +++++++++++++++++++++++
 tests/test_engine.py                     | 112 ++++++++++
 tests/test_sqlite_rag.py                 |  59 ------
 10 files changed, 532 insertions(+), 209 deletions(-)
 create mode 100644 tests/models/test_document_result.py

diff --git a/src/sqlite_rag/database.py b/src/sqlite_rag/database.py
index b5ed743..c0a52d7 100644
--- a/src/sqlite_rag/database.py
+++ b/src/sqlite_rag/database.py
@@ -88,7 +88,6 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):
         """
         )
 
-        # TODO: remove sequence
         cursor.execute(
             """
             CREATE TABLE IF NOT EXISTS sentences (
diff --git a/src/sqlite_rag/engine.py b/src/sqlite_rag/engine.py
index f8ce76e..1651de2 100644
--- a/src/sqlite_rag/engine.py
+++ b/src/sqlite_rag/engine.py
@@ -1,4 +1,5 @@
 import json
+import re
 import sqlite3
 from pathlib import Path
 from typing import List
@@ -123,11 +124,38 @@ def free_context(self) -> None:
 
         cursor.execute("SELECT llm_context_free();")
 
-    def search(
-        self, semantic_query: str, fts_query, top_k: int = 10
+    def search(self, query, top_k: int = 10) -> list[DocumentResult]:
+        """Semantic search and full-text search sorted with Reciprocal Rank Fusion
+        with top matching sentences to highlight."""
+        semantic_query = query
+        if self._settings.use_prompt_templates:
+            semantic_query = self._settings.prompt_template_retrieval_query.format(
+                content=query
+            )
+
+        # Clean up and split into words
+        # '*' is used to match while typing
+        fts_query = " ".join(re.findall(r"\b\w+\b", query.lower())) + "*"
+
+        query_embedding = self.generate_embedding(semantic_query)
+
+        results = self.search_documents(query_embedding, fts_query, top_k=top_k)
+
+        # Refine chunks with top sentences
+        for result in results:
+            result.sentences = self.search_sentences(
+                query_embedding, result.chunk_id, top_k=self._settings.top_k_sentences
+            )
+
+        return results
+
+    def search_documents(
+        self, query_embedding: bytes, fts_query: str, top_k: int
     ) -> list[DocumentResult]:
         """Semantic search and full-text search sorted with Reciprocal Rank Fusion."""
-        query_embedding = self.generate_embedding(semantic_query)
+        # invalid query
+        if query_embedding == b"" or fts_query.strip() == "":
+            return []
 
         vector_scan_type = (
             "vector_quantize_scan"
@@ -180,7 +208,7 @@ def search(
                 documents.content as document_content,
                 documents.metadata,
                 chunks.id AS chunk_id,
-                chunks.content AS snippet,
+                chunks.content AS chunk_content,
                 vec_rank,
                 fts_rank,
                 combined_rank,
@@ -212,7 +240,7 @@ def search(
                     metadata=json.loads(row["metadata"]) if row["metadata"] else {},
                 ),
                 chunk_id=row["chunk_id"],
-                snippet=row["snippet"],
+                chunk_content=row["chunk_content"],
                 vec_rank=row["vec_rank"],
                 fts_rank=row["fts_rank"],
                 combined_rank=row["combined_rank"],
@@ -225,10 +253,9 @@ def search(
         return results
 
     def search_sentences(
-        self, query: str, chunk_id: int, top_k: int
+        self, query_embedding: bytes, chunk_id: int, top_k: int
     ) -> List[SentenceResult]:
-        query_embedding = self.generate_embedding(query)
-
+        """Semantic search for sentences within a chunk."""
         vector_scan_type = (
             "vector_quantize_scan_stream"
             if self._settings.quantize_scan
@@ -244,7 +271,6 @@ def search_sentences(
                     v.rowid AS sentence_id,
                     row_number() OVER (ORDER BY v.distance) AS rank_number,
                     v.distance,
-                    sentences.content as sentence_content,
                     sentences.start_offset as sentence_start_offset,
                     sentences.end_offset as sentence_end_offset
                 FROM {vector_scan_type}('sentences', 'embedding', :query_embedding) AS v
@@ -255,7 +281,6 @@ def search_sentences(
             )
             SELECT
                 sentence_id,
-                sentence_content,
                 sentence_start_offset,
                 sentence_end_offset,
                 rank_number,
diff --git a/src/sqlite_rag/formatters.py b/src/sqlite_rag/formatters.py
index ca3b0df..ff3f0d8 100644
--- a/src/sqlite_rag/formatters.py
+++ b/src/sqlite_rag/formatters.py
@@ -6,8 +6,6 @@
 
 import typer
 
-from sqlite_rag.models.sentence_result import SentenceResult
-
 from .models.document_result import DocumentResult
 
 
@@ -82,81 +80,6 @@ def _format_uri_display(self, uri: str, icon: str, max_width: int = 75) -> str:
             uri_display = f"{icon} ...{uri[-available_width:]}"
         return uri_display
 
-    def _build_sentence_preview(
-        self,
-        chunk_content: str,
-        sentences: List[SentenceResult],
-        max_chars: int = 400,
-    ) -> str:
-        """Build preview from top 3 ranked sentences with [...] for gaps.
-
-        Args:
-            chunk_content: The full chunk text
-            sentences: List of SentenceResult objects (should already be sorted by rank)
-            max_chars: Maximum total characters for preview
-
-        Returns:
-            Preview string with top sentences and [...] separators.
-            Falls back to truncated chunk_content if sentences have no offsets.
-        """
-
-        # Take top 3 sentences (they should already be sorted by rank/distance)
-        top_sentences = sentences[:3] if sentences else []
-
-        if not top_sentences:
-            # Fallback: no sentences, return truncated chunk content
-            return chunk_content[:max_chars]
-
-        # Filter sentences that have offset information
-        sentences_with_offsets = [
-            s
-            for s in top_sentences
-            if s.start_offset is not None and s.end_offset is not None
-        ]
-
-        if not sentences_with_offsets:
-            # Fallback: sentences exist but no offset information, return truncated chunk content
-            return chunk_content[:max_chars]
-
-        # Sort by start_offset to maintain document order
-        sentences_with_offsets.sort(
-            key=lambda s: s.start_offset if s.start_offset is not None else -1
-        )
-
-        preview_parts = []
-        total_chars = 0
-        prev_end_offset = None
-
-        for sentence in sentences_with_offsets:
-            # Extract sentence text using offsets
-            sentence_text = chunk_content[
-                sentence.start_offset : sentence.end_offset
-            ].strip()
-
-            # Calculate remaining budget including potential separator
-            separator_len = len(" [...] ") if preview_parts else 0
-            remaining = max_chars - total_chars - separator_len
-
-            if remaining <= 0:
-                break
-
-            # Truncate sentence if needed
-            if len(sentence_text) > remaining:
-                sentence_text = sentence_text[: remaining - 3] + "..."
-
-            # Check if there's a gap > 10 chars from previous sentence
-            if prev_end_offset is not None and sentence.start_offset is not None:
-                gap_size = sentence.start_offset - prev_end_offset
-                if gap_size > 10:
-                    preview_parts.append("[...]")
-                    total_chars += len(" [...] ")
-
-            preview_parts.append(sentence_text)
-            total_chars += len(sentence_text)
-            prev_end_offset = sentence.end_offset
-
-        return " ".join(preview_parts)
-
 
 class BoxedFormatter(SearchResultFormatter):
     """Base class for boxed result formatters."""
@@ -176,11 +99,8 @@ def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
         """Format a single result with box layout."""
         icon = self._get_file_icon(doc.document.uri or "")
 
-        # Use sentence-based preview if sentences are available
-        if doc.sentences:
-            snippet_text = self._build_sentence_preview(doc.snippet, doc.sentences)
-        else:
-            snippet_text = doc.snippet
+        # Get snippet from DocumentResult (handles sentence-based preview automatically)
+        snippet_text = doc.get_preview(max_chars=400)
 
         snippet_lines = self._clean_and_wrap_snippet(
             snippet_text, width=75, max_length=400
@@ -250,11 +170,8 @@ def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
         """Format a single result with box layout including sentence summary."""
         icon = self._get_file_icon(doc.document.uri or "")
 
-        # Use sentence-based preview if sentences are available
-        if doc.sentences:
-            snippet_text = self._build_sentence_preview(doc.snippet, doc.sentences)
-        else:
-            snippet_text = doc.snippet
+        # Get snippet from DocumentResult (handles sentence-based preview automatically)
+        snippet_text = doc.get_preview(max_chars=400)
 
         snippet_lines = self._clean_and_wrap_snippet(
             snippet_text, width=75, max_length=400
@@ -305,7 +222,7 @@ def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
                     sentence.start_offset is not None
                     and sentence.end_offset is not None
                 ):
-                    sentence_text = doc.snippet[
+                    sentence_text = doc.chunk_content[
                         sentence.start_offset : sentence.end_offset
                     ].strip()
                     # Truncate and clean for display
@@ -364,13 +281,8 @@ def _print_table_header(self) -> None:
 
     def _print_table_row(self, idx: int, doc: DocumentResult) -> None:
         """Print a single table row."""
-        # Use sentence-based preview if sentences are available
-        if doc.sentences:
-            snippet = self._build_sentence_preview(
-                doc.snippet, doc.sentences, max_chars=52
-            )
-        else:
-            snippet = doc.snippet
+        # Get snippet from DocumentResult (handles sentence-based preview automatically)
+        snippet = doc.get_preview(max_chars=52)
 
         # Clean snippet display
         snippet = snippet.replace("\n", " ").replace("\r", "")
diff --git a/src/sqlite_rag/models/document_result.py b/src/sqlite_rag/models/document_result.py
index 86a6f9a..0776592 100644
--- a/src/sqlite_rag/models/document_result.py
+++ b/src/sqlite_rag/models/document_result.py
@@ -9,7 +9,7 @@ class DocumentResult:
     document: Document
 
     chunk_id: int
-    snippet: str
+    chunk_content: str
 
     combined_rank: float
     vec_rank: float | None = None
@@ -20,3 +20,67 @@ class DocumentResult:
 
     # highlight sentences
     sentences: list[SentenceResult] = field(default_factory=list)
+
+    def get_preview(
+        self, top_k_sentences: int = 3, max_chars: int = 400, gap: str = "[...]"
+    ) -> str:
+        """Build preview from top ranked sentences with [...] for gaps.
+
+        Args:
+            top_k_sentences: Number of top sentences to include in preview
+            max_chars: Maximum total characters for preview
+
+        Returns:
+            Preview string with top sentences and [...] separators.
+            Falls back to truncated chunk_content if sentences have no offsets.
+        """
+        top_sentences = self.sentences[:top_k_sentences] if self.sentences else []
+
+        if not top_sentences:
+            # Fallback: no sentences, return truncated chunk content
+            return self.chunk_content[:max_chars]
+
+        # Filter sentences that have offset information
+        sentences_with_offsets = [
+            s
+            for s in top_sentences
+            if s.start_offset is not None and s.end_offset is not None
+        ]
+
+        if not sentences_with_offsets:
+            return self.chunk_content[:max_chars]
+
+        # Sort by start_offset to maintain document order
+        sentences_with_offsets.sort(
+            key=lambda s: s.start_offset if s.start_offset is not None else -1
+        )
+
+        preview_parts = []
+        total_chars = 0
+        prev_end_offset = None
+
+        for sentence in sentences_with_offsets:
+            sentence_text = self.chunk_content[
+                sentence.start_offset : sentence.end_offset
+            ].strip()
+
+            # Calculate remaining budget including potential separator
+            separator_len = len("[...] ") if preview_parts else 0
+            remaining = max_chars - total_chars - separator_len
+
+            if remaining <= 0:
+                break
+
+            if prev_end_offset is not None and sentence.start_offset is not None:
+                gap_size = sentence.start_offset - prev_end_offset
+                if gap_size > 10:
+                    preview_parts.append(gap)
+                    total_chars += len(gap)
+
+            preview_parts.append(sentence_text)
+            total_chars += len(sentence_text)
+            prev_end_offset = sentence.end_offset
+
+        preview = " ".join(preview_parts)
+
+        return preview[: max_chars - 3] + "..." if len(preview) > max_chars else preview
diff --git a/src/sqlite_rag/sentence_splitter.py b/src/sqlite_rag/sentence_splitter.py
index a1cee83..c177504 100644
--- a/src/sqlite_rag/sentence_splitter.py
+++ b/src/sqlite_rag/sentence_splitter.py
@@ -48,14 +48,3 @@ def split(self, chunk: Chunk) -> List[Sentence]:
                 )
 
         return sentences
-
-    def _split_into_sentences(self, text: str) -> List[str]:
-        """Split into focused segments for semantic matching."""
-
-        sentence_endings = re.compile(r'(?<=[.!?;])(?:"|\')?\s+(?=[A-Z])|[\n]{2,}')
-        sentences = sentence_endings.split(text)
-
-        # Keep segments that are substantial enough (20+ chars for meaningful matching)
-        return [
-            s.strip() for s in sentences if len(s.strip()) > self.MIN_CHARS_PER_SENTENCE
-        ]
diff --git a/src/sqlite_rag/sqliterag.py b/src/sqlite_rag/sqliterag.py
index da2078b..6aa1037 100644
--- a/src/sqlite_rag/sqliterag.py
+++ b/src/sqlite_rag/sqliterag.py
@@ -1,4 +1,3 @@
-import re
 import sqlite3
 from dataclasses import asdict
 from pathlib import Path
@@ -317,25 +316,7 @@ def search(
         if new_context:
             self._engine.create_new_context()
 
-        semantic_query = query
-        if self._settings.use_prompt_templates:
-            semantic_query = self._settings.prompt_template_retrieval_query.format(
-                content=query
-            )
-
-        # Clean up and split into words
-        # '*' is used to match while typing
-        fts_query = " ".join(re.findall(r"\b\w+\b", query.lower())) + "*"
-
-        results = self._engine.search(semantic_query, fts_query, top_k=top_k)
-
-        # Refine chunks with top sentences
-        for result in results:
-            result.sentences = self._engine.search_sentences(
-                semantic_query, result.chunk_id, top_k=self._settings.top_k_sentences
-            )
-
-        return results
+        return self._engine.search(query, top_k=top_k)
 
     def get_settings(self) -> dict:
         """Get settings and more useful information"""
diff --git a/tests/integration/test_engine.py b/tests/integration/test_engine.py
index a25a9a1..d289198 100644
--- a/tests/integration/test_engine.py
+++ b/tests/integration/test_engine.py
@@ -41,21 +41,54 @@ def test_quantize_embedding(self, engine: Engine):
         engine.quantize()
 
         # If no exception is raised, the test passes
-        engine.search("test query", "test query")
+        engine.search("test query")
 
-    def test_quantize_cleanup(self, engine):
+    def test_quantize_cleanup(self, engine: Engine):
         """Test quantize cleanup works without errors."""
         engine.quantize()
         engine.quantize_cleanup()
 
         with pytest.raises(OperationalError) as exc_info:
-            engine.search("test query", "test query")
+            engine.search("test query")
         assert "Ensure that vector_quantize() has been called" in str(exc_info.value)
 
 
 class TestEngineSearch:
+    def test_search(self, engine: Engine):
+        # Arrange
+        doc1 = Document(
+            content="The quick brown fox jumps over the lazy dog.",
+            uri="document1.txt",
+        )
+        doc2 = Document(
+            content="How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
+            uri="document2.txt",
+        )
+
+        engine.create_new_context()
+        engine.process(doc1)
+        engine.process(doc2)
+
+        repository = Repository(engine._conn, engine._settings)
+        repository.add_document(doc1)
+        repository.add_document(doc2)
+
+        # Act
+        results = engine.search("quick brown fox")
+
+        # Assert
+        assert len(results) > 0
+        assert results[0].document.uri == "document1.txt"
+
+
+class TestEngineSearchDocuments:
     def test_search_with_empty_database(self, engine: Engine):
-        results = engine.search("nonexistent query", "nonexistent query", top_k=5)
+        results = engine.search_documents(b"132456", "myquery", top_k=5)
+
+        assert len(results) == 0
+
+    def test_search_with_invalid_query(self, engine: Engine):
+        results = engine.search_documents(b"", "", top_k=5)
 
         assert len(results) == 0
 
@@ -89,10 +122,11 @@ def test_search_with_semantic_and_fts(self, db_conn):
         repository.add_document(doc2)
         doc3_id = repository.add_document(doc3)
 
+        embedding = engine.generate_embedding("about lumberjack")
         engine.quantize()
 
         # Act
-        results = engine.search("wood lumberjack", "wood lumberjack", top_k=5)
+        results = engine.search_documents(embedding, "about lumberjack", top_k=5)
 
         assert len(results) > 0
         assert doc3_id == results[0].document.id
@@ -127,10 +161,11 @@ def test_search_semantic_result(self, db_conn):
         repository.add_document(doc2)
         doc3_id = repository.add_document(doc3)
 
+        embedding = engine.generate_embedding("about lumberjack")
         engine.quantize()
 
         # Act
-        results = engine.search("about lumberjack", "about lumberjack", top_k=5)
+        results = engine.search_documents(embedding, "about lumberjack", top_k=5)
 
         assert len(results) > 0
         assert doc3_id == results[0].document.id
@@ -165,10 +200,11 @@ def test_search_fts_results(self, db_conn):
         repository.add_document(doc2)
         repository.add_document(doc3)
 
+        embedding = engine.generate_embedding("quick brown fox")
         engine.quantize()
 
         # Act
-        results = engine.search("quick brown fox", "quick brown fox", top_k=5)
+        results = engine.search_documents(embedding, "quick brown fox", top_k=5)
 
         assert len(results) > 0
         assert doc1_id == results[0].document.id
@@ -195,8 +231,10 @@ def test_search_without_quantization(self, db_conn):
         repository = Repository(conn, settings)
         doc_id = repository.add_document(doc)
 
+        embedding = engine.generate_embedding("wood lumberjack")
+
         # Act
-        results = engine.search("wood lumberjack", "wood lumberjack")
+        results = engine.search_documents(embedding, "wood lumberjack", top_k=5)
 
         assert len(results) > 0
         assert doc_id == results[0].document.id
@@ -227,12 +265,14 @@ def test_search_exact_match(self, db_conn):
         doc1_id = repository.add_document(doc1)
         repository.add_document(doc2)
 
+        embedding = engine.generate_embedding(
+            "The quick brown fox jumps over the lazy dog"
+        )
         engine.quantize()
 
         # Act
-        results = engine.search(
-            "The quick brown fox jumps over the lazy dog",
-            "The quick brown fox jumps over the lazy dog",
+        results = engine.search_documents(
+            embedding, "The quick brown fox jumps over the lazy dog", top_k=5
         )
 
         assert len(results) > 0
@@ -268,9 +308,11 @@ def test_search_sentences(self, db_conn):
         cursor = conn.execute("SELECT id FROM chunks WHERE document_id = ?", (doc_id,))
         chunk_id = cursor.fetchone()[0]
 
+        embedding = engine.generate_embedding("stitch time")
+
         # Act
         results = engine.search_sentences(
-            "stitch time",
+            embedding,
             chunk_id,
             top_k=1,
         )
diff --git a/tests/models/test_document_result.py b/tests/models/test_document_result.py
new file mode 100644
index 0000000..7fb80fc
--- /dev/null
+++ b/tests/models/test_document_result.py
@@ -0,0 +1,258 @@
+from sqlite_rag.models.document import Document
+from sqlite_rag.models.document_result import DocumentResult
+from sqlite_rag.models.sentence_result import SentenceResult
+
+
+class TestDocumentResult:
+    def test_get_preview_no_sentences(self):
+        doc = Document(uri="test.txt", content="test content")
+        chunk_content = "This is a long piece of text. " * 50
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content=chunk_content,
+            combined_rank=1.0,
+            sentences=[],
+        )
+
+        preview = result.get_preview(max_chars=100)
+        assert len(preview) == 100
+        assert preview == chunk_content[:100]
+
+    def test_get_preview_with_sentences_no_offsets(self):
+        doc = Document(uri="test.txt", content="test content")
+        chunk_content = "First sentence. Second sentence. Third sentence."
+
+        # Sentences without offset information
+        sentences = [
+            SentenceResult(chunk_id=1, id=1, rank=1, distance=0.1),
+            SentenceResult(chunk_id=1, id=2, rank=2, distance=0.2),
+        ]
+
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content=chunk_content,
+            combined_rank=1.0,
+            sentences=sentences,
+        )
+
+        preview = result.get_preview(max_chars=100)
+        assert preview == chunk_content[:100]
+
+    def test_get_preview_with_single_sentence(self):
+        doc = Document(uri="test.txt", content="test content")
+        chunk_content = (
+            "First sentence here. Second sentence there. Third sentence everywhere."
+        )
+
+        sentences = [
+            SentenceResult(
+                chunk_id=1,
+                id=2,
+                rank=1,
+                distance=0.1,
+                start_offset=21,
+                end_offset=44,
+            ),
+        ]
+
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content=chunk_content,
+            combined_rank=1.0,
+            sentences=sentences,
+        )
+
+        preview = result.get_preview(max_chars=400)
+        assert preview == "Second sentence there."
+
+    def test_get_preview_with_gaps(self):
+        """Test get_preview adds [...] separator for gaps."""
+        doc = Document(uri="test.txt", content="test content")
+        chunk_content = (
+            "First sentence at the beginning."
+            "Some middle content that we skip over here."
+            "Last sentence at the end."
+        )
+
+        sentences = [
+            SentenceResult(
+                chunk_id=1,
+                id=1,
+                rank=1,
+                distance=0.1,
+                start_offset=0,
+                end_offset=32,  # "First sentence at the beginning."
+            ),
+            SentenceResult(
+                chunk_id=1,
+                id=3,
+                rank=2,
+                distance=0.2,
+                start_offset=75,
+                end_offset=103,  # "Last sentence at the end."
+            ),
+        ]
+
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content=chunk_content,
+            combined_rank=1.0,
+            sentences=sentences,
+        )
+
+        preview = result.get_preview(max_chars=400)
+        assert (
+            "First sentence at the beginning. [...] Last sentence at the end."
+            == preview
+        )
+
+    def test_get_preview_respects_max_chars(self):
+        """Test get_preview truncates when exceeding max_chars."""
+        doc = Document(uri="test.txt", content="test content")
+        chunk_content = (
+            "A very long sentence that exceeds the maximum character limit. " * 10
+        )
+
+        sentences = [
+            SentenceResult(
+                chunk_id=1,
+                id=1,
+                rank=1,
+                distance=0.1,
+                start_offset=0,
+                end_offset=200,
+            ),
+        ]
+
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content=chunk_content,
+            combined_rank=1.0,
+            sentences=sentences,
+        )
+
+        preview = result.get_preview(max_chars=50)
+        assert len(preview) <= 50
+
+    def test_get_preview_with_multiple_consecutive_and_ordered_sentences(self):
+        doc = Document(uri="test.txt", content="test content")
+        chunk_content = "First sentence. Second sentence. Third sentence."
+
+        sentences = [
+            SentenceResult(
+                chunk_id=1,
+                id=1,
+                rank=1,
+                distance=0.1,
+                start_offset=0,
+                end_offset=15,
+            ),
+            SentenceResult(
+                chunk_id=1,
+                id=2,
+                rank=2,
+                distance=0.2,
+                start_offset=16,
+                end_offset=32,
+            ),
+        ]
+
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content=chunk_content,
+            combined_rank=1.0,
+            sentences=sentences,
+        )
+
+        preview = result.get_preview(max_chars=400)
+        assert preview == "First sentence. Second sentence."
+
+    def test_get_preview_orders_sentences_by_offset(self):
+        """Test get_preview reorders sentences by start_offset (document order)."""
+        doc = Document(uri="test.txt", content="test content")
+        chunk_content = "First sentence. " + "x" * 50 + " Third sentence."
+
+        # Sentences in reverse rank order (rank 1 is last in document)
+        sentences = [
+            SentenceResult(
+                chunk_id=1,
+                id=3,
+                rank=1,  # higher rank but appears latter in document
+                distance=0.1,
+                start_offset=66,
+                end_offset=82,  # "Third sentence."
+            ),
+            SentenceResult(
+                chunk_id=1,
+                id=1,
+                rank=2,
+                distance=0.2,
+                start_offset=0,
+                end_offset=15,  # "First sentence."
+            ),
+        ]
+
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content=chunk_content,
+            combined_rank=1.0,
+            sentences=sentences,
+        )
+
+        preview = result.get_preview(max_chars=400)
+        # Should be in document order despite rank order
+        assert "First sentence. [...] Third sentence." == preview
+
+    def test_get_preview_limits_to_top_k_sentences(self):
+        """Test get_preview respects top_k_sentences parameter."""
+        doc = Document(uri="test.txt", content="test content")
+        chunk_content = "First. Second. Third. Fourth. Fifth."
+
+        # 5 sentences, but only top 2 should be used
+        sentences = [
+            SentenceResult(
+                chunk_id=1, id=1, rank=1, distance=0.1, start_offset=0, end_offset=6
+            ),
+            SentenceResult(
+                chunk_id=1, id=2, rank=2, distance=0.2, start_offset=7, end_offset=14
+            ),
+            SentenceResult(
+                chunk_id=1, id=3, rank=3, distance=0.3, start_offset=15, end_offset=21
+            ),
+            SentenceResult(
+                chunk_id=1, id=4, rank=4, distance=0.4, start_offset=22, end_offset=29
+            ),
+            SentenceResult(
+                chunk_id=1, id=5, rank=5, distance=0.5, start_offset=30, end_offset=36
+            ),
+        ]
+
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content=chunk_content,
+            combined_rank=1.0,
+            sentences=sentences,
+        )
+
+        preview = result.get_preview(top_k_sentences=2, max_chars=400)
+        assert "First." in preview
+        assert "Second." in preview
+        assert "Third" not in preview
+        assert "Fourth" not in preview
+        assert "Fifth" not in preview
+
+        # Test with default top_k=3
+        preview_default = result.get_preview(max_chars=400)
+        assert "First." in preview_default
+        assert "Second." in preview_default
+        assert "Third." in preview_default
+        assert "Fourth" not in preview_default
+        assert "Fifth" not in preview_default
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 7f7ead6..1cdd3f3 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -147,3 +147,115 @@ def test_process_without_sentences(self, mocker):
         # Assert
         assert len(document.chunks) == 1
         assert len(document.chunks[0].sentences) == 0
+
+    def test_search(self, mocker):
+        # Arrange
+        mock_conn = mocker.Mock()
+        settings = Settings()
+        engine = Engine(mock_conn, settings, mocker.Mock(), mocker.Mock())
+
+        mock_generate = mocker.patch.object(
+            engine, "generate_embedding", return_value=b"embedding"
+        )
+        mock_search_docs = mocker.patch.object(
+            engine,
+            "search_documents",
+            return_value=[
+                mocker.Mock(chunk_id=1, sentences=[]),
+                mocker.Mock(chunk_id=2, sentences=[]),
+            ],
+        )
+        mock_search_sents = mocker.patch.object(
+            engine, "search_sentences", return_value=[]
+        )
+
+        # Act
+        engine.search("test query", top_k=5)
+
+        # Assert
+        mock_generate.assert_called_once_with('title: "none" | text: test query')
+        mock_search_docs.assert_called_once_with(b"embedding", "test query*", top_k=5)
+        assert mock_search_sents.call_count == 2
+        mock_search_sents.assert_any_call(
+            b"embedding", 1, top_k=settings.top_k_sentences
+        )
+        mock_search_sents.assert_any_call(
+            b"embedding", 2, top_k=settings.top_k_sentences
+        )
+
+    def test_search_uses_retrieval_query_template(self, mocker):
+        # Arrange
+        template = "task: search | Do something with {content}"
+
+        settings = Settings(prompt_template_retrieval_query=template)
+
+        mock_conn = mocker.Mock()
+        engine = Engine(mock_conn, settings, mocker.Mock(), mocker.Mock())
+
+        mock_generate = mocker.patch.object(
+            engine, "generate_embedding", return_value=b"embedding"
+        )
+        mock_search_docs = mocker.patch.object(
+            engine,
+            "search_documents",
+            return_value=[
+                mocker.Mock(chunk_id=1, sentences=[]),
+            ],
+        )
+        mock_search_sents = mocker.patch.object(
+            engine, "search_sentences", return_value=[]
+        )
+
+        # Act
+        query = "test query"
+        engine.search(query, top_k=10)
+
+        expected_fts_query = query + "*"
+
+        # Assert
+        # Is called with the formatted template
+        mock_generate.assert_called_once_with(
+            "task: search | Do something with test query"
+        )
+        mock_search_docs.assert_called_once_with(
+            b"embedding", expected_fts_query, top_k=10
+        )
+        mock_search_sents.assert_called_once_with(
+            b"embedding", 1, top_k=settings.top_k_sentences
+        )
+
+    @pytest.mark.parametrize("use_prompt_templates", [True, False])
+    def test_search_with_prompt_template(self, mocker, use_prompt_templates):
+        # Arrange
+        settings = Settings(
+            use_prompt_templates=use_prompt_templates,
+            prompt_template_retrieval_query="task: search result | query: {content}",
+        )
+
+        mock_conn = mocker.Mock()
+        engine = Engine(mock_conn, settings, mocker.Mock(), mocker.Mock())
+
+        mock_generate_embedding = mocker.patch.object(
+            engine, "generate_embedding", return_value=b"embedding"
+        )
+        mocker.patch.object(
+            engine,
+            "search_documents",
+            return_value=[
+                mocker.Mock(chunk_id=1, sentences=[]),
+            ],
+        )
+        mocker.patch.object(engine, "search_sentences", return_value=[])
+
+        # Act
+        query = "test query"
+        engine.search(query)
+
+        # Assert - verify engine.search was called with correct formatted query
+        expected_semantic_query = (
+            "task: search result | query: test query"
+            if use_prompt_templates
+            else "test query"
+        )
+
+        mock_generate_embedding.assert_called_once_with(expected_semantic_query)
diff --git a/tests/test_sqlite_rag.py b/tests/test_sqlite_rag.py
index a04fc71..3cdddc0 100644
--- a/tests/test_sqlite_rag.py
+++ b/tests/test_sqlite_rag.py
@@ -6,7 +6,6 @@
 import pytest
 
 from sqlite_rag import SQLiteRag
-from sqlite_rag.settings import Settings
 
 
 class TestSQLiteRagAdd:
@@ -821,61 +820,3 @@ def test_search_samples_exact_match_by_scan_type(self, quantize_scan: bool):
             # Second result should have distance > 0
             second_result = results[1]
             assert second_result.vec_distance and second_result.vec_distance > 0.0
-
-    def test_search_uses_retrieval_query_template(self, mocker):
-        template = "task: search | Do something with {content}"
-
-        settings = {"prompt_template_retrieval_query": template}
-
-        rag = SQLiteRag.create(":memory:", settings=settings)
-
-        mock_engine = mocker.Mock()
-        mock_engine.search.return_value = []
-
-        rag._engine = mock_engine
-
-        query = "test query"
-        rag.search(query)
-
-        # Assert that engine.search was called with the formatted template
-        expected_semantic_query = rag._settings.prompt_template_retrieval_query.format(
-            content=query
-        )
-        expected_fts_query = query + "*"
-
-        mock_engine.search.assert_called_once_with(
-            expected_semantic_query, expected_fts_query, top_k=10
-        )
-
-    @pytest.mark.parametrize("use_prompt_templates", [True, False])
-    def test_search_with_prompt_template(self, mocker, use_prompt_templates):
-        # Arrange
-        settings = Settings(
-            use_prompt_templates=use_prompt_templates,
-            prompt_template_retrieval_query="task: search result | query: {content}",
-        )
-
-        # Mock engine and its search method
-        mock_engine = mocker.Mock()
-        mock_engine.search.return_value = []  # Empty search results
-
-        # Create SQLiteRag instance with mocked dependencies
-        rag = SQLiteRag(mocker.Mock(), settings)
-        rag._engine = mock_engine
-
-        mocker.patch.object(rag, "_ensure_initialized")
-
-        # Act
-        rag.search("test query", new_context=False)
-
-        # Assert - verify engine.search was called with correct formatted query
-        expected_semantic_query = (
-            "task: search result | query: test query"
-            if use_prompt_templates
-            else "test query"
-        )
-        expected_fts_query = "test query*"
-
-        mock_engine.search.assert_called_once_with(
-            expected_semantic_query, expected_fts_query, top_k=10
-        )

From 6f2aa0adefe5271b5dce4c8462ee51243e2fab6f Mon Sep 17 00:00:00 2001
From: Daniele Briggi <=>
Date: Mon, 20 Oct 2025 16:37:31 +0000
Subject: [PATCH 6/7] feat(sentences): extract sentence content from sql.

Avoid to fetch the entire chunk to extract the content
---
 src/sqlite_rag/database.py               |  11 +-
 src/sqlite_rag/engine.py                 |  18 +-
 src/sqlite_rag/formatters.py             | 206 ++++++----------
 src/sqlite_rag/models/document_result.py |  28 +--
 src/sqlite_rag/models/sentence_result.py |   2 +
 tests/models/test_document_result.py     | 107 ++++----
 tests/test_formatters.py                 | 299 +++++++++++++++++++++++
 7 files changed, 456 insertions(+), 215 deletions(-)
 create mode 100644 tests/test_formatters.py

diff --git a/src/sqlite_rag/database.py b/src/sqlite_rag/database.py
index c0a52d7..f15450f 100644
--- a/src/sqlite_rag/database.py
+++ b/src/sqlite_rag/database.py
@@ -76,19 +76,19 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):
         )
 
         # TODO: this table is not ready for sqlite-sync, it uses the id AUTOINCREMENT
-        cursor.execute(
+        cursor.executescript(
             """
             CREATE TABLE IF NOT EXISTS chunks (
                 id INTEGER PRIMARY KEY AUTOINCREMENT,
                 document_id TEXT,
                 content TEXT,
-                embedding BLOB,
-                FOREIGN KEY (document_id) REFERENCES documents (id) ON DELETE CASCADE
+                embedding BLOB
             );
+            CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks (document_id);
         """
         )
 
-        cursor.execute(
+        cursor.executescript(
             """
             CREATE TABLE IF NOT EXISTS sentences (
                 id TEXT PRIMARY KEY,
@@ -97,7 +97,8 @@ def _create_schema(conn: sqlite3.Connection, settings: Settings):
                 embedding BLOB,
                 start_offset INTEGER,
                 end_offset INTEGER
-            )
+            );
+            CREATE INDEX IF NOT EXISTS idx_sentences_chunk_id ON sentences (chunk_id);
             """
         )
 
diff --git a/src/sqlite_rag/engine.py b/src/sqlite_rag/engine.py
index 1651de2..1496437 100644
--- a/src/sqlite_rag/engine.py
+++ b/src/sqlite_rag/engine.py
@@ -270,22 +270,27 @@ def search_sentences(
                 SELECT
                     v.rowid AS sentence_id,
                     row_number() OVER (ORDER BY v.distance) AS rank_number,
-                    v.distance,
-                    sentences.start_offset as sentence_start_offset,
-                    sentences.end_offset as sentence_end_offset
+                    v.distance
                 FROM {vector_scan_type}('sentences', 'embedding', :query_embedding) AS v
                     JOIN sentences ON sentences.rowid = v.rowid
                 WHERE sentences.chunk_id = :chunk_id
-                ORDER BY rank_number ASC
                 LIMIT :top_k
             )
             SELECT
                 sentence_id,
-                sentence_start_offset,
-                sentence_end_offset,
+                -- Extract sentence directly from document content
+                COALESCE(
+                    substr(chunks.content, sentences.start_offset + 1, sentences.end_offset - sentences.start_offset),
+                    ""
+                ) AS content,
+                sentences.start_offset AS sentence_start_offset,
+                sentences.end_offset AS sentence_end_offset,
                 rank_number,
                 distance
             FROM vec_matches
+                JOIN sentences ON sentences.rowid = vec_matches.sentence_id
+                JOIN chunks ON chunks.id = sentences.chunk_id
+            ORDER BY rank_number ASC
             """,  # nosec B608
             {
                 "query_embedding": query_embedding,
@@ -301,6 +306,7 @@ def search_sentences(
                 SentenceResult(
                     id=row["sentence_id"],
                     chunk_id=chunk_id,
+                    content=row["content"].strip(),
                     rank=row["rank_number"],
                     distance=row["distance"],
                     start_offset=row["sentence_start_offset"],
diff --git a/src/sqlite_rag/formatters.py b/src/sqlite_rag/formatters.py
index ff3f0d8..66a7ae9 100644
--- a/src/sqlite_rag/formatters.py
+++ b/src/sqlite_rag/formatters.py
@@ -2,12 +2,19 @@
 """Output formatters for CLI search results."""
 
 from abc import ABC, abstractmethod
-from typing import List, Optional
+from typing import List
 
 import typer
 
 from .models.document_result import DocumentResult
 
+# Display constants
+BOX_CONTENT_WIDTH = 75
+BOX_TOTAL_WIDTH = 77
+SNIPPET_MAX_LENGTH = 400
+SENTENCE_PREVIEW_LENGTH = 50
+MAX_SENTENCES_DISPLAY = 5
+
 
 class SearchResultFormatter(ABC):
     """Base class for search result formatters."""
@@ -40,7 +47,10 @@ def _get_file_icon(self, uri: str) -> str:
         return "📄"
 
     def _clean_and_wrap_snippet(
-        self, snippet: str, width: int = 75, max_length: int = 400
+        self,
+        snippet: str,
+        width: int = BOX_CONTENT_WIDTH,
+        max_length: int = SNIPPET_MAX_LENGTH,
     ) -> List[str]:
         """Clean snippet and wrap to specified width with max length limit."""
         # Clean the snippet
@@ -69,7 +79,9 @@ def _clean_and_wrap_snippet(
 
         return lines
 
-    def _format_uri_display(self, uri: str, icon: str, max_width: int = 75) -> str:
+    def _format_uri_display(
+        self, uri: str, icon: str, max_width: int = BOX_CONTENT_WIDTH
+    ) -> str:
         """Format URI for display with icon and truncation."""
         if not uri:
             return ""
@@ -82,7 +94,15 @@ def _format_uri_display(self, uri: str, icon: str, max_width: int = 75) -> str:
 
 
 class BoxedFormatter(SearchResultFormatter):
-    """Base class for boxed result formatters."""
+    """Boxed formatter for search results with optional debug information."""
+
+    def __init__(self, show_debug: bool = False):
+        """Initialize formatter.
+
+        Args:
+            show_debug: Whether to show debug information and sentence details
+        """
+        self.show_debug = show_debug
 
     def format_results(self, results: List[DocumentResult], query: str) -> None:
         if not results:
@@ -98,56 +118,39 @@ def format_results(self, results: List[DocumentResult], query: str) -> None:
     def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
         """Format a single result with box layout."""
         icon = self._get_file_icon(doc.document.uri or "")
+        snippet_text = doc.get_preview(max_chars=SNIPPET_MAX_LENGTH)
+        snippet_lines = self._clean_and_wrap_snippet(snippet_text)
 
-        # Get snippet from DocumentResult (handles sentence-based preview automatically)
-        snippet_text = doc.get_preview(max_chars=400)
-
-        snippet_lines = self._clean_and_wrap_snippet(
-            snippet_text, width=75, max_length=400
-        )
-
-        # Draw the result box header
-        header = f"┌─ Result #{idx} " + "─" * (67 - len(str(idx)))
+        # Draw box header
+        header = f"┌─ Result #{idx} " + "─" * (BOX_TOTAL_WIDTH - 10 - len(str(idx)))
         typer.echo(header)
 
-        # Display URI if available
+        # Display URI and debug info
         if doc.document.uri:
-            uri_display = self._format_uri_display(doc.document.uri, icon, 75)
-            typer.echo(f"│ {uri_display:<75}│")
+            uri_display = self._format_uri_display(doc.document.uri, icon)
+            typer.echo(f"│ {uri_display:<{BOX_CONTENT_WIDTH}}│")
 
-            # Add debug info if needed
-            debug_line = self._get_debug_line(doc)
-            if debug_line:
-                typer.echo(debug_line)
+            if self.show_debug:
+                self._print_debug_line(doc)
 
-            typer.echo("├" + "─" * 77 + "┤")
-        elif self._should_show_debug():
-            debug_line = self._get_debug_line(doc)
-            if debug_line:
-                typer.echo(debug_line)
-                typer.echo("├" + "─" * 77 + "┤")
+            typer.echo("├" + "─" * BOX_TOTAL_WIDTH + "┤")
+        elif self.show_debug:
+            self._print_debug_line(doc)
+            typer.echo("├" + "─" * BOX_TOTAL_WIDTH + "┤")
 
         # Display snippet
         for line in snippet_lines:
-            typer.echo(f"│ {line:<75} │")
+            typer.echo(f"│ {line:<{BOX_CONTENT_WIDTH}} │")
 
-        typer.echo("└" + "─" * 77 + "┘")
-        typer.echo()
+        # Display sentence details in debug mode
+        if self.show_debug and doc.sentences:
+            self._print_sentence_details(doc)
 
-    def _get_debug_line(self, doc: DocumentResult) -> Optional[str]:
-        """Get debug information line. Override in subclasses."""
-        return None
-
-    def _should_show_debug(self) -> bool:
-        """Whether to show debug information. Override in subclasses."""
-        return False
-
-
-class BoxedDebugFormatter(BoxedFormatter):
-    """Modern detailed formatter with debug information in boxes."""
+        typer.echo("└" + "─" * BOX_TOTAL_WIDTH + "┘")
+        typer.echo()
 
-    def _get_debug_line(self, doc: DocumentResult) -> str:
-        """Format debug metrics line."""
+    def _print_debug_line(self, doc: DocumentResult) -> None:
+        """Print debug metrics line."""
         combined = (
             f"{doc.combined_rank:.5f}" if doc.combined_rank is not None else "N/A"
         )
@@ -161,88 +164,36 @@ def _get_debug_line(self, doc: DocumentResult) -> str:
             if doc.fts_rank is not None
             else "N/A"
         )
-        return f"│ Combined: {combined} │ Vector: {vec_info} │ FTS: {fts_info}"
-
-    def _should_show_debug(self) -> bool:
-        return True
-
-    def _format_single_result(self, doc: DocumentResult, idx: int) -> None:
-        """Format a single result with box layout including sentence summary."""
-        icon = self._get_file_icon(doc.document.uri or "")
-
-        # Get snippet from DocumentResult (handles sentence-based preview automatically)
-        snippet_text = doc.get_preview(max_chars=400)
-
-        snippet_lines = self._clean_and_wrap_snippet(
-            snippet_text, width=75, max_length=400
-        )
+        debug_line = f"│ Combined: {combined} │ Vector: {vec_info} │ FTS: {fts_info}"
+        typer.echo(debug_line)
 
-        # Draw the result box header
-        header = f"┌─ Result #{idx} " + "─" * (67 - len(str(idx)))
-        typer.echo(header)
+    def _print_sentence_details(self, doc: DocumentResult) -> None:
+        """Print sentence-level details."""
+        typer.echo("├" + "─" * BOX_TOTAL_WIDTH + "┤")
+        typer.echo(f"│ Sentences:{' ' * (BOX_CONTENT_WIDTH - 10)}│")
 
-        # Display URI if available
-        if doc.document.uri:
-            uri_display = self._format_uri_display(doc.document.uri, icon, 75)
-            typer.echo(f"│ {uri_display:<75}│")
-
-            # Add debug info
-            debug_line = self._get_debug_line(doc)
-            if debug_line:
-                typer.echo(debug_line)
-
-            typer.echo("├" + "─" * 77 + "┤")
-        elif self._should_show_debug():
-            debug_line = self._get_debug_line(doc)
-            if debug_line:
-                typer.echo(debug_line)
-                typer.echo("├" + "─" * 77 + "┤")
-
-        # Display snippet preview
-        for line in snippet_lines:
-            typer.echo(f"│ {line:<75} │")
-
-        # Display sentence details if available
-        if doc.sentences:
-            typer.echo("├" + "─" * 77 + "┤")
-            typer.echo(
-                "│ Sentences:                                                                │"
+        for sentence in doc.sentences[:MAX_SENTENCES_DISPLAY]:
+            distance_str = (
+                f"{sentence.distance:.6f}" if sentence.distance is not None else "N/A"
             )
-
-            for sentence in doc.sentences[:5]:  # Show max 5 sentences
-                distance_str = (
-                    f"{sentence.distance:.6f}"
-                    if sentence.distance is not None
-                    else "N/A"
-                )
-                rank_str = f"#{sentence.rank}" if sentence.rank is not None else "N/A"
-
-                # Extract sentence preview (first 50 chars)
-                if (
-                    sentence.start_offset is not None
-                    and sentence.end_offset is not None
-                ):
-                    sentence_text = doc.chunk_content[
-                        sentence.start_offset : sentence.end_offset
-                    ].strip()
-                    # Truncate and clean for display
-                    sentence_preview = sentence_text.replace("\n", " ").replace(
-                        "\r", ""
+            rank_str = f"#{sentence.rank}" if sentence.rank is not None else "N/A"
+
+            # Extract sentence preview
+            if sentence.start_offset is not None and sentence.end_offset is not None:
+                sentence_text = doc.chunk_content[
+                    sentence.start_offset : sentence.end_offset
+                ].strip()
+                sentence_preview = sentence_text.replace("\n", " ").replace("\r", "")
+                if len(sentence_preview) > SENTENCE_PREVIEW_LENGTH:
+                    sentence_preview = (
+                        sentence_preview[: SENTENCE_PREVIEW_LENGTH - 3] + "..."
                     )
-                    if len(sentence_preview) > 50:
-                        sentence_preview = sentence_preview[:47] + "..."
-                else:
-                    sentence_preview = "[No offset info]"
-
-                # Format sentence line
-                sentence_line = (
-                    f"│   {rank_str:>3} ({distance_str}) | {sentence_preview}"
-                )
-                # Pad to 78 chars and add closing border
-                typer.echo(sentence_line.ljust(78) + " │")
-
-        typer.echo("└" + "─" * 77 + "┘")
-        typer.echo()
+            else:
+                sentence_preview = "[No offset info]"
+
+            # Format and print sentence line
+            sentence_line = f"│   {rank_str:>3} ({distance_str}) | {sentence_preview}"
+            typer.echo(sentence_line.ljust(BOX_TOTAL_WIDTH + 1) + " │")
 
 
 class TableDebugFormatter(SearchResultFormatter):
@@ -312,10 +263,15 @@ def _print_table_row(self, idx: int, doc: DocumentResult) -> None:
 def get_formatter(
     debug: bool = False, table_view: bool = False
 ) -> SearchResultFormatter:
-    """Factory function to get the appropriate formatter."""
+    """Factory function to get the appropriate formatter.
+
+    Args:
+        debug: Show debug information and sentence details
+        table_view: Use table format instead of boxed format
+
+    Returns:
+        SearchResultFormatter instance
+    """
     if table_view:
         return TableDebugFormatter()
-    elif debug:
-        return BoxedDebugFormatter()
-    else:
-        return BoxedFormatter()
+    return BoxedFormatter(show_debug=debug)
diff --git a/src/sqlite_rag/models/document_result.py b/src/sqlite_rag/models/document_result.py
index 0776592..f0c7dc8 100644
--- a/src/sqlite_rag/models/document_result.py
+++ b/src/sqlite_rag/models/document_result.py
@@ -9,7 +9,6 @@ class DocumentResult:
     document: Document
 
     chunk_id: int
-    chunk_content: str
 
     combined_rank: float
     vec_rank: float | None = None
@@ -18,6 +17,8 @@ class DocumentResult:
     vec_distance: float | None = None
     fts_score: float | None = None
 
+    chunk_content: str = ""
+
     # highlight sentences
     sentences: list[SentenceResult] = field(default_factory=list)
 
@@ -40,18 +41,8 @@ def get_preview(
             # Fallback: no sentences, return truncated chunk content
             return self.chunk_content[:max_chars]
 
-        # Filter sentences that have offset information
-        sentences_with_offsets = [
-            s
-            for s in top_sentences
-            if s.start_offset is not None and s.end_offset is not None
-        ]
-
-        if not sentences_with_offsets:
-            return self.chunk_content[:max_chars]
-
         # Sort by start_offset to maintain document order
-        sentences_with_offsets.sort(
+        top_sentences.sort(
             key=lambda s: s.start_offset if s.start_offset is not None else -1
         )
 
@@ -59,17 +50,8 @@ def get_preview(
         total_chars = 0
         prev_end_offset = None
 
-        for sentence in sentences_with_offsets:
-            sentence_text = self.chunk_content[
-                sentence.start_offset : sentence.end_offset
-            ].strip()
-
-            # Calculate remaining budget including potential separator
-            separator_len = len("[...] ") if preview_parts else 0
-            remaining = max_chars - total_chars - separator_len
-
-            if remaining <= 0:
-                break
+        for sentence in top_sentences:
+            sentence_text = sentence.content
 
             if prev_end_offset is not None and sentence.start_offset is not None:
                 gap_size = sentence.start_offset - prev_end_offset
diff --git a/src/sqlite_rag/models/sentence_result.py b/src/sqlite_rag/models/sentence_result.py
index 7094efd..d2ffa1d 100644
--- a/src/sqlite_rag/models/sentence_result.py
+++ b/src/sqlite_rag/models/sentence_result.py
@@ -6,6 +6,8 @@ class SentenceResult:
     id: int | None = None
     chunk_id: int | None = None
 
+    content: str = ""
+
     rank: float | None = None
     distance: float | None = None
 
diff --git a/tests/models/test_document_result.py b/tests/models/test_document_result.py
index 7fb80fc..1246ce6 100644
--- a/tests/models/test_document_result.py
+++ b/tests/models/test_document_result.py
@@ -6,61 +6,34 @@
 class TestDocumentResult:
     def test_get_preview_no_sentences(self):
         doc = Document(uri="test.txt", content="test content")
-        chunk_content = "This is a long piece of text. " * 50
         result = DocumentResult(
             document=doc,
             chunk_id=1,
-            chunk_content=chunk_content,
             combined_rank=1.0,
             sentences=[],
         )
 
         preview = result.get_preview(max_chars=100)
-        assert len(preview) == 100
-        assert preview == chunk_content[:100]
-
-    def test_get_preview_with_sentences_no_offsets(self):
-        doc = Document(uri="test.txt", content="test content")
-        chunk_content = "First sentence. Second sentence. Third sentence."
-
-        # Sentences without offset information
-        sentences = [
-            SentenceResult(chunk_id=1, id=1, rank=1, distance=0.1),
-            SentenceResult(chunk_id=1, id=2, rank=2, distance=0.2),
-        ]
-
-        result = DocumentResult(
-            document=doc,
-            chunk_id=1,
-            chunk_content=chunk_content,
-            combined_rank=1.0,
-            sentences=sentences,
-        )
-
-        preview = result.get_preview(max_chars=100)
-        assert preview == chunk_content[:100]
+        assert preview == ""
 
     def test_get_preview_with_single_sentence(self):
         doc = Document(uri="test.txt", content="test content")
-        chunk_content = (
-            "First sentence here. Second sentence there. Third sentence everywhere."
-        )
 
         sentences = [
             SentenceResult(
                 chunk_id=1,
                 id=2,
+                content="Second sentence there.",
                 rank=1,
                 distance=0.1,
-                start_offset=21,
-                end_offset=44,
+                start_offset=15,
+                end_offset=36,
             ),
         ]
 
         result = DocumentResult(
             document=doc,
             chunk_id=1,
-            chunk_content=chunk_content,
             combined_rank=1.0,
             sentences=sentences,
         )
@@ -71,35 +44,31 @@ def test_get_preview_with_single_sentence(self):
     def test_get_preview_with_gaps(self):
         """Test get_preview adds [...] separator for gaps."""
         doc = Document(uri="test.txt", content="test content")
-        chunk_content = (
-            "First sentence at the beginning."
-            "Some middle content that we skip over here."
-            "Last sentence at the end."
-        )
 
         sentences = [
             SentenceResult(
                 chunk_id=1,
                 id=1,
+                content="First sentence at the beginning.",
                 rank=1,
                 distance=0.1,
                 start_offset=0,
-                end_offset=32,  # "First sentence at the beginning."
+                end_offset=32,
             ),
             SentenceResult(
                 chunk_id=1,
                 id=3,
+                content="Last sentence at the end.",
                 rank=2,
                 distance=0.2,
                 start_offset=75,
-                end_offset=103,  # "Last sentence at the end."
+                end_offset=103,
             ),
         ]
 
         result = DocumentResult(
             document=doc,
             chunk_id=1,
-            chunk_content=chunk_content,
             combined_rank=1.0,
             sentences=sentences,
         )
@@ -113,14 +82,13 @@ def test_get_preview_with_gaps(self):
     def test_get_preview_respects_max_chars(self):
         """Test get_preview truncates when exceeding max_chars."""
         doc = Document(uri="test.txt", content="test content")
-        chunk_content = (
-            "A very long sentence that exceeds the maximum character limit. " * 10
-        )
+        content = "A very long sentence that exceeds the maximum character limit. " * 10
 
         sentences = [
             SentenceResult(
                 chunk_id=1,
                 id=1,
+                content=content,
                 rank=1,
                 distance=0.1,
                 start_offset=0,
@@ -131,7 +99,6 @@ def test_get_preview_respects_max_chars(self):
         result = DocumentResult(
             document=doc,
             chunk_id=1,
-            chunk_content=chunk_content,
             combined_rank=1.0,
             sentences=sentences,
         )
@@ -141,12 +108,12 @@ def test_get_preview_respects_max_chars(self):
 
     def test_get_preview_with_multiple_consecutive_and_ordered_sentences(self):
         doc = Document(uri="test.txt", content="test content")
-        chunk_content = "First sentence. Second sentence. Third sentence."
 
         sentences = [
             SentenceResult(
                 chunk_id=1,
                 id=1,
+                content="First sentence.",
                 rank=1,
                 distance=0.1,
                 start_offset=0,
@@ -155,6 +122,7 @@ def test_get_preview_with_multiple_consecutive_and_ordered_sentences(self):
             SentenceResult(
                 chunk_id=1,
                 id=2,
+                content="Second sentence.",
                 rank=2,
                 distance=0.2,
                 start_offset=16,
@@ -165,7 +133,6 @@ def test_get_preview_with_multiple_consecutive_and_ordered_sentences(self):
         result = DocumentResult(
             document=doc,
             chunk_id=1,
-            chunk_content=chunk_content,
             combined_rank=1.0,
             sentences=sentences,
         )
@@ -176,32 +143,32 @@ def test_get_preview_with_multiple_consecutive_and_ordered_sentences(self):
     def test_get_preview_orders_sentences_by_offset(self):
         """Test get_preview reorders sentences by start_offset (document order)."""
         doc = Document(uri="test.txt", content="test content")
-        chunk_content = "First sentence. " + "x" * 50 + " Third sentence."
 
         # Sentences in reverse rank order (rank 1 is last in document)
         sentences = [
             SentenceResult(
                 chunk_id=1,
                 id=3,
+                content="Third sentence.",
                 rank=1,  # higher rank but appears latter in document
                 distance=0.1,
                 start_offset=66,
-                end_offset=82,  # "Third sentence."
+                end_offset=82,
             ),
             SentenceResult(
                 chunk_id=1,
                 id=1,
+                content="First sentence.",
                 rank=2,
                 distance=0.2,
                 start_offset=0,
-                end_offset=15,  # "First sentence."
+                end_offset=15,
             ),
         ]
 
         result = DocumentResult(
             document=doc,
             chunk_id=1,
-            chunk_content=chunk_content,
             combined_rank=1.0,
             sentences=sentences,
         )
@@ -213,31 +180,59 @@ def test_get_preview_orders_sentences_by_offset(self):
     def test_get_preview_limits_to_top_k_sentences(self):
         """Test get_preview respects top_k_sentences parameter."""
         doc = Document(uri="test.txt", content="test content")
-        chunk_content = "First. Second. Third. Fourth. Fifth."
 
         # 5 sentences, but only top 2 should be used
         sentences = [
             SentenceResult(
-                chunk_id=1, id=1, rank=1, distance=0.1, start_offset=0, end_offset=6
+                chunk_id=1,
+                id=1,
+                content="First.",
+                rank=1,
+                distance=0.1,
+                start_offset=0,
+                end_offset=6,
             ),
             SentenceResult(
-                chunk_id=1, id=2, rank=2, distance=0.2, start_offset=7, end_offset=14
+                chunk_id=1,
+                id=2,
+                content="Second.",
+                rank=2,
+                distance=0.2,
+                start_offset=7,
+                end_offset=14,
             ),
             SentenceResult(
-                chunk_id=1, id=3, rank=3, distance=0.3, start_offset=15, end_offset=21
+                chunk_id=1,
+                id=3,
+                content="Third.",
+                rank=3,
+                distance=0.3,
+                start_offset=15,
+                end_offset=21,
             ),
             SentenceResult(
-                chunk_id=1, id=4, rank=4, distance=0.4, start_offset=22, end_offset=29
+                chunk_id=1,
+                id=4,
+                content="Fourth.",
+                rank=4,
+                distance=0.4,
+                start_offset=22,
+                end_offset=29,
             ),
             SentenceResult(
-                chunk_id=1, id=5, rank=5, distance=0.5, start_offset=30, end_offset=36
+                chunk_id=1,
+                id=5,
+                content="Fifth.",
+                rank=5,
+                distance=0.5,
+                start_offset=30,
+                end_offset=36,
             ),
         ]
 
         result = DocumentResult(
             document=doc,
             chunk_id=1,
-            chunk_content=chunk_content,
             combined_rank=1.0,
             sentences=sentences,
         )
diff --git a/tests/test_formatters.py b/tests/test_formatters.py
new file mode 100644
index 0000000..c738c91
--- /dev/null
+++ b/tests/test_formatters.py
@@ -0,0 +1,299 @@
+from sqlite_rag.formatters import (
+    BoxedFormatter,
+    TableDebugFormatter,
+    get_formatter,
+)
+from sqlite_rag.models.document import Document
+from sqlite_rag.models.document_result import DocumentResult
+from sqlite_rag.models.sentence_result import SentenceResult
+
+
+class TestGetFormatter:
+    """Test the get_formatter factory function."""
+
+    def test_get_formatter_default(self):
+        """Test getting formatter with default parameters."""
+        formatter = get_formatter()
+        assert isinstance(formatter, BoxedFormatter)
+        assert formatter.show_debug is False
+
+    def test_get_formatter_debug(self):
+        """Test getting formatter with debug=True."""
+        formatter = get_formatter(debug=True)
+        assert isinstance(formatter, BoxedFormatter)
+        assert formatter.show_debug is True
+
+    def test_get_formatter_table_view(self):
+        """Test getting table formatter."""
+        formatter = get_formatter(table_view=True)
+        assert isinstance(formatter, TableDebugFormatter)
+
+    def test_get_formatter_table_view_takes_precedence(self):
+        """Test that table_view takes precedence over debug."""
+        formatter = get_formatter(debug=True, table_view=True)
+        assert isinstance(formatter, TableDebugFormatter)
+        # Table formatter doesn't have show_debug attribute
+
+
+class TestSearchResultFormatter:
+    """Test base SearchResultFormatter methods."""
+
+    def setup_method(self):
+        """Set up test fixtures."""
+        self.formatter = BoxedFormatter()
+
+    def test_get_file_icon_python(self):
+        """Test getting icon for Python files."""
+        assert self.formatter._get_file_icon("test.py") == "🐍"
+        assert self.formatter._get_file_icon("test.pyx") == "🐍"
+
+    def test_get_file_icon_javascript(self):
+        """Test getting icon for JavaScript/TypeScript files."""
+        assert self.formatter._get_file_icon("test.js") == "⚡"
+        assert self.formatter._get_file_icon("test.ts") == "⚡"
+        assert self.formatter._get_file_icon("test.jsx") == "⚡"
+        assert self.formatter._get_file_icon("test.tsx") == "⚡"
+
+    def test_get_file_icon_markdown(self):
+        """Test getting icon for Markdown files."""
+        assert self.formatter._get_file_icon("README.md") == "📄"
+        assert self.formatter._get_file_icon("doc.markdown") == "📄"
+
+    def test_get_file_icon_case_insensitive(self):
+        """Test that file icon detection is case insensitive."""
+        assert self.formatter._get_file_icon("TEST.PY") == "🐍"
+        assert self.formatter._get_file_icon("Test.Js") == "⚡"
+
+    def test_get_file_icon_empty_uri(self):
+        """Test getting icon for empty URI."""
+        assert self.formatter._get_file_icon("") == "📝"
+
+    def test_get_file_icon_unknown_extension(self):
+        """Test getting default icon for unknown extensions."""
+        assert self.formatter._get_file_icon("test.xyz") == "📄"
+
+    def test_clean_and_wrap_snippet_basic(self):
+        """Test basic snippet cleaning and wrapping."""
+        snippet = "This is a simple test snippet."
+        result = self.formatter._clean_and_wrap_snippet(snippet, width=30)
+        assert len(result) > 0
+        assert all(len(line) <= 30 for line in result)
+
+    def test_clean_and_wrap_snippet_removes_newlines(self):
+        """Test that newlines and carriage returns are removed."""
+        snippet = "Line 1\nLine 2\r\nLine 3"
+        result = self.formatter._clean_and_wrap_snippet(snippet)
+        combined = " ".join(result)
+        assert "\n" not in combined
+        assert "\r" not in combined
+        assert "Line 1 Line 2 Line 3" == combined
+
+    def test_clean_and_wrap_snippet_truncates_long_text(self):
+        """Test that long snippets are truncated."""
+        snippet = "A" * 500
+        result = self.formatter._clean_and_wrap_snippet(snippet, max_length=100)
+        combined = "".join(result)
+        assert len(combined) <= 103  # 100 + "..."
+        assert combined.endswith("...")
+
+    def test_format_uri_display_basic(self):
+        """Test basic URI formatting."""
+        uri_display = self.formatter._format_uri_display(
+            "path/to/file.py", "🐍", max_width=50
+        )
+        assert uri_display == "🐍 path/to/file.py"
+
+    def test_format_uri_display_truncates_long_uri(self):
+        """Test that long URIs are truncated."""
+        long_uri = "very/long/path/" * 10 + "file.py"
+        uri_display = self.formatter._format_uri_display(long_uri, "🐍", max_width=50)
+        assert len(uri_display) <= 50
+        assert uri_display.startswith("🐍 ...")
+
+    def test_format_uri_display_empty_uri(self):
+        """Test formatting empty URI."""
+        assert self.formatter._format_uri_display("", "🐍") == ""
+
+
+class TestBoxedFormatter:
+    """Test BoxedFormatter functionality."""
+
+    def test_init_default(self):
+        """Test BoxedFormatter initialization with default parameters."""
+        formatter = BoxedFormatter()
+        assert formatter.show_debug is False
+
+    def test_init_with_debug(self):
+        """Test BoxedFormatter initialization with debug enabled."""
+        formatter = BoxedFormatter(show_debug=True)
+        assert formatter.show_debug is True
+
+    def test_format_results_empty(self, mocker):
+        """Test formatting with empty results."""
+        formatter = BoxedFormatter()
+        mock_echo = mocker.patch("typer.echo")
+        formatter.format_results([], "test query")
+        mock_echo.assert_called_once_with("No documents found matching the query.")
+
+    def test_format_results_with_results(self, mocker):
+        """Test formatting with actual results."""
+        doc = Document(uri="test.py", content="test content")
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content="This is test content.",
+            combined_rank=0.95,
+            vec_rank=1,
+            fts_rank=2,
+            vec_distance=0.1,
+            fts_score=5.0,
+        )
+
+        formatter = BoxedFormatter()
+        mock_echo = mocker.patch("typer.echo")
+        formatter.format_results([result], "test query")
+        # Should print header, result box, and empty line
+        assert mock_echo.call_count > 3
+        # Check that it prints the search results header
+        first_call = mock_echo.call_args_list[0][0][0]
+        assert "Search Results" in first_call
+        assert "1 matches" in first_call
+
+    def test_format_results_with_debug(self, mocker):
+        """Test formatting with debug information."""
+        doc = Document(uri="test.py", content="test content")
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content="This is test content.",
+            combined_rank=0.95,
+            vec_rank=1,
+            fts_rank=2,
+            vec_distance=0.123456,
+            fts_score=5.678901,
+        )
+
+        formatter = BoxedFormatter(show_debug=True)
+        mock_echo = mocker.patch("typer.echo")
+        formatter.format_results([result], "test query")
+        # Check that debug info is printed
+        output = "\n".join(
+            [
+                str(call.args[0]) if call.args else ""
+                for call in mock_echo.call_args_list
+            ]
+        )
+        assert "Combined:" in output
+        assert "Vector:" in output
+        assert "FTS:" in output
+
+    def test_format_results_with_sentences_in_debug_mode(self, mocker):
+        """Test formatting with sentence details in debug mode."""
+        doc = Document(uri="test.py", content="test content")
+        sentences = [
+            SentenceResult(
+                id=1,
+                chunk_id=1,
+                content="First sentence.",
+                rank=1,
+                distance=0.1,
+                start_offset=0,
+                end_offset=15,
+            ),
+            SentenceResult(
+                id=2,
+                chunk_id=1,
+                content="Second sentence.",
+                rank=2,
+                distance=0.2,
+                start_offset=16,
+                end_offset=32,
+            ),
+        ]
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content="First sentence. Second sentence.",
+            combined_rank=0.95,
+            sentences=sentences,
+        )
+
+        formatter = BoxedFormatter(show_debug=True)
+        mock_echo = mocker.patch("typer.echo")
+        formatter.format_results([result], "test query")
+        output = "\n".join(
+            [
+                str(call.args[0]) if call.args else ""
+                for call in mock_echo.call_args_list
+            ]
+        )
+        assert "Sentences:" in output
+
+    def test_format_results_without_sentences_in_non_debug_mode(self, mocker):
+        """Test that sentences are not shown in non-debug mode."""
+        doc = Document(uri="test.py", content="test content")
+        sentences = [
+            SentenceResult(
+                id=1,
+                chunk_id=1,
+                content="First sentence.",
+                rank=1,
+                distance=0.1,
+                start_offset=0,
+                end_offset=15,
+            ),
+        ]
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content="First sentence.",
+            combined_rank=0.95,
+            sentences=sentences,
+        )
+
+        formatter = BoxedFormatter(show_debug=False)
+        mock_echo = mocker.patch("typer.echo")
+        formatter.format_results([result], "test query")
+        output = "\n".join(
+            [
+                str(call.args[0]) if call.args else ""
+                for call in mock_echo.call_args_list
+            ]
+        )
+        assert "Sentences:" not in output
+
+
+class TestTableDebugFormatter:
+    """Test TableDebugFormatter functionality."""
+
+    def test_format_results_empty(self, mocker):
+        """Test table formatting with empty results."""
+        formatter = TableDebugFormatter()
+        mock_echo = mocker.patch("typer.echo")
+        formatter.format_results([], "test query")
+        mock_echo.assert_called_once_with("No documents found matching the query.")
+
+    def test_format_results_with_results(self, mocker):
+        """Test table formatting with actual results."""
+        doc = Document(uri="test.py", content="test content")
+        result = DocumentResult(
+            document=doc,
+            chunk_id=1,
+            chunk_content="This is test content.",
+            combined_rank=0.95,
+            vec_rank=1,
+            fts_rank=2,
+            vec_distance=0.1,
+            fts_score=5.0,
+        )
+
+        formatter = TableDebugFormatter()
+        mock_echo = mocker.patch("typer.echo")
+        formatter.format_results([result], "test query")
+        # Should print header, table header, separator, and row
+        assert mock_echo.call_count >= 4
+        # Check that headers are printed
+        output = "\n".join([str(call[0][0]) for call in mock_echo.call_args_list])
+        assert "Preview" in output
+        assert "URI" in output
+        assert "C.Rank" in output

From c9ee5ddb985f9f5574b6a8bc5c6849c4d2e95664 Mon Sep 17 00:00:00 2001
From: Daniele Briggi <=>
Date: Tue, 21 Oct 2025 10:06:16 +0000
Subject: [PATCH 7/7] fix(seg-fault): attempt

---
 .devcontainer/devcontainer.json          | 20 -------------
 .devcontainer/py3.11/devcontainer.json   | 38 ++++++++++++++++++++++++
 .github/workflows/test.yaml              |  4 +++
 src/sqlite_rag/models/document_result.py |  5 ++--
 tests/conftest.py                        | 12 ++++++--
 5 files changed, 55 insertions(+), 24 deletions(-)
 create mode 100644 .devcontainer/py3.11/devcontainer.json

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index b22273d..855ba7b 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,18 +1,6 @@
 {
     "name": "Python 3.10",
     "image": "mcr.microsoft.com/devcontainers/python:3.10",
-    "runArgs": [
-        "--runtime",
-        "nvidia",
-        "--gpus",
-        "all",
-        // optional but make sure CUDA workloads are available
-        "--env",
-        "NVIDIA_VISIBLE_DEVICES=all",
-        // optional but make sure CUDA workloads are available
-        "--env",
-        "NVIDIA_DRIVER_CAPABILITIES=compute,utility"
-    ],
     "customizations": {
         "vscode": {
             "extensions": [
@@ -26,13 +14,5 @@
                 "hbenl.vscode-test-explorer"
             ]
         }
-    },
-    "hostRequirements": {
-        "gpu": "optional"
-    },
-    "remoteEnv": {
-        // optional but make sure CUDA workloads are available
-        "NVIDIA_VISIBLE_DEVICES": "all",
-        "NVIDIA_DRIVER_CAPABILITIES": "compute,utility"
     }
 }
diff --git a/.devcontainer/py3.11/devcontainer.json b/.devcontainer/py3.11/devcontainer.json
new file mode 100644
index 0000000..42bb225
--- /dev/null
+++ b/.devcontainer/py3.11/devcontainer.json
@@ -0,0 +1,38 @@
+{
+    "name": "Python 3.11",
+    "image": "mcr.microsoft.com/devcontainers/python:3.11",
+    "runArgs": [
+        "--runtime",
+        "nvidia",
+        "--gpus",
+        "all",
+        // optional but make sure CUDA workloads are available
+        "--env",
+        "NVIDIA_VISIBLE_DEVICES=all",
+        // optional but make sure CUDA workloads are available
+        "--env",
+        "NVIDIA_DRIVER_CAPABILITIES=compute,utility"
+    ],
+    "customizations": {
+        "vscode": {
+            "extensions": [
+                "ms-python.black-formatter",
+                "ms-python.flake8",
+                "ms-python.isort",
+                "ms-python.vscode-pylance",
+                "ms-python.python",
+                "ms-python.debugpy",
+                "ms-python.vscode-python-envs",
+                "hbenl.vscode-test-explorer"
+            ]
+        }
+    },
+    "hostRequirements": {
+        "gpu": "optional"
+    },
+    "remoteEnv": {
+        // optional but make sure CUDA workloads are available
+        "NVIDIA_VISIBLE_DEVICES": "all",
+        "NVIDIA_DRIVER_CAPABILITIES": "compute,utility"
+    }
+}
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 36c77c2..a929f23 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -49,6 +49,10 @@ jobs:
 
     - name: Test
       # Using default directory for models
+      # COVERAGE_CORE=pytrace: Workaround for Python 3.11 segfault with SQLite extensions + C tracer
+      # See: https://github.com/nedbat/coveragepy/issues/1665
+      env:
+        COVERAGE_CORE: ${{ matrix.python-version == '3.11' && 'pytrace' || '' }}
       run: |
         pytest --cov --cov-branch --cov-report=xml -v -m "not slow" ./tests
 
diff --git a/src/sqlite_rag/models/document_result.py b/src/sqlite_rag/models/document_result.py
index f0c7dc8..07b364f 100644
--- a/src/sqlite_rag/models/document_result.py
+++ b/src/sqlite_rag/models/document_result.py
@@ -42,8 +42,9 @@ def get_preview(
             return self.chunk_content[:max_chars]
 
         # Sort by start_offset to maintain document order
-        top_sentences.sort(
-            key=lambda s: s.start_offset if s.start_offset is not None else -1
+        top_sentences = sorted(
+            top_sentences,
+            key=lambda s: s.start_offset if s.start_offset is not None else -1,
         )
 
         preview_parts = []
diff --git a/tests/conftest.py b/tests/conftest.py
index 04a3596..2bd096a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,6 @@
 import sqlite3
 import tempfile
+from collections.abc import Generator
 
 import pytest
 
@@ -26,7 +27,7 @@ def db_conn():
 
 
 @pytest.fixture
-def engine(db_conn) -> Engine:
+def engine(db_conn) -> Generator[Engine, None, None]:
     conn, settings = db_conn
 
     engine = Engine(
@@ -39,4 +40,11 @@ def engine(db_conn) -> Engine:
     engine.quantize()
     engine.create_new_context()
 
-    return engine
+    yield engine
+
+    # Cleanup resources to prevent segfaults in Python 3.11
+    # Must explicitly free resources before garbage collection
+    try:
+        engine.close()
+    except Exception:
+        pass