Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions src/sqlite_rag/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def document_exists_by_hash(self, hash: str) -> bool:
return cursor.fetchone() is not None

def remove_document(self, document_id: str) -> bool:
"""Remove document and its chunks by document ID"""
"""Remove document and its related resources by document ID"""
cursor = self._conn.cursor()

# Check if document exists
Expand All @@ -114,11 +114,29 @@ def remove_document(self, document_id: str) -> bool:
if cursor.fetchone()["total"] == 0:
return False

# Remove chunks first
# Delete sentences
cursor.execute(
"DELETE FROM chunks_fts WHERE rowid IN (SELECT rowid FROM chunks WHERE document_id = ?)",
"""
DELETE FROM sentences
WHERE chunk_id IN (
SELECT id FROM chunks WHERE document_id = ?
)
""",
(document_id,),
)

# Delete chunks FTS
cursor.execute(
"""
DELETE FROM chunks_fts
WHERE rowid IN (
SELECT rowid FROM chunks WHERE document_id = ?
)
""",
(document_id,),
)

# Delete chunks
cursor.execute("DELETE FROM chunks WHERE document_id = ?", (document_id,))

# Remove document
Expand Down
50 changes: 44 additions & 6 deletions tests/test_repository.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from sqlite_rag.models.chunk import Chunk
from sqlite_rag.models.document import Document
from sqlite_rag.models.sentence import Sentence
from sqlite_rag.repository import Repository


Expand Down Expand Up @@ -153,35 +154,72 @@ def test_remove_document_success(self, db_conn):
conn, settings = db_conn
repo = Repository(conn, settings)

# Add a document with chunks
# Add a document with chunks and sentences
doc = Document(
content="Test document content.",
uri="test.txt",
metadata={"author": "test"},
)
doc.chunks = [
Chunk(content="Chunk 1", embedding=b"\x00" * 384),
Chunk(content="Chunk 2", embedding=b"\x00" * 384),
chunk1 = Chunk(content="Chunk 1", embedding=b"\x00" * 384)
chunk1.sentences = [
Sentence(
content="Sentence 1",
embedding=b"\x00" * 384,
start_offset=0,
end_offset=10,
),
Sentence(
content="Sentence 2",
embedding=b"\x00" * 384,
start_offset=11,
end_offset=20,
),
]
chunk2 = Chunk(content="Chunk 2", embedding=b"\x00" * 384)
chunk2.sentences = [
Sentence(
content="Sentence 3",
embedding=b"\x00" * 384,
start_offset=0,
end_offset=10,
),
]
doc.chunks = [chunk1, chunk2]
doc_id = repo.add_document(doc)

# Verify document and chunks exist
# Verify document, chunks, and sentences exist
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM documents WHERE id = ?", (doc_id,))
assert cursor.fetchone()[0] == 1
cursor.execute("SELECT COUNT(*) FROM chunks WHERE document_id = ?", (doc_id,))
assert cursor.fetchone()[0] == 2
cursor.execute(
"""
SELECT COUNT(*) FROM sentences
WHERE chunk_id IN (SELECT id FROM chunks WHERE document_id = ?)
""",
(doc_id,),
)
assert cursor.fetchone()[0] == 3

# Remove document
success = repo.remove_document(doc_id)

assert success is True

# Verify document and chunks are removed
# Verify document, chunks, and sentences are removed
cursor.execute("SELECT COUNT(*) FROM documents WHERE id = ?", (doc_id,))
assert cursor.fetchone()[0] == 0
cursor.execute("SELECT COUNT(*) FROM chunks WHERE document_id = ?", (doc_id,))
assert cursor.fetchone()[0] == 0
cursor.execute(
"""
SELECT COUNT(*) FROM sentences
WHERE chunk_id IN (SELECT id FROM chunks WHERE document_id = ?)
""",
(doc_id,),
)
assert cursor.fetchone()[0] == 0

def test_remove_document_not_found(self, db_conn):
conn, settings = db_conn
Expand Down