11import os
22import sqlite3
3- import json
43from typing import Any , Dict , List , Optional
54
5+ from config import CFG # config (keeps chunk_size etc if needed)
6+
67# Simple connection helper: we open new connections per operation so the code is robust
78# across threads. We set WAL journal mode for safer concurrency.
9+ # Added a small timeout to avoid long blocking if DB is locked.
810def _get_connection (db_path : str ) -> sqlite3 .Connection :
911 dirname = os .path .dirname (os .path .abspath (db_path ))
1012 if dirname and not os .path .isdir (dirname ):
1113 os .makedirs (dirname , exist_ok = True )
12- conn = sqlite3 .connect (db_path , check_same_thread = False )
14+ # timeout in seconds for busy sqlite; small value to avoid long blocking in web requests
15+ conn = sqlite3 .connect (db_path , check_same_thread = False , timeout = 5.0 )
1316 conn .row_factory = sqlite3 .Row
1417 try :
1518 conn .execute ("PRAGMA journal_mode = WAL;" )
@@ -23,23 +26,21 @@ def init_db(database_path: str) -> None:
2326 """
2427 Initialize database schema. Safe to call multiple times.
2528 Creates:
26- - analyses
29+ - analyses (embedding_count column kept for backward compat but not used as source of truth)
2730 - files
28- - embeddings
2931 - chunks (with embedding BLOB column for sqlite-vector)
3032 """
3133 conn = _get_connection (database_path )
3234 try :
3335 cur = conn .cursor ()
34- # analyses table
36+ # analyses table: embedding_count column kept for compatibility but will be computed live
3537 cur .execute (
3638 """
3739 CREATE TABLE IF NOT EXISTS analyses (
3840 id INTEGER PRIMARY KEY AUTOINCREMENT,
3941 name TEXT NOT NULL,
4042 path TEXT NOT NULL,
4143 status TEXT NOT NULL,
42- file_count INTEGER DEFAULT 0,
4344 embedding_count INTEGER DEFAULT 0,
4445 created_at TEXT DEFAULT (datetime('now'))
4546 )
@@ -63,22 +64,7 @@ def init_db(database_path: str) -> None:
6364 )
6465 cur .execute ("CREATE INDEX IF NOT EXISTS idx_files_analysis ON files(analysis_id);" )
6566
66- # embeddings: audit/backup store of embeddings as JSON text
67- cur .execute (
68- """
69- CREATE TABLE IF NOT EXISTS embeddings (
70- id INTEGER PRIMARY KEY AUTOINCREMENT,
71- file_id INTEGER NOT NULL,
72- vector TEXT,
73- created_at TEXT DEFAULT (datetime('now')),
74- FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
75- )
76- """
77- )
78- cur .execute ("CREATE INDEX IF NOT EXISTS idx_embeddings_file ON embeddings(file_id);" )
79-
8067 # chunks table: metadata for chunked documents; includes embedding BLOB column
81- # which sqlite-vector will operate on (via vector_as_f32 / vector_full_scan / vector_init, etc).
8268 cur .execute (
8369 """
8470 CREATE TABLE IF NOT EXISTS chunks (
@@ -122,19 +108,6 @@ def update_analysis_status(database_path: str, analysis_id: int, status: str) ->
122108 conn .close ()
123109
124110
125- def update_analysis_counts (database_path : str , analysis_id : int , file_count : int , embedding_count : int ) -> None :
126- conn = _get_connection (database_path )
127- try :
128- cur = conn .cursor ()
129- cur .execute (
130- "UPDATE analyses SET file_count = ?, embedding_count = ? WHERE id = ?" ,
131- (file_count , embedding_count , analysis_id ),
132- )
133- conn .commit ()
134- finally :
135- conn .close ()
136-
137-
138111def store_file (database_path : str , analysis_id : int , path : str , content : str , language : str ) -> int :
139112 """
140113 Insert a file row. Returns the new file id.
@@ -152,29 +125,10 @@ def store_file(database_path: str, analysis_id: int, path: str, content: str, la
152125 conn .close ()
153126
154127
155- def store_embedding (database_path : str , file_id : int , vector : Any ) -> None :
156- """
157- Store an embedding in the embeddings audit table as JSON text.
158- Keep semantics backward-compatible: external code expects this to exist.
159- """
160- conn = _get_connection (database_path )
161- try :
162- cur = conn .cursor ()
163- cur .execute (
164- "INSERT INTO embeddings (file_id, vector) VALUES (?, ?)" ,
165- (file_id , json .dumps (vector )),
166- )
167- conn .commit ()
168- finally :
169- conn .close ()
170-
171-
172128def insert_chunk_row_with_null_embedding (database_path : str , file_id : int , path : str , chunk_index : int ) -> int :
173129 """
174- Convenience to insert a chunk metadata row without populating embedding column.
130+ Insert a chunk metadata row without populating embedding column.
175131 Returns the new chunks.id.
176- (Typically you will later update chunks.embedding using the sqlite-vector API or via
177- an INSERT that uses vector_as_f32(...) as done in analyzer._insert_chunk_vector.)
178132 """
179133 conn = _get_connection (database_path )
180134 try :
@@ -190,11 +144,27 @@ def insert_chunk_row_with_null_embedding(database_path: str, file_id: int, path:
190144
191145
192146def list_analyses (database_path : str ) -> List [Dict [str , Any ]]:
147+ """
148+ Return analyses with computed file_count and computed embedding_count (from chunks.embedding).
149+ This ensures the UI shows accurate, up-to-date counts based on actual rows.
150+ """
193151 conn = _get_connection (database_path )
194152 try :
195153 cur = conn .cursor ()
196154 rows = cur .execute (
197- "SELECT id, name, path, status, file_count, embedding_count, created_at FROM analyses ORDER BY id DESC"
155+ """
156+ SELECT
157+ a.id,
158+ a.name,
159+ a.path,
160+ a.status,
161+ (SELECT COUNT(*) FROM files f WHERE f.analysis_id = a.id) AS file_count,
162+ (SELECT COUNT(*) FROM chunks ch JOIN files f2 ON ch.file_id = f2.id
163+ WHERE f2.analysis_id = a.id AND ch.embedding IS NOT NULL) AS embedding_count,
164+ a.created_at
165+ FROM analyses a
166+ ORDER BY a.id DESC
167+ """
198168 ).fetchall ()
199169 results : List [Dict [str , Any ]] = []
200170 for r in rows :
@@ -204,8 +174,8 @@ def list_analyses(database_path: str) -> List[Dict[str, Any]]:
204174 "name" : r ["name" ],
205175 "path" : r ["path" ],
206176 "status" : r ["status" ],
207- "file_count" : r ["file_count" ],
208- "embedding_count" : r ["embedding_count" ],
177+ "file_count" : int ( r ["file_count" ]) ,
178+ "embedding_count" : int ( r ["embedding_count" ]) ,
209179 "created_at" : r ["created_at" ],
210180 }
211181 )
@@ -227,18 +197,12 @@ def list_files_for_analysis(database_path: str, analysis_id: int) -> List[Dict[s
227197
228198def delete_analysis (database_path : str , analysis_id : int ) -> None :
229199 """
230- Delete an analysis and cascade-delete associated files / embeddings / chunks.
231- SQLite foreign keys require PRAGMA foreign_keys = ON for enforcement; do explicit deletes
232- for safety across SQLite builds.
200+ Delete an analysis and cascade-delete associated files / chunks.
201+ Foreign key enforcement varies by SQLite build; do explicit deletes for safety.
233202 """
234203 conn = _get_connection (database_path )
235204 try :
236205 cur = conn .cursor ()
237- # delete embeddings for files in analysis
238- cur .execute (
239- "DELETE FROM embeddings WHERE file_id IN (SELECT id FROM files WHERE analysis_id = ?)" ,
240- (analysis_id ,),
241- )
242206 # delete chunks for files in analysis
243207 cur .execute (
244208 "DELETE FROM chunks WHERE file_id IN (SELECT id FROM files WHERE analysis_id = ?)" ,
0 commit comments