Skip to content

Commit 42682df

Browse files
committed
feat(error): tracked in files
1 parent 3328e32 commit 42682df

File tree

1 file changed

+76
-38
lines changed

1 file changed

+76
-38
lines changed

analyzer.py

Lines changed: 76 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import concurrent.futures
88
import sqlite3
99
import importlib.resources
10+
import logging
1011
from pathlib import Path
1112
from typing import Optional, Dict, Any, List
1213

@@ -50,6 +51,10 @@
5051
DB_LOCK_RETRY_COUNT = 6
5152
DB_LOCK_RETRY_BASE_DELAY = 0.05 # seconds, exponential backoff multiplier
5253

54+
# configure basic logging for visibility
55+
logging.basicConfig(level=logging.INFO)
56+
logger = logging.getLogger(__name__)
57+
5358

5459
def detect_language(path: str):
5560
if "LICENSE.md" in path:
@@ -73,6 +78,54 @@ async def async_get_embedding(text: str, model: Optional[str] = None):
7378
return await _run_in_executor(get_embedding_for_text, text, model)
7479

7580

81+
# helper: write error files to disk (instead of storing them in the DB)
82+
def _write_error_file_sync(database_path: str, analysis_id: Optional[int], rel_path: str, content: str, language: Optional[str] = None) -> str:
83+
"""
84+
Synchronously write an error file to disk under:
85+
<dir_of_database>/analysis_errors/<analysis_id>/<rel_path>
86+
87+
rel_path is sanitized to avoid path traversal.
88+
Returns the full written path.
89+
"""
90+
try:
91+
# base directory: directory that contains the database file; fallback to cwd
92+
base = os.path.dirname(os.path.abspath(database_path)) if database_path else os.getcwd()
93+
except Exception:
94+
base = os.getcwd()
95+
96+
base_dir = os.path.join(base, "analysis_errors", str(analysis_id or "unknown"))
97+
98+
# Sanitize rel_path: normalize, remove leading slashes, replace .. with __
99+
p = os.path.normpath(rel_path)
100+
# Normalize separators and strip any leading drive/sep
101+
p = p.replace("\\", "/")
102+
while p.startswith("../") or p.startswith("/"):
103+
if p.startswith("../"):
104+
p = p[len("../") :]
105+
elif p.startswith("/"):
106+
p = p.lstrip("/")
107+
p = p.replace("..", "__")
108+
safe_rel = p
109+
110+
full_path = os.path.join(base_dir, safe_rel)
111+
# Ensure directory exists
112+
os.makedirs(os.path.dirname(full_path), exist_ok=True)
113+
114+
# Write file (overwrite if exists)
115+
try:
116+
with open(full_path, "w", encoding="utf-8") as fh:
117+
fh.write(content or "")
118+
except Exception as e:
119+
# As a last resort, log to logger
120+
logger.exception("Failed to write error file to disk: %s (dest=%s)", e, full_path)
121+
122+
return full_path
123+
124+
125+
async def _write_error_file(database_path: str, analysis_id: Optional[int], rel_path: str, content: str, language: Optional[str] = None):
126+
return await _run_in_executor(_write_error_file_sync, database_path, analysis_id, rel_path, content, language)
127+
128+
76129
# Simple chunker (character-based). Tunable CHUNK_SIZE, CHUNK_OVERLAP.
77130
def chunk_text(text: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[str]:
78131
if chunk_size <= 0:
@@ -346,46 +399,31 @@ def _insert_task(dbp, fid_local, pth, idx_local, vector_local):
346399
await _run_in_executor(_insert_task, database_path, fid, rel_path, idx, emb)
347400
embedded_any = True
348401
except Exception as e:
349-
# record an error row in files table for diagnostics but continue processing other chunks
402+
# record an error to disk (previously was stored in DB)
350403
try:
351-
await _run_in_executor(
352-
store_file,
353-
database_path,
354-
analysis_id,
355-
f"errors/{rel_path}.chunk{idx}.error.txt",
356-
f"Failed to insert chunk vector: {e}",
357-
"error",
358-
)
404+
err_content = f"Failed to insert chunk vector: {e}\n\nTraceback:\n{traceback.format_exc()}"
405+
await _write_error_file(database_path, analysis_id, f"errors/{rel_path}.chunk{idx}.error.txt", err_content, "error")
359406
except Exception:
360-
pass
407+
logger.exception("Failed to write chunk-insert error to disk for %s chunk %d", rel_path, idx)
361408
else:
362409
try:
363-
await _run_in_executor(
364-
store_file,
365-
database_path,
366-
analysis_id,
367-
f"errors/{rel_path}.chunk{idx}.error.txt",
368-
"Embedding API returned no vector for chunk.",
369-
"error",
370-
)
410+
err_content = "Embedding API returned no vector for chunk."
411+
await _write_error_file(database_path, analysis_id, f"errors/{rel_path}.chunk{idx}.error.txt", err_content, "error")
371412
except Exception:
372-
pass
413+
logger.exception("Failed to write empty-embedding error to disk for %s chunk %d", rel_path, idx)
373414

374415
return {"stored": True, "embedded": embedded_any}
375416
except Exception as e:
376417
tb = traceback.format_exc()
377418
try:
378419
error_payload = {"file": rel_path, "error": str(e), "traceback": tb[:2000]}
379-
await _run_in_executor(
380-
store_file,
381-
database_path,
382-
analysis_id,
383-
f"errors/{rel_path}.error.txt",
384-
json.dumps(error_payload, indent=2),
385-
"error",
386-
)
420+
# write the error payload to disk instead of DB
421+
try:
422+
await _write_error_file(database_path, analysis_id, f"errors/{rel_path}.error.txt", json.dumps(error_payload, indent=2), "error")
423+
except Exception:
424+
logger.exception("Failed to write exception error to disk for file %s", rel_path)
387425
except Exception:
388-
pass
426+
logger.exception("Failed while handling exception for file %s", rel_path)
389427
return {"stored": False, "embedded": False}
390428

391429

@@ -439,20 +477,16 @@ async def analyze_local_path(
439477
await _run_in_executor(update_analysis_counts, database_path, aid, file_count, emb_count)
440478
except Exception:
441479
try:
442-
await _run_in_executor(
443-
store_file,
444-
database_path,
445-
aid,
446-
f"errors/progress_update_{chunk_start}.error.txt",
447-
f"Failed to update progress at chunk_start={chunk_start}",
448-
"error",
449-
)
480+
# Previously this error was recorded to DB; now write to disk
481+
err_content = f"Failed to update progress at chunk_start={chunk_start}"
482+
await _write_error_file(database_path, aid, f"errors/progress_update_{chunk_start}.error.txt", err_content, "error")
450483
except Exception:
451-
pass
484+
logger.exception("Failed to write progress-update error to disk for analysis %s", aid)
452485

453486
# detect uv usage and deps (run in executor because it may use subprocess / file IO)
454487
uv_info = await _run_in_executor(lambda p, v: (None if p is None else p), local_path, venv_path)
455488
try:
489+
# uv_detected.json is meta information — we keep storing meta in DB as before
456490
await _run_in_executor(
457491
store_file,
458492
database_path,
@@ -462,7 +496,11 @@ async def analyze_local_path(
462496
"meta",
463497
)
464498
except Exception:
465-
pass
499+
# if storing meta fails, log to disk
500+
try:
501+
await _write_error_file(database_path, aid, "meta/uv_detected_write_failed.error.txt", "Failed to store uv_detected.json in DB", "error")
502+
except Exception:
503+
logger.exception("Failed to write uv_detected meta error to disk for analysis %s", aid)
466504

467505
# final counts & status
468506
await _run_in_executor(update_analysis_counts, database_path, aid, file_count, emb_count)

0 commit comments

Comments
 (0)