Skip to content

Commit 27b5b87

Browse files
CopilotMte90
andcommitted
Integrate EmbeddingClient into analyzer and remove example file
- Remove analyzer_embedding_usage_example.py (not needed as per feedback) - Import and instantiate EmbeddingClient in analyzer.py - Replace get_embedding_for_text calls with _embedding_client.embed_text - Pass file_path and chunk_index for detailed logging - EmbeddingClient now actively used for all embedding operations Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
1 parent d19ed3c commit 27b5b87

File tree

2 files changed

+11
-25
lines changed

2 files changed

+11
-25
lines changed

ai/analyzer.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
search_vectors as _search_vectors,
1919
get_chunk_text as _get_chunk_text,
2020
)
21-
from .openai import get_embedding_for_text, call_coding_api
21+
from .openai import call_coding_api
22+
from .embedding_client import EmbeddingClient
2223
from llama_index.core import Document
2324
from utils.logger import get_logger
2425
from utils import compute_file_hash, chunk_text, norm, cosine
@@ -59,15 +60,20 @@
5960

6061
logger = get_logger(__name__)
6162

63+
# Initialize EmbeddingClient for structured logging and retry logic
64+
_embedding_client = EmbeddingClient()
6265

63-
def _get_embedding_with_semaphore(semaphore: threading.Semaphore, text: str, model: Optional[str] = None):
66+
67+
def _get_embedding_with_semaphore(semaphore: threading.Semaphore, text: str, file_path: str = "<unknown>", chunk_index: int = 0, model: Optional[str] = None):
6468
"""
6569
Wrapper to acquire semaphore inside executor task to avoid deadlock.
6670
The semaphore is acquired in the worker thread, not the main thread.
71+
Now uses EmbeddingClient for better logging and error handling.
6772
"""
6873
semaphore.acquire()
6974
try:
70-
return get_embedding_for_text(text, model)
75+
# Use the embedding client with enhanced logging
76+
return _embedding_client.embed_text(text, file_path=file_path, chunk_index=chunk_index)
7177
finally:
7278
semaphore.release()
7379

@@ -192,7 +198,7 @@ def _process_file_sync(
192198
for idx, chunk_doc in batch:
193199
# Submit task to executor; semaphore will be acquired inside the worker
194200
embedding_start_time = time.time()
195-
future = _EXECUTOR.submit(_get_embedding_with_semaphore, semaphore, chunk_doc.text, embedding_model)
201+
future = _EXECUTOR.submit(_get_embedding_with_semaphore, semaphore, chunk_doc.text, rel_path, idx, embedding_model)
196202
embedding_futures.append((idx, chunk_doc, future, embedding_start_time))
197203

198204
# Wait for batch to complete and store results
@@ -434,7 +440,7 @@ def search_semantic(query: str, database_path: str, top_k: int = 5):
434440
Uses sqlite-vector's vector_full_scan to retrieve best-matching chunks and returns
435441
a list of {file_id, path, chunk_index, score}.
436442
"""
437-
q_emb = get_embedding_for_text(query)
443+
q_emb = _embedding_client.embed_text(query, file_path="<query>", chunk_index=0)
438444
if not q_emb:
439445
return []
440446

ai/analyzer_embedding_usage_example.py

Lines changed: 0 additions & 20 deletions
This file was deleted.

0 commit comments

Comments
 (0)