5252# Increase batch size for parallel processing
5353EMBEDDING_BATCH_SIZE = 16 # Process embeddings in batches for better throughput
5454PROGRESS_LOG_INTERVAL = 10 # Log progress every N completed files
55+ EMBEDDING_TIMEOUT = 30 # Timeout in seconds for each embedding API call
56+ FILE_PROCESSING_TIMEOUT = 300 # Timeout in seconds for processing a single file (5 minutes)
5557_THREADPOOL_WORKERS = max (16 , EMBEDDING_CONCURRENCY + 8 )
5658_EXECUTOR = concurrent .futures .ThreadPoolExecutor (max_workers = _THREADPOOL_WORKERS )
5759
@@ -216,12 +218,16 @@ def _process_file_sync(
216218 if elapsed_before_result > 3.0 :
217219 logger .warning (f"Embedding API request taking too long for { rel_path } chunk { idx } : { elapsed_before_result :.2f} s elapsed, still waiting for response..." )
218220
219- emb = future .result () # This will re-raise any exception from the worker
221+ emb = future .result (timeout = EMBEDDING_TIMEOUT ) # Add timeout to prevent hanging indefinitely
220222 embedding_duration = time .time () - embedding_start_time
221223
222224 # Log slow embedding generation (> 5 seconds)
223225 if embedding_duration > 5.0 :
224226 logger .warning (f"Slow embedding API response for { rel_path } chunk { idx } : { embedding_duration :.2f} s total" )
227+ except concurrent .futures .TimeoutError :
228+ logger .error (f"Embedding API timeout ({ EMBEDDING_TIMEOUT } s) for { rel_path } chunk { idx } " )
229+ emb = None
230+ failed_count += 1
225231 except Exception as e :
226232 logger .exception ("Embedding retrieval failed for %s chunk %d: %s" , rel_path , idx , e )
227233 emb = None
@@ -355,7 +361,7 @@ def analyze_local_path_sync(
355361
356362 for fut in concurrent .futures .as_completed (futures ):
357363 try :
358- r = fut .result ()
364+ r = fut .result (timeout = FILE_PROCESSING_TIMEOUT )
359365
360366 # Increment completed counter and check for periodic logging
361367 with counters [2 ]:
@@ -374,6 +380,10 @@ def analyze_local_path_sync(
374380 # Log periodic progress updates (every 10 files)
375381 if should_log :
376382 logger .info (f"Progress: { completed_count } /{ total_files } files processed ({ file_count } stored, { emb_count } with embeddings, { skipped_count } skipped)" )
383+ except concurrent .futures .TimeoutError :
384+ logger .error (f"File processing timeout ({ FILE_PROCESSING_TIMEOUT } s exceeded)" )
385+ with counters [2 ]:
386+ counters [1 ] += 1
377387 except Exception :
378388 logger .exception ("A per-file task failed" )
379389
0 commit comments