@@ -425,6 +425,7 @@ def _process_file_sync(
425425 batch = chunk_tasks [batch_start :batch_start + EMBEDDING_BATCH_SIZE ]
426426
427427 # Log batch processing start
428+ batch_start_time = time .time ()
428429 logger .info (f"Generating embeddings for { rel_path } : batch { batch_num } /{ num_batches } ({ len (batch )} chunks)" )
429430
430431 embedding_futures = []
@@ -433,48 +434,70 @@ def _process_file_sync(
433434 # Acquire semaphore to bound concurrent embedding requests
434435 semaphore .acquire ()
435436 try :
437+ embedding_start_time = time .time ()
436438 future = _EXECUTOR .submit (get_embedding_for_text , chunk_doc .text , embedding_model )
437- embedding_futures .append ((idx , chunk_doc , future ))
439+ embedding_futures .append ((idx , chunk_doc , future , embedding_start_time ))
438440 except Exception :
439441 semaphore .release ()
440442 raise
441443
442444 # Wait for batch to complete and store results
443445 saved_count = 0
444- for idx , chunk_doc , future in embedding_futures :
446+ failed_count = 0
447+ for idx , chunk_doc , future , embedding_start_time in embedding_futures :
445448 try :
449+ # Check if request is already slow before waiting for result
450+ elapsed_before_result = time .time () - embedding_start_time
451+ if elapsed_before_result > 3.0 :
452+ logger .warning (f"Embedding API request taking too long for { rel_path } chunk { idx } : { elapsed_before_result :.2f} s elapsed, still waiting for response..." )
453+
446454 emb = future .result () # This will re-raise any exception from the worker
455+ embedding_duration = time .time () - embedding_start_time
456+
457+ # Log slow embedding generation (> 5 seconds)
458+ if embedding_duration > 5.0 :
459+ logger .warning (f"Slow embedding API response for { rel_path } chunk { idx } : { embedding_duration :.2f} s total" )
447460 except Exception as e :
448461 logger .exception ("Embedding retrieval failed for %s chunk %d: %s" , rel_path , idx , e )
449462 emb = None
463+ failed_count += 1
450464 finally :
451465 semaphore .release ()
452466
453467 if emb :
454468 try :
469+ db_start_time = time .time ()
455470 conn2 = _connect_db (database_path )
456471 try :
457472 _load_sqlite_vector_extension (conn2 )
458473 _insert_chunk_vector_with_retry (conn2 , fid , rel_path , idx , emb )
459474 saved_count += 1
475+ db_duration = time .time () - db_start_time
476+
477+ # Log slow database operations (> 2 seconds)
478+ if db_duration > 2.0 :
479+ logger .warning (f"Slow database insert for { rel_path } chunk { idx } : { db_duration :.2f} s" )
460480 finally :
461481 conn2 .close ()
462482 embedded_any = True
463483 except Exception as e :
484+ failed_count += 1
464485 try :
465486 err_content = f"Failed to insert chunk vector: { e } \n \n Traceback:\n { traceback .format_exc ()} "
466487 print (err_content )
467488 except Exception :
468489 logger .exception ("Failed to write chunk-insert error to disk for %s chunk %d" , rel_path , idx )
469490 else :
491+ failed_count += 1
470492 try :
471493 err_content = "Embedding API returned no vector for chunk."
472494 print (err_content )
473495 except Exception :
474496 logger .exception ("Failed to write empty-embedding error to disk for %s chunk %d" , rel_path , idx )
475497
476- # Log batch completion
477- logger .info (f"Saved { saved_count } /{ len (batch )} embeddings for { rel_path } batch { batch_num } /{ num_batches } " )
498+ # Log batch completion with timing and status
499+ batch_duration = time .time () - batch_start_time
500+ logger .info (f"Completed batch { batch_num } /{ num_batches } for { rel_path } : { saved_count } saved, { failed_count } failed, { batch_duration :.2f} s elapsed" )
478501
479502 return {"stored" : True , "embedded" : embedded_any , "skipped" : False }
480503 except Exception :
0 commit comments