@@ -502,7 +502,11 @@ def run_monte_carlo_analysis(
502502 logger .debug ("Pre-computing failure exclusions for all iterations" )
503503 pre_compute_start = time .time ()
504504
505- worker_args = []
505+ worker_args : list [tuple ] = []
506+ iteration_index_to_key : dict [int , tuple ] = {}
507+ key_to_first_arg : dict [tuple , tuple ] = {}
508+ key_to_members : dict [tuple , list [int ]] = {}
509+
506510 for i in range (mc_iters ):
507511 seed_offset = None
508512 if seed is not None :
@@ -520,37 +524,87 @@ def run_monte_carlo_analysis(
520524 policy , seed_offset
521525 )
522526
523- # Create worker arguments
524- worker_args .append (
525- (
526- excluded_nodes ,
527- excluded_links ,
528- analysis_func ,
529- analysis_kwargs ,
530- i , # iteration_index
531- is_baseline ,
532- func_name ,
533- )
527+ arg = (
528+ excluded_nodes ,
529+ excluded_links ,
530+ analysis_func ,
531+ analysis_kwargs ,
532+ i , # iteration_index
533+ is_baseline ,
534+ func_name ,
535+ )
536+ worker_args .append (arg )
537+
538+ # Build deduplication key (excludes iteration index)
539+ dedup_key = _create_cache_key (
540+ excluded_nodes , excluded_links , func_name , analysis_kwargs
534541 )
542+ iteration_index_to_key [i ] = dedup_key
543+ if dedup_key not in key_to_first_arg :
544+ key_to_first_arg [dedup_key ] = arg
545+ key_to_members .setdefault (dedup_key , []).append (i )
535546
536547 pre_compute_time = time .time () - pre_compute_start
537548 logger .debug (
538549 f"Pre-computed { len (worker_args )} exclusion sets in { pre_compute_time :.2f} s"
539550 )
540551
552+ # Prepare unique tasks (deduplicated by failure pattern + analysis params)
553+ unique_worker_args : list [tuple ] = list (key_to_first_arg .values ())
554+ num_unique_tasks : int = len (unique_worker_args )
555+ logger .info (
556+ f"Monte-Carlo deduplication: { num_unique_tasks } unique patterns from { mc_iters } iterations"
557+ )
558+
541559 # Determine if we should run in parallel
542- use_parallel = parallelism > 1 and mc_iters > 1
560+ use_parallel = parallelism > 1 and num_unique_tasks > 1
543561
544562 start_time = time .time ()
545563
564+ # Execute only unique tasks, then replicate results to original indices
546565 if use_parallel :
547- results , failure_patterns = self ._run_parallel (
548- worker_args , mc_iters , store_failure_patterns , parallelism
566+ unique_result_values , _ = self ._run_parallel (
567+ unique_worker_args , num_unique_tasks , False , parallelism
549568 )
550569 else :
551- results , failure_patterns = self ._run_serial (
552- worker_args , store_failure_patterns
570+ unique_result_values , _ = self ._run_serial (unique_worker_args , False )
571+
572+ # Map unique task results back to their groups by zipping args with results
573+ key_to_result : dict [tuple , Any ] = {}
574+ for arg , value in zip (unique_worker_args , unique_result_values , strict = False ):
575+ exc_nodes , exc_links = arg [0 ], arg [1 ]
576+ dedup_key = _create_cache_key (
577+ exc_nodes , exc_links , func_name , analysis_kwargs
553578 )
579+ key_to_result [dedup_key ] = value
580+
581+ # Build full results list in original order
582+ results : list [Any ] = [None ] * mc_iters # type: ignore[var-annotated]
583+ for key , members in key_to_members .items ():
584+ if key not in key_to_result :
585+ # Defensive: should not happen unless parallel map returned fewer tasks
586+ continue
587+ value = key_to_result [key ]
588+ for idx in members :
589+ results [idx ] = value
590+
591+ # Reconstruct failure patterns per original iteration if requested
592+ failure_patterns : list [dict [str , Any ]] = []
593+ if store_failure_patterns :
594+ for key , members in key_to_members .items ():
595+ # Use exclusions from the representative arg
596+ rep_arg = key_to_first_arg [key ]
597+ exc_nodes : set [str ] = rep_arg [0 ]
598+ exc_links : set [str ] = rep_arg [1 ]
599+ for idx in members :
600+ failure_patterns .append (
601+ {
602+ "iteration_index" : idx ,
603+ "is_baseline" : bool (baseline and idx == 0 ),
604+ "excluded_nodes" : list (exc_nodes ),
605+ "excluded_links" : list (exc_links ),
606+ }
607+ )
554608
555609 elapsed_time = time .time () - start_time
556610
@@ -564,19 +618,14 @@ def run_monte_carlo_analysis(
564618 "analysis_function" : func_name ,
565619 "policy_name" : self .policy_name ,
566620 "execution_time" : elapsed_time ,
567- "unique_patterns" : len (
568- set (
569- (tuple (sorted (args [0 ])), tuple (sorted (args [1 ])))
570- for args in worker_args
571- )
572- ),
621+ "unique_patterns" : num_unique_tasks ,
573622 },
574623 }
575624
576625 def _run_parallel (
577626 self ,
578627 worker_args : list [tuple ],
579- mc_iters : int ,
628+ total_tasks : int ,
580629 store_failure_patterns : bool ,
581630 parallelism : int ,
582631 ) -> tuple [list [Any ], list [dict [str , Any ]]]:
@@ -596,17 +645,17 @@ def _run_parallel(
596645 Returns:
597646 Tuple of (results_list, failure_patterns_list).
598647 """
599- workers = min (parallelism , mc_iters )
648+ workers = min (parallelism , total_tasks )
600649 logger .info (
601- f"Running parallel analysis with { workers } workers for { mc_iters } iterations"
650+ f"Running parallel analysis with { workers } workers for { total_tasks } iterations"
602651 )
603652
604653 # Serialize network once for all workers
605654 network_pickle = pickle .dumps (self .network )
606655 logger .debug (f"Serialized network once: { len (network_pickle )} bytes" )
607656
608657 # Calculate optimal chunksize to minimize IPC overhead
609- chunksize = max (1 , mc_iters // (workers * 4 ))
658+ chunksize = max (1 , total_tasks // (workers * 4 ))
610659 logger .debug (f"Using chunksize={ chunksize } for parallel execution" )
611660
612661 start_time = time .time ()
@@ -622,7 +671,7 @@ def _run_parallel(
622671 logger .debug (
623672 f"ProcessPoolExecutor created with { workers } workers and shared network"
624673 )
625- logger .info (f"Starting parallel execution of { mc_iters } iterations" )
674+ logger .info (f"Starting parallel execution of { total_tasks } iterations" )
626675
627676 try :
628677 for (
@@ -649,9 +698,9 @@ def _run_parallel(
649698 )
650699
651700 # Progress logging
652- if completed_tasks % max (1 , mc_iters // 10 ) == 0 :
701+ if completed_tasks % max (1 , total_tasks // 10 ) == 0 :
653702 logger .info (
654- f"Parallel analysis progress: { completed_tasks } /{ mc_iters } tasks completed"
703+ f"Parallel analysis progress: { completed_tasks } /{ total_tasks } tasks completed"
655704 )
656705
657706 except Exception as e :
@@ -664,7 +713,7 @@ def _run_parallel(
664713 elapsed_time = time .time () - start_time
665714 logger .info (f"Parallel analysis completed in { elapsed_time :.2f} seconds" )
666715 logger .debug (
667- f"Average time per iteration: { elapsed_time / mc_iters :.3f} seconds"
716+ f"Average time per iteration: { elapsed_time / total_tasks :.3f} seconds"
668717 )
669718
670719 # Log exclusion pattern diversity for cache efficiency analysis
@@ -678,9 +727,9 @@ def _run_parallel(
678727 unique_exclusions .add (exclusion_key )
679728
680729 logger .info (
681- f"Generated { len (unique_exclusions )} unique exclusion patterns from { mc_iters } iterations"
730+ f"Generated { len (unique_exclusions )} unique exclusion patterns from { total_tasks } iterations"
682731 )
683- cache_efficiency = (mc_iters - len (unique_exclusions )) / mc_iters * 100
732+ cache_efficiency = (total_tasks - len (unique_exclusions )) / total_tasks * 100
684733 logger .debug (
685734 f"Potential cache efficiency: { cache_efficiency :.1f} % (worker processes benefit from caching)"
686735 )
0 commit comments