Add profiling support for parallel workflows

networmix · networmix · commit ed81719e335f · 2025-07-05T16:20:31.000+01:00
diff --git a/ngraph/cli.py b/ngraph/cli.py
@@ -5,6 +5,7 @@
 import argparse
 import json
 import logging
+import os
 import sys
 from pathlib import Path
 from typing import Any, Dict, List, Optional
@@ -421,6 +422,12 @@ def _run_scenario(
 
             logger.info("Starting scenario execution with profiling")
 
+            # Enable child-process profiling for parallel workflows
+            child_profile_dir = Path("worker_profiles")
+            child_profile_dir.mkdir(exist_ok=True)
+            os.environ["NGRAPH_PROFILE_DIR"] = str(child_profile_dir.resolve())
+            logger.info(f"Worker profiles will be saved to: {child_profile_dir}")
+
             # Manual execution of workflow steps with profiling
             for step in scenario.workflow:
                 step_name = step.name or step.__class__.__name__
@@ -429,12 +436,33 @@ def _run_scenario(
                 with profiler.profile_step(step_name, step_type):
                     step.execute(scenario)
 
+                # Merge any worker profiles generated by this step
+                if child_profile_dir.exists():
+                    profiler.merge_child_profiles(child_profile_dir, step_name)
+
             logger.info("Scenario execution completed successfully")
 
             # End scenario profiling and analyze results
             profiler.end_scenario()
             profiler.analyze_performance()
 
+            # Clean up any remaining worker profile files
+            if child_profile_dir.exists():
+                remaining_files = list(child_profile_dir.glob("*.pstats"))
+                if remaining_files:
+                    logger.debug(
+                        f"Cleaning up {len(remaining_files)} remaining profile files"
+                    )
+                    for f in remaining_files:
+                        try:
+                            f.unlink()
+                        except Exception:
+                            pass
+                try:
+                    child_profile_dir.rmdir()  # Remove dir if empty
+                except Exception:
+                    pass
+
             # Generate and display performance report
             reporter = PerformanceReporter(profiler.results)
             performance_report = reporter.generate_report()
diff --git a/ngraph/profiling.py b/ngraph/profiling.py
@@ -31,6 +31,7 @@ class StepProfile:
         function_calls: Number of function calls during execution.
         memory_peak: Peak memory usage during step (if available).
         cprofile_stats: Detailed cProfile statistics object.
+        worker_profiles_merged: Number of worker profiles merged into this step.
     """
 
     step_name: str
@@ -40,6 +41,7 @@ class StepProfile:
     function_calls: int
     memory_peak: Optional[float] = None
     cprofile_stats: Optional[pstats.Stats] = None
+    worker_profiles_merged: int = 0
 
 
 @dataclass
@@ -168,6 +170,61 @@ def profile_step(
                 f"({wall_time:.3f}s wall, {cpu_time:.3f}s CPU, {function_calls:,} calls)"
             )
 
+    def merge_child_profiles(self, profile_dir: Path, step_name: str) -> None:
+        """Merge child worker profiles into the parent step profile.
+
+        Args:
+            profile_dir: Directory containing worker profile files.
+            step_name: Name of the workflow step these workers belong to.
+        """
+        # Find the step profile to merge into
+        step_profile = None
+        for profile in self.results.step_profiles:
+            if profile.step_name == step_name:
+                step_profile = profile
+                break
+
+        if not step_profile or not step_profile.cprofile_stats:
+            logger.warning(f"No parent profile found for step: {step_name}")
+            return
+
+        # Find all worker profile files for this step
+        worker_files = list(profile_dir.glob("*_worker_*.pstats"))
+        if not worker_files:
+            logger.debug(f"No worker profiles found in {profile_dir}")
+            return
+
+        logger.debug(f"Found {len(worker_files)} worker profiles to merge")
+
+        # Merge all worker stats into the parent stats
+        try:
+            merged_count = 0
+            for worker_file in worker_files:
+                step_profile.cprofile_stats.add(str(worker_file))
+                logger.debug(f"Merged worker profile: {worker_file.name}")
+                merged_count += 1
+
+            # Update function call count after merge
+            stats_data = getattr(step_profile.cprofile_stats, "stats", {})
+            step_profile.function_calls = sum(
+                stat_tuple[0] for stat_tuple in stats_data.values()
+            )
+            step_profile.worker_profiles_merged = merged_count
+
+            logger.info(
+                f"Merged {len(worker_files)} worker profiles into step '{step_name}'"
+            )
+
+            # Clean up worker files after successful merge
+            for worker_file in worker_files:
+                try:
+                    worker_file.unlink()
+                except Exception:
+                    pass  # Best effort cleanup
+
+        except Exception as e:
+            logger.warning(f"Failed to merge worker profiles: {type(e).__name__}: {e}")
+
     def analyze_performance(self) -> None:
         """Analyze profiling results and identify bottlenecks.
 
@@ -332,8 +389,8 @@ def generate_report(self) -> str:
             ["=" * 80, "NETGRAPH PERFORMANCE PROFILING REPORT", "=" * 80, ""]
         )
 
-        # Executive summary
-        report_lines.extend(self._generate_executive_summary())
+        # Summary
+        report_lines.extend(self._generate_summary())
 
         # Step-by-step timing analysis
         report_lines.extend(self._generate_timing_analysis())
@@ -350,8 +407,8 @@ def generate_report(self) -> str:
 
         return "\n".join(report_lines)
 
-    def _generate_executive_summary(self) -> List[str]:
-        """Generate executive summary section of the report."""
+    def _generate_summary(self) -> List[str]:
+        """Generate summary section of the report."""
         summary = self.results.analysis_summary
 
         lines = [
@@ -385,7 +442,15 @@ def _generate_timing_analysis(self) -> List[str]:
         )
 
         # Create formatted table
-        headers = ["Step Name", "Type", "Wall Time", "CPU Time", "Calls", "% Total"]
+        headers = [
+            "Step Name",
+            "Type",
+            "Wall Time",
+            "CPU Time",
+            "Calls",
+            "% Total",
+            "Workers",
+        ]
 
         # Calculate column widths
         col_widths = [len(h) for h in headers]
@@ -404,6 +469,9 @@ def _generate_timing_analysis(self) -> List[str]:
                 f"{step.cpu_time:.3f}s",
                 f"{step.function_calls:,}",
                 f"{percentage:.1f}%",
+                f"{step.worker_profiles_merged}"
+                if step.worker_profiles_merged > 0
+                else "-",
             ]
             table_data.append(row)
 
diff --git a/ngraph/workflow/capacity_envelope_analysis.py b/ngraph/workflow/capacity_envelope_analysis.py
@@ -29,7 +29,7 @@ def _worker(args: tuple[Any, ...]) -> tuple[list[tuple[str, str, float]], float]
 
     Args:
         args: Tuple containing (base_network, base_policy, source_regex, sink_regex,
-              mode, shortest_path, flow_placement, seed_offset, is_baseline)
+              mode, shortest_path, flow_placement, seed_offset, is_baseline, step_name)
 
     Returns:
         Tuple of (flow_results, total_capacity) where:
@@ -49,8 +49,20 @@ def _worker(args: tuple[Any, ...]) -> tuple[list[tuple[str, str, float]], float]
         flow_placement,
         seed_offset,
         is_baseline,
+        step_name,
     ) = args
 
+    # Optional per-worker profiling -------------------------------------------------
+    profile_dir_env = os.getenv("NGRAPH_PROFILE_DIR")
+    collect_profile: bool = bool(profile_dir_env)
+
+    profiler: "cProfile.Profile | None" = None  # Lazy init to avoid overhead
+    if collect_profile:
+        import cProfile  # Local import to avoid cost when profiling disabled
+
+        profiler = cProfile.Profile()
+        profiler.enable()
+
     worker_pid = os.getpid()
     worker_logger.debug(f"Worker {worker_pid} started with seed_offset={seed_offset}")
 
@@ -126,6 +138,28 @@ def _worker(args: tuple[Any, ...]) -> tuple[list[tuple[str, str, float]], float]
     worker_logger.debug(f"Worker {worker_pid} computed {len(result)} flow results")
     worker_logger.debug(f"Worker {worker_pid} total capacity: {total_capacity:.2f}")
 
+    # Dump profile if enabled ------------------------------------------------------
+    if profiler is not None:
+        profiler.disable()
+        try:
+            import pstats
+            import uuid
+            from pathlib import Path
+
+            profile_dir = Path(profile_dir_env) if profile_dir_env else None
+            if profile_dir is not None:
+                profile_dir.mkdir(parents=True, exist_ok=True)
+                unique_id = uuid.uuid4().hex[:8]
+                profile_path = (
+                    profile_dir / f"{step_name}_worker_{worker_pid}_{unique_id}.pstats"
+                )
+                pstats.Stats(profiler).dump_stats(profile_path)
+                worker_logger.debug("Saved worker profile to %s", profile_path.name)
+        except Exception as exc:  # pragma: no cover – best-effort profiling
+            worker_logger.warning(
+                "Failed to save worker profile: %s: %s", type(exc).__name__, exc
+            )
+
     return result, total_capacity
 
 
@@ -172,6 +206,7 @@ def _run_single_iteration(
             flow_placement,
             seed_offset,
             is_baseline,
+            "",  # step_name not available in serial execution
         )
     )
     logger.debug(
@@ -463,6 +498,7 @@ def _run_parallel_analysis(
                     self.flow_placement,
                     seed_offset,
                     is_baseline,
+                    self.name or self.__class__.__name__,
                 )
             )
 
diff --git a/tests/workflow/test_capacity_envelope_analysis.py b/tests/workflow/test_capacity_envelope_analysis.py
@@ -100,7 +100,9 @@ def test_initialization_with_parameters(self):
     def test_string_flow_placement_conversion(self):
         """Test automatic conversion of string flow_placement to enum."""
         step = CapacityEnvelopeAnalysis(
-            source_path="^A", sink_path="^C", flow_placement="EQUAL_BALANCED"
+            source_path="^A",
+            sink_path="^C",
+            flow_placement="EQUAL_BALANCED",  # type: ignore[arg-type]
         )
         assert step.flow_placement == FlowPlacement.EQUAL_BALANCED
 
@@ -121,7 +123,9 @@ def test_validation_errors(self):
         # Test invalid flow_placement string
         with pytest.raises(ValueError, match="Invalid flow_placement"):
             CapacityEnvelopeAnalysis(
-                source_path="^A", sink_path="^C", flow_placement="INVALID"
+                source_path="^A",
+                sink_path="^C",
+                flow_placement="INVALID",  # type: ignore[arg-type]
             )
 
     def test_validation_iterations_without_failure_policy(self):
@@ -406,6 +410,7 @@ def test_worker_no_failures(self, simple_network):
             FlowPlacement.PROPORTIONAL,
             42,  # seed
             False,  # is_baseline
+            "test_step",  # step_name
         )
 
         flow_results, total_capacity = _worker(args)
@@ -435,6 +440,7 @@ def test_worker_with_failures(self, simple_network, simple_failure_policy):
             FlowPlacement.PROPORTIONAL,
             42,  # seed
             False,  # is_baseline
+            "test_step",  # step_name
         )
 
         flow_results, total_capacity = _worker(args)
@@ -656,6 +662,7 @@ def test_worker_baseline_iteration(self, simple_network, simple_failure_policy):
             FlowPlacement.PROPORTIONAL,
             42,  # seed
             True,  # is_baseline - should skip failures
+            "test_step",  # step_name
         )
 
         flow_results, total_capacity = _worker(args)
@@ -675,6 +682,7 @@ def test_worker_baseline_iteration(self, simple_network, simple_failure_policy):
             FlowPlacement.PROPORTIONAL,
             42,  # seed
             False,  # is_baseline
+            "test_step",  # step_name
         )
 
         baseline_results, baseline_capacity = _worker(args)