From c399e92f481846ea8b805eb44a6303f54c5fc1c8 Mon Sep 17 00:00:00 2001 From: Andrey Golovanov Date: Sat, 20 Dec 2025 05:28:40 +0000 Subject: [PATCH] Update to v0.14.0 - Restructured Monte Carlo results: `baseline` is returned separately, and `results` now contains deduplicated failure patterns with `occurrence_count`. - Removed `baseline` parameter from Monte Carlo APIs; baseline now runs implicitly. - Added `FlowIterationResult.occurrence_count` to track how many iterations produced each failure pattern. --- .claude/skills/netgraph-dsl/SKILL.md | 4 +- .../netgraph-dsl/references/EXAMPLES.md | 4 +- .../netgraph-dsl/references/REFERENCE.md | 12 +- CHANGELOG.md | 12 + docs/reference/api-full.md | 47 +- docs/reference/dsl.md | 1 - docs/reference/workflow.md | 64 +- ngraph/_version.py | 2 +- ngraph/exec/failure/manager.py | 458 +++++------ ngraph/explorer.py | 2 +- ngraph/model/components.py | 2 +- ngraph/model/failure/policy.py | 46 ++ ngraph/results/artifacts.py | 11 +- ngraph/results/flow.py | 22 +- ngraph/workflow/build_graph.py | 11 + ngraph/workflow/cost_power.py | 16 +- ngraph/workflow/max_flow_step.py | 61 +- .../workflow/maximum_supported_demand_step.py | 12 +- ngraph/workflow/network_stats.py | 27 +- .../workflow/traffic_matrix_placement_step.py | 102 +-- pyproject.toml | 2 +- scenarios/backbone_clos.yml | 1 - scenarios/nsfnet.yaml | 740 +++++++++++++++--- scenarios/square_mesh.yaml | 2 - tests/exec/failure/test_manager.py | 139 +++- .../exec/failure/test_manager_integration.py | 155 +++- tests/integration/scenario_3.yaml | 4 - tests/integration/scenario_4.yaml | 4 - tests/integration/test_data_templates.py | 3 - tests/integration/test_scenario_1.py | 2 +- tests/integration/test_scenario_2.py | 4 +- tests/integration/test_scenario_3.py | 47 +- tests/integration/test_scenario_4.py | 31 +- tests/integration/test_template_examples.py | 2 - tests/model/components/test_components.py | 10 +- tests/model/failure/test_failure_trace.py | 378 +++++++++ tests/model/test_risk_groups.py | 6 +- tests/model/test_selection.py | 1 - tests/profiling/test_profiling.py | 2 +- tests/workflow/test_base.py | 2 +- .../test_capacity_envelope_analysis.py | 279 +++++-- .../workflow/test_tm_analysis_perf_safety.py | 3 +- .../workflow/test_traffic_matrix_placement.py | 215 ++++- 43 files changed, 2190 insertions(+), 758 deletions(-) create mode 100644 tests/model/failure/test_failure_trace.py diff --git a/.claude/skills/netgraph-dsl/SKILL.md b/.claude/skills/netgraph-dsl/SKILL.md index 8b40468..23d8da5 100644 --- a/.claude/skills/netgraph-dsl/SKILL.md +++ b/.claude/skills/netgraph-dsl/SKILL.md @@ -250,7 +250,7 @@ workflow: mode: pairwise failure_policy: single_link iterations: 1000 - baseline: true # Include no-failure baseline + seed: 42 # Optional: for reproducibility ``` **Step types**: `BuildGraph`, `NetworkStats`, `MaxFlow`, `TrafficMatrixPlacement`, `MaximumSupportedDemand`, `CostPower` @@ -345,4 +345,4 @@ Overrides only affect entities that exist at their processing stage. ## More Information - [Full DSL Reference](references/REFERENCE.md) - Complete field documentation, all operators, workflow steps -- [Working Examples](references/EXAMPLES.md) - 11 complete scenarios from simple to advanced +- [Working Examples](references/EXAMPLES.md) - 17 complete scenarios from simple to advanced diff --git a/.claude/skills/netgraph-dsl/references/EXAMPLES.md b/.claude/skills/netgraph-dsl/references/EXAMPLES.md index dc142a7..9ef93fd 100644 --- a/.claude/skills/netgraph-dsl/references/EXAMPLES.md +++ b/.claude/skills/netgraph-dsl/references/EXAMPLES.md @@ -256,7 +256,7 @@ workflow: mode: pairwise failure_policy: single_link_failure iterations: 1000 - baseline: true + seed: 42 ``` ## Example 6: Attribute-Based Selectors @@ -455,7 +455,7 @@ workflow: failure_policy: single_link iterations: 1000 parallelism: 7 - baseline: true + include_flow_details: true alpha_from_step: msd_baseline alpha_from_field: data.alpha_star ``` diff --git a/.claude/skills/netgraph-dsl/references/REFERENCE.md b/.claude/skills/netgraph-dsl/references/REFERENCE.md index b4cdeb4..f743db0 100644 --- a/.claude/skills/netgraph-dsl/references/REFERENCE.md +++ b/.claude/skills/netgraph-dsl/references/REFERENCE.md @@ -752,11 +752,15 @@ workflow: ```yaml - step_type: NetworkStats name: stats - include_disabled: false # Include disabled nodes/links in stats + include_disabled: false # Include disabled nodes/links in stats + excluded_nodes: [] # Optional: temporary node exclusions + excluded_links: [] # Optional: temporary link exclusions ``` ### MaxFlow Parameters +Baseline (no failures) is always run first as a reference. The `iterations` parameter specifies how many failure scenarios to run. + ```yaml - step_type: MaxFlow name: capacity_analysis @@ -766,7 +770,7 @@ workflow: failure_policy: policy_name iterations: 1000 parallelism: auto # or integer - baseline: true # Include baseline (no failures) iteration + seed: 42 # Optional: for reproducibility shortest_path: false # Restrict to shortest paths only require_capacity: true # Path selection considers capacity flow_placement: PROPORTIONAL # PROPORTIONAL | EQUAL_BALANCED @@ -777,6 +781,8 @@ workflow: ### TrafficMatrixPlacement Parameters +Baseline (no failures) is always run first as a reference. The `iterations` parameter specifies how many failure scenarios to run. + ```yaml - step_type: TrafficMatrixPlacement name: tm_placement @@ -785,7 +791,7 @@ workflow: iterations: 100 parallelism: auto placement_rounds: auto # or integer - baseline: false + seed: 42 # Optional: for reproducibility include_flow_details: true include_used_edges: false store_failure_patterns: false diff --git a/CHANGELOG.md b/CHANGELOG.md index e0ea978..e69a4a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.14.0] - 2025-12-20 + +### Changed + +- **BREAKING**: Monte Carlo results restructured: `baseline` returned separately; `results` contains deduplicated failure patterns with `occurrence_count` +- **BREAKING**: `baseline` parameter removed from Monte Carlo APIs; baseline always runs implicitly + +### Added + +- `FlowIterationResult.occurrence_count`: how many iterations produced this failure pattern +- `FlowIterationResult.failure_trace`: mode/rule selection details when `store_failure_patterns=True` + ## [0.13.0] - 2025-12-19 ### Changed diff --git a/docs/reference/api-full.md b/docs/reference/api-full.md index c12c37f..2c52d53 100644 --- a/docs/reference/api-full.md +++ b/docs/reference/api-full.md @@ -12,7 +12,7 @@ Quick links: - [CLI Reference](cli.md) - [DSL Reference](dsl.md) -Generated from source code on: December 20, 2025 at 00:19 UTC +Generated from source code on: December 20, 2025 at 04:32 UTC Modules auto-discovered: 49 @@ -635,7 +635,7 @@ Attributes: **Methods:** -- `apply_failures(self, network_nodes: 'Dict[str, Any]', network_links: 'Dict[str, Any]', network_risk_groups: 'Dict[str, Any] | None' = None, *, seed: 'Optional[int]' = None) -> 'List[str]'` - Identify which entities fail for this iteration. +- `apply_failures(self, network_nodes: 'Dict[str, Any]', network_links: 'Dict[str, Any]', network_risk_groups: 'Dict[str, Any] | None' = None, *, seed: 'Optional[int]' = None, failure_trace: 'Optional[Dict[str, Any]]' = None) -> 'List[str]'` - Identify which entities fail for this iteration. - `to_dict(self) -> 'Dict[str, Any]'` - Convert to dictionary for JSON serialization. ### FailureRule @@ -1114,6 +1114,9 @@ MaxFlow workflow step. Monte Carlo analysis of maximum flow capacity between node groups using FailureManager. Produces unified `flow_results` per iteration under `data.flow_results`. +Baseline (no failures) is always run first as a separate reference. The `iterations` +parameter specifies how many failure scenarios to run. + YAML Configuration Example: workflow: @@ -1130,7 +1133,6 @@ YAML Configuration Example: shortest_path: false require_capacity: true # false for true IP/IGP semantics flow_placement: "PROPORTIONAL" - baseline: false seed: 42 store_failure_patterns: false include_flow_details: false # cost_distribution @@ -1140,18 +1142,21 @@ YAML Configuration Example: Maximum flow Monte Carlo workflow step. +Baseline (no failures) is always run first as a separate reference. Results are +returned with baseline in a separate field, and failure iterations in a 0-indexed +list that corresponds 1:1 with failure_patterns. + Attributes: source: Source node selector (string path or selector dict). sink: Sink node selector (string path or selector dict). mode: Flow analysis mode ("combine" or "pairwise"). failure_policy: Name of failure policy in scenario.failure_policy_set. - iterations: Number of Monte Carlo trials. + iterations: Number of failure iterations to run. parallelism: Number of parallel worker processes. shortest_path: Whether to use shortest paths only. require_capacity: If True (default), path selection considers capacity. If False, path selection is cost-only (true IP/IGP semantics). flow_placement: Flow placement strategy. - baseline: Whether to run first iteration without failures as baseline. seed: Optional seed for reproducible results. store_failure_patterns: Whether to store failure patterns in results. include_flow_details: Whether to collect cost distribution per flow. @@ -1171,7 +1176,6 @@ Attributes: - `shortest_path` (bool) = False - `require_capacity` (bool) = True - `flow_placement` (FlowPlacement | str) = 1 -- `baseline` (bool) = False - `store_failure_patterns` (bool) = False - `include_flow_details` (bool) = False - `include_min_cut` (bool) = False @@ -1309,17 +1313,23 @@ TrafficMatrixPlacement workflow step. Runs Monte Carlo demand placement using a named traffic matrix and produces unified `flow_results` per iteration under `data.flow_results`. +Baseline (no failures) is always run first as a separate reference. The `iterations` +parameter specifies how many failure scenarios to run. + ### TrafficMatrixPlacement Monte Carlo demand placement using a named traffic matrix. +Baseline (no failures) is always run first as a separate reference. Results are +returned with baseline in a separate field, and failure iterations in a 0-indexed +list that corresponds 1:1 with failure_patterns. + Attributes: matrix_name: Name of the traffic matrix to analyze. failure_policy: Optional policy name in scenario.failure_policy_set. - iterations: Number of Monte Carlo iterations. + iterations: Number of failure iterations to run. parallelism: Number of parallel worker processes. placement_rounds: Placement optimization rounds (int or "auto"). - baseline: Include baseline iteration without failures first. seed: Optional seed for reproducibility. store_failure_patterns: Whether to store failure pattern results. include_flow_details: When True, include cost_distribution per flow. @@ -1338,7 +1348,6 @@ Attributes: - `iterations` (int) = 1 - `parallelism` (int | str) = auto - `placement_rounds` (int | str) = auto -- `baseline` (bool) = False - `store_failure_patterns` (bool) = False - `include_flow_details` (bool) = False - `include_used_edges` (bool) = False @@ -1968,8 +1977,14 @@ Args: Container for per-iteration analysis results. Args: - failure_id: Stable identifier for the failure scenario (e.g., "baseline" or a hash). + failure_id: Stable identifier for the failure scenario (hash of excluded + components, or "" for no exclusions). failure_state: Optional excluded components for the iteration. + failure_trace: Optional trace info (mode_index, selections, expansion) when + store_failure_patterns=True. None for baseline or when tracing disabled. + occurrence_count: Number of Monte Carlo iterations that produced this exact + failure pattern. Used with deduplication to avoid re-running identical + analyses. Defaults to 1. flows: List of flow entries for this iteration. summary: Aggregated summary across ``flows``. data: Optional per-iteration extras. @@ -1978,6 +1993,8 @@ Args: - `failure_id` (str) - `failure_state` (Optional[Dict[str, List[str]]]) +- `failure_trace` (Optional[Dict[str, Any]]) +- `occurrence_count` (int) = 1 - `flows` (List[FlowEntry]) = [] - `summary` (FlowSummary) = FlowSummary(total_demand=0.0, total_placed=0.0, overall_ratio=1.0, dropped_flows=0, num_flows=0) - `data` (Dict[str, Any]) = {} @@ -2801,12 +2818,12 @@ Attributes: **Methods:** -- `compute_exclusions(self, policy: "'FailurePolicy | None'" = None, seed_offset: 'int | None' = None) -> 'tuple[set[str], set[str]]'` - Compute set of nodes and links to exclude for a failure iteration. +- `compute_exclusions(self, policy: "'FailurePolicy | None'" = None, seed_offset: 'int | None' = None, failure_trace: 'Optional[Dict[str, Any]]' = None) -> 'tuple[set[str], set[str]]'` - Compute set of nodes and links to exclude for a failure iteration. - `get_failure_policy(self) -> "'FailurePolicy | None'"` - Get failure policy for analysis. -- `run_demand_placement_monte_carlo(self, demands_config: 'list[dict[str, Any]] | Any', iterations: 'int' = 100, parallelism: 'int' = 1, placement_rounds: 'int | str' = 'auto', baseline: 'bool' = False, seed: 'int | None' = None, store_failure_patterns: 'bool' = False, include_flow_details: 'bool' = False, include_used_edges: 'bool' = False, **kwargs) -> 'Any'` - Analyze traffic demand placement success under failures. -- `run_max_flow_monte_carlo(self, source: 'str | dict[str, Any]', sink: 'str | dict[str, Any]', mode: 'str' = 'combine', iterations: 'int' = 100, parallelism: 'int' = 1, shortest_path: 'bool' = False, require_capacity: 'bool' = True, flow_placement: 'FlowPlacement | str' = , baseline: 'bool' = False, seed: 'int | None' = None, store_failure_patterns: 'bool' = False, include_flow_summary: 'bool' = False, **kwargs) -> 'Any'` - Analyze maximum flow capacity envelopes between node groups under failures. -- `run_monte_carlo_analysis(self, analysis_func: 'AnalysisFunction', iterations: 'int' = 1, parallelism: 'int' = 1, baseline: 'bool' = False, seed: 'int | None' = None, store_failure_patterns: 'bool' = False, **analysis_kwargs) -> 'dict[str, Any]'` - Run Monte Carlo failure analysis with any analysis function. -- `run_sensitivity_monte_carlo(self, source: 'str | dict[str, Any]', sink: 'str | dict[str, Any]', mode: 'str' = 'combine', iterations: 'int' = 100, parallelism: 'int' = 1, shortest_path: 'bool' = False, flow_placement: 'FlowPlacement | str' = , baseline: 'bool' = False, seed: 'int | None' = None, store_failure_patterns: 'bool' = False, **kwargs) -> 'dict[str, Any]'` - Analyze component criticality for flow capacity under failures. +- `run_demand_placement_monte_carlo(self, demands_config: 'list[dict[str, Any]] | Any', iterations: 'int' = 100, parallelism: 'int' = 1, placement_rounds: 'int | str' = 'auto', seed: 'int | None' = None, store_failure_patterns: 'bool' = False, include_flow_details: 'bool' = False, include_used_edges: 'bool' = False, **kwargs) -> 'Any'` - Analyze traffic demand placement success under failures. +- `run_max_flow_monte_carlo(self, source: 'str | dict[str, Any]', sink: 'str | dict[str, Any]', mode: 'str' = 'combine', iterations: 'int' = 100, parallelism: 'int' = 1, shortest_path: 'bool' = False, require_capacity: 'bool' = True, flow_placement: 'FlowPlacement | str' = , seed: 'int | None' = None, store_failure_patterns: 'bool' = False, include_flow_summary: 'bool' = False, **kwargs) -> 'Any'` - Analyze maximum flow capacity envelopes between node groups under failures. +- `run_monte_carlo_analysis(self, analysis_func: 'AnalysisFunction', iterations: 'int' = 1, parallelism: 'int' = 1, seed: 'int | None' = None, store_failure_patterns: 'bool' = False, **analysis_kwargs) -> 'dict[str, Any]'` - Run Monte Carlo failure analysis with any analysis function. +- `run_sensitivity_monte_carlo(self, source: 'str | dict[str, Any]', sink: 'str | dict[str, Any]', mode: 'str' = 'combine', iterations: 'int' = 100, parallelism: 'int' = 1, shortest_path: 'bool' = False, flow_placement: 'FlowPlacement | str' = , seed: 'int | None' = None, store_failure_patterns: 'bool' = False, **kwargs) -> 'dict[str, Any]'` - Analyze component criticality for flow capacity under failures. - `run_single_failure_scenario(self, analysis_func: 'AnalysisFunction', **kwargs) -> 'Any'` - Run a single failure scenario for convenience. --- diff --git a/docs/reference/dsl.md b/docs/reference/dsl.md index b264a16..f98ed29 100644 --- a/docs/reference/dsl.md +++ b/docs/reference/dsl.md @@ -670,7 +670,6 @@ workflow: matrix_name: baseline_traffic_matrix failure_policy: weighted_modes iterations: 1000 - baseline: true ``` **Common Steps:** diff --git a/docs/reference/workflow.md b/docs/reference/workflow.md index 4e7720e..a00f46a 100644 --- a/docs/reference/workflow.md +++ b/docs/reference/workflow.md @@ -26,7 +26,6 @@ workflow: matrix_name: baseline_traffic_matrix failure_policy: random_failures iterations: 1000 - baseline: true ``` ## Execution Model @@ -71,7 +70,7 @@ Parameters: ### MaxFlow -Monte Carlo maximum flow analysis between node groups. +Monte Carlo maximum flow analysis between node groups. Baseline (no failures) is always run first as a separate reference. ```yaml - step_type: MaxFlow @@ -80,9 +79,8 @@ Monte Carlo maximum flow analysis between node groups. sink: "^storage/.*" mode: "combine" # combine | pairwise failure_policy: random_failures - iterations: 1000 + iterations: 1000 # Number of failure iterations parallelism: auto # or an integer - baseline: true shortest_path: false require_capacity: true # false for true IP/IGP semantics flow_placement: PROPORTIONAL # or EQUAL_BALANCED @@ -93,17 +91,16 @@ Monte Carlo maximum flow analysis between node groups. ### TrafficMatrixPlacement -Monte Carlo placement of a named traffic matrix with optional alpha scaling. +Monte Carlo placement of a named traffic matrix with optional alpha scaling. Baseline (no failures) is always run first as a separate reference. ```yaml - step_type: TrafficMatrixPlacement name: tm_placement matrix_name: default failure_policy: random_failures # Optional: policy name in failure_policy_set - iterations: 100 + iterations: 100 # Number of failure iterations parallelism: auto placement_rounds: auto # or an integer - baseline: false include_flow_details: true # cost_distribution per flow include_used_edges: false # include per-demand used edge lists store_failure_patterns: false @@ -115,7 +112,7 @@ Monte Carlo placement of a named traffic matrix with optional alpha scaling. Outputs: -- metadata: iterations, parallelism, baseline, analysis_function, policy_name, +- metadata: iterations, parallelism, analysis_function, policy_name, execution_time, unique_patterns - data.context: matrix_name, placement_rounds, include_flow_details, include_used_edges, base_demands, alpha, alpha_source @@ -266,9 +263,8 @@ source: ```yaml mode: combine # combine | pairwise (default: combine) -iterations: 1000 # Monte Carlo trials (default: 1) +iterations: 1000 # Failure iterations to run (default: 1) failure_policy: policy_name # Name in failure_policy_set (default: null) -baseline: true # Include baseline iteration first (default: false) parallelism: auto # Worker processes (default: auto) shortest_path: false # Restrict to shortest paths (default: false) require_capacity: true # Path selection considers capacity (default: true) @@ -279,6 +275,8 @@ include_flow_details: false # Emit cost_distribution per flow include_min_cut: false # Emit min-cut edge list per flow ``` +Note: Baseline (no failures) is always run first as a separate reference. The `iterations` parameter specifies the number of failure scenarios to run. + ## Results Export Shape Exported results have a fixed top-level structure. Keys under `workflow` and `steps` are step names. @@ -328,31 +326,27 @@ Exported results have a fixed top-level structure. Keys under `workflow` and `st } ``` -- `MaxFlow` and `TrafficMatrixPlacement` write per-iteration entries under `data.flow_results`: +- `MaxFlow` and `TrafficMatrixPlacement` write results with baseline separate from failure iterations: ```json { + "baseline": { + "failure_id": "", + "failure_state": { "excluded_nodes": [], "excluded_links": [] }, + "failure_trace": null, + "occurrence_count": 1, + "flows": [ ... ], + "summary": { "total_demand": 10.0, "total_placed": 10.0, "overall_ratio": 1.0 } + }, "flow_results": [ { - "failure_id": "baseline", - "failure_state": null, - "flows": [ - { - "source": "A", "destination": "B", "priority": 0, - "demand": 10.0, "placed": 10.0, "dropped": 0.0, - "cost_distribution": { "2": 6.0, "4": 4.0 }, - "data": { "edges": ["(u,v,k)"] } - } - ], - "summary": { - "total_demand": 10.0, "total_placed": 10.0, - "overall_ratio": 1.0, "dropped_flows": 0, "num_flows": 1 - }, - "data": { } - }, - { "failure_id": "d0eea3f4d06413a2", "failure_state": null, "flows": [], - "summary": { "total_demand": 0.0, "total_placed": 0.0, "overall_ratio": 1.0, "dropped_flows": 0, "num_flows": 0 }, - "data": {} } + "failure_id": "d0eea3f4d06413a2", + "failure_state": { "excluded_nodes": ["nodeA"], "excluded_links": [] }, + "failure_trace": { "mode_index": 0, "selections": [...], ... }, + "occurrence_count": 5, + "flows": [ ... ], + "summary": { "total_demand": 10.0, "total_placed": 8.0, "overall_ratio": 0.8 } + } ], "context": { ... } } @@ -360,9 +354,11 @@ Exported results have a fixed top-level structure. Keys under `workflow` and `st Notes: -- Baseline: when `baseline: true`, the first entry has `failure_id: "baseline"`. -- `failure_state` may be `null` or an object with `excluded_nodes` and `excluded_links` lists. -- Per-iteration `data` can include instrumentation (e.g., `iteration_metrics`). -- Per-flow `data` can include instrumentation (e.g., `policy_metrics`). +- Baseline is always returned separately in the `baseline` field. +- `flow_results` contains K unique failure patterns (deduplicated), not N iterations. +- `occurrence_count` indicates how many iterations produced each unique failure pattern. +- `failure_id` is a hash of exclusions (empty string for no exclusions). +- `failure_trace` contains policy selection details when `store_failure_patterns: true`. +- `failure_state` contains `excluded_nodes` and `excluded_links` lists. - `cost_distribution` uses string keys for JSON stability; values are numeric. - Effective `parallelism` and other execution fields are recorded in step metadata. diff --git a/ngraph/_version.py b/ngraph/_version.py index ac27bf2..8a726e6 100644 --- a/ngraph/_version.py +++ b/ngraph/_version.py @@ -2,4 +2,4 @@ __all__ = ["__version__"] -__version__ = "0.13.0" +__version__ = "0.14.0" diff --git a/ngraph/exec/failure/manager.py b/ngraph/exec/failure/manager.py index cba033c..e31a460 100644 --- a/ngraph/exec/failure/manager.py +++ b/ngraph/exec/failure/manager.py @@ -26,7 +26,7 @@ import os import time from concurrent.futures import ThreadPoolExecutor -from typing import TYPE_CHECKING, Any, Dict, Protocol, Set, TypeVar +from typing import TYPE_CHECKING, Any, Dict, Optional, Protocol, Set, TypeVar from ngraph.dsl.selectors import flatten_link_attrs, flatten_node_attrs from ngraph.logging import get_logger @@ -94,46 +94,6 @@ def _create_cache_key( return base_key + (tuple(hashable_kwargs),) -def _shallow_copy_result(value: Any) -> Any: - """Create a shallow copy of a result object for deduplication expansion. - - For FlowIterationResult-like objects, creates a new instance that shares - the expensive flows list and summary but has its own identity fields - (failure_id, failure_state) that can be set independently. - - This avoids the overhead of deepcopy while preventing aliasing issues - when we later mutate failure_id and failure_state per iteration. - - Args: - value: Result object to copy (typically FlowIterationResult). - - Returns: - A shallow copy suitable for independent mutation of identity fields. - """ - # Import here to avoid circular imports - from ngraph.results.flow import FlowIterationResult - - if isinstance(value, FlowIterationResult): - # Create new instance sharing flows and summary (read-only after creation) - # but with fresh identity fields for per-iteration mutation - return FlowIterationResult( - failure_id=value.failure_id, - failure_state=value.failure_state, - flows=value.flows, # Share reference - never mutated after creation - summary=value.summary, # Share reference - never mutated after creation - data=dict(value.data) if value.data else {}, # Shallow copy of data dict - ) - - # For dict-like objects with known structure, shallow copy - if isinstance(value, dict): - return dict(value) - - # Fallback: use copy.copy for shallow copy (faster than deepcopy) - from copy import copy - - return copy(value) - - def _auto_adjust_parallelism(parallelism: int, analysis_func: Any) -> int: """Adjust parallelism based on function characteristics. @@ -317,6 +277,7 @@ def compute_exclusions( self, policy: "FailurePolicy | None" = None, seed_offset: int | None = None, + failure_trace: Optional[Dict[str, Any]] = None, ) -> tuple[set[str], set[str]]: """Compute set of nodes and links to exclude for a failure iteration. @@ -327,6 +288,7 @@ def compute_exclusions( Args: policy: Failure policy to apply. If None, uses instance policy. seed_offset: Optional seed for deterministic failures. + failure_trace: Optional dict to populate with trace data from policy. Returns: Tuple of (excluded_nodes, excluded_links) containing entity IDs to exclude. @@ -358,7 +320,11 @@ def compute_exclusions( # Apply failure policy with optional deterministic seed override failed_ids = policy.apply_failures( - node_map, link_map, self.network.risk_groups, seed=seed_offset + node_map, + link_map, + self.network.risk_groups, + seed=seed_offset, + failure_trace=failure_trace, ) # Separate entity types for exclusion sets @@ -390,7 +356,6 @@ def run_monte_carlo_analysis( analysis_func: AnalysisFunction, iterations: int = 1, parallelism: int = 1, - baseline: bool = False, seed: int | None = None, store_failure_patterns: bool = False, **analysis_kwargs, @@ -401,54 +366,44 @@ def run_monte_carlo_analysis( parallel processing, worker caching, and failure policy application, while allowing flexibility in the analysis function. + Baseline is always run first as a separate reference iteration (no failures). + The ``iterations`` parameter specifies the number of failure iterations to run. + Args: analysis_func: Function that takes (network, excluded_nodes, excluded_links, **kwargs) and returns results. Must be serializable for parallel execution. - iterations: Number of Monte Carlo iterations to run. + iterations: Number of failure iterations to run (baseline is always run separately). parallelism: Number of parallel worker threads to use. - baseline: If True, first iteration runs without failures as baseline. seed: Optional seed for reproducible results across runs. - store_failure_patterns: If True, store detailed failure patterns in results. + store_failure_patterns: If True, populate failure_trace on each result. **analysis_kwargs: Additional arguments passed to analysis_func. Returns: Dictionary containing: - - 'results': List of results from each iteration - - 'failure_patterns': List of failure pattern details (if store_failure_patterns=True) - - 'metadata': Execution metadata (iterations, timing, etc.) - - Raises: - ValueError: If iterations > 1 without a failure policy and baseline=False. + - 'baseline': FlowIterationResult for the baseline (no failures) + - 'results': List of unique FlowIterationResult objects (deduplicated patterns). + Each result has occurrence_count indicating how many iterations matched. + - 'metadata': Execution metadata (iterations, unique_patterns, execution_time, etc.) """ policy = self.get_failure_policy() - # Validate iterations parameter based on failure policy (modes-only policies) + # Check if policy has effective rules has_effective_rules = bool( policy and any(len(m.rules) > 0 for m in policy.modes) ) - if (not has_effective_rules) and iterations > 1 and not baseline: - raise ValueError( - f"iterations={iterations} has no effect without a failure policy. " - "Without failures, all iterations produce the same results. " - "Either set iterations=1, provide a failure_policy with rules, or set baseline=True." - ) - if baseline and iterations < 2: - raise ValueError( - "baseline=True requires iterations >= 2 " - "(first iteration is baseline, remaining are with failures)" - ) + # Without effective rules, only baseline makes sense (no failure iterations) + if not has_effective_rules: + iterations = 0 # Auto-adjust parallelism based on function characteristics parallelism = _auto_adjust_parallelism(parallelism, analysis_func) - # Determine actual number of iterations to run - if not has_effective_rules: - mc_iters = 1 # No failures => single iteration - else: - mc_iters = iterations - - logger.info(f"Running {mc_iters} Monte-Carlo iterations") + logger.info( + f"Running baseline + {iterations} failure iterations" + if iterations > 0 + else "Running baseline only (no failure policy)" + ) # Pre-build context for analysis functions # This amortizes expensive graph construction across all iterations @@ -483,34 +438,39 @@ def run_monte_carlo_analysis( func_name = getattr(analysis_func, "__name__", "analysis_function") logger.debug( f"Analysis parameters: function={func_name}, " - f"parallelism={parallelism}, baseline={baseline}, policy={self.policy_name}" + f"parallelism={parallelism}, policy={self.policy_name}" ) # Pre-compute worker arguments for all iterations logger.debug("Pre-computing failure exclusions for all iterations") pre_compute_start = time.time() + # Baseline is always run first (no failures, separate from failure iterations) + baseline_arg = ( + self.network, + set(), # No excluded nodes + set(), # No excluded links + analysis_func, + analysis_kwargs, + -1, # Special index for baseline + True, # is_baseline + func_name, + ) + + # Build failure iteration arguments (indexed 0..iterations-1) worker_args: list[tuple] = [] - iteration_index_to_key: dict[int, tuple] = {} key_to_first_arg: dict[tuple, tuple] = {} - key_to_members: dict[tuple, list[int]] = {} - - for i in range(mc_iters): - seed_offset = None - if seed is not None: - seed_offset = seed + i + key_to_count: dict[tuple, int] = {} + key_to_trace: dict[tuple, dict[str, Any]] = {} - # First iteration is baseline if baseline=True (no failures) - is_baseline = baseline and i == 0 + for i in range(iterations): + seed_offset = seed + i if seed is not None else None - if is_baseline: - # For baseline iteration, use empty exclusion sets - excluded_nodes, excluded_links = set(), set() - else: - # Pre-compute exclusions for this iteration - excluded_nodes, excluded_links = self.compute_exclusions( - policy, seed_offset - ) + # Pre-compute exclusions for this failure iteration + trace = {} if store_failure_patterns else None + excluded_nodes, excluded_links = self.compute_exclusions( + policy, seed_offset, failure_trace=trace + ) arg = ( self.network, @@ -518,8 +478,8 @@ def run_monte_carlo_analysis( excluded_links, analysis_func, analysis_kwargs, - i, # iteration_index - is_baseline, + i, # iteration_index (0-based for failures) + False, # is_baseline func_name, ) worker_args.append(arg) @@ -528,135 +488,102 @@ def run_monte_carlo_analysis( dedup_key = _create_cache_key( excluded_nodes, excluded_links, func_name, analysis_kwargs ) - iteration_index_to_key[i] = dedup_key if dedup_key not in key_to_first_arg: key_to_first_arg[dedup_key] = arg - key_to_members.setdefault(dedup_key, []).append(i) + key_to_count[dedup_key] = 1 + # Store trace for first occurrence + if trace is not None: + key_to_trace[dedup_key] = trace + else: + key_to_count[dedup_key] += 1 pre_compute_time = time.time() - pre_compute_start logger.debug( - f"Pre-computed {len(worker_args)} exclusion sets in {pre_compute_time:.2f}s" + f"Pre-computed {len(worker_args)} failure exclusion sets in {pre_compute_time:.2f}s" ) # Prepare unique tasks (deduplicated by failure pattern + analysis params) unique_worker_args: list[tuple] = list(key_to_first_arg.values()) num_unique_tasks: int = len(unique_worker_args) - logger.info( - f"Monte-Carlo deduplication: {num_unique_tasks} unique patterns from {mc_iters} iterations" - ) - - # Determine if we should run in parallel - use_parallel = parallelism > 1 and num_unique_tasks > 1 + if iterations > 0: + logger.info( + f"Monte-Carlo deduplication: {num_unique_tasks} unique patterns from {iterations} failure iterations" + ) start_time = time.time() - # Execute only unique tasks, then replicate results to original indices - if use_parallel: - unique_result_values, _ = self._run_parallel( - unique_worker_args, num_unique_tasks, False, parallelism - ) + # Always run baseline first (separate from failure iterations) + baseline_result_raw = self._run_serial([baseline_arg]) + baseline_result = baseline_result_raw[0] if baseline_result_raw else None + + # Enrich baseline result with failure metadata + if baseline_result is not None and hasattr(baseline_result, "failure_id"): + baseline_result.failure_id = "" + baseline_result.failure_state = {"excluded_nodes": [], "excluded_links": []} + baseline_result.failure_trace = None # No policy applied for baseline + + # Execute failure iterations (deduplicated) + if iterations > 0: + use_parallel = parallelism > 1 and num_unique_tasks > 1 + if use_parallel: + unique_result_values = self._run_parallel( + unique_worker_args, num_unique_tasks, parallelism + ) + else: + unique_result_values = self._run_serial(unique_worker_args) + + # Map unique task results back to their dedup keys + key_to_result: dict[tuple, Any] = {} + for (dedup_key, _arg), value in zip( + key_to_first_arg.items(), unique_result_values, strict=False + ): + key_to_result[dedup_key] = value else: - unique_result_values, _ = self._run_serial(unique_worker_args, False) - - # Map unique task results back to their groups preserving insertion order - key_to_result: dict[tuple, Any] = {} - for (dedup_key, _arg), value in zip( - key_to_first_arg.items(), unique_result_values, strict=False - ): - key_to_result[dedup_key] = value - - # Build full results list in original order. Create shallow copies that share - # the expensive flows/summary data but have their own mutable identity fields. - # This avoids deepcopy overhead while preventing aliasing issues when we later - # set failure_id and failure_state per iteration. - results: list[Any] = [None] * mc_iters # type: ignore[var-annotated] - for key, members in key_to_members.items(): - if key not in key_to_result: - # Defensive: should not happen unless parallel map returned fewer tasks - continue - value = key_to_result[key] - for idx in members: - results[idx] = _shallow_copy_result(value) - - # Reconstruct failure patterns per original iteration if requested - failure_patterns: list[dict[str, Any]] = [] - if store_failure_patterns: - for key, members in key_to_members.items(): - # Use exclusions from the representative arg - rep_arg = key_to_first_arg[key] - exc_nodes: set[str] = rep_arg[1] - exc_links: set[str] = rep_arg[2] - for idx in members: - failure_patterns.append( - { - "iteration_index": idx, - "is_baseline": bool(baseline and idx == 0), - "excluded_nodes": list(exc_nodes), - "excluded_links": list(exc_links), - } - ) + key_to_result = {} elapsed_time = time.time() - start_time - # Precompute failure_id per unique pattern key (not including baseline override) - pattern_key_to_failure_id: dict[tuple, str] = {} - for key, rep_arg in key_to_first_arg.items(): + # Enrich unique failure results with metadata and occurrence_count + results: list[Any] = [] + for dedup_key, rep_arg in key_to_first_arg.items(): + result = key_to_result.get(dedup_key) + if result is None: + continue + exc_nodes: set[str] = rep_arg[1] exc_links: set[str] = rep_arg[2] + + # Compute failure_id (hash of exclusions, or "" for empty) if not exc_nodes and not exc_links: - pattern_key_to_failure_id[key] = "" - continue - payload = ",".join(sorted(exc_nodes)) + "|" + ",".join(sorted(exc_links)) - pattern_key_to_failure_id[key] = hashlib.blake2s( - payload.encode("utf-8"), digest_size=8 - ).hexdigest() - - # Optionally precompute failure_state per unique pattern when storing patterns - dedup_key_to_state: dict[tuple, dict[str, list[str]]] = {} - if store_failure_patterns: - for key, rep_arg in key_to_first_arg.items(): - exc_nodes: set[str] = rep_arg[1] - exc_links: set[str] = rep_arg[2] - dedup_key_to_state[key] = { + fid = "" + else: + payload = ( + ",".join(sorted(exc_nodes)) + "|" + ",".join(sorted(exc_links)) + ) + fid = hashlib.blake2s( + payload.encode("utf-8"), digest_size=8 + ).hexdigest() + + # Enrich FlowIterationResult-like objects + if hasattr(result, "failure_id") and hasattr(result, "summary"): + result.failure_id = fid + result.failure_state = { "excluded_nodes": list(exc_nodes), "excluded_links": list(exc_links), } + result.failure_trace = ( + key_to_trace.get(dedup_key) if store_failure_patterns else None + ) + result.occurrence_count = key_to_count[dedup_key] - # Mutate dict-like results that expose to_dict to embed failure info - enriched: list[Any] = [] - for i, iter_res in enumerate(results): - dedup_key = iteration_index_to_key[i] - is_baseline_iter = bool(baseline and i == 0) - # baseline takes precedence regardless of pattern content - fid = ( - "baseline" - if is_baseline_iter - else pattern_key_to_failure_id.get(dedup_key, "") - ) - - # ngraph.results.flow.FlowIterationResult like object - if hasattr(iter_res, "failure_id") and hasattr(iter_res, "summary"): - iter_res.failure_id = fid - # Only populate failure_state when storing patterns; otherwise keep None - if store_failure_patterns: - iter_res.failure_state = dedup_key_to_state.get(dedup_key) - else: - iter_res.failure_state = None - enriched.append(iter_res) - continue - - # Unknown type: keep as-is - enriched.append(iter_res) - - results = enriched + results.append(result) return { + "baseline": baseline_result, "results": results, - "failure_patterns": failure_patterns if store_failure_patterns else [], "metadata": { - "iterations": mc_iters, + "iterations": iterations, "parallelism": parallelism, - "baseline": baseline, "analysis_function": func_name, "policy_name": self.policy_name, "execution_time": elapsed_time, @@ -668,9 +595,8 @@ def _run_parallel( self, worker_args: list[tuple], total_tasks: int, - store_failure_patterns: bool, parallelism: int, - ) -> tuple[list[Any], list[dict[str, Any]]]: + ) -> list[Any]: """Run analysis in parallel using shared network approach. Network is shared by reference across all threads (zero-copy), which is @@ -681,11 +607,10 @@ def _run_parallel( Args: worker_args: Pre-computed worker arguments for all iterations. total_tasks: Number of tasks to run. - store_failure_patterns: Whether to collect failure pattern details. parallelism: Number of parallel worker threads to use. Returns: - Tuple of (results_list, failure_patterns_list). + List of analysis results. """ workers = min(parallelism, total_tasks) logger.info( @@ -702,7 +627,6 @@ def _run_parallel( start_time = time.time() completed_tasks = 0 results = [] - failure_patterns = [] with ThreadPoolExecutor( max_workers=workers, @@ -714,27 +638,14 @@ def _run_parallel( for ( result, - iteration_index, - is_baseline, - excluded_nodes, - excluded_links, + _iteration_index, + _is_baseline, + _excluded_nodes, + _excluded_links, ) in pool.map(_generic_worker, worker_args, chunksize=chunksize): completed_tasks += 1 - - # Collect results results.append(result) - # Add failure pattern if requested - if store_failure_patterns: - failure_patterns.append( - { - "iteration_index": iteration_index, - "is_baseline": is_baseline, - "excluded_nodes": list(excluded_nodes), - "excluded_links": list(excluded_links), - } - ) - # Progress logging (throttle for small N at INFO) if total_tasks >= 20: # Show approx 10% increments @@ -750,32 +661,24 @@ def _run_parallel( f"Average time per iteration: {elapsed_time / total_tasks:.3f} seconds" ) - # Note: Task deduplication was performed earlier across - # (exclusions + analysis parameters), so worker-level caches - # see only unique work items. Additional cache efficiency - # metrics here would not be meaningful and are intentionally omitted. - - return results, failure_patterns + return results def _run_serial( self, worker_args: list[tuple], - store_failure_patterns: bool, - ) -> tuple[list[Any], list[dict[str, Any]]]: + ) -> list[Any]: """Run analysis serially for single process execution. Args: worker_args: Pre-computed worker arguments for all iterations. - store_failure_patterns: Whether to collect failure pattern details. Returns: - Tuple of (results_list, failure_patterns_list). + List of analysis results. """ logger.info("Running serial analysis") start_time = time.time() results = [] - failure_patterns = [] # In serial mode, disable worker-level profiling in the current process # to avoid nesting profilers when the CLI has already enabled step-level @@ -793,32 +696,20 @@ def _run_serial( for i, args in enumerate(worker_args): iter_start = time.time() - is_baseline = len(args) > 6 and args[6] # is_baseline flag - baseline_msg = " (baseline)" if is_baseline else "" + is_baseline_arg = len(args) > 6 and args[6] # is_baseline flag + baseline_msg = " (baseline)" if is_baseline_arg else "" logger.debug(f"Serial iteration {i + 1}/{len(worker_args)}{baseline_msg}") ( result, - iteration_index, - is_baseline, - excluded_nodes, - excluded_links, + _iteration_index, + _is_baseline, + _excluded_nodes, + _excluded_links, ) = _generic_worker(args) - # Collect results results.append(result) - # Add failure pattern if requested - if store_failure_patterns: - failure_patterns.append( - { - "iteration_index": iteration_index, - "is_baseline": is_baseline, - "excluded_nodes": list(excluded_nodes), - "excluded_links": list(excluded_links), - } - ) - iter_time = time.time() - iter_start if len(worker_args) <= 10: logger.debug( @@ -841,7 +732,7 @@ def _run_serial( f"Average time per iteration: {elapsed_time / len(worker_args):.3f} seconds" ) - return results, failure_patterns + return results def run_single_failure_scenario( self, analysis_func: AnalysisFunction, **kwargs @@ -858,12 +749,16 @@ def run_single_failure_scenario( **kwargs: Additional arguments passed to analysis_func. Returns: - Result from the analysis function. + Result from the analysis function. Returns the first failure result if + available, otherwise the baseline result. """ result = self.run_monte_carlo_analysis( analysis_func=analysis_func, iterations=1, parallelism=1, **kwargs ) - return result["results"][0] + # Return first failure result if available, otherwise baseline + if result["results"]: + return result["results"][0] + return result["baseline"] # Convenience methods for common analysis patterns @@ -877,7 +772,6 @@ def run_max_flow_monte_carlo( shortest_path: bool = False, require_capacity: bool = True, flow_placement: FlowPlacement | str = FlowPlacement.PROPORTIONAL, - baseline: bool = False, seed: int | None = None, store_failure_patterns: bool = False, include_flow_summary: bool = False, @@ -889,6 +783,8 @@ def run_max_flow_monte_carlo( source and sink node groups across Monte Carlo failure scenarios. Results include frequency-based capacity envelopes and optional failure pattern analysis. + Baseline (no failures) is always run first as a separate reference. + Args: source: Source node selector (string path or selector dict). sink: Sink node selector (string path or selector dict). @@ -899,16 +795,16 @@ def run_max_flow_monte_carlo( require_capacity: If True (default), path selection considers available capacity. If False, path selection is cost-only (true IP/IGP semantics). flow_placement: Flow placement strategy. - baseline: Whether to include baseline (no failures) iteration. seed: Optional seed for reproducible results. - store_failure_patterns: Whether to store failure patterns in results. + store_failure_patterns: Whether to store failure trace on results. include_flow_summary: Whether to collect detailed flow summary data. Returns: Dictionary with keys: - - 'results': list[FlowIterationResult] for each iteration - - 'failure_patterns': list of failure pattern dicts (if store_failure_patterns=True) - - 'metadata': execution metadata (iterations, timing, etc.) + - 'baseline': FlowIterationResult for baseline (no failures) + - 'results': List of unique FlowIterationResult objects (deduplicated patterns). + Each result has occurrence_count indicating how many iterations matched. + - 'metadata': Execution metadata (iterations, unique_patterns, execution_time, etc.) """ from ngraph.exec.analysis.flow import max_flow_analysis @@ -921,7 +817,6 @@ def run_max_flow_monte_carlo( analysis_func=max_flow_analysis, iterations=iterations, parallelism=parallelism, - baseline=baseline, seed=seed, store_failure_patterns=store_failure_patterns, source=source, @@ -933,7 +828,6 @@ def run_max_flow_monte_carlo( include_flow_details=include_flow_summary, **kwargs, ) - # New contract: return the raw dict with list[FlowIterationResult] return raw_results def _process_sensitivity_results( @@ -942,7 +836,9 @@ def _process_sensitivity_results( """Process sensitivity results to aggregate component impact scores. Args: - results: List of FlowIterationResult from each iteration. + results: List of unique FlowIterationResult objects (deduplicated). + Each result has occurrence_count indicating how many iterations + produced that pattern. Returns: Dictionary mapping flow keys to component impact aggregations. @@ -951,31 +847,38 @@ def _process_sensitivity_results( from ngraph.results.flow import FlowIterationResult - # Aggregate component scores across all iterations + # Aggregate component scores weighted by occurrence_count + # Store (weighted_sum, total_count, min, max) per component flow_aggregates: dict[str, dict[str, list[float]]] = defaultdict( - lambda: defaultdict(list) + lambda: defaultdict(lambda: [0.0, 0, float("inf"), float("-inf")]) ) for result in results: if not isinstance(result, FlowIterationResult): continue + count = getattr(result, "occurrence_count", 1) for entry in result.flows: flow_key = f"{entry.source}->{entry.destination}" sensitivity = entry.data.get("sensitivity", {}) for component_key, score in sensitivity.items(): - flow_aggregates[flow_key][component_key].append(score) + agg = flow_aggregates[flow_key][component_key] + agg[0] += score * count # weighted sum + agg[1] += count # total count + agg[2] = min(agg[2], score) # min + agg[3] = max(agg[3], score) # max # Calculate statistics for each component processed_scores: dict[str, dict[str, dict[str, float]]] = {} for flow_key, components in flow_aggregates.items(): flow_stats: dict[str, dict[str, float]] = {} - for component_key, scores in components.items(): - if scores: + for component_key, agg in components.items(): + weighted_sum, total_count, min_val, max_val = agg + if total_count > 0: flow_stats[component_key] = { - "mean": sum(scores) / len(scores), - "max": max(scores), - "min": min(scores), - "count": float(len(scores)), + "mean": weighted_sum / total_count, + "max": max_val, + "min": min_val, + "count": float(total_count), } processed_scores[flow_key] = flow_stats @@ -991,7 +894,6 @@ def run_demand_placement_monte_carlo( iterations: int = 100, parallelism: int = 1, placement_rounds: int | str = "auto", - baseline: bool = False, seed: int | None = None, store_failure_patterns: bool = False, include_flow_details: bool = False, @@ -1003,20 +905,22 @@ def run_demand_placement_monte_carlo( Attempts to place traffic demands on the network across Monte Carlo failure scenarios and measures success rates. + Baseline (no failures) is always run first as a separate reference. + Args: demands_config: List of demand configs or TrafficMatrixSet object. iterations: Number of failure scenarios to simulate. parallelism: Number of parallel workers (auto-adjusted if needed). placement_rounds: Optimization rounds for demand placement. - baseline: Whether to include baseline (no failures) iteration. seed: Optional seed for reproducible results. - store_failure_patterns: Whether to store failure patterns in results. + store_failure_patterns: Whether to store failure trace on results. Returns: Dictionary with keys: - - 'results': list[FlowIterationResult] for each iteration - - 'failure_patterns': list of failure pattern dicts (if store_failure_patterns=True) - - 'metadata': execution metadata (iterations, timing, etc.) + - 'baseline': FlowIterationResult for baseline (no failures) + - 'results': List of unique FlowIterationResult objects (deduplicated patterns). + Each result has occurrence_count indicating how many iterations matched. + - 'metadata': Execution metadata (iterations, unique_patterns, execution_time, etc.) """ from ngraph.exec.analysis.flow import demand_placement_analysis @@ -1056,7 +960,6 @@ def run_demand_placement_monte_carlo( analysis_func=demand_placement_analysis, iterations=iterations, parallelism=parallelism, - baseline=baseline, seed=seed, store_failure_patterns=store_failure_patterns, demands_config=demands_config, @@ -1065,7 +968,6 @@ def run_demand_placement_monte_carlo( include_used_edges=include_used_edges, **kwargs, ) - # New contract: return the raw dict with list[FlowIterationResult] return raw_results def run_sensitivity_monte_carlo( @@ -1077,7 +979,6 @@ def run_sensitivity_monte_carlo( parallelism: int = 1, shortest_path: bool = False, flow_placement: FlowPlacement | str = FlowPlacement.PROPORTIONAL, - baseline: bool = False, seed: int | None = None, store_failure_patterns: bool = False, **kwargs, @@ -1088,6 +989,8 @@ def run_sensitivity_monte_carlo( capacity across Monte Carlo failure scenarios. Returns aggregated sensitivity scores showing which components have the greatest effect on network capacity. + Baseline (no failures) is always run first as a separate reference. + Args: source: Source node selector (string path or selector dict). sink: Sink node selector (string path or selector dict). @@ -1096,16 +999,16 @@ def run_sensitivity_monte_carlo( parallelism: Number of parallel workers (auto-adjusted if needed). shortest_path: Whether to use shortest paths only. flow_placement: Flow placement strategy. - baseline: Whether to include baseline (no failures) iteration. seed: Optional seed for reproducible results. - store_failure_patterns: Whether to store failure patterns in results. + store_failure_patterns: Whether to store failure trace on results. Returns: Dictionary with keys: - - 'results': list of per-iteration sensitivity dicts mapping flow keys to component scores + - 'baseline': Baseline result (no failures) + - 'results': List of unique per-iteration sensitivity dicts (deduplicated patterns). + Each result has occurrence_count indicating how many iterations matched. - 'component_scores': aggregated statistics (mean, max, min, count) per component per flow - - 'failure_patterns': list of failure pattern dicts (if store_failure_patterns=True) - - 'metadata': execution metadata (iterations, timing, source/sink patterns, etc.) + - 'metadata': Execution metadata (iterations, unique_patterns, execution_time, etc.) """ from ngraph.exec.analysis.flow import sensitivity_analysis @@ -1117,7 +1020,6 @@ def run_sensitivity_monte_carlo( analysis_func=sensitivity_analysis, iterations=iterations, parallelism=parallelism, - baseline=baseline, seed=seed, store_failure_patterns=store_failure_patterns, source=source, diff --git a/ngraph/explorer.py b/ngraph/explorer.py index 6eba8c9..2d57f4d 100644 --- a/ngraph/explorer.py +++ b/ngraph/explorer.py @@ -501,7 +501,7 @@ def set_node_counts(n: TreeNode): src = link.source dst = link.target - # Resolve per-end link hardware (no legacy support) + # Resolve per-end link hardware (src_end, dst_end, per_end) = resolve_link_end_components( link.attrs, self.components_library ) diff --git a/ngraph/model/components.py b/ngraph/model/components.py index ba3839b..cf2df0f 100644 --- a/ngraph/model/components.py +++ b/ngraph/model/components.py @@ -409,5 +409,5 @@ def _from_mapping( dst_map = hw_struct.get("target", {}) return _from_mapping(src_map), _from_mapping(dst_map), True - # No legacy or flattened formats supported. + # Only structured source/target format is supported. return (None, 1.0, False), (None, 1.0, False), False diff --git a/ngraph/model/failure/policy.py b/ngraph/model/failure/policy.py index 6a53755..1c44089 100644 --- a/ngraph/model/failure/policy.py +++ b/ngraph/model/failure/policy.py @@ -166,6 +166,7 @@ def apply_failures( network_risk_groups: Dict[str, Any] | None = None, *, seed: Optional[int] = None, + failure_trace: Optional[Dict[str, Any]] = None, ) -> List[str]: """Identify which entities fail for this iteration. @@ -177,6 +178,8 @@ def apply_failures( network_links: Mapping of link_id -> flattened attribute dict. network_risk_groups: Mapping of risk_group_name -> RiskGroup or dict. seed: Optional deterministic seed for selection. + failure_trace: Optional dict to populate with trace data (mode selection, + rule selections, expansion). If provided, will be mutated in-place. Returns: Sorted list of failed entity IDs (nodes, links, and/or risk group names). @@ -188,12 +191,26 @@ def apply_failures( failed_links: Set[str] = set() failed_risk_groups: Set[str] = set() + # Initialize trace structure if requested + if failure_trace is not None: + failure_trace.update( + { + "mode_index": None, + "mode_attrs": {}, + "selections": [], + "expansion": {"nodes": [], "links": [], "risk_groups": []}, + } + ) + # Determine rules from a selected mode (or none if no modes) rules_to_apply: Sequence[FailureRule] = [] if self.modes: effective_seed = seed if seed is not None else self.seed mode_index = self._select_mode_index(self.modes, effective_seed) rules_to_apply = self.modes[mode_index].rules + if failure_trace is not None: + failure_trace["mode_index"] = mode_index + failure_trace["mode_attrs"] = dict(self.modes[mode_index].attrs) # Collect matched from each rule, then select for idx, rule in enumerate(rules_to_apply): @@ -218,6 +235,18 @@ def apply_failures( ), ) + # Record selection in trace if non-empty + if failure_trace is not None and selected: + failure_trace["selections"].append( + { + "rule_index": idx, + "entity_scope": rule.entity_scope, + "rule_type": rule.rule_type, + "matched_count": len(matched_ids), + "selected_ids": sorted(selected), + } + ) + # Add them to the respective fail sets if rule.entity_scope == "node": failed_nodes |= set(selected) @@ -226,6 +255,15 @@ def apply_failures( elif rule.entity_scope == "risk_group": failed_risk_groups |= set(selected) + # Snapshot before expansion for trace + pre_nodes: Set[str] = set() + pre_links: Set[str] = set() + pre_rgs: Set[str] = set() + if failure_trace is not None: + pre_nodes = set(failed_nodes) + pre_links = set(failed_links) + pre_rgs = set(failed_risk_groups) + # Optionally expand by risk groups if self.fail_risk_groups: self._expand_risk_groups( @@ -238,6 +276,14 @@ def apply_failures( failed_risk_groups, network_risk_groups ) + # Capture expansion in trace + if failure_trace is not None: + failure_trace["expansion"] = { + "nodes": sorted(failed_nodes - pre_nodes), + "links": sorted(failed_links - pre_links), + "risk_groups": sorted(failed_risk_groups - pre_rgs), + } + all_failed = set(failed_nodes) | set(failed_links) | set(failed_risk_groups) return sorted(all_failed) diff --git a/ngraph/results/artifacts.py b/ngraph/results/artifacts.py index 4db5e6b..0309cc5 100644 --- a/ngraph/results/artifacts.py +++ b/ngraph/results/artifacts.py @@ -307,16 +307,19 @@ def to_dict(self) -> Dict[str, Any]: def pattern_key(self) -> str: """Generate a deterministic key for this failure pattern. - Uses a stable SHA1 hash of the sorted excluded entity list to avoid + Uses a stable BLAKE2s hash of the sorted excluded entity list to avoid Python's randomized hash() variability across processes. - """ - if self.is_baseline: - return "baseline" + Returns empty string for patterns with no exclusions (including baseline). + """ # Cache to avoid recomputation when accessed repeatedly if self._pattern_key_cache: return self._pattern_key_cache + # Empty exclusions (no failures) return empty string + if not self.excluded_nodes and not self.excluded_links: + return "" + # Create deterministic key from excluded entities using fast BLAKE2s excluded_str = ",".join(sorted(self.excluded_nodes + self.excluded_links)) digest = hashlib.blake2s( diff --git a/ngraph/results/flow.py b/ngraph/results/flow.py index 6da28fe..55578b1 100644 --- a/ngraph/results/flow.py +++ b/ngraph/results/flow.py @@ -278,8 +278,14 @@ class FlowIterationResult: """Container for per-iteration analysis results. Args: - failure_id: Stable identifier for the failure scenario (e.g., "baseline" or a hash). + failure_id: Stable identifier for the failure scenario (hash of excluded + components, or "" for no exclusions). failure_state: Optional excluded components for the iteration. + failure_trace: Optional trace info (mode_index, selections, expansion) when + store_failure_patterns=True. None for baseline or when tracing disabled. + occurrence_count: Number of Monte Carlo iterations that produced this exact + failure pattern. Used with deduplication to avoid re-running identical + analyses. Defaults to 1. flows: List of flow entries for this iteration. summary: Aggregated summary across ``flows``. data: Optional per-iteration extras. @@ -287,6 +293,8 @@ class FlowIterationResult: failure_id: str = "" failure_state: Optional[Dict[str, List[str]]] = None + failure_trace: Optional[Dict[str, Any]] = None + occurrence_count: int = 1 flows: List[FlowEntry] = field(default_factory=list) summary: FlowSummary = field( default_factory=lambda: FlowSummary( @@ -305,6 +313,14 @@ def __post_init__(self) -> None: Raises: ValueError: If summary/flow counts mismatch or failure_state invalid. """ + # Validate occurrence_count + if not isinstance(self.occurrence_count, int) or self.occurrence_count < 1: + logger.error( + "FlowIterationResult.occurrence_count must be a positive int: %r", + self.occurrence_count, + ) + raise ValueError("occurrence_count must be a positive int") + # Validate failure_state structure if present if self.failure_state is not None: if not isinstance(self.failure_state, dict): @@ -342,6 +358,10 @@ def to_dict(self) -> Dict[str, Any]: "failure_state": self.failure_state if self.failure_state is not None else None, + "failure_trace": self.failure_trace + if self.failure_trace is not None + else None, + "occurrence_count": self.occurrence_count, "flows": [f.to_dict() for f in self.flows], "summary": self.summary.to_dict(), "data": _ensure_json_safe(self.data), diff --git a/ngraph/workflow/build_graph.py b/ngraph/workflow/build_graph.py index 9313948..5c11407 100644 --- a/ngraph/workflow/build_graph.py +++ b/ngraph/workflow/build_graph.py @@ -29,11 +29,14 @@ import networkx as nx +from ngraph.logging import get_logger from ngraph.workflow.base import WorkflowStep, register_workflow_step if TYPE_CHECKING: from ngraph.scenario import Scenario +logger = get_logger(__name__) + @dataclass class BuildGraph(WorkflowStep): @@ -59,6 +62,7 @@ def run(self, scenario: Scenario) -> None: Returns: None """ + logger.info("Starting BuildGraph: name=%s", self.name) network = scenario.network # Build NetworkX MultiDiGraph from Network @@ -118,6 +122,13 @@ def run(self, scenario: Scenario) -> None: }, ) + logger.info( + "BuildGraph completed: name=%s nodes=%d edges=%d", + self.name, + len(graph.nodes), + len(graph.edges), + ) + # Register the class after definition to avoid decorator ordering issues register_workflow_step("BuildGraph")(BuildGraph) diff --git a/ngraph/workflow/cost_power.py b/ngraph/workflow/cost_power.py index f3d74a5..94dab81 100644 --- a/ngraph/workflow/cost_power.py +++ b/ngraph/workflow/cost_power.py @@ -91,11 +91,11 @@ def run(self, scenario: Any) -> None: scenario: Scenario with network, components_library, and results store. """ t0 = time.perf_counter() - logger.info( - "Starting CostPower: name=%s include_disabled=%s levels=0..%d", - self.name or self.__class__.__name__, - str(self.include_disabled), - int(self.aggregation_level), + logger.info("Starting CostPower: name=%s", self.name) + logger.debug( + "CostPower params: include_disabled=%s aggregation_level=%d", + self.include_disabled, + self.aggregation_level, ) network = scenario.network @@ -225,12 +225,10 @@ def add_values( root_items = levels_payload.get(0, []) root = root_items[0] if root_items else {} logger.info( - "CostPower complete: name=%s capex=%.3f power=%.3f platform_capex=%.3f optics_capex=%.3f duration=%.3fs", - self.name or self.__class__.__name__, + "CostPower completed: name=%s capex_total=%.2f power_total=%.2f duration=%.3fs", + self.name, float(root.get("capex_total", 0.0)), float(root.get("power_total_watts", 0.0)), - float(root.get("platform_capex", 0.0)), - float(root.get("optics_capex", 0.0)), time.perf_counter() - t0, ) diff --git a/ngraph/workflow/max_flow_step.py b/ngraph/workflow/max_flow_step.py index 922d1b8..92514d3 100644 --- a/ngraph/workflow/max_flow_step.py +++ b/ngraph/workflow/max_flow_step.py @@ -3,6 +3,9 @@ Monte Carlo analysis of maximum flow capacity between node groups using FailureManager. Produces unified `flow_results` per iteration under `data.flow_results`. +Baseline (no failures) is always run first as a separate reference. The `iterations` +parameter specifies how many failure scenarios to run. + YAML Configuration Example: workflow: @@ -17,7 +20,6 @@ shortest_path: false require_capacity: true # false for true IP/IGP semantics flow_placement: "PROPORTIONAL" - baseline: false seed: 42 store_failure_patterns: false include_flow_details: false # cost_distribution @@ -50,18 +52,22 @@ class MaxFlow(WorkflowStep): """Maximum flow Monte Carlo workflow step. + Baseline (no failures) is always run first as a separate reference. Results are + returned with baseline in a separate field. The flow_results list contains unique + failure patterns (deduplicated); each result has occurrence_count indicating how + many iterations matched that pattern. + Attributes: source: Source node selector (string path or selector dict). sink: Sink node selector (string path or selector dict). mode: Flow analysis mode ("combine" or "pairwise"). failure_policy: Name of failure policy in scenario.failure_policy_set. - iterations: Number of Monte Carlo trials. + iterations: Number of failure iterations to run. parallelism: Number of parallel worker processes. shortest_path: Whether to use shortest paths only. require_capacity: If True (default), path selection considers capacity. If False, path selection is cost-only (true IP/IGP semantics). flow_placement: Flow placement strategy. - baseline: Whether to run first iteration without failures as baseline. seed: Optional seed for reproducible results. store_failure_patterns: Whether to store failure patterns in results. include_flow_details: Whether to collect cost distribution per flow. @@ -77,15 +83,14 @@ class MaxFlow(WorkflowStep): shortest_path: bool = False require_capacity: bool = True flow_placement: FlowPlacement | str = FlowPlacement.PROPORTIONAL - baseline: bool = False seed: int | None = None store_failure_patterns: bool = False include_flow_details: bool = False include_min_cut: bool = False def __post_init__(self) -> None: - if self.iterations < 1: - raise ValueError("iterations must be >= 1") + if self.iterations < 0: + raise ValueError("iterations must be >= 0") if isinstance(self.parallelism, str): if self.parallelism != "auto": raise ValueError("parallelism must be an integer or 'auto'") @@ -94,29 +99,23 @@ def __post_init__(self) -> None: raise ValueError("parallelism must be >= 1") if self.mode not in {"combine", "pairwise"}: raise ValueError("mode must be 'combine' or 'pairwise'") - if self.baseline and self.iterations < 2: - raise ValueError( - "baseline=True requires iterations >= 2 " - "(first iteration is baseline, remaining are with failures)" - ) if isinstance(self.flow_placement, str): self.flow_placement = FlowPlacement.from_string(self.flow_placement) def run(self, scenario: "Scenario") -> None: t0 = time.perf_counter() - logger.info(f"Starting max-flow: {self.name}") + logger.info("Starting MaxFlow: name=%s", self.name) logger.debug( - "Parameters: source=%s, sink=%s, mode=%s, iterations=%s, parallelism=%s, " - "failure_policy=%s, baseline=%s, include_flow_details=%s, include_min_cut=%s", + "MaxFlow params: source=%s sink=%s mode=%s failure_iters=%d parallelism=%s " + "failure_policy=%s include_flow_details=%s include_min_cut=%s", self.source, self.sink, self.mode, - str(self.iterations), - str(self.parallelism), - str(self.failure_policy), - str(self.baseline), - str(self.include_flow_details), - str(self.include_min_cut), + self.iterations, + self.parallelism, + self.failure_policy, + self.include_flow_details, + self.include_min_cut, ) fm = FailureManager( @@ -134,7 +133,6 @@ def run(self, scenario: "Scenario") -> None: shortest_path=self.shortest_path, require_capacity=self.require_capacity, flow_placement=self.flow_placement, - baseline=self.baseline, seed=self.seed, store_failure_patterns=self.store_failure_patterns, include_flow_summary=self.include_flow_details, @@ -142,6 +140,17 @@ def run(self, scenario: "Scenario") -> None: ) scenario.results.put("metadata", raw.get("metadata", {})) + + # Handle baseline (separate from failure results) + baseline_result = raw.get("baseline") + baseline_dict = None + if baseline_result is not None: + if hasattr(baseline_result, "to_dict"): + baseline_dict = baseline_result.to_dict() + else: + baseline_dict = baseline_result + + # Handle failure results flow_results: list[dict] = [] for item in raw.get("results", []): if isinstance(item, FlowIterationResult): @@ -166,16 +175,20 @@ def run(self, scenario: "Scenario") -> None: scenario.results.put( "data", { + "baseline": baseline_dict, "flow_results": flow_results, "context": context, }, ) + metadata = raw.get("metadata", {}) logger.info( - "Max-flow stored: name=%s iters=%s workers=%s duration=%.3fs", + "MaxFlow completed: name=%s failure_iters=%d unique_patterns=%d " + "workers=%d duration=%.3fs", self.name, - str(raw.get("metadata", {}).get("iterations", self.iterations)), - str(raw.get("metadata", {}).get("parallelism", effective_parallelism)), + metadata.get("iterations", self.iterations), + metadata.get("unique_patterns", 0), + metadata.get("parallelism", effective_parallelism), time.perf_counter() - t0, ) diff --git a/ngraph/workflow/maximum_supported_demand_step.py b/ngraph/workflow/maximum_supported_demand_step.py index ee5a69e..2e7c6a0 100644 --- a/ngraph/workflow/maximum_supported_demand_step.py +++ b/ngraph/workflow/maximum_supported_demand_step.py @@ -88,9 +88,10 @@ def run(self, scenario: "Any") -> None: raise ValueError("Only 'hard' acceptance_rule is implemented") t0 = time.perf_counter() - logger.info( - "Starting MSD: name=%s matrix=%s alpha_start=%.6g growth=%.3f seeds=%d resolution=%.6g", - self.name or self.__class__.__name__, + logger.info("Starting MaximumSupportedDemand: name=%s", self.name) + logger.debug( + "MaximumSupportedDemand params: matrix=%s alpha_start=%.6g " + "growth=%.3f seeds=%d resolution=%.6g", self.matrix_name, float(self.alpha_start), float(self.growth_factor), @@ -165,9 +166,8 @@ def probe(alpha: float) -> tuple[bool, dict[str, Any]]: }, ) logger.info( - "MSD completed: name=%s matrix=%s alpha_star=%.6g probes=%d duration=%.3fs", - self.name or self.__class__.__name__, - self.matrix_name, + "MaximumSupportedDemand completed: name=%s alpha_star=%.6g probes=%d duration=%.3fs", + self.name, float(alpha_star), len(probes), time.perf_counter() - t0, diff --git a/ngraph/workflow/network_stats.py b/ngraph/workflow/network_stats.py index 18eb8e4..70b3a63 100644 --- a/ngraph/workflow/network_stats.py +++ b/ngraph/workflow/network_stats.py @@ -33,6 +33,8 @@ if TYPE_CHECKING: from ngraph.scenario import Scenario +logger = get_logger(__name__) + @dataclass class NetworkStats(WorkflowStep): @@ -63,6 +65,8 @@ def run(self, scenario: Scenario) -> None: Returns: None """ + logger.info("Starting NetworkStats: name=%s", self.name) + # Convert exclusion iterables to sets for efficient lookup excluded_nodes_set = set(self.excluded_nodes) if self.excluded_nodes else set() excluded_links_set = set(self.excluded_links) if self.excluded_links else set() @@ -142,20 +146,7 @@ def run(self, scenario: Scenario) -> None: min_degree_val = min(degree_values) max_degree_val = max(degree_values) - # INFO summary for workflow users (avoid expensive work unless needed) - total_capacity = 0.0 - if links: - total_capacity = float(sum(link.capacity for link in links.values())) - mean_deg = float(mean(degree_values)) if degree_values else 0.0 - get_logger(__name__).info( - "NetworkStats summary: name=%s nodes=%d links=%d total_capacity=%.1f mean_degree=%.2f", - self.name, - node_count, - link_count, - total_capacity, - mean_deg, - ) - # Store results in new schema + # Store results scenario.results.put("metadata", {}) # Ensure locals exist even when sets are empty if not links: @@ -186,6 +177,14 @@ def run(self, scenario: Scenario) -> None: }, ) + logger.info( + "NetworkStats completed: name=%s nodes=%d links=%d total_capacity=%.1f", + self.name, + node_count, + link_count, + float(total_capacity_val) if links else 0.0, + ) + # Register the class after definition to avoid decorator ordering issues register_workflow_step("NetworkStats")(NetworkStats) diff --git a/ngraph/workflow/traffic_matrix_placement_step.py b/ngraph/workflow/traffic_matrix_placement_step.py index 2e9287e..9afa171 100644 --- a/ngraph/workflow/traffic_matrix_placement_step.py +++ b/ngraph/workflow/traffic_matrix_placement_step.py @@ -2,6 +2,9 @@ Runs Monte Carlo demand placement using a named traffic matrix and produces unified `flow_results` per iteration under `data.flow_results`. + +Baseline (no failures) is always run first as a separate reference. The `iterations` +parameter specifies how many failure scenarios to run. """ from __future__ import annotations @@ -29,13 +32,17 @@ class TrafficMatrixPlacement(WorkflowStep): """Monte Carlo demand placement using a named traffic matrix. + Baseline (no failures) is always run first as a separate reference. Results are + returned with baseline in a separate field. The flow_results list contains unique + failure patterns (deduplicated); each result has occurrence_count indicating how + many iterations matched that pattern. + Attributes: matrix_name: Name of the traffic matrix to analyze. failure_policy: Optional policy name in scenario.failure_policy_set. - iterations: Number of Monte Carlo iterations. + iterations: Number of failure iterations to run. parallelism: Number of parallel worker processes. placement_rounds: Placement optimization rounds (int or "auto"). - baseline: Include baseline iteration without failures first. seed: Optional seed for reproducibility. store_failure_patterns: Whether to store failure pattern results. include_flow_details: When True, include cost_distribution per flow. @@ -50,7 +57,6 @@ class TrafficMatrixPlacement(WorkflowStep): iterations: int = 1 parallelism: int | str = "auto" placement_rounds: int | str = "auto" - baseline: bool = False seed: int | None = None store_failure_patterns: bool = False include_flow_details: bool = False @@ -60,8 +66,8 @@ class TrafficMatrixPlacement(WorkflowStep): alpha_from_field: str = "data.alpha_star" def __post_init__(self) -> None: - if self.iterations < 1: - raise ValueError("iterations must be >= 1") + if self.iterations < 0: + raise ValueError("iterations must be >= 0") if isinstance(self.parallelism, str): if self.parallelism != "auto": raise ValueError("parallelism must be an integer or 'auto'") @@ -76,20 +82,16 @@ def run(self, scenario: "Scenario") -> None: raise ValueError("'matrix_name' is required for TrafficMatrixPlacement") t0 = time.perf_counter() - logger.info( - f"Starting traffic-matrix placement: {self.name or self.__class__.__name__}" - ) + logger.info("Starting TrafficMatrixPlacement: name=%s", self.name) logger.debug( - "Parameters: matrix_name=%s, iterations=%d, parallelism=%s, placement_rounds=%s, baseline=%s, include_flow_details=%s, include_used_edges=%s, failure_policy=%s, alpha=%s", + "TrafficMatrixPlacement params: matrix_name=%s failure_iters=%d " + "parallelism=%s placement_rounds=%s failure_policy=%s alpha=%s", self.matrix_name, self.iterations, - str(self.parallelism), - str(self.placement_rounds), - str(self.baseline), - str(self.include_flow_details), - str(self.include_used_edges), - str(self.failure_policy), - str(self.alpha), + self.parallelism, + self.placement_rounds, + self.failure_policy, + self.alpha, ) # Extract and serialize traffic matrix @@ -160,7 +162,6 @@ def run(self, scenario: "Scenario") -> None: iterations=self.iterations, parallelism=effective_parallelism, placement_rounds=self.placement_rounds, - baseline=self.baseline, seed=self.seed, store_failure_patterns=self.store_failure_patterns, include_flow_details=self.include_flow_details, @@ -168,15 +169,24 @@ def run(self, scenario: "Scenario") -> None: ) logger.debug( - "Placement MC completed: iterations=%s, parallelism=%s, baseline=%s", - str(raw.get("metadata", {}).get("iterations", 0)), - str(raw.get("metadata", {}).get("parallelism", 0)), - str(raw.get("metadata", {}).get("baseline", False)), + "TrafficMatrixPlacement MC done: failure_iters=%d unique_patterns=%d", + raw.get("metadata", {}).get("iterations", 0), + raw.get("metadata", {}).get("unique_patterns", 0), ) # Store outputs - step_metadata = raw.get("metadata", {}) - scenario.results.put("metadata", step_metadata) + scenario.results.put("metadata", raw.get("metadata", {})) + + # Handle baseline (separate from failure results) + baseline_result = raw.get("baseline") + baseline_dict = None + if baseline_result is not None: + if hasattr(baseline_result, "to_dict"): + baseline_dict = baseline_result.to_dict() + else: + baseline_dict = baseline_result + + # Handle failure results flow_results: list[dict] = [] for item in raw.get("results", []): if isinstance(item, FlowIterationResult): @@ -192,6 +202,7 @@ def run(self, scenario: "Scenario") -> None: scenario.results.put( "data", { + "baseline": baseline_dict, "flow_results": flow_results, "context": { "matrix_name": self.matrix_name, @@ -205,47 +216,16 @@ def run(self, scenario: "Scenario") -> None: }, ) - # Log summary - totals = [] - for item in raw.get("results", []): - if isinstance(item, FlowIterationResult): - totals.append(float(item.summary.total_placed)) - else: - summary = getattr(item, "summary", None) - if summary and hasattr(summary, "get"): - totals.append(float(summary.get("total_placed", 0.0))) - else: - totals.append(0.0) - from statistics import mean - - mean_v = float(mean(totals)) if totals else 0.0 - duration_sec = time.perf_counter() - t0 - rounds_str = str(self.placement_rounds) - seed_str = str(self.seed) if self.seed is not None else "-" - baseline_str = str(step_metadata.get("baseline", self.baseline)) - iterations = int(step_metadata.get("iterations", self.iterations)) - workers = int( - step_metadata.get("parallelism", resolve_parallelism(self.parallelism)) - ) + metadata = raw.get("metadata", {}) logger.info( - ( - "Placement summary: name=%s alpha=%.6g source=%s " - "iters=%d workers=%d rounds=%s baseline=%s seed=%s delivered_mean=%.4f duration=%.3fs" - ), + "TrafficMatrixPlacement completed: name=%s alpha=%.6g failure_iters=%d " + "unique_patterns=%d workers=%d duration=%.3fs", self.name, alpha_value, - str(alpha_source_value or "explicit"), - iterations, - workers, - rounds_str, - baseline_str, - seed_str, - mean_v, - duration_sec, - ) - - logger.info( - f"Traffic-matrix placement completed: {self.name or self.__class__.__name__}" + metadata.get("iterations", self.iterations), + metadata.get("unique_patterns", 0), + metadata.get("parallelism", effective_parallelism), + time.perf_counter() - t0, ) def _resolve_alpha(self, scenario: "Scenario") -> float: diff --git a/pyproject.toml b/pyproject.toml index 4359b83..4d5952d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" # --------------------------------------------------------------------- [project] name = "ngraph" -version = "0.13.0" +version = "0.14.0" description = "A tool and a library for network modeling and analysis." readme = "README.md" authors = [{ name = "Andrey Golovanov" }] diff --git a/scenarios/backbone_clos.yml b/scenarios/backbone_clos.yml index ad8a31f..91085a2 100644 --- a/scenarios/backbone_clos.yml +++ b/scenarios/backbone_clos.yml @@ -1205,7 +1205,6 @@ workflow: iterations: 1000 parallelism: 7 placement_rounds: auto - baseline: true store_failure_patterns: false include_flow_details: true include_used_edges: false diff --git a/scenarios/nsfnet.yaml b/scenarios/nsfnet.yaml index 4f6494b..e435f44 100644 --- a/scenarios/nsfnet.yaml +++ b/scenarios/nsfnet.yaml @@ -26,129 +26,647 @@ network: version: 1.1 nodes: # ----- CNSS core POPs -------------------------------------------------------- - Seattle: {attrs: {site_type: core}} - PaloAlto: {attrs: {site_type: core}} - LosAngeles: {attrs: {site_type: core}} - SaltLakeCity: {attrs: {site_type: core}} - Denver: {attrs: {site_type: core}} - Lincoln: {attrs: {site_type: core}} - StLouis: {attrs: {site_type: core}} - Chicago: {attrs: {site_type: core}} - Cleveland: {attrs: {site_type: core}} - NewYork: {attrs: {site_type: core}} - WashingtonDC: {attrs: {site_type: core}} - Greensboro: {attrs: {site_type: core}} - Atlanta: {attrs: {site_type: core}} - Houston: {attrs: {site_type: core}} - AnnArbor: {attrs: {site_type: core}} - Hartford: {attrs: {site_type: core}} + Seattle: { attrs: { site_type: core } } + PaloAlto: { attrs: { site_type: core } } + LosAngeles: { attrs: { site_type: core } } + SaltLakeCity: { attrs: { site_type: core } } + Denver: { attrs: { site_type: core } } + Lincoln: { attrs: { site_type: core } } + StLouis: { attrs: { site_type: core } } + Chicago: { attrs: { site_type: core } } + Cleveland: { attrs: { site_type: core } } + NewYork: { attrs: { site_type: core } } + WashingtonDC: { attrs: { site_type: core } } + Greensboro: { attrs: { site_type: core } } + Atlanta: { attrs: { site_type: core } } + Houston: { attrs: { site_type: core } } + AnnArbor: { attrs: { site_type: core } } + Hartford: { attrs: { site_type: core } } # ----- ENSS / super-computer & "additional" sites ----------------------- - Cambridge: {attrs: {site_type: edge}} # NEARnet - additional site - Argonne: {attrs: {site_type: edge}} # additional site - SanDiego: {attrs: {site_type: edge}} - Boulder: {attrs: {site_type: edge}} - Princeton: {attrs: {site_type: edge}} - Ithaca: {attrs: {site_type: edge}} - CollegePark: {attrs: {site_type: edge}} - Pittsburgh: {attrs: {site_type: edge}} - UrbanaChampaign: {attrs: {site_type: edge}} - MoffettField: {attrs: {site_type: edge}} # NASA Ames additional site + Cambridge: { attrs: { site_type: edge } } # NEARnet - additional site + Argonne: { attrs: { site_type: edge } } # additional site + SanDiego: { attrs: { site_type: edge } } + Boulder: { attrs: { site_type: edge } } + Princeton: { attrs: { site_type: edge } } + Ithaca: { attrs: { site_type: edge } } + CollegePark: { attrs: { site_type: edge } } + Pittsburgh: { attrs: { site_type: edge } } + UrbanaChampaign: { attrs: { site_type: edge } } + MoffettField: { attrs: { site_type: edge } } # NASA Ames additional site links: # Northern arc - - {source: NewYork, target: Cleveland, link_params: {capacity: 45000.0, cost: 4, risk_groups: [RG_Cleveland_NewYork], attrs: {circuit: A}}} - - {source: NewYork, target: Cleveland, link_params: {capacity: 45000.0, cost: 4, risk_groups: [RG_Cleveland_NewYork], attrs: {circuit: B}}} - - {source: Cleveland,target: Chicago, link_params: {capacity: 45000.0, cost: 6, risk_groups: [RG_Cleveland_Chicago], attrs: {circuit: A}}} - - {source: Cleveland,target: Chicago, link_params: {capacity: 45000.0, cost: 6, risk_groups: [RG_Cleveland_Chicago], attrs: {circuit: B}}} - - {source: Chicago, target: PaloAlto, link_params: {capacity: 45000.0, cost: 12, risk_groups: [RG_Chicago_PaloAlto], attrs: {circuit: A}}} - - {source: Chicago, target: PaloAlto, link_params: {capacity: 45000.0, cost: 12, risk_groups: [RG_Chicago_PaloAlto], attrs: {circuit: B}}} + - { + source: NewYork, + target: Cleveland, + link_params: + { + capacity: 45000.0, + cost: 4, + risk_groups: [RG_Cleveland_NewYork], + attrs: { circuit: A }, + }, + } + - { + source: NewYork, + target: Cleveland, + link_params: + { + capacity: 45000.0, + cost: 4, + risk_groups: [RG_Cleveland_NewYork], + attrs: { circuit: B }, + }, + } + - { + source: Cleveland, + target: Chicago, + link_params: + { + capacity: 45000.0, + cost: 6, + risk_groups: [RG_Cleveland_Chicago], + attrs: { circuit: A }, + }, + } + - { + source: Cleveland, + target: Chicago, + link_params: + { + capacity: 45000.0, + cost: 6, + risk_groups: [RG_Cleveland_Chicago], + attrs: { circuit: B }, + }, + } + - { + source: Chicago, + target: PaloAlto, + link_params: + { + capacity: 45000.0, + cost: 12, + risk_groups: [RG_Chicago_PaloAlto], + attrs: { circuit: A }, + }, + } + - { + source: Chicago, + target: PaloAlto, + link_params: + { + capacity: 45000.0, + cost: 12, + risk_groups: [RG_Chicago_PaloAlto], + attrs: { circuit: B }, + }, + } # Southern arc - - {source: NewYork, target: WashingtonDC, link_params: {capacity: 45000.0, cost: 4, risk_groups: [RG_NewYork_WashingtonDC], attrs: {circuit: A}}} - - {source: NewYork, target: WashingtonDC, link_params: {capacity: 45000.0, cost: 4, risk_groups: [RG_NewYork_WashingtonDC], attrs: {circuit: B}}} - - {source: WashingtonDC, target: Greensboro, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_WashingtonDC_Greensboro], attrs: {circuit: A}}} - - {source: WashingtonDC, target: Greensboro, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_WashingtonDC_Greensboro], attrs: {circuit: B}}} - - {source: Greensboro, target: Atlanta, link_params: {capacity: 45000.0, cost: 7, risk_groups: [RG_Greensboro_Atlanta], attrs: {circuit: A}}} - - {source: Greensboro, target: Atlanta, link_params: {capacity: 45000.0, cost: 7, risk_groups: [RG_Greensboro_Atlanta], attrs: {circuit: B}}} - - {source: Atlanta, target: Houston, link_params: {capacity: 45000.0, cost: 10, risk_groups: [RG_Atlanta_Houston], attrs: {circuit: A}}} - - {source: Atlanta, target: Houston, link_params: {capacity: 45000.0, cost: 10, risk_groups: [RG_Atlanta_Houston], attrs: {circuit: B}}} - - {source: Houston, target: LosAngeles, link_params: {capacity: 45000.0, cost: 14, risk_groups: [RG_Houston_LosAngeles], attrs: {circuit: A}}} - - {source: Houston, target: LosAngeles, link_params: {capacity: 45000.0, cost: 14, risk_groups: [RG_Houston_LosAngeles], attrs: {circuit: B}}} - - {source: LosAngeles, target: PaloAlto, link_params: {capacity: 45000.0, cost: 8, risk_groups: [RG_LosAngeles_PaloAlto], attrs: {circuit: A}}} - - {source: LosAngeles, target: PaloAlto, link_params: {capacity: 45000.0, cost: 8, risk_groups: [RG_LosAngeles_PaloAlto], attrs: {circuit: B}}} + - { + source: NewYork, + target: WashingtonDC, + link_params: + { + capacity: 45000.0, + cost: 4, + risk_groups: [RG_NewYork_WashingtonDC], + attrs: { circuit: A }, + }, + } + - { + source: NewYork, + target: WashingtonDC, + link_params: + { + capacity: 45000.0, + cost: 4, + risk_groups: [RG_NewYork_WashingtonDC], + attrs: { circuit: B }, + }, + } + - { + source: WashingtonDC, + target: Greensboro, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_WashingtonDC_Greensboro], + attrs: { circuit: A }, + }, + } + - { + source: WashingtonDC, + target: Greensboro, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_WashingtonDC_Greensboro], + attrs: { circuit: B }, + }, + } + - { + source: Greensboro, + target: Atlanta, + link_params: + { + capacity: 45000.0, + cost: 7, + risk_groups: [RG_Greensboro_Atlanta], + attrs: { circuit: A }, + }, + } + - { + source: Greensboro, + target: Atlanta, + link_params: + { + capacity: 45000.0, + cost: 7, + risk_groups: [RG_Greensboro_Atlanta], + attrs: { circuit: B }, + }, + } + - { + source: Atlanta, + target: Houston, + link_params: + { + capacity: 45000.0, + cost: 10, + risk_groups: [RG_Atlanta_Houston], + attrs: { circuit: A }, + }, + } + - { + source: Atlanta, + target: Houston, + link_params: + { + capacity: 45000.0, + cost: 10, + risk_groups: [RG_Atlanta_Houston], + attrs: { circuit: B }, + }, + } + - { + source: Houston, + target: LosAngeles, + link_params: + { + capacity: 45000.0, + cost: 14, + risk_groups: [RG_Houston_LosAngeles], + attrs: { circuit: A }, + }, + } + - { + source: Houston, + target: LosAngeles, + link_params: + { + capacity: 45000.0, + cost: 14, + risk_groups: [RG_Houston_LosAngeles], + attrs: { circuit: B }, + }, + } + - { + source: LosAngeles, + target: PaloAlto, + link_params: + { + capacity: 45000.0, + cost: 8, + risk_groups: [RG_LosAngeles_PaloAlto], + attrs: { circuit: A }, + }, + } + - { + source: LosAngeles, + target: PaloAlto, + link_params: + { + capacity: 45000.0, + cost: 8, + risk_groups: [RG_LosAngeles_PaloAlto], + attrs: { circuit: B }, + }, + } # Pacific NW & Rockies - - {source: Seattle, target: PaloAlto, link_params: {capacity: 45000.0, cost: 9, risk_groups: [RG_PaloAlto_Seattle], attrs: {circuit: A}}} - - {source: Seattle, target: PaloAlto, link_params: {capacity: 45000.0, cost: 9, risk_groups: [RG_PaloAlto_Seattle], attrs: {circuit: B}}} - - {source: Seattle, target: SaltLakeCity, link_params: {capacity: 45000.0, cost: 10, risk_groups: [RG_Seattle_SaltLakeCity], attrs: {circuit: A}}} - - {source: Seattle, target: SaltLakeCity, link_params: {capacity: 45000.0, cost: 10, risk_groups: [RG_Seattle_SaltLakeCity], attrs: {circuit: B}}} - - {source: SaltLakeCity, target: Denver, link_params: {capacity: 45000.0, cost: 9, risk_groups: [RG_SaltLakeCity_Denver], attrs: {circuit: A}}} - - {source: SaltLakeCity, target: Denver, link_params: {capacity: 45000.0, cost: 9, risk_groups: [RG_SaltLakeCity_Denver], attrs: {circuit: B}}} - - {source: Denver, target: Lincoln, link_params: {capacity: 45000.0, cost: 8, risk_groups: [RG_Denver_Lincoln], attrs: {circuit: A}}} - - {source: Denver, target: Lincoln, link_params: {capacity: 45000.0, cost: 8, risk_groups: [RG_Denver_Lincoln], attrs: {circuit: B}}} - - {source: Lincoln, target: StLouis, link_params: {capacity: 45000.0, cost: 6, risk_groups: [RG_Lincoln_StLouis], attrs: {circuit: A}}} - - {source: Lincoln, target: StLouis, link_params: {capacity: 45000.0, cost: 6, risk_groups: [RG_Lincoln_StLouis], attrs: {circuit: B}}} - - {source: StLouis, target: Chicago, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_StLouis_Chicago], attrs: {circuit: A}}} - - {source: StLouis, target: Chicago, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_StLouis_Chicago], attrs: {circuit: B}}} + - { + source: Seattle, + target: PaloAlto, + link_params: + { + capacity: 45000.0, + cost: 9, + risk_groups: [RG_PaloAlto_Seattle], + attrs: { circuit: A }, + }, + } + - { + source: Seattle, + target: PaloAlto, + link_params: + { + capacity: 45000.0, + cost: 9, + risk_groups: [RG_PaloAlto_Seattle], + attrs: { circuit: B }, + }, + } + - { + source: Seattle, + target: SaltLakeCity, + link_params: + { + capacity: 45000.0, + cost: 10, + risk_groups: [RG_Seattle_SaltLakeCity], + attrs: { circuit: A }, + }, + } + - { + source: Seattle, + target: SaltLakeCity, + link_params: + { + capacity: 45000.0, + cost: 10, + risk_groups: [RG_Seattle_SaltLakeCity], + attrs: { circuit: B }, + }, + } + - { + source: SaltLakeCity, + target: Denver, + link_params: + { + capacity: 45000.0, + cost: 9, + risk_groups: [RG_SaltLakeCity_Denver], + attrs: { circuit: A }, + }, + } + - { + source: SaltLakeCity, + target: Denver, + link_params: + { + capacity: 45000.0, + cost: 9, + risk_groups: [RG_SaltLakeCity_Denver], + attrs: { circuit: B }, + }, + } + - { + source: Denver, + target: Lincoln, + link_params: + { + capacity: 45000.0, + cost: 8, + risk_groups: [RG_Denver_Lincoln], + attrs: { circuit: A }, + }, + } + - { + source: Denver, + target: Lincoln, + link_params: + { + capacity: 45000.0, + cost: 8, + risk_groups: [RG_Denver_Lincoln], + attrs: { circuit: B }, + }, + } + - { + source: Lincoln, + target: StLouis, + link_params: + { + capacity: 45000.0, + cost: 6, + risk_groups: [RG_Lincoln_StLouis], + attrs: { circuit: A }, + }, + } + - { + source: Lincoln, + target: StLouis, + link_params: + { + capacity: 45000.0, + cost: 6, + risk_groups: [RG_Lincoln_StLouis], + attrs: { circuit: B }, + }, + } + - { + source: StLouis, + target: Chicago, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_StLouis_Chicago], + attrs: { circuit: A }, + }, + } + - { + source: StLouis, + target: Chicago, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_StLouis_Chicago], + attrs: { circuit: B }, + }, + } # Midwest shortcuts - - {source: Cleveland, target: StLouis, link_params: {capacity: 45000.0, cost: 7, risk_groups: [RG_Cleveland_StLouis], attrs: {circuit: A}}} - - {source: Cleveland, target: StLouis, link_params: {capacity: 45000.0, cost: 7, risk_groups: [RG_Cleveland_StLouis], attrs: {circuit: B}}} - - {source: Denver, target: SaltLakeCity, link_params: {capacity: 45000.0, cost: 9, risk_groups: [RG_Denver_SaltLakeCity], attrs: {circuit: A}}} - - {source: Denver, target: SaltLakeCity, link_params: {capacity: 45000.0, cost: 9, risk_groups: [RG_Denver_SaltLakeCity], attrs: {circuit: B}}} + - { + source: Cleveland, + target: StLouis, + link_params: + { + capacity: 45000.0, + cost: 7, + risk_groups: [RG_Cleveland_StLouis], + attrs: { circuit: A }, + }, + } + - { + source: Cleveland, + target: StLouis, + link_params: + { + capacity: 45000.0, + cost: 7, + risk_groups: [RG_Cleveland_StLouis], + attrs: { circuit: B }, + }, + } + - { + source: Denver, + target: SaltLakeCity, + link_params: + { + capacity: 45000.0, + cost: 9, + risk_groups: [RG_Denver_SaltLakeCity], + attrs: { circuit: A }, + }, + } + - { + source: Denver, + target: SaltLakeCity, + link_params: + { + capacity: 45000.0, + cost: 9, + risk_groups: [RG_Denver_SaltLakeCity], + attrs: { circuit: B }, + }, + } # Great-Lakes loop - - {source: Chicago, target: AnnArbor, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_Chicago_AnnArbor], attrs: {circuit: A}}} - - {source: Chicago, target: AnnArbor, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_Chicago_AnnArbor], attrs: {circuit: B}}} - - {source: AnnArbor, target: Cleveland, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_AnnArbor_Cleveland], attrs: {circuit: A}}} - - {source: AnnArbor, target: Cleveland, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_AnnArbor_Cleveland], attrs: {circuit: B}}} + - { + source: Chicago, + target: AnnArbor, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_Chicago_AnnArbor], + attrs: { circuit: A }, + }, + } + - { + source: Chicago, + target: AnnArbor, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_Chicago_AnnArbor], + attrs: { circuit: B }, + }, + } + - { + source: AnnArbor, + target: Cleveland, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_AnnArbor_Cleveland], + attrs: { circuit: A }, + }, + } + - { + source: AnnArbor, + target: Cleveland, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_AnnArbor_Cleveland], + attrs: { circuit: B }, + }, + } # Hartford hub - - {source: Hartford, target: NewYork, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_Hartford_NewYork], attrs: {circuit: A}}} - - {source: Hartford, target: NewYork, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_Hartford_NewYork], attrs: {circuit: B}}} - - {source: Hartford, target: WashingtonDC, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_Hartford_WashingtonDC], attrs: {circuit: A}}} - - {source: Hartford, target: WashingtonDC, link_params: {capacity: 45000.0, cost: 5, risk_groups: [RG_Hartford_WashingtonDC], attrs: {circuit: B}}} + - { + source: Hartford, + target: NewYork, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_Hartford_NewYork], + attrs: { circuit: A }, + }, + } + - { + source: Hartford, + target: NewYork, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_Hartford_NewYork], + attrs: { circuit: B }, + }, + } + - { + source: Hartford, + target: WashingtonDC, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_Hartford_WashingtonDC], + attrs: { circuit: A }, + }, + } + - { + source: Hartford, + target: WashingtonDC, + link_params: + { + capacity: 45000.0, + cost: 5, + risk_groups: [RG_Hartford_WashingtonDC], + attrs: { circuit: B }, + }, + } # Northeast spur - single circuits (no SRLG needed) - - {source: Princeton, target: Ithaca, link_params: {capacity: 45000.0, cost: 5, attrs: {circuit: A}}} - - {source: Princeton, target: WashingtonDC, link_params: {capacity: 45000.0, cost: 4, attrs: {circuit: A}}} - - {source: CollegePark, target: WashingtonDC, link_params: {capacity: 45000.0, cost: 3, attrs: {circuit: A}}} - - {source: CollegePark, target: NewYork, link_params: {capacity: 45000.0, cost: 6, attrs: {circuit: A}}} - - {source: Cambridge, target: NewYork, link_params: {capacity: 45000.0, cost: 6, attrs: {circuit: A}}} + - { + source: Princeton, + target: Ithaca, + link_params: { capacity: 45000.0, cost: 5, attrs: { circuit: A } }, + } + - { + source: Princeton, + target: WashingtonDC, + link_params: { capacity: 45000.0, cost: 4, attrs: { circuit: A } }, + } + - { + source: CollegePark, + target: WashingtonDC, + link_params: { capacity: 45000.0, cost: 3, attrs: { circuit: A } }, + } + - { + source: CollegePark, + target: NewYork, + link_params: { capacity: 45000.0, cost: 6, attrs: { circuit: A } }, + } + - { + source: Cambridge, + target: NewYork, + link_params: { capacity: 45000.0, cost: 6, attrs: { circuit: A } }, + } # ENSS & "additional site" spurs - single circuits - - {source: Argonne, target: Chicago, link_params: {capacity: 45000.0, cost: 4, attrs: {circuit: A}}} - - {source: SanDiego, target: LosAngeles, link_params: {capacity: 45000.0, cost: 6, attrs: {circuit: A}}} - - {source: Boulder, target: Denver, link_params: {capacity: 45000.0, cost: 4, attrs: {circuit: A}}} - - {source: Pittsburgh, target: Cleveland, link_params: {capacity: 45000.0, cost: 4, attrs: {circuit: A}}} - - {source: UrbanaChampaign, target: Chicago, link_params: {capacity: 45000.0, cost: 4, attrs: {circuit: A}}} - - {source: MoffettField, target: PaloAlto, link_params: {capacity: 45000.0, cost: 6, attrs: {circuit: A}}} + - { + source: Argonne, + target: Chicago, + link_params: { capacity: 45000.0, cost: 4, attrs: { circuit: A } }, + } + - { + source: SanDiego, + target: LosAngeles, + link_params: { capacity: 45000.0, cost: 6, attrs: { circuit: A } }, + } + - { + source: Boulder, + target: Denver, + link_params: { capacity: 45000.0, cost: 4, attrs: { circuit: A } }, + } + - { + source: Pittsburgh, + target: Cleveland, + link_params: { capacity: 45000.0, cost: 4, attrs: { circuit: A } }, + } + - { + source: UrbanaChampaign, + target: Chicago, + link_params: { capacity: 45000.0, cost: 4, attrs: { circuit: A } }, + } + - { + source: MoffettField, + target: PaloAlto, + link_params: { capacity: 45000.0, cost: 6, attrs: { circuit: A } }, + } ############################################################################### # Shared-risk groups - one per span that carried parallel A- and B-circuits ############################################################################### risk_groups: - - {name: RG_AnnArbor_Cleveland, attrs: {description: "Great-Lakes loop DS-3 pair"}} - - {name: RG_Atlanta_Houston, attrs: {description: "Southern arc DS-3 pair"}} - - {name: RG_Cleveland_Chicago, attrs: {description: "Northern arc DS-3 pair"}} - - {name: RG_Cleveland_NewYork, attrs: {description: "Northern arc DS-3 pair"}} - - {name: RG_Cleveland_StLouis, attrs: {description: "Mid-west shortcut DS-3 pair"}} - - {name: RG_Chicago_AnnArbor, attrs: {description: "Great-Lakes loop DS-3 pair"}} - - {name: RG_Chicago_PaloAlto, attrs: {description: "Trans-continental northern DS-3 pair"}} - - {name: RG_Denver_Lincoln, attrs: {description: "Rockies DS-3 pair"}} - - {name: RG_Denver_SaltLakeCity, attrs: {description: "Rockies DS-3 pair"}} - - {name: RG_Greensboro_Atlanta, attrs: {description: "Southern arc DS-3 pair"}} - - {name: RG_Hartford_NewYork, attrs: {description: "Hartford hub DS-3 pair"}} - - {name: RG_Hartford_WashingtonDC, attrs: {description: "Hartford hub DS-3 pair"}} - - {name: RG_Houston_LosAngeles, attrs: {description: "Southern arc DS-3 pair"}} - - {name: RG_Lincoln_StLouis, attrs: {description: "Rockies DS-3 pair"}} - - {name: RG_LosAngeles_PaloAlto, attrs: {description: "California DS-3 pair"}} - - {name: RG_NewYork_WashingtonDC, attrs: {description: "Southern arc DS-3 pair"}} - - {name: RG_PaloAlto_Seattle, attrs: {description: "Pacific-Northwest DS-3 pair"}} - - {name: RG_Seattle_SaltLakeCity, attrs: {description: "Pacific-Northwest DS-3 pair"}} - - {name: RG_SaltLakeCity_Denver, attrs: {description: "Rockies DS-3 pair"}} - - {name: RG_StLouis_Chicago, attrs: {description: "Rockies DS-3 pair"}} - - {name: RG_WashingtonDC_Greensboro, attrs: {description: "Southern arc DS-3 pair"}} + - { + name: RG_AnnArbor_Cleveland, + attrs: { description: "Great-Lakes loop DS-3 pair" }, + } + - { + name: RG_Atlanta_Houston, + attrs: { description: "Southern arc DS-3 pair" }, + } + - { + name: RG_Cleveland_Chicago, + attrs: { description: "Northern arc DS-3 pair" }, + } + - { + name: RG_Cleveland_NewYork, + attrs: { description: "Northern arc DS-3 pair" }, + } + - { + name: RG_Cleveland_StLouis, + attrs: { description: "Mid-west shortcut DS-3 pair" }, + } + - { + name: RG_Chicago_AnnArbor, + attrs: { description: "Great-Lakes loop DS-3 pair" }, + } + - { + name: RG_Chicago_PaloAlto, + attrs: { description: "Trans-continental northern DS-3 pair" }, + } + - { name: RG_Denver_Lincoln, attrs: { description: "Rockies DS-3 pair" } } + - { + name: RG_Denver_SaltLakeCity, + attrs: { description: "Rockies DS-3 pair" }, + } + - { + name: RG_Greensboro_Atlanta, + attrs: { description: "Southern arc DS-3 pair" }, + } + - { + name: RG_Hartford_NewYork, + attrs: { description: "Hartford hub DS-3 pair" }, + } + - { + name: RG_Hartford_WashingtonDC, + attrs: { description: "Hartford hub DS-3 pair" }, + } + - { + name: RG_Houston_LosAngeles, + attrs: { description: "Southern arc DS-3 pair" }, + } + - { name: RG_Lincoln_StLouis, attrs: { description: "Rockies DS-3 pair" } } + - { + name: RG_LosAngeles_PaloAlto, + attrs: { description: "California DS-3 pair" }, + } + - { + name: RG_NewYork_WashingtonDC, + attrs: { description: "Southern arc DS-3 pair" }, + } + - { + name: RG_PaloAlto_Seattle, + attrs: { description: "Pacific-Northwest DS-3 pair" }, + } + - { + name: RG_Seattle_SaltLakeCity, + attrs: { description: "Pacific-Northwest DS-3 pair" }, + } + - { + name: RG_SaltLakeCity_Denver, + attrs: { description: "Rockies DS-3 pair" }, + } + - { name: RG_StLouis_Chicago, attrs: { description: "Rockies DS-3 pair" } } + - { + name: RG_WashingtonDC_Greensboro, + attrs: { description: "Southern arc DS-3 pair" }, + } ############################################################################### # Failure policies @@ -163,16 +681,16 @@ failure_policy_set: fail_risk_groups: false fail_risk_group_children: false modes: - - weight: 1.0 - rules: - # link reliability - random independent failures - - entity_scope: link - rule_type: random - probability: 0.001 # 0.1 % chance a given circuit is down - # node reliability - random independent router failures - - entity_scope: node - rule_type: random - probability: 0.0005 # 0.05 % chance a given node is down + - weight: 1.0 + rules: + # link reliability - random independent failures + - entity_scope: link + rule_type: random + probability: 0.001 # 0.1 % chance a given circuit is down + # node reliability - random independent router failures + - entity_scope: node + rule_type: random + probability: 0.0005 # 0.05 % chance a given node is down single_link_failure: attrs: @@ -199,7 +717,6 @@ workflow: parallelism: 8 shortest_path: false flow_placement: PROPORTIONAL - baseline: true seed: 42 store_failure_patterns: true include_flow_details: true @@ -215,7 +732,6 @@ workflow: parallelism: 8 shortest_path: false flow_placement: PROPORTIONAL - baseline: true seed: 42 store_failure_patterns: true include_flow_details: true diff --git a/scenarios/square_mesh.yaml b/scenarios/square_mesh.yaml index 358ac19..d7368bd 100644 --- a/scenarios/square_mesh.yaml +++ b/scenarios/square_mesh.yaml @@ -86,7 +86,6 @@ workflow: iterations: 1000 parallelism: 8 placement_rounds: auto - baseline: true seed: 42 store_failure_patterns: true include_flow_details: true @@ -104,7 +103,6 @@ workflow: parallelism: 8 shortest_path: false flow_placement: PROPORTIONAL - baseline: true seed: 42 store_failure_patterns: true include_flow_details: true diff --git a/tests/exec/failure/test_manager.py b/tests/exec/failure/test_manager.py index ea617e8..2503092 100644 --- a/tests/exec/failure/test_manager.py +++ b/tests/exec/failure/test_manager.py @@ -1,8 +1,7 @@ """High-value tests for `FailureManager` public behavior and APIs. -Focus on functional outcomes and API semantics using the new FailureManager -that works with NetGraph-Core. Tests core functionality, policy management, -exclusion computation, and convenience methods. +Focus on functional outcomes and API semantics. Tests core functionality, +policy management, exclusion computation, and convenience methods. """ from typing import Any @@ -233,10 +232,10 @@ def test_link_matching_on_capacity_attribute( class TestFailureManagerMonteCarloValidation: """Test validation logic for Monte Carlo parameters.""" - def test_validation_iterations_without_policy( + def test_iterations_without_policy_runs_baseline_only( self, simple_network: Network, failure_policy_set: FailurePolicySet ) -> None: - """Test that iterations > 1 without policy raises error.""" + """Test that iterations > 0 without policy runs baseline only.""" fm = FailureManager( network=simple_network, failure_policy_set=failure_policy_set, @@ -247,26 +246,28 @@ def test_validation_iterations_without_policy( def mock_analysis_func(*args: Any, **kwargs: Any) -> dict[str, Any]: return {"result": "mock"} - with pytest.raises( - ValueError, match="iterations=2 has no effect without a failure policy" - ): - fm.run_monte_carlo_analysis( - analysis_func=mock_analysis_func, iterations=2, baseline=False - ) + # Without policy, iterations are ignored and only baseline runs + result = fm.run_monte_carlo_analysis( + analysis_func=mock_analysis_func, iterations=10 + ) + # Baseline is always run + assert "baseline" in result + # No failure iterations without a policy + assert len(result["results"]) == 0 - def test_validation_baseline_requires_multiple_iterations( - self, failure_manager: FailureManager - ) -> None: - """Test that baseline=True requires iterations >= 2.""" + def test_baseline_always_present(self, failure_manager: FailureManager) -> None: + """Test that baseline is always present in results.""" # Mock analysis function def mock_analysis_func(*args: Any, **kwargs: Any) -> dict[str, Any]: return {"result": "mock"} - with pytest.raises(ValueError, match="baseline=True requires iterations >= 2"): - failure_manager.run_monte_carlo_analysis( - analysis_func=mock_analysis_func, iterations=1, baseline=True - ) + result = failure_manager.run_monte_carlo_analysis( + analysis_func=mock_analysis_func, iterations=3 + ) + # Baseline should always be present as separate field + assert "baseline" in result + assert result["baseline"] is not None class TestFailureManagerConvenienceMethods: @@ -279,7 +280,6 @@ def test_run_max_flow_monte_carlo_delegates( """Test run_max_flow_monte_carlo delegates to run_monte_carlo_analysis.""" mock_mc_analysis.return_value = { "results": [], - "failure_patterns": [], "metadata": {"iterations": 2}, } @@ -301,7 +301,6 @@ def test_run_demand_placement_monte_carlo_delegates( """Test run_demand_placement_monte_carlo delegates correctly.""" mock_mc_analysis.return_value = { "results": [], - "failure_patterns": [], "metadata": {"iterations": 1}, } @@ -394,3 +393,101 @@ def mock_analysis_func(*args: Any, **kwargs: Any) -> dict[str, Any]: failure_manager.run_monte_carlo_analysis( analysis_func=mock_analysis_func, iterations=2, parallelism=2 ) + + +class TestSensitivityResultsProcessing: + """Test sensitivity results processing with occurrence_count weighting.""" + + def test_process_sensitivity_results_weights_by_occurrence_count( + self, failure_manager: FailureManager + ) -> None: + """Verify weighted statistics calculation uses occurrence_count correctly.""" + from ngraph.results.flow import FlowEntry, FlowIterationResult, FlowSummary + + # Pattern A: score=0.8, occurred 5 times + # Pattern B: score=0.2, occurred 1 time + # Correct weighted mean = (0.8*5 + 0.2*1) / 6 = 4.2/6 = 0.7 + # Incorrect unweighted mean = (0.8 + 0.2) / 2 = 0.5 + + summary = FlowSummary( + total_demand=100.0, + total_placed=100.0, + overall_ratio=1.0, + dropped_flows=0, + num_flows=1, + ) + + entry_a = FlowEntry( + source="A", + destination="B", + priority=0, + demand=100.0, + placed=100.0, + dropped=0.0, + data={"sensitivity": {"link1": 0.8}}, + ) + result_a = FlowIterationResult( + flows=[entry_a], + summary=summary, + occurrence_count=5, + ) + + entry_b = FlowEntry( + source="A", + destination="B", + priority=0, + demand=100.0, + placed=100.0, + dropped=0.0, + data={"sensitivity": {"link1": 0.2}}, + ) + result_b = FlowIterationResult( + flows=[entry_b], + summary=summary, + occurrence_count=1, + ) + + processed = failure_manager._process_sensitivity_results([result_a, result_b]) + + stats = processed["A->B"]["link1"] + assert stats["count"] == 6.0 # 5 + 1 + assert stats["mean"] == pytest.approx(0.7) # weighted mean + assert stats["min"] == 0.2 + assert stats["max"] == 0.8 + + def test_process_sensitivity_results_single_pattern( + self, failure_manager: FailureManager + ) -> None: + """Single pattern with occurrence_count > 1 should have correct count.""" + from ngraph.results.flow import FlowEntry, FlowIterationResult, FlowSummary + + summary = FlowSummary( + total_demand=50.0, + total_placed=50.0, + overall_ratio=1.0, + dropped_flows=0, + num_flows=1, + ) + + entry = FlowEntry( + source="X", + destination="Y", + priority=0, + demand=50.0, + placed=50.0, + dropped=0.0, + data={"sensitivity": {"node1": 0.5}}, + ) + result = FlowIterationResult( + flows=[entry], + summary=summary, + occurrence_count=10, + ) + + processed = failure_manager._process_sensitivity_results([result]) + + stats = processed["X->Y"]["node1"] + assert stats["count"] == 10.0 + assert stats["mean"] == 0.5 + assert stats["min"] == 0.5 + assert stats["max"] == 0.5 diff --git a/tests/exec/failure/test_manager_integration.py b/tests/exec/failure/test_manager_integration.py index c2009ba..5119004 100644 --- a/tests/exec/failure/test_manager_integration.py +++ b/tests/exec/failure/test_manager_integration.py @@ -126,12 +126,20 @@ def test_run_monte_carlo_analysis(self, simple_network, failure_policy_set): assert "results" in results assert "metadata" in results - assert len(results["results"]) == 5 + + # Results contain K unique patterns (not N iterations) + unique_results = results["results"] + unique_patterns = results["metadata"]["unique_patterns"] + assert len(unique_results) == unique_patterns + + # Total occurrence_count should equal iterations + total_occurrences = sum(r.occurrence_count for r in unique_results) + assert total_occurrences == 5 # Each item is a FlowIterationResult; compute placed capacity capacities = [ float(iter_res.summary.total_placed) - for iter_res in results["results"] + for iter_res in unique_results if isinstance(iter_res, FlowIterationResult) ] # With the network topology (A->B->C and A->C), max flow is 15.0 without failures @@ -139,7 +147,6 @@ def test_run_monte_carlo_analysis(self, simple_network, failure_policy_set): # With single link failures (policy always fails 1 link): # - Exclude A->B or B->C: capacity is 5.0 (only direct path) # - Exclude A->C: capacity is 10.0 (only via B) - # The test runs 5 iterations with failures, so we see a mix of 5.0 and 10.0 assert max(capacities) == 10.0 # Best case with 1 failure assert min(capacities) == 5.0 # Worst case with 1 failure assert 5.0 in capacities # Should see some 5.0 results @@ -160,8 +167,14 @@ def test_analysis_with_parallel_execution(self, simple_network, failure_policy_s mode="combine", ) - assert len(results["results"]) == 4 + # Results contain K unique patterns assert "metadata" in results + unique_patterns = results["metadata"]["unique_patterns"] + assert len(results["results"]) == unique_patterns + + # Total occurrence_count should equal iterations + total_occurrences = sum(r.occurrence_count for r in results["results"]) + assert total_occurrences == 4 def test_baseline_iteration_handling(self, simple_network, failure_policy_set): """Test baseline iteration (no failures) behavior.""" @@ -171,23 +184,96 @@ def test_baseline_iteration_handling(self, simple_network, failure_policy_set): analysis_func=max_flow_analysis, iterations=3, parallelism=1, - baseline=True, # Include baseline seed=42, source="A", sink="C", mode="combine", ) - # Should have results from baseline + regular iterations - assert len(results["results"]) == 3 + # Baseline is always run first and stored separately + assert "baseline" in results + baseline = results["baseline"] + assert baseline is not None + assert baseline.failure_id == "" # Empty string for baseline + + # Results contain K unique patterns (total occurrence_count == 3) assert "metadata" in results + unique_patterns = results["metadata"]["unique_patterns"] + assert len(results["results"]) == unique_patterns + total_occurrences = sum(r.occurrence_count for r in results["results"]) + assert total_occurrences == 3 + + def test_failure_trace_fields_present(self, simple_network, failure_policy_set): + """Test that trace fields are present on results when store_failure_patterns=True.""" + manager = FailureManager(simple_network, failure_policy_set, "single_failures") + + results = manager.run_monte_carlo_analysis( + analysis_func=max_flow_analysis, + iterations=5, + parallelism=1, + store_failure_patterns=True, + seed=42, + source="A", + sink="C", + mode="combine", + ) + + # All unique failure results should have trace fields + for result in results["results"]: + assert isinstance(result, FlowIterationResult) + assert result.failure_id != "" # All failures have non-empty ID + assert result.failure_state is not None + assert "excluded_nodes" in result.failure_state + assert "excluded_links" in result.failure_state + assert result.occurrence_count >= 1 + + # Trace fields should be present when store_failure_patterns=True + trace = result.failure_trace + assert trace is not None, "Trace should be present" + assert "mode_index" in trace, "Trace field 'mode_index' missing" + assert "mode_attrs" in trace, "Trace field 'mode_attrs' missing" + assert "selections" in trace, "Trace field 'selections' missing" + assert "expansion" in trace, "Trace field 'expansion' missing" + + # Verify selections structure + assert isinstance(trace["selections"], list) + if trace["selections"]: + sel = trace["selections"][0] + assert "rule_index" in sel + assert "entity_scope" in sel + assert "rule_type" in sel + assert "matched_count" in sel + assert "selected_ids" in sel + + # Verify expansion structure + assert "nodes" in trace["expansion"] + assert "links" in trace["expansion"] + assert "risk_groups" in trace["expansion"] + + def test_failure_trace_not_present_when_disabled( + self, simple_network, failure_policy_set + ): + """Test that trace fields are NOT present when store_failure_patterns=False.""" + manager = FailureManager(simple_network, failure_policy_set, "single_failures") + + results = manager.run_monte_carlo_analysis( + analysis_func=max_flow_analysis, + iterations=5, + parallelism=1, + store_failure_patterns=False, # Disabled + seed=42, + source="A", + sink="C", + mode="combine", + ) - # Baseline should be included (enabled with baseline=True) - metadata = results["metadata"] - assert metadata["baseline"] + # Results should have failure_state but no trace + for result in results["results"]: + assert isinstance(result, FlowIterationResult) + assert result.failure_trace is None # No trace when disabled - def test_failure_pattern_storage(self, simple_network, failure_policy_set): - """Test storage of failure patterns in results.""" + def test_baseline_has_no_trace_fields(self, simple_network, failure_policy_set): + """Test that baseline result doesn't have trace fields.""" manager = FailureManager(simple_network, failure_policy_set, "single_failures") results = manager.run_monte_carlo_analysis( @@ -201,11 +287,40 @@ def test_failure_pattern_storage(self, simple_network, failure_policy_set): mode="combine", ) - assert "failure_patterns" in results - failure_patterns = results["failure_patterns"] + # Baseline is a separate result with no trace + baseline = results["baseline"] + assert baseline is not None + assert baseline.failure_trace is None # No trace for baseline + assert baseline.failure_id == "" + + def test_failure_trace_deterministic(self, simple_network, failure_policy_set): + """Test that trace is deterministic with fixed seed.""" + manager = FailureManager(simple_network, failure_policy_set, "single_failures") + + def run(): + return manager.run_monte_carlo_analysis( + analysis_func=max_flow_analysis, + iterations=5, + parallelism=1, + store_failure_patterns=True, + seed=42, + source="A", + sink="C", + mode="combine", + ) + + result1 = run() + result2 = run() - # Should have recorded failure patterns (may be empty list in this simple case) - assert isinstance(failure_patterns, list) + # Results should have same failure patterns + assert len(result1["results"]) == len(result2["results"]) + for r1, r2 in zip(result1["results"], result2["results"], strict=True): + assert r1.failure_id == r2.failure_id + assert r1.failure_state == r2.failure_state + assert r1.occurrence_count == r2.occurrence_count + if r1.failure_trace: + assert r1.failure_trace["mode_index"] == r2.failure_trace["mode_index"] + assert r1.failure_trace["selections"] == r2.failure_trace["selections"] class TestFailureManagerIntegration: @@ -262,14 +377,18 @@ def test_capacity_envelope_analysis_integration(self): assert "results" in results assert "metadata" in results - # Should have results for each iteration - assert len(results["results"]) == 10 + # Results contain K unique patterns (occurrence_count sum == 10) + unique_patterns = results["metadata"]["unique_patterns"] + assert len(results["results"]) == unique_patterns + total_occurrences = sum(r.occurrence_count for r in results["results"]) + assert total_occurrences == 10 # Each result is a FlowIterationResult; ensure flows present for iter_res in results["results"]: assert isinstance(iter_res, FlowIterationResult) assert hasattr(iter_res, "summary") assert isinstance(iter_res.flows, list) + assert iter_res.occurrence_count >= 1 def test_error_handling_in_analysis(self): """Test error handling during analysis execution.""" diff --git a/tests/integration/scenario_3.yaml b/tests/integration/scenario_3.yaml index f3d2e0c..fd2ada8 100644 --- a/tests/integration/scenario_3.yaml +++ b/tests/integration/scenario_3.yaml @@ -117,7 +117,6 @@ workflow: shortest_path: true flow_placement: PROPORTIONAL iterations: 1 - baseline: false failure_policy: null # Reverse direction analysis - equivalent to capacity_probe with probe_reverse @@ -129,7 +128,6 @@ workflow: shortest_path: true flow_placement: PROPORTIONAL iterations: 1 - baseline: false failure_policy: null # Forward direction with EQUAL_BALANCED - equivalent to capacity_probe2 @@ -141,7 +139,6 @@ workflow: shortest_path: true flow_placement: EQUAL_BALANCED iterations: 1 - baseline: false failure_policy: null # Reverse direction with EQUAL_BALANCED - equivalent to capacity_probe2 with probe_reverse @@ -153,5 +150,4 @@ workflow: shortest_path: true flow_placement: EQUAL_BALANCED iterations: 1 - baseline: false failure_policy: null diff --git a/tests/integration/scenario_4.yaml b/tests/integration/scenario_4.yaml index d2088ef..735f28f 100644 --- a/tests/integration/scenario_4.yaml +++ b/tests/integration/scenario_4.yaml @@ -331,7 +331,6 @@ workflow: shortest_path: false flow_placement: "PROPORTIONAL" iterations: 1 - baseline: false failure_policy: null # Reverse intra-DC capacity analysis @@ -343,7 +342,6 @@ workflow: shortest_path: false flow_placement: "PROPORTIONAL" iterations: 1 - baseline: false failure_policy: null # Forward inter-DC capacity analysis @@ -355,7 +353,6 @@ workflow: shortest_path: false flow_placement: "EQUAL_BALANCED" iterations: 1 - baseline: false failure_policy: null # Reverse inter-DC capacity analysis @@ -367,7 +364,6 @@ workflow: shortest_path: false flow_placement: "EQUAL_BALANCED" iterations: 1 - baseline: false failure_policy: null # Failure analysis with different policies diff --git a/tests/integration/test_data_templates.py b/tests/integration/test_data_templates.py index 9748668..225bc22 100644 --- a/tests/integration/test_data_templates.py +++ b/tests/integration/test_data_templates.py @@ -480,7 +480,6 @@ def capacity_analysis_workflow( "sink": sink_pattern, "mode": mode, "iterations": 1, - "baseline": False, "failure_policy": None, "shortest_path": True, } @@ -519,7 +518,6 @@ def comprehensive_analysis_workflow( "sink": sink_pattern, "mode": "combine", "iterations": 1, - "baseline": True, }, { "step_type": "MaxFlow", @@ -529,7 +527,6 @@ def comprehensive_analysis_workflow( "mode": "pairwise", "shortest_path": True, "iterations": 1, - "baseline": True, }, { "step_type": "MaxFlow", diff --git a/tests/integration/test_scenario_1.py b/tests/integration/test_scenario_1.py index 2f678e7..38fbee7 100644 --- a/tests/integration/test_scenario_1.py +++ b/tests/integration/test_scenario_1.py @@ -39,7 +39,7 @@ def scenario_1_executed(self, scenario_1): @pytest.fixture def helper(self, scenario_1_executed): """Create test helper for scenario 1.""" - # create_scenario_helper now handles graph conversion using nx.node_link_graph + # create_scenario_helper handles graph conversion using nx.node_link_graph helper = create_scenario_helper(scenario_1_executed) return helper diff --git a/tests/integration/test_scenario_2.py b/tests/integration/test_scenario_2.py index a04299c..b9d5378 100644 --- a/tests/integration/test_scenario_2.py +++ b/tests/integration/test_scenario_2.py @@ -40,7 +40,7 @@ def scenario_2_executed(self, scenario_2): @pytest.fixture def helper(self, scenario_2_executed): """Create test helper for scenario 2.""" - # create_scenario_helper now handles graph conversion using nx.node_link_graph + # create_scenario_helper handles graph conversion using nx.node_link_graph helper = create_scenario_helper(scenario_2_executed) return helper @@ -79,7 +79,7 @@ def test_hierarchical_node_naming(self, helper): """Test that hierarchical node naming from blueprints works correctly.""" # Test specific expanded node names from the blueprint hierarchy expected_nodes = { - "SEA/clos_instance/spine/myspine-6", # Overridden spine with new naming + "SEA/clos_instance/spine/myspine-6", # Overridden spine with custom naming "SFO/single/single-1", # Single node blueprint "SEA/edge_nodes/edge-1", # Edge nodes from city_cloud blueprint "SEA/clos_instance/leaf/leaf-1", # Leaf nodes from nested clos_2tier diff --git a/tests/integration/test_scenario_3.py b/tests/integration/test_scenario_3.py index 70be621..3e01c45 100644 --- a/tests/integration/test_scenario_3.py +++ b/tests/integration/test_scenario_3.py @@ -41,7 +41,7 @@ def scenario_3_executed(self, scenario_3): @pytest.fixture def helper(self, scenario_3_executed): """Create test helper for scenario 3.""" - # create_scenario_helper now handles graph conversion using nx.node_link_graph + # create_scenario_helper handles graph conversion using nx.node_link_graph helper = create_scenario_helper(scenario_3_executed) return helper @@ -177,7 +177,7 @@ def test_link_overrides_application(self, helper): ) # Test general spine-spine link overrides - # Now only risk_groups remain validated at link-level; per-end HW moved under attrs.hardware + # Only risk_groups are validated at link-level; per-end HW is under attrs.hardware helper.validate_link_attributes( source_pattern=r"my_clos1/spine/t3-2$", target_pattern=r"my_clos2/spine/t3-2$", @@ -216,21 +216,26 @@ def test_no_failure_policy(self, helper): def test_capacity_envelope_proportional_flow_results(self, helper): """Test capacity envelope results with PROPORTIONAL flow placement.""" - # Test forward direction (MaxFlow now returns flow_results with summary) + # Test forward direction (MaxFlow returns baseline separately, flow_results for failures) exported = helper.scenario.results.to_dict() fwd = exported["steps"].get("capacity_analysis_forward", {}).get("data", {}) - fwd_results = fwd.get("flow_results", []) - assert fwd_results, "Forward capacity analysis should have flow_results" - fwd_total = float(fwd_results[0].get("summary", {}).get("total_placed", 0.0)) + # Without failure policy, use baseline; otherwise check flow_results + fwd_result = fwd.get("baseline") or (fwd.get("flow_results", []) or [None])[0] + assert fwd_result, ( + "Forward capacity analysis should have baseline or flow_results" + ) + fwd_total = float(fwd_result.get("summary", {}).get("total_placed", 0.0)) assert abs(fwd_total - 3200.0) < 0.1, ( f"Expected forward flow ~3200.0, got {fwd_total}" ) # Test reverse direction rev = exported["steps"].get("capacity_analysis_reverse", {}).get("data", {}) - rev_results = rev.get("flow_results", []) - assert rev_results, "Reverse capacity analysis should have flow_results" - rev_total = float(rev_results[0].get("summary", {}).get("total_placed", 0.0)) + rev_result = rev.get("baseline") or (rev.get("flow_results", []) or [None])[0] + assert rev_result, ( + "Reverse capacity analysis should have baseline or flow_results" + ) + rev_total = float(rev_result.get("summary", {}).get("total_placed", 0.0)) assert abs(rev_total - 3200.0) < 0.1, ( f"Expected reverse flow ~3200.0, got {rev_total}" ) @@ -243,11 +248,12 @@ def test_capacity_envelope_equal_balanced_flow_results(self, helper): .get("capacity_analysis_forward_balanced", {}) .get("data", {}) ) - fwd_results = fwd.get("flow_results", []) - assert fwd_results, ( - "Forward balanced capacity analysis should have flow_results" + # Without failure policy, use baseline; otherwise check flow_results + fwd_result = fwd.get("baseline") or (fwd.get("flow_results", []) or [None])[0] + assert fwd_result, ( + "Forward balanced capacity analysis should have baseline or flow_results" ) - fwd_total = float(fwd_results[0].get("summary", {}).get("total_placed", 0.0)) + fwd_total = float(fwd_result.get("summary", {}).get("total_placed", 0.0)) assert abs(fwd_total - 3200.0) < 0.1 rev = ( @@ -255,11 +261,11 @@ def test_capacity_envelope_equal_balanced_flow_results(self, helper): .get("capacity_analysis_reverse_balanced", {}) .get("data", {}) ) - rev_results = rev.get("flow_results", []) - assert rev_results, ( - "Reverse balanced capacity analysis should have flow_results" + rev_result = rev.get("baseline") or (rev.get("flow_results", []) or [None])[0] + assert rev_result, ( + "Reverse balanced capacity analysis should have baseline or flow_results" ) - rev_total = float(rev_results[0].get("summary", {}).get("total_placed", 0.0)) + rev_total = float(rev_result.get("summary", {}).get("total_placed", 0.0)) assert abs(rev_total - 3200.0) < 0.1 def test_flow_conservation_properties(self, helper): @@ -270,10 +276,11 @@ def test_flow_conservation_properties(self, helper): def total_placed(step: str) -> float | None: data = exported["steps"].get(step, {}).get("data", {}) - res = data.get("flow_results", []) - if not res: + # Check baseline first (no failure policy), then flow_results + result = data.get("baseline") or (data.get("flow_results", []) or [None])[0] + if not result: return None - return float(res[0].get("summary", {}).get("total_placed", 0.0)) + return float(result.get("summary", {}).get("total_placed", 0.0)) fp = total_placed("capacity_analysis_forward") if fp is not None: diff --git a/tests/integration/test_scenario_4.py b/tests/integration/test_scenario_4.py index 97fd023..ac7dbdc 100644 --- a/tests/integration/test_scenario_4.py +++ b/tests/integration/test_scenario_4.py @@ -51,7 +51,7 @@ def scenario_4_executed(self, scenario_4): @pytest.fixture(scope="module") def helper(self, scenario_4_executed): """Create test helper for scenario 4.""" - # create_scenario_helper now handles graph conversion using nx.node_link_graph + # create_scenario_helper handles graph conversion using nx.node_link_graph helper = create_scenario_helper(scenario_4_executed) return helper @@ -339,30 +339,37 @@ def test_advanced_workflow_steps(self, helper): # assert graph is not None # Skipping graph check - node_link_to_graph removed after NetGraph-Core migration - # Test MaxFlow results - using flow_results key and summary totals + # Test MaxFlow results - check baseline (no failure policy) or flow_results intra_dc = ( exported["steps"].get("intra_dc_capacity_forward", {}).get("data", {}) ) - intra_results = intra_dc.get("flow_results", []) - assert intra_results, ( - "Intra-DC forward capacity analysis should have flow_results" + intra_result = ( + intra_dc.get("baseline") or (intra_dc.get("flow_results", []) or [None])[0] ) - assert float(intra_results[0]["summary"].get("total_placed", 0.0)) >= 0.0 + assert intra_result, ( + "Intra-DC forward capacity analysis should have baseline or flow_results" + ) + assert float(intra_result["summary"].get("total_placed", 0.0)) >= 0.0 inter_dc = ( exported["steps"].get("inter_dc_capacity_forward", {}).get("data", {}) ) - inter_results = inter_dc.get("flow_results", []) - assert inter_results, ( - "Inter-DC forward capacity analysis should have flow_results" + inter_result = ( + inter_dc.get("baseline") or (inter_dc.get("flow_results", []) or [None])[0] + ) + assert inter_result, ( + "Inter-DC forward capacity analysis should have baseline or flow_results" ) - assert float(inter_results[0]["summary"].get("total_placed", 0.0)) >= 0.0 + assert float(inter_result["summary"].get("total_placed", 0.0)) >= 0.0 rack_failure = ( exported["steps"].get("rack_failure_analysis", {}).get("data", {}) ) - rack_results = rack_failure.get("flow_results", []) - assert rack_results, "Rack failure analysis should have flow_results" + rack_result = ( + rack_failure.get("baseline") + or (rack_failure.get("flow_results", []) or [None])[0] + ) + assert rack_result, "Rack failure analysis should have baseline or flow_results" def test_network_explorer_integration(self, helper): """Test NetworkExplorer functionality with complex hierarchy.""" diff --git a/tests/integration/test_template_examples.py b/tests/integration/test_template_examples.py index fdc2ac4..52bfa3a 100644 --- a/tests/integration/test_template_examples.py +++ b/tests/integration/test_template_examples.py @@ -604,7 +604,6 @@ def test_scenario_3_template_variant(self): "shortest_path": True, "flow_placement": "PROPORTIONAL", "iterations": 1, - "baseline": False, "failure_policy": None, }, { @@ -616,7 +615,6 @@ def test_scenario_3_template_variant(self): "shortest_path": True, "flow_placement": "EQUAL_BALANCED", "iterations": 1, - "baseline": False, "failure_policy": None, }, ] diff --git a/tests/model/components/test_components.py b/tests/model/components/test_components.py index 44f235b..d8eec1d 100644 --- a/tests/model/components/test_components.py +++ b/tests/model/components/test_components.py @@ -156,11 +156,11 @@ def test_components_library_merge_override_true() -> None: ) lib1.merge(lib2, override=True) - # The "Overlap" component should now be the one from lib2 (cost=200). + # The "Overlap" component is replaced by lib2's version (cost=200). assert lib1.get("Overlap") is new_comp - # The new library should also include the previously missing component. + # The merged library includes components from lib2. assert "UniqueLib2" in lib1.components - # The old unique component remains. + # The original unique component remains. assert "UniqueLib1" in lib1.components @@ -186,9 +186,9 @@ def test_components_library_merge_override_false() -> None: ) lib1.merge(lib2, override=False) - # The "Overlap" component should remain the original_comp (cost=100). + # The "Overlap" component remains the original (cost=100). assert lib1.get("Overlap") is original_comp - # The new library should also include the previously missing component. + # The merged library includes components from lib2. assert "UniqueLib2" in lib1.components diff --git a/tests/model/failure/test_failure_trace.py b/tests/model/failure/test_failure_trace.py new file mode 100644 index 0000000..6a377fb --- /dev/null +++ b/tests/model/failure/test_failure_trace.py @@ -0,0 +1,378 @@ +"""Tests for failure_trace capture in FailurePolicy and FailureManager.""" + +import pytest + +from ngraph.exec.failure.manager import FailureManager +from ngraph.model.failure.policy import ( + FailureCondition, + FailureMode, + FailurePolicy, + FailureRule, +) +from ngraph.model.failure.policy_set import FailurePolicySet +from ngraph.model.network import Link, Network, Node + +# ----------------------------------------------------------------------------- +# FailurePolicy.apply_failures trace tests +# ----------------------------------------------------------------------------- + + +class TestFailureTracePolicyLevel: + """Test failure_trace capture in FailurePolicy.apply_failures.""" + + def test_trace_captures_mode_index(self) -> None: + """Test that mode_index is correctly captured.""" + rule = FailureRule(entity_scope="node", rule_type="all") + policy = FailurePolicy( + modes=[ + FailureMode(weight=0.0, rules=[]), # weight=0 never selected + FailureMode(weight=1.0, rules=[rule], attrs={"name": "mode1"}), + ] + ) + + nodes = {"N1": {}, "N2": {}} + trace: dict = {} + policy.apply_failures(nodes, {}, failure_trace=trace, seed=42) + + assert trace["mode_index"] == 1 + assert trace["mode_attrs"] == {"name": "mode1"} + + def test_trace_captures_mode_attrs(self) -> None: + """Test that mode_attrs is a copy of the selected mode's attrs.""" + attrs = {"severity": "high", "region": "west"} + rule = FailureRule(entity_scope="node", rule_type="all") + policy = FailurePolicy( + modes=[FailureMode(weight=1.0, rules=[rule], attrs=attrs)] + ) + + trace: dict = {} + policy.apply_failures({"N1": {}}, {}, failure_trace=trace) + + assert trace["mode_attrs"] == attrs + # Verify it's a copy, not a reference + assert trace["mode_attrs"] is not attrs + + def test_trace_captures_selection_fields(self) -> None: + """Test that selections contain correct fields.""" + rule = FailureRule( + entity_scope="node", + conditions=[FailureCondition(attr="type", operator="==", value="router")], + rule_type="choice", + count=1, + ) + policy = FailurePolicy(modes=[FailureMode(weight=1.0, rules=[rule])]) + + nodes = { + "N1": {"type": "router"}, + "N2": {"type": "router"}, + "N3": {"type": "server"}, + } + trace: dict = {} + policy.apply_failures(nodes, {}, failure_trace=trace, seed=42) + + assert len(trace["selections"]) == 1 + sel = trace["selections"][0] + assert sel["rule_index"] == 0 + assert sel["entity_scope"] == "node" + assert sel["rule_type"] == "choice" + assert sel["matched_count"] == 2 # N1 and N2 matched + assert len(sel["selected_ids"]) == 1 # count=1 + + def test_trace_empty_selections_when_no_match(self) -> None: + """Test that rules matching nothing are not recorded.""" + rule = FailureRule( + entity_scope="node", + conditions=[ + FailureCondition(attr="type", operator="==", value="nonexistent") + ], + rule_type="all", + ) + policy = FailurePolicy(modes=[FailureMode(weight=1.0, rules=[rule])]) + + nodes = {"N1": {"type": "router"}} + trace: dict = {} + policy.apply_failures(nodes, {}, failure_trace=trace) + + assert trace["selections"] == [] + + def test_trace_captures_expansion_nodes_links(self) -> None: + """Test expansion tracking for risk group expansion.""" + nodes = { + "N1": {"risk_groups": ["rg1"]}, + "N2": {"risk_groups": ["rg1"]}, + "N3": {"risk_groups": []}, + } + links = {"L1": {"risk_groups": ["rg1"]}} + + # N1 and N2 share rg1, L1 also in rg1 + # After expansion: N2 and L1 should appear in expansion + trace: dict = {} + # Pass only N1 that matches (we need to match only one node initially) + policy_choice = FailurePolicy( + modes=[ + FailureMode( + weight=1.0, + rules=[ + FailureRule(entity_scope="node", rule_type="choice", count=1) + ], + ) + ], + fail_risk_groups=True, + ) + policy_choice.apply_failures(nodes, links, failure_trace=trace, seed=42) + + # The expansion should show nodes/links added after initial selection + assert "expansion" in trace + assert "nodes" in trace["expansion"] + assert "links" in trace["expansion"] + + def test_trace_captures_expansion_risk_groups(self) -> None: + """Test expansion tracking for risk group children.""" + # Select only the parent, then expansion should add child + rule = FailureRule( + entity_scope="risk_group", + conditions=[ + FailureCondition(attr="name", operator="==", value="parent_rg") + ], + rule_type="all", + ) + policy = FailurePolicy( + modes=[FailureMode(weight=1.0, rules=[rule])], + fail_risk_group_children=True, + ) + + risk_groups = { + "parent_rg": {"name": "parent_rg", "children": [{"name": "child_rg"}]}, + "child_rg": {"name": "child_rg", "children": []}, + } + + trace: dict = {} + policy.apply_failures({}, {}, risk_groups, failure_trace=trace) + + # child_rg should appear in expansion.risk_groups (added by expansion, not selection) + assert "child_rg" in trace["expansion"]["risk_groups"] + + def test_trace_no_modes_returns_null_mode_index(self) -> None: + """Test that mode_index is None when no modes configured.""" + policy = FailurePolicy(modes=[]) + + trace: dict = {} + policy.apply_failures({}, {}, failure_trace=trace) + + assert trace["mode_index"] is None + assert trace["mode_attrs"] == {} + assert trace["selections"] == [] + + def test_trace_none_does_not_populate(self) -> None: + """Test that passing failure_trace=None doesn't cause errors.""" + rule = FailureRule(entity_scope="node", rule_type="all") + policy = FailurePolicy(modes=[FailureMode(weight=1.0, rules=[rule])]) + + # Should not raise + result = policy.apply_failures({"N1": {}}, {}, failure_trace=None) + assert result == ["N1"] + + def test_trace_deterministic_with_seed(self) -> None: + """Test that trace is deterministic with fixed seed.""" + rule = FailureRule(entity_scope="node", rule_type="choice", count=1) + policy = FailurePolicy(modes=[FailureMode(weight=1.0, rules=[rule])]) + + nodes = {"N1": {}, "N2": {}, "N3": {}} + + trace1: dict = {} + trace2: dict = {} + policy.apply_failures(nodes, {}, failure_trace=trace1, seed=42) + policy.apply_failures(nodes, {}, failure_trace=trace2, seed=42) + + assert trace1 == trace2 + + +# ----------------------------------------------------------------------------- +# FailureManager integration tests +# ----------------------------------------------------------------------------- + + +@pytest.fixture +def network_with_risk_groups() -> Network: + """Create a network with risk groups for expansion testing.""" + network = Network() + n1 = Node("N1", attrs={"type": "router"}) + n1.risk_groups = ["rg1"] + n2 = Node("N2", attrs={"type": "router"}) + n2.risk_groups = ["rg1"] + n3 = Node("N3", attrs={"type": "server"}) + network.add_node(n1) + network.add_node(n2) + network.add_node(n3) + + link = Link("N1", "N2", capacity=100.0) + link.risk_groups = ["rg1"] + network.add_link(link) + network.add_link(Link("N2", "N3", capacity=100.0)) + return network + + +@pytest.fixture +def simple_network() -> Network: + """Create a simple network for testing.""" + network = Network() + network.add_node(Node("N1", attrs={"type": "router"})) + network.add_node(Node("N2", attrs={"type": "router"})) + network.add_node(Node("N3", attrs={"type": "server"})) + network.add_link(Link("N1", "N2", capacity=100.0)) + network.add_link(Link("N2", "N3", capacity=100.0)) + return network + + +class TestFailureTraceManagerIntegration: + """Test failure_trace integration in FailureManager.""" + + def test_results_include_trace_fields(self, simple_network: Network) -> None: + """Test that results include trace fields when store_failure_patterns=True.""" + rule = FailureRule(entity_scope="node", rule_type="choice", count=1) + policy = FailurePolicy( + modes=[FailureMode(weight=1.0, rules=[rule], attrs={"test": "attr"})] + ) + policy_set = FailurePolicySet() + policy_set.policies["test"] = policy + + fm = FailureManager(simple_network, policy_set, policy_name="test") + + def mock_analysis(network, excluded_nodes, excluded_links, **kwargs): + return {"mock": True} + + result = fm.run_monte_carlo_analysis( + analysis_func=mock_analysis, + iterations=3, + store_failure_patterns=True, + seed=42, + ) + + # Results contain unique patterns (total occurrence_count == 3) + results = result["results"] + total_occurrences = sum(getattr(r, "occurrence_count", 1) for r in results) + assert total_occurrences == 3 + + # All results should have trace fields when store_failure_patterns=True + # Note: mock_analysis returns dict, not FlowIterationResult, so trace + # is stored differently. The key behavior is that failure_trace is captured. + + def test_baseline_has_no_trace_fields(self, simple_network: Network) -> None: + """Test that baseline result doesn't have trace fields.""" + rule = FailureRule(entity_scope="node", rule_type="choice", count=1) + policy = FailurePolicy(modes=[FailureMode(weight=1.0, rules=[rule])]) + policy_set = FailurePolicySet() + policy_set.policies["test"] = policy + + fm = FailureManager(simple_network, policy_set, policy_name="test") + + def mock_analysis(network, excluded_nodes, excluded_links, **kwargs): + return {"mock": True} + + result = fm.run_monte_carlo_analysis( + analysis_func=mock_analysis, + iterations=3, + store_failure_patterns=True, + seed=42, + ) + + # Baseline is a separate result + baseline = result["baseline"] + assert baseline is not None + + # Results contain K unique patterns (occurrence_count sum == 3) + results = result["results"] + total_occurrences = sum(getattr(r, "occurrence_count", 1) for r in results) + assert total_occurrences == 3 + + def test_deduplication_produces_unique_patterns( + self, simple_network: Network + ) -> None: + """Test that deduplicated iterations produce single unique result.""" + # Use a deterministic policy that always produces same result + rule = FailureRule( + entity_scope="node", + conditions=[FailureCondition(attr="type", operator="==", value="router")], + rule_type="all", # Always selects same nodes + ) + policy = FailurePolicy(modes=[FailureMode(weight=1.0, rules=[rule])]) + policy_set = FailurePolicySet() + policy_set.policies["test"] = policy + + fm = FailureManager(simple_network, policy_set, policy_name="test") + + def mock_analysis(network, excluded_nodes, excluded_links, **kwargs): + return {"mock": True} + + result = fm.run_monte_carlo_analysis( + analysis_func=mock_analysis, + iterations=5, + store_failure_patterns=True, + seed=42, + ) + + # All 5 iterations should produce same pattern -> 1 unique result + results = result["results"] + assert len(results) == 1 # All deduplicated to 1 unique pattern + + # Metadata should report 1 unique pattern from 5 iterations + assert result["metadata"]["unique_patterns"] == 1 + assert result["metadata"]["iterations"] == 5 + + def test_trace_deterministic_across_runs(self, simple_network: Network) -> None: + """Test that trace is deterministic with fixed seed across runs.""" + rule = FailureRule(entity_scope="node", rule_type="choice", count=1) + policy = FailurePolicy(modes=[FailureMode(weight=1.0, rules=[rule])]) + policy_set = FailurePolicySet() + policy_set.policies["test"] = policy + + fm = FailureManager(simple_network, policy_set, policy_name="test") + + def mock_analysis(network, excluded_nodes, excluded_links, **kwargs): + return {"mock": True} + + result1 = fm.run_monte_carlo_analysis( + analysis_func=mock_analysis, + iterations=5, + store_failure_patterns=True, + seed=42, + ) + result2 = fm.run_monte_carlo_analysis( + analysis_func=mock_analysis, + iterations=5, + store_failure_patterns=True, + seed=42, + ) + + # Compare unique patterns count + assert len(result1["results"]) == len(result2["results"]) + + # Compare metadata + assert ( + result1["metadata"]["unique_patterns"] + == result2["metadata"]["unique_patterns"] + ) + + def test_no_trace_when_store_failure_patterns_false( + self, simple_network: Network + ) -> None: + """Test that trace is not captured when store_failure_patterns=False.""" + rule = FailureRule(entity_scope="node", rule_type="choice", count=1) + policy = FailurePolicy(modes=[FailureMode(weight=1.0, rules=[rule])]) + policy_set = FailurePolicySet() + policy_set.policies["test"] = policy + + fm = FailureManager(simple_network, policy_set, policy_name="test") + + def mock_analysis(network, excluded_nodes, excluded_links, **kwargs): + return {"mock": True} + + result = fm.run_monte_carlo_analysis( + analysis_func=mock_analysis, + iterations=3, + store_failure_patterns=False, + seed=42, + ) + + # Results should still be present (just without trace) + assert len(result["results"]) > 0 diff --git a/tests/model/test_risk_groups.py b/tests/model/test_risk_groups.py index 44187d1..af7484b 100644 --- a/tests/model/test_risk_groups.py +++ b/tests/model/test_risk_groups.py @@ -171,14 +171,12 @@ def test_risk_group_mixed_membership(self): # Disable group2 - affects A and B net.disable_risk_group("group2") assert net.nodes["A"].disabled is True # A still disabled (group1) - assert net.nodes["B"].disabled is True # B now disabled (group2) + assert net.nodes["B"].disabled is True # B disabled (group2) assert net.links[link.id].disabled is True # link still disabled (group1) # Enable group1 - A should be enabled because it has group1, link should be enabled net.enable_risk_group("group1") - assert ( - net.nodes["A"].disabled is False - ) # A enabled (has group1 which is now enabled) + assert net.nodes["A"].disabled is False # A enabled (group1 is enabled) assert net.nodes["B"].disabled is True # B still disabled (group2) assert ( net.links[link.id].disabled is False diff --git a/tests/model/test_selection.py b/tests/model/test_selection.py index f4f6b6d..787bb87 100644 --- a/tests/model/test_selection.py +++ b/tests/model/test_selection.py @@ -97,7 +97,6 @@ def test_select_node_groups_multiple_capture_groups(self, complex_network): # Should have groups for each combination found assert len(node_groups) >= 2 - # Note: The legacy attr: syntax has been removed. # For attribute-based grouping, use the unified selector system with # {"group_by": "attr_name"} dict selectors via normalize_selector/select_nodes. diff --git a/tests/profiling/test_profiling.py b/tests/profiling/test_profiling.py index d114857..5aeff09 100644 --- a/tests/profiling/test_profiling.py +++ b/tests/profiling/test_profiling.py @@ -36,7 +36,7 @@ def test_step_profiling_basic(self): assert profile.step_name == "test_step" assert profile.step_type == "TestStep" assert profile.wall_time > 0 - assert profile.cpu_time >= 0.0 # Always has CPU profiling now + assert profile.cpu_time >= 0.0 # CPU profiling is always enabled assert profile.function_calls >= 0 def test_step_profiling_with_memory(self): diff --git a/tests/workflow/test_base.py b/tests/workflow/test_base.py index 9d7f3fd..6579b1c 100644 --- a/tests/workflow/test_base.py +++ b/tests/workflow/test_base.py @@ -73,7 +73,7 @@ def run(self, scenario) -> None: assert md.step_type == "Dummy" assert md.step_name == "d1" assert isinstance(md.execution_order, int) and md.execution_order >= 0 - # New fields + # Seed fields assert hasattr(md, "scenario_seed") and md.scenario_seed == 1010 assert hasattr(md, "step_seed") assert hasattr(md, "seed_source") diff --git a/tests/workflow/test_capacity_envelope_analysis.py b/tests/workflow/test_capacity_envelope_analysis.py index eaf4f9f..f252b99 100644 --- a/tests/workflow/test_capacity_envelope_analysis.py +++ b/tests/workflow/test_capacity_envelope_analysis.py @@ -72,7 +72,6 @@ def test_initialization_defaults(self): assert step.parallelism == "auto" assert step.shortest_path is False assert step.flow_placement == FlowPlacement.PROPORTIONAL - assert step.baseline is False assert step.seed is None assert step.store_failure_patterns is False assert step.include_flow_details is False @@ -88,7 +87,6 @@ def test_initialization_custom_values(self): parallelism=4, shortest_path=True, flow_placement=FlowPlacement.EQUAL_BALANCED, - baseline=True, seed=42, store_failure_patterns=True, include_flow_details=True, @@ -102,15 +100,14 @@ def test_initialization_custom_values(self): assert step.parallelism == 4 assert step.shortest_path is True assert step.flow_placement == FlowPlacement.EQUAL_BALANCED - assert step.baseline is True assert step.seed == 42 assert step.store_failure_patterns is True assert step.include_flow_details is True def test_validation_errors(self): """Test parameter validation.""" - with pytest.raises(ValueError, match="iterations must be >= 1"): - MaxFlow(source="^A", sink="^C", iterations=0) + with pytest.raises(ValueError, match="iterations must be >= 0"): + MaxFlow(source="^A", sink="^C", iterations=-1) with pytest.raises(ValueError, match="parallelism must be >= 1"): MaxFlow(source="^A", sink="^C", parallelism=0) @@ -118,9 +115,6 @@ def test_validation_errors(self): with pytest.raises(ValueError, match="mode must be 'combine' or 'pairwise'"): MaxFlow(source="^A", sink="^C", mode="invalid") - with pytest.raises(ValueError, match="baseline=True requires iterations >= 2"): - MaxFlow(source="^A", sink="^C", baseline=True, iterations=1) - def test_flow_placement_enum_usage(self): """Test that FlowPlacement enum is used correctly.""" step = MaxFlow( @@ -138,33 +132,34 @@ def test_run_with_mock_failure_manager( mock_failure_manager_class.return_value = mock_failure_manager # Mock the convenience method results returning unified flow_results + # Baseline is separate, results contains only failure iterations mock_raw = { - "results": [ - { - "failure_id": "baseline", - "failure_state": {"excluded_nodes": [], "excluded_links": []}, - "flows": [ - { - "source": "A", - "destination": "C", - "priority": 0, - "demand": 5.0, - "placed": 5.0, - "dropped": 0.0, - "cost_distribution": {}, - "data": {}, - } - ], - "summary": { - "total_demand": 5.0, - "total_placed": 5.0, - "overall_ratio": 1.0, - "dropped_flows": 0, - "num_flows": 1, - }, - } - ], - "metadata": {"iterations": 1, "parallelism": 1, "baseline": False}, + "baseline": { + "failure_id": "", + "failure_state": {"excluded_nodes": [], "excluded_links": []}, + "failure_trace": None, + "flows": [ + { + "source": "A", + "destination": "C", + "priority": 0, + "demand": 5.0, + "placed": 5.0, + "dropped": 0.0, + "cost_distribution": {}, + "data": {}, + } + ], + "summary": { + "total_demand": 5.0, + "total_placed": 5.0, + "overall_ratio": 1.0, + "dropped_flows": 0, + "num_flows": 1, + }, + }, + "results": [], # No failure iterations for this test + "metadata": {"iterations": 1, "parallelism": 1}, } mock_failure_manager.run_max_flow_monte_carlo.return_value = mock_raw @@ -195,7 +190,6 @@ def test_run_with_mock_failure_manager( assert kwargs["parallelism"] == 1 assert kwargs["shortest_path"] is False assert kwargs["flow_placement"] == step.flow_placement - assert kwargs["baseline"] is False assert kwargs["seed"] is None assert kwargs["store_failure_patterns"] is False assert kwargs["include_flow_summary"] is False @@ -205,7 +199,8 @@ def test_run_with_mock_failure_manager( data = exported["steps"]["envelope"]["data"] assert isinstance(data, dict) assert "flow_results" in data and isinstance(data["flow_results"], list) - assert len(data["flow_results"]) == 1 + # No failure results, but baseline should be present + assert "baseline" in data @patch("ngraph.workflow.max_flow_step.FailureManager") def test_run_with_failure_patterns(self, mock_failure_manager_class, mock_scenario): @@ -214,34 +209,34 @@ def test_run_with_failure_patterns(self, mock_failure_manager_class, mock_scenar mock_failure_manager = MagicMock() mock_failure_manager_class.return_value = mock_failure_manager - # Mock raw results and patterns + # Mock raw results with failure_trace on each result mock_raw = { "results": [ - { - "failure_id": "deadbeef", - "failure_state": { - "excluded_nodes": ["node1"], - "excluded_links": [], - }, - "flows": [], - "summary": { - "total_demand": 0.0, - "total_placed": 0.0, - "overall_ratio": 1.0, - "dropped_flows": 0, - "num_flows": 0, + MagicMock( + failure_id="deadbeef", + failure_state={"excluded_nodes": ["node1"], "excluded_links": []}, + failure_trace={"mode_index": 0}, + occurrence_count=2, + to_dict=lambda: { + "failure_id": "deadbeef", + "failure_state": { + "excluded_nodes": ["node1"], + "excluded_links": [], + }, + "failure_trace": {"mode_index": 0}, + "occurrence_count": 2, + "flows": [], + "summary": { + "total_demand": 0.0, + "total_placed": 0.0, + "overall_ratio": 1.0, + "dropped_flows": 0, + "num_flows": 0, + }, }, - } - ], - "metadata": {"iterations": 2, "parallelism": 1, "baseline": False}, - "failure_patterns": [ - { - "iteration_index": 0, - "is_baseline": False, - "excluded_nodes": ["node1"], - "excluded_links": [], - } + ) ], + "metadata": {"iterations": 2, "parallelism": 1, "unique_patterns": 1}, } mock_failure_manager.run_max_flow_monte_carlo.return_value = mock_raw @@ -268,7 +263,6 @@ def test_capacity_envelope_with_failures_mocked(self): mode="combine", iterations=2, parallelism=1, - baseline=False, store_failure_patterns=True, ) @@ -280,11 +274,29 @@ def test_capacity_envelope_with_failures_mocked(self): ) # Mock the convenience method call results (unified flow_results) + # Baseline is separate, results contains only failures mock_raw = { + "baseline": { + "failure_id": "", + "failure_state": {"excluded_nodes": [], "excluded_links": []}, + "failure_trace": None, + "flows": [], + "summary": { + "total_demand": 0.0, + "total_placed": 0.0, + "overall_ratio": 1.0, + "dropped_flows": 0, + "num_flows": 0, + }, + }, "results": [ { - "failure_id": "", - "failure_state": {"excluded_nodes": [], "excluded_links": []}, + "failure_id": "abc123", + "failure_state": { + "excluded_nodes": [], + "excluded_links": ["link1"], + }, + "failure_trace": {"mode_index": 0}, "flows": [], "summary": { "total_demand": 0.0, @@ -295,7 +307,7 @@ def test_capacity_envelope_with_failures_mocked(self): }, } ], - "metadata": {"iterations": 2, "parallelism": 1, "baseline": False}, + "metadata": {"iterations": 2, "parallelism": 1}, } # Mock the FailureManager class and its convenience method @@ -368,4 +380,141 @@ def test_include_flow_summary_functionality( _, kwargs = mock_failure_manager.run_max_flow_monte_carlo.call_args assert kwargs["include_flow_summary"] is True - # Verify run without error; detailed stats are embedded in flow_results entries now + # Verify run without error; detailed stats are embedded in flow_results entries + + @patch("ngraph.workflow.max_flow_step.FailureManager") + def test_failure_trace_persisted_on_results( + self, mock_failure_manager_class, mock_scenario + ): + """Test that failure_trace is persisted on flow_results.""" + mock_failure_manager = mock_failure_manager_class.return_value + + # Create mock result with failure_trace + mock_result = MagicMock() + mock_result.failure_id = "abc123" + mock_result.failure_state = {"excluded_nodes": [], "excluded_links": ["link1"]} + mock_result.failure_trace = { + "mode_index": 0, + "mode_attrs": {"severity": "single"}, + "selections": [ + { + "rule_index": 0, + "entity_scope": "link", + "rule_type": "choice", + "matched_count": 3, + "selected_ids": ["link1"], + } + ], + "expansion": {"nodes": [], "links": [], "risk_groups": []}, + } + mock_result.occurrence_count = 2 + mock_result.to_dict.return_value = { + "failure_id": "abc123", + "failure_state": {"excluded_nodes": [], "excluded_links": ["link1"]}, + "failure_trace": mock_result.failure_trace, + "occurrence_count": 2, + "flows": [], + "summary": { + "total_demand": 0.0, + "total_placed": 0.0, + "overall_ratio": 1.0, + "dropped_flows": 0, + "num_flows": 0, + }, + } + + # Mock baseline + mock_baseline = MagicMock() + mock_baseline.to_dict.return_value = { + "failure_id": "", + "failure_state": {"excluded_nodes": [], "excluded_links": []}, + "failure_trace": None, + "occurrence_count": 1, + "flows": [], + "summary": { + "total_demand": 0.0, + "total_placed": 0.0, + "overall_ratio": 1.0, + "dropped_flows": 0, + "num_flows": 0, + }, + } + + mock_raw = { + "baseline": mock_baseline, + "results": [mock_result], + "metadata": {"iterations": 2, "parallelism": 1, "unique_patterns": 1}, + } + mock_failure_manager.run_max_flow_monte_carlo.return_value = mock_raw + + step = MaxFlow( + name="test_step", + source="^A", + sink="^C", + iterations=2, + store_failure_patterns=True, + parallelism=1, + ) + step.execute(mock_scenario) + + # Verify results are persisted + exported = mock_scenario.results.to_dict() + data = exported["steps"]["test_step"]["data"] + + # Verify flow_results contains failure_trace + assert len(data["flow_results"]) == 1 + result = data["flow_results"][0] + assert result["failure_id"] == "abc123" + assert result["failure_trace"]["mode_index"] == 0 + assert result["occurrence_count"] == 2 + + # Verify baseline is stored separately in data + assert "baseline" in data + assert data["baseline"]["failure_id"] == "" + + @patch("ngraph.workflow.max_flow_step.FailureManager") + def test_no_failure_trace_when_disabled( + self, mock_failure_manager_class, mock_scenario + ): + """Test that failure_trace is None when store_failure_patterns=False.""" + mock_failure_manager = mock_failure_manager_class.return_value + + mock_result = MagicMock() + mock_result.failure_trace = None # No trace when disabled + mock_result.occurrence_count = 1 + mock_result.to_dict.return_value = { + "failure_id": "", + "failure_state": None, + "failure_trace": None, + "occurrence_count": 1, + "flows": [], + "summary": { + "total_demand": 0.0, + "total_placed": 0.0, + "overall_ratio": 1.0, + "dropped_flows": 0, + "num_flows": 0, + }, + } + + mock_raw = { + "results": [mock_result], + "metadata": {"iterations": 1, "parallelism": 1, "unique_patterns": 1}, + } + mock_failure_manager.run_max_flow_monte_carlo.return_value = mock_raw + + step = MaxFlow( + name="test_step_disabled", + source="^A", + sink="^C", + iterations=1, + store_failure_patterns=False, + parallelism=1, + ) + step.execute(mock_scenario) + + # Verify flow_results exist but have no trace + exported = mock_scenario.results.to_dict() + data = exported["steps"]["test_step_disabled"]["data"] + assert len(data["flow_results"]) == 1 + assert data["flow_results"][0]["failure_trace"] is None diff --git a/tests/workflow/test_tm_analysis_perf_safety.py b/tests/workflow/test_tm_analysis_perf_safety.py index 8d84cbf..16806f3 100644 --- a/tests/workflow/test_tm_analysis_perf_safety.py +++ b/tests/workflow/test_tm_analysis_perf_safety.py @@ -18,7 +18,7 @@ def __init__( def test_tm_basic_behavior_unchanged(monkeypatch): - # Small sanity test that the step runs end-to-end and stores new outputs + # Small sanity test that the step runs end-to-end and stores outputs from ngraph.model.demand.matrix import TrafficMatrixSet from ngraph.model.demand.spec import TrafficDemand from ngraph.model.network import Link, Network, Node @@ -64,7 +64,6 @@ class _FailurePolicySetStub: step = TrafficMatrixPlacement( matrix_name="default", iterations=2, - baseline=True, placement_rounds="auto", include_flow_details=False, ) diff --git a/tests/workflow/test_traffic_matrix_placement.py b/tests/workflow/test_traffic_matrix_placement.py index eae48bd..a16802a 100644 --- a/tests/workflow/test_traffic_matrix_placement.py +++ b/tests/workflow/test_traffic_matrix_placement.py @@ -24,8 +24,25 @@ def test_traffic_matrix_placement_stores_core_outputs( mock_td.priority = 0 mock_scenario.traffic_matrix_set.get_matrix.return_value = [mock_td] - # Mock FailureManager return value: two iterations with structured dicts + # Mock FailureManager return value: baseline separate, failure iterations in results mock_raw = { + "baseline": { + "demands": [ + { + "src": "A", + "dst": "B", + "priority": 0, + "offered_gbps": 10.0, + "placed_gbps": 10.0, + "placement_ratio": 1.0, + } + ], + "summary": { + "total_offered_gbps": 10.0, + "total_placed_gbps": 10.0, + "overall_ratio": 1.0, + }, + }, "results": [ { "demands": [ @@ -62,8 +79,7 @@ def test_traffic_matrix_placement_stores_core_outputs( }, }, ], - "metadata": {"iterations": 2}, - "failure_patterns": {}, + "metadata": {"iterations": 2, "unique_patterns": 1}, } mock_failure_manager = MagicMock() mock_failure_manager_class.return_value = mock_failure_manager @@ -73,12 +89,11 @@ def test_traffic_matrix_placement_stores_core_outputs( name="tm_step", matrix_name="default", iterations=2, - baseline=False, ) mock_scenario.results = Results() step.execute(mock_scenario) - # Verify new schema outputs exist and have expected shapes + # Verify schema outputs exist and have expected shapes exported = mock_scenario.results.to_dict() data = exported["steps"]["tm_step"]["data"] assert isinstance(data, dict) @@ -102,8 +117,21 @@ def test_traffic_matrix_placement_flow_details_edges( mock_td.priority = 0 mock_scenario.traffic_matrix_set.get_matrix.return_value = [mock_td] - # Mock FailureManager return value with edges used + # Mock FailureManager return value with edges used (baseline separate) mock_raw = { + "baseline": { + "failure_id": "", + "failure_state": None, + "flows": [], + "summary": { + "total_demand": 10.0, + "total_placed": 10.0, + "overall_ratio": 1.0, + "dropped_flows": 0, + "num_flows": 0, + }, + "data": {}, + }, "results": [ { "failure_id": "", @@ -154,8 +182,7 @@ def test_traffic_matrix_placement_flow_details_edges( "data": {}, }, ], - "metadata": {"iterations": 2}, - "failure_patterns": {}, + "metadata": {"iterations": 2, "unique_patterns": 1}, } mock_failure_manager = MagicMock() mock_failure_manager_class.return_value = mock_failure_manager @@ -165,7 +192,6 @@ def test_traffic_matrix_placement_flow_details_edges( name="tm_step", matrix_name="default", iterations=2, - baseline=False, include_flow_details=True, include_used_edges=True, ) @@ -206,8 +232,7 @@ def test_traffic_matrix_placement_alpha_scales_demands( }, } ], - "metadata": {"iterations": 1}, - "failure_patterns": {}, + "metadata": {"iterations": 1, "unique_patterns": 1}, } mock_failure_manager = MagicMock() mock_failure_manager_class.return_value = mock_failure_manager @@ -218,7 +243,6 @@ def test_traffic_matrix_placement_alpha_scales_demands( name="tm_step_alpha", matrix_name="default", iterations=1, - baseline=False, alpha=2.5, ) mock_scenario.results = Results() @@ -258,8 +282,7 @@ def test_traffic_matrix_placement_metadata_includes_alpha( }, } ], - "metadata": {"iterations": 1, "baseline": False}, - "failure_patterns": {}, + "metadata": {"iterations": 1, "baseline": False, "unique_patterns": 1}, } mock_failure_manager = MagicMock() mock_failure_manager_class.return_value = mock_failure_manager @@ -269,7 +292,6 @@ def test_traffic_matrix_placement_metadata_includes_alpha( name="tm_step_meta", matrix_name="default", iterations=1, - baseline=False, alpha=3.0, ) mock_scenario.results = Results() @@ -332,8 +354,7 @@ def test_traffic_matrix_placement_alpha_auto_uses_msd( }, } ], - "metadata": {"iterations": 1}, - "failure_patterns": {}, + "metadata": {"iterations": 1, "unique_patterns": 1}, } mock_failure_manager = MagicMock() mock_failure_manager_class.return_value = mock_failure_manager @@ -343,7 +364,6 @@ def test_traffic_matrix_placement_alpha_auto_uses_msd( name="tm_auto", matrix_name="default", iterations=1, - baseline=False, alpha_from_step="msd1", alpha_from_field="data.alpha_star", ) @@ -377,10 +397,167 @@ def test_traffic_matrix_placement_alpha_auto_missing_msd_raises( name="tm_auto", matrix_name="default", iterations=1, - baseline=False, alpha_from_step="msd1", alpha_from_field="data.alpha_star", ) mock_scenario.results = Results() with pytest.raises(ValueError): step.execute(mock_scenario) + + +@patch("ngraph.workflow.traffic_matrix_placement_step.FailureManager") +def test_traffic_matrix_placement_failure_trace_on_results( + mock_failure_manager_class, +) -> None: + """Test that failure_trace is present on flow_results when store_failure_patterns=True.""" + mock_scenario = MagicMock() + mock_td = MagicMock() + mock_td.source = "A" + mock_td.sink = "B" + mock_td.demand = 10.0 + mock_td.mode = "pairwise" + mock_td.priority = 0 + mock_scenario.traffic_matrix_set.get_matrix.return_value = [mock_td] + + # Create mock result with failure_trace and occurrence_count + mock_result = MagicMock() + mock_result.failure_id = "abc123" + mock_result.failure_state = {"excluded_nodes": [], "excluded_links": ["L1"]} + mock_result.failure_trace = { + "mode_index": 0, + "mode_attrs": {"category": "link_failure"}, + "selections": [ + { + "rule_index": 0, + "entity_scope": "link", + "rule_type": "choice", + "matched_count": 5, + "selected_ids": ["L1"], + } + ], + "expansion": {"nodes": [], "links": [], "risk_groups": []}, + } + mock_result.occurrence_count = 2 + mock_result.summary = MagicMock() + mock_result.summary.total_placed = 8.0 + mock_result.to_dict.return_value = { + "failure_id": "abc123", + "failure_state": {"excluded_nodes": [], "excluded_links": ["L1"]}, + "failure_trace": mock_result.failure_trace, + "occurrence_count": 2, + "flows": [], + "summary": { + "total_demand": 10.0, + "total_placed": 8.0, + "overall_ratio": 0.8, + "dropped_flows": 0, + "num_flows": 1, + }, + } + + # Mock baseline + mock_baseline = MagicMock() + mock_baseline.to_dict.return_value = { + "failure_id": "", + "failure_state": {"excluded_nodes": [], "excluded_links": []}, + "failure_trace": None, + "occurrence_count": 1, + "flows": [], + "summary": { + "total_demand": 10.0, + "total_placed": 10.0, + "overall_ratio": 1.0, + "dropped_flows": 0, + "num_flows": 1, + }, + } + + mock_raw = { + "baseline": mock_baseline, + "results": [mock_result], + "metadata": {"iterations": 2, "parallelism": 1, "unique_patterns": 1}, + } + mock_failure_manager = MagicMock() + mock_failure_manager_class.return_value = mock_failure_manager + mock_failure_manager.run_demand_placement_monte_carlo.return_value = mock_raw + + step = TrafficMatrixPlacement( + name="tm_patterns", + matrix_name="default", + iterations=2, + store_failure_patterns=True, + ) + mock_scenario.results = Results() + step.execute(mock_scenario) + + # Verify flow_results contains failure_trace + exported = mock_scenario.results.to_dict() + data = exported["steps"]["tm_patterns"]["data"] + + assert len(data["flow_results"]) == 1 + result = data["flow_results"][0] + assert result["failure_id"] == "abc123" + assert result["failure_trace"]["mode_index"] == 0 + assert result["occurrence_count"] == 2 + + # Verify baseline is stored separately in data + assert "baseline" in data + assert data["baseline"]["failure_id"] == "" + + +@patch("ngraph.workflow.traffic_matrix_placement_step.FailureManager") +def test_traffic_matrix_placement_no_trace_when_disabled( + mock_failure_manager_class, +) -> None: + """Test that failure_trace is None when store_failure_patterns=False.""" + mock_scenario = MagicMock() + mock_td = MagicMock() + mock_td.source = "A" + mock_td.sink = "B" + mock_td.demand = 10.0 + mock_td.mode = "pairwise" + mock_td.priority = 0 + mock_scenario.traffic_matrix_set.get_matrix.return_value = [mock_td] + + mock_result = MagicMock() + mock_result.failure_trace = None # No trace when disabled + mock_result.occurrence_count = 1 + mock_result.summary = MagicMock() + mock_result.summary.total_placed = 10.0 + mock_result.to_dict.return_value = { + "failure_id": "", + "failure_state": None, + "failure_trace": None, + "occurrence_count": 1, + "flows": [], + "summary": { + "total_demand": 10.0, + "total_placed": 10.0, + "overall_ratio": 1.0, + "dropped_flows": 0, + "num_flows": 1, + }, + } + + mock_raw = { + "results": [mock_result], + "metadata": {"iterations": 1, "parallelism": 1, "unique_patterns": 1}, + } + mock_failure_manager = MagicMock() + mock_failure_manager_class.return_value = mock_failure_manager + mock_failure_manager.run_demand_placement_monte_carlo.return_value = mock_raw + + step = TrafficMatrixPlacement( + name="tm_no_patterns", + matrix_name="default", + iterations=1, + store_failure_patterns=False, + ) + mock_scenario.results = Results() + step.execute(mock_scenario) + + # Verify flow_results exist but have no trace + exported = mock_scenario.results.to_dict() + data = exported["steps"]["tm_no_patterns"]["data"] + assert len(data["flow_results"]) == 1 + assert data["flow_results"][0]["failure_trace"] is None