Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 226 additions & 9 deletions ngraph/failure_policy.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,235 @@
from dataclasses import dataclass, field
from random import random
from typing import Any, Dict, List, Literal
from random import random, sample


@dataclass(slots=True)
@dataclass
class FailureCondition:
"""
A single condition for matching an entity's attribute with an operator and value.

Example usage:

.. code-block:: yaml

conditions:
- attr: "capacity"
operator: "<"
value: 100

:param attr:
The name of the attribute to inspect, e.g. "type", "capacity".
:param operator:
The comparison operator: "==", "!=", "<", "<=", ">", ">=".
:param value:
The value to compare against, e.g. "node", 100, True, etc.
"""

attr: str # e.g. "type", "capacity", "region"
operator: str # "==", "!=", "<", "<=", ">", ">="
value: Any # e.g. "node", 100, "east_coast"


@dataclass
class FailureRule:
"""
A single rule defining how to match entities and then select them for failure.

- conditions: list of conditions
- logic: how to combine conditions ("and", "or", "any")
- rule_type: how to pick from matched entities ("random", "choice", "all")
- probability: used by "random" (a float in [0,1])
- count: used by "choice" (e.g. pick 2)

:param conditions:
A list of :class:`FailureCondition` to filter matching entities.
:param logic:
How to combine the conditions for matching: "and", "or", or "any".
- "and": all conditions must be true
- "or": at least one condition is true
- "any": skip condition checks; everything is matched
:param rule_type:
The selection strategy. One of:
- "random": pick each matched entity with `probability`
- "choice": pick exactly `count` from matched
- "all": pick all matched
:param probability:
Probability of selecting any matched entity (used only if rule_type="random").
:param count:
Number of matched entities to pick (used only if rule_type="choice").
"""

conditions: List[FailureCondition] = field(default_factory=list)
logic: Literal["and", "or", "any"] = "and"
rule_type: Literal["random", "choice", "all"] = "all"
probability: float = 1.0
count: int = 1


@dataclass
class FailurePolicy:
"""
Mapping from element tag to failure probability.
A container for multiple FailureRules and arbitrary metadata in `attrs`.

The method :meth:`apply_failures` merges nodes and links into a single
dictionary (by their unique ID), and then applies each rule in turn,
building a union of all failed entities.

:param rules:
A list of :class:`FailureRule` objects to apply.
:param attrs:
A dictionary for storing policy-wide metadata (e.g. "name", "description").
"""

failure_probabilities: dict[str, float] = field(default_factory=dict)
distribution: str = "uniform"
rules: List[FailureRule] = field(default_factory=list)
attrs: Dict[str, Any] = field(default_factory=dict)

def test_failure(self, tag: str) -> bool:
if self.distribution == "uniform":
return random() < self.failure_probabilities.get(tag, 0)
def apply_failures(
self, nodes: Dict[str, Dict[str, Any]], links: Dict[str, Dict[str, Any]]
) -> List[str]:
"""
Identify which entities (nodes or links) fail according to the
defined rules.

:param nodes:
A mapping of node_name -> node.attrs, where node.attrs has at least
a "type" = "node".
:param links:
A mapping of link_id -> link.attrs, where link.attrs has at least
a "type" = "link".
:returns:
A list of failed entity IDs. For nodes, that ID is typically the
node's name. For links, it's the link's ID.
"""
# Merge nodes and links into a single map of entity_id -> entity_attrs
# e.g. { "SEA": { "type": "node", ...}, "SEA-DEN-xxx": { "type": "link", ...} }
all_entities = {**nodes, **links}

failed_entities = set()

# Evaluate each rule to find matched entities and union them
for rule in self.rules:
matched = self._match_entities(all_entities, rule.conditions, rule.logic)
selected = self._select_entities(matched, all_entities, rule)
failed_entities.update(selected)

return list(failed_entities)

def _match_entities(
self,
all_entities: Dict[str, Dict[str, Any]],
conditions: List[FailureCondition],
logic: str,
) -> List[str]:
"""
Find which entities (by ID) satisfy the given list of conditions
combined by 'and'/'or' logic (or 'any' to skip checks).

:param all_entities:
Mapping of entity_id -> attribute dict.
:param conditions:
List of :class:`FailureCondition` to apply.
:param logic:
"and", "or", or "any".
:returns:
A list of entity IDs that match.
"""
matched = []
for entity_id, attr_dict in all_entities.items():
if self._evaluate_conditions(attr_dict, conditions, logic):
matched.append(entity_id)
return matched

@staticmethod
def _evaluate_conditions(
entity_attrs: Dict[str, Any], conditions: List[FailureCondition], logic: str
) -> bool:
"""
Check if the given entity (via entity_attrs) meets all/any of the conditions.

:param entity_attrs:
The dictionary of attributes for a single entity (node or link).
:param conditions:
A list of conditions to evaluate.
:param logic:
"and" -> all must be true
"or" -> at least one true
"any" -> skip condition checks (always true)
:returns:
True if conditions pass for the specified logic, else False.
"""
if logic == "any":
return True # means "select everything"
if not conditions:
return False # no conditions => no match, unless logic='any'

results = []
for cond in conditions:
results.append(_evaluate_condition(entity_attrs, cond))

if logic == "and":
return all(results)
elif logic == "or":
return any(results)
else:
raise ValueError(f"Unsupported distribution: {self.distribution}")
raise ValueError(f"Unsupported logic: {logic}")

@staticmethod
def _select_entities(
entity_ids: List[str],
all_entities: Dict[str, Dict[str, Any]],
rule: FailureRule,
) -> List[str]:
"""
Select which entity IDs will fail from the matched set, based on rule_type.

:param entity_ids:
IDs that matched the rule's conditions.
:param all_entities:
The full entity dictionary (not strictly needed for some rule_types).
:param rule:
The FailureRule specifying how to pick the final subset.
:returns:
The final list of entity IDs that fail from this rule.
"""
if rule.rule_type == "random":
return [e for e in entity_ids if random() < rule.probability]
elif rule.rule_type == "choice":
count = min(rule.count, len(entity_ids))
# Use sorted(...) to ensure consistent picks when testing
return sample(sorted(entity_ids), k=count)
elif rule.rule_type == "all":
return entity_ids
else:
raise ValueError(f"Unsupported rule_type: {rule.rule_type}")


def _evaluate_condition(entity: Dict[str, Any], cond: FailureCondition) -> bool:
"""
Evaluate one condition (attr, operator, value) against an entity's attrs.

:param entity:
The entity's attribute dictionary (node.attrs or link.attrs).
:param cond:
A single :class:`FailureCondition` specifying 'attr', 'operator', 'value'.
:returns:
True if the condition passes, else False.
:raises ValueError:
If the condition's operator is not recognized.
"""
derived_value = entity.get(cond.attr, None)
op = cond.operator
if op == "==":
return derived_value == cond.value
elif op == "!=":
return derived_value != cond.value
elif op == "<":
return derived_value < cond.value
elif op == "<=":
return derived_value <= cond.value
elif op == ">":
return derived_value > cond.value
elif op == ">=":
return derived_value >= cond.value
else:
raise ValueError(f"Unsupported operator: {op}")
46 changes: 34 additions & 12 deletions ngraph/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

def new_base64_uuid() -> str:
"""
Generate a Base64-encoded UUID without padding (~22 characters).
Generate a Base64-encoded UUID without padding (a string with 22 characters).
"""
return base64.urlsafe_b64encode(uuid.uuid4().bytes).decode("ascii").rstrip("=")

Expand All @@ -21,7 +21,12 @@ class Node:
in the Network's node dictionary.

:param name: The unique name of the node.
:param attrs: Optional extra metadata for the node.
:param attrs: Optional extra metadata for the node. For example:
{
"type": "node", # auto-tagged upon add_node
"coords": [lat, lon], # user-provided
"region": "west_coast" # user-provided
}
"""

name: str
Expand All @@ -42,8 +47,13 @@ class Link:
:param capacity: Link capacity (default 1.0).
:param latency: Link latency (default 1.0).
:param cost: Link cost (default 1.0).
:param attrs: Optional extra metadata for the link.
:param id: Auto-generated unique link identifier.
:param attrs: Optional extra metadata for the link. For example:
{
"type": "link", # auto-tagged upon add_link
"distance_km": 1500, # user-provided
"fiber_provider": "Lumen", # user-provided
}
:param id: Auto-generated unique link identifier, e.g. "SEA-DEN-abCdEf..."
"""

source: str
Expand All @@ -67,13 +77,13 @@ class Network:
"""
A container for network nodes and links.

Nodes are stored in a dictionary keyed by their unique names.
Links are stored in a dictionary keyed by their auto-generated IDs.
Nodes are stored in a dictionary keyed by their unique names (:attr:`Node.name`).
Links are stored in a dictionary keyed by their auto-generated IDs (:attr:`Link.id`).
The 'attrs' dict allows extra network metadata.

:param nodes: Mapping from node name to Node.
:param links: Mapping from link id to Link.
:param attrs: Optional extra metadata for the network.
:param nodes: Mapping from node name -> Node object.
:param links: Mapping from link id -> Link object.
:param attrs: Optional extra metadata for the network itself.
"""

nodes: Dict[str, Node] = field(default_factory=dict)
Expand All @@ -82,21 +92,33 @@ class Network:

def add_node(self, node: Node) -> None:
"""
Add a node to the network, keyed by its name.
Add a node to the network, keyed by its :attr:`Node.name`.

This method also auto-tags the node with ``node.attrs["type"] = "node"``
if it's not already set.

:param node: The Node to add.
:raises ValueError: If a node with the same name is already in the network.
"""
node.attrs.setdefault("type", "node")
if node.name in self.nodes:
raise ValueError(f"Node '{node.name}' already exists in the network.")
self.nodes[node.name] = node

def add_link(self, link: Link) -> None:
"""
Add a link to the network. Both source and target nodes must exist.
Add a link to the network, keyed by its auto-generated :attr:`Link.id`.

This method also auto-tags the link with ``link.attrs["type"] = "link"``
if it's not already set.

:param link: The Link to add.
:raises ValueError: If the source or target node is not present.
:raises ValueError: If the source/target node is not present in the network.
"""
if link.source not in self.nodes:
raise ValueError(f"Source node '{link.source}' not found in network.")
if link.target not in self.nodes:
raise ValueError(f"Target node '{link.target}' not found in network.")

link.attrs.setdefault("type", "link")
self.links[link.id] = link
Loading