diff --git a/README.md b/README.md index 8193bbd..5669beb 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ An implementation of Boruvka's algorithm to find a minimum spanning tree in a gr ``` Finding MST with Boruvka's algorithm: Vertices: [0, 1, 2, 3, 4, 5, 6, 7, 8] -Edges (vertex_1, vertex_2, weight): +Edges (node1, node2, weight): (0, 1, 4) (0, 6, 7) (1, 2, 9) @@ -50,7 +50,7 @@ Added edge 0 - 6 with weight 7 to MST. Added edge 2 - 3 with weight 6 to MST. MST found with Boruvka's algorithm. -MST edges (vertex_1, vertex_2, weight): +MST edges (node1, node2, weight): (0, 1, 4) (0, 6, 7) (2, 3, 6) diff --git a/docs/main.pdf b/docs/main.pdf index 158c12f..4cc548f 100644 Binary files a/docs/main.pdf and b/docs/main.pdf differ diff --git a/docs/main.tex b/docs/main.tex index a1745b7..daf872a 100644 --- a/docs/main.tex +++ b/docs/main.tex @@ -14,7 +14,7 @@ \begin{document} \title{Borůvka's Algorithm} -\author{Student Number: 690065435} +\author{Isaac Cheng} \date{December 2022} \maketitle @@ -29,12 +29,6 @@ \vspace*{\fill} \begin{center} YouTube Video Link: \url{https://www.youtube.com/watch?v=n5LNVobuBNU} - -\vspace{0.5em} -Word Count: 1,469 - -\vspace{1em} -I certify that all material in this report which is not my own work has been identified. \end{center} \vspace{1em} @@ -71,7 +65,7 @@ \section{Pseudocode} \nl Initialise a list of components $N$, where $N_k$ denotes the vertices in component $k$. - \nl \For{vertex $v \in V$}{ + \nl \For{node $v \in V$}{ \nl $N_v = v$. } @@ -158,17 +152,17 @@ \subsection{Two-Approximation for the Travelling Salesperson Problem} \begin{algorithm} \caption{Two-Approximation for the Travelling Salesperson Problem with MST-DFS \cite{andreae1995performance}} - \nl Set a vertex as the start. + \nl Set a node as the start. \nl Construct a minimum spanning tree, $T$. - \nl Create a list of vertices, $H$, that is ordered according to when they are visited in a pre-order tree walk of $T$, and add the start vertex at the end. + \nl Create a list of vertices, $H$, that is ordered according to when they are visited in a pre-order tree walk of $T$, and add the start node at the end. \nl Return the path $H$. \end{algorithm} \subsection{Parallel Computation of Minimum Spanning Trees} -Several other algorithms are technically more optimal for finding a minimum spanning tree depending on the input graph -- Prim's algorithm is faster for dense graphs, and Kruskal's algorithm is faster for sparse graphs \cite{bazlamaccci2001minimum}. However, this only considers sequential implementations of the algorithms -- Borůvka's algorithm has become increasingly popular because it is easy to parallelise \cite{mariano2015generic}. This contrasts with the aforementioned algorithms, which are intrinsically serial -- they start with a single component and seek to add edges to it, making it difficult to parallelise them as we must keep and check edges in a strict order. As Borůvka's algorithm starts with multiple components and seeks to connect them with the shortest edge, it can be parallelised by distributing the edges between processors to determine the shortest connecting edge for each vertex \cite{chung1996parallel}. The parallel implementation of Borůvka's algorithm enables faster performance on multi-core or distributed systems, giving it an advantage over other classical minimum spanning tree problems when working at a large scale. +Several other algorithms are technically more optimal for finding a minimum spanning tree depending on the input graph -- Prim's algorithm is faster for dense graphs, and Kruskal's algorithm is faster for sparse graphs \cite{bazlamaccci2001minimum}. However, this only considers sequential implementations of the algorithms -- Borůvka's algorithm has become increasingly popular because it is easy to parallelise \cite{mariano2015generic}. This contrasts with the aforementioned algorithms, which are intrinsically serial -- they start with a single component and seek to add edges to it, making it difficult to parallelise them as we must keep and check edges in a strict order. As Borůvka's algorithm starts with multiple components and seeks to connect them with the shortest edge, it can be parallelised by distributing the edges between processors to determine the shortest connecting edge for each node \cite{chung1996parallel}. The parallel implementation of Borůvka's algorithm enables faster performance on multi-core or distributed systems, giving it an advantage over other classical minimum spanning tree problems when working at a large scale. \subsection{Faster Sequential Algorithms for Minimum Spanning Trees} The concepts behind Borůvka's algorithm have also been used to develop faster sequential algorithms. For example, the expected linear time minimum spanning tree algorithm proposed by Karger, Klein, and Tarjan runs in O(E) time. It involves an adaptation of Borůvka's algorithm by using the Borůvka step, which reduces the number of vertices in the graph by at least a factor of two, on graph G to create a contracted graph G' \cite{dixon1992verification, king1995simpler}. This is followed by a random sampling step that selects a subgraph H by selecting each edge in G' independently with a probability of 1/2 \cite{bazlamaccci2001minimum}. Finally, the verification step removes F-heavy edges from G' to reduce the graph further using a linear time minimum spanning tree verification algorithm \cite{dixon1992verification, king1995simpler, karger1995randomized}. diff --git a/src/boruvkas_algorithm/boruvka.py b/src/boruvkas_algorithm/boruvka.py index 02eed0c..82e157e 100644 --- a/src/boruvkas_algorithm/boruvka.py +++ b/src/boruvkas_algorithm/boruvka.py @@ -5,222 +5,45 @@ import matplotlib.pyplot as plt import networkx as nx -from typing import Dict, List, Optional, Tuple - class Graph: - """ - A graph that contains vertices and edges. - """ + """A graph that contains nodes and edges.""" - def __init__(self, num_vertices: int): + def __init__(self, num_nodes: int) -> None: """ + Initialises the graph with a given number of vertices. + Args: - num_vertices: The number of vertices to generate in the graph. + num_nodes: The number of nodes to generate in the graph. """ - self.vertices = list(range(num_vertices)) - # [(vertex_1, vertex_2, weight)] - self.edges = [] + self.vertices: list[int] = list(range(num_nodes)) + # [(node1, node2, weight)] + self.edges: list[tuple[int, int, int]] = [] - def add_edge(self, vertex_1: int, vertex_2: int, weight: int) -> None: + def add_edge(self, node1: int, node2: int, weight: int) -> None: """ - Add an edge to the graph. + Adds an edge to the graph. Args: - vertex_1: The first vertex of the edge. - vertex_2: The second vertex of the edge. + node1: The first node of the edge. + node2: The second node of the edge. weight: The weight of the edge. Raises: - ValueError: If either vertex does not exist in the graph. + ValueError: If either node does not exist in the graph. """ - if vertex_1 not in self.vertices or vertex_2 not in self.vertices: + if node1 not in self.vertices or node2 not in self.vertices: raise ValueError("One or both vertices not found in graph.") - self.edges.append((vertex_1, vertex_2, weight)) + self.edges.append((node1, node2, weight)) def print_graph_info(self) -> None: - """ - Print the graph's vertices and edges. - """ + """Print the graph's vertices and edges.""" print(f"Vertices: {self.vertices}") - print("Edges (vertex_1, vertex_2, weight):") + print("Edges (node1, node2, weight):") for edge in sorted(self.edges): print(f" {edge}") - def merge_components( - self, - vertex_to_component: Dict[int, int], - component_sizes: List[int], - vertex_1: int, - vertex_2: int, - ) -> None: - """ - Merge two components of the graph into one, ensuring that the smaller - component is merged into the larger one to optimize the merging - process. - - Args: - vertex_to_component: A mapping of vertices to their component - identifiers. - component_sizes: A list where the index represents the component - identifier and the value is the size of the - component. - vertex_1: A vertex in the first component to be merged. - vertex_2: A vertex in the second component to be merged. - """ - # Identify the components of the two vertices. - component_1 = vertex_to_component[vertex_1] - component_2 = vertex_to_component[vertex_2] - - # Determine the smaller and larger components. - if component_sizes[component_1] < component_sizes[component_2]: - smaller, larger = component_1, component_2 - else: - smaller, larger = component_2, component_1 - - # Merge the smaller component into larger component. - for vertex, component in vertex_to_component.items(): - if component == smaller: - vertex_to_component[vertex] = larger - # Update the size of the larger component. - component_sizes[larger] += component_sizes[smaller] - - def update_min_edge_per_component( - self, - vertex_to_component: Dict[int, int], - min_connecting_edge_per_component: List[Optional[Tuple]], - ): - """ - Check each edge and update the shortest edge for each vertex if it - connects two components together. - - Args: - vertex_to_component: A dictionary containing the component of each - vertex. - min_connecting_edge_per_component: A list with the shortest edge - for each vertex that connects to - a new component. - """ - for edge in self.edges: - vertex_1, vertex_2, weight = edge - vertex_1_component = vertex_to_component[vertex_1] - vertex_2_component = vertex_to_component[vertex_2] - - # If the vertices are in different components and the edge is - # smaller than the current minimum weight edge for either - # component, update them. - if vertex_1_component != vertex_2_component: - if ( - not min_connecting_edge_per_component[vertex_1_component] - or weight < min_connecting_edge_per_component[vertex_1_component][2] - ): - min_connecting_edge_per_component[vertex_1_component] = edge - - if ( - not min_connecting_edge_per_component[vertex_2_component] - or weight < min_connecting_edge_per_component[vertex_2_component][2] - ): - min_connecting_edge_per_component[vertex_2_component] = edge - - def connect_components_with_min_edges( - self, - component_sizes: List[int], - min_connecting_edge_per_component: List[Optional[Tuple]], - mst_edges: List[Tuple[int, int, int]], - mst_weight: int, - vertex_to_component: Dict[int, int], - num_components: int, - ) -> Tuple[int, int]: - """ - Connect components using the minimum connecting edges. - - Args: - component_sizes: List containing the sizes of each component. - min_connecting_edge_per_component: List storing the shortest edge - for each component. - mst_edges: List of edges in the minimum spanning tree. - mst_weight: Total weight of the minimum spanning tree. - vertex_to_component: Dictionary mapping vertices to their - component. - num_components: Total number of components in the graph. - - Returns: - Tuple containing the updated MST weight and number of components. - """ - for edge in min_connecting_edge_per_component: - if edge is not None: - vertex_1, vertex_2, weight = edge - if vertex_to_component[vertex_1] != vertex_to_component[vertex_2]: - mst_edges.append((vertex_1, vertex_2, weight)) - mst_weight += weight - self.merge_components( - vertex_to_component, component_sizes, vertex_1, vertex_2 - ) - num_components -= 1 - print( - f"Added edge {vertex_1} - {vertex_2} with " - f"weight {weight} to MST." - ) - - return mst_weight, num_components - - def initialize_components(self) -> Tuple[Dict[int, int], List[int], int]: - """ - Initialize each vertex as its own component with size 1, and set the - initial number of components equal to the number of vertices. - - Returns: - Tuple containing the mapping of vertex to its component, the list - of component sizes, and the initial number of components. - """ - vertex_to_component = {vertex: vertex for vertex in self.vertices} - component_sizes = [1] * len(self.vertices) - num_components = len(self.vertices) - return vertex_to_component, component_sizes, num_components - - def perform_iteration( - self, - vertex_to_component: Dict[int, int], - component_sizes: List[int], - num_components: int, - mst_edges: List[Tuple[int, int, int]], - mst_weight: int, - ): - """ - Perform one iteration of Boruvka's algorithm, finding the minimum - connecting edge for each component and connecting components using - these edges. - - Args: - vertex_to_component: Mapping of vertices to their component. - component_sizes: List containing the sizes of each component. - num_components: Total number of components in the graph. - mst_edges: List of edges in the minimum spanning tree so far. - mst_weight: Total weight of the minimum spanning tree so far. - - Returns: - Tuple containing the updated MST weight and number of components. - """ - # Initialize list to store minimum connecting edge for each component. - min_connecting_edge_per_component = [None] * len(self.vertices) - # Update the minimum connecting edge for each component. - self.update_min_edge_per_component( - vertex_to_component, min_connecting_edge_per_component - ) - # Connect components using the minimum connecting edges and update MST - # weight and number of components. - mst_weight, num_components = self.connect_components_with_min_edges( - component_sizes, - min_connecting_edge_per_component, - mst_edges, - mst_weight, - vertex_to_component, - num_components, - ) - - return mst_weight, num_components - - def draw_mst(self, mst_edges: List[Tuple[int, int, int]]) -> None: + def draw_mst(self, mst_edges: list[tuple[int, int, int]]) -> None: """ Draw the graph with the minimum spanning tree highlighted using networkx. @@ -233,8 +56,8 @@ def draw_mst(self, mst_edges: List[Tuple[int, int, int]]) -> None: G.add_nodes_from(self.vertices) # Add all edges to the graph with weights. for edge in self.edges: - vertex1, vertex2, weight = edge - G.add_edge(vertex1, vertex2, weight=weight) + node1, node2, weight = edge + G.add_edge(node1, node2, weight=weight) pos = nx.spring_layout(G) # Draw the graph edges and highlight the edges in the MST in red. nx.draw_networkx_edges( @@ -252,57 +75,128 @@ def draw_mst(self, mst_edges: List[Tuple[int, int, int]]) -> None: plt.axis("off") plt.show() - def run_boruvkas_algorithm(self): + +def find_mst_with_boruvkas_algorithm( + graph: Graph, +) -> tuple[int, list[tuple[int, int, int]]]: + """ + Finds the minimum spanning tree (MST) of a graph using Boruvka's algorithm. + + Args: + graph: The graph to find the MST of. + + Returns: + A tuple containing the total weight of the MST and a list of the + edges in the MST. + """ + + def find(node: int) -> int: """ - Find the minimum spanning tree (MST) of the graph using Boruvka's - algorithm. + Finds the root parent of the node using path compression. + + Args: + node: The node to find the root parent of. Returns: - A tuple containing the total weight of the MST and a list of the - edges in the MST. + The root parent of the node. """ - print("\nFinding MST with Boruvka's algorithm:") - self.print_graph_info() - mst_weight = 0 - mst_edges = [] - ( - vertex_to_component, - component_sizes, - num_components, - ) = self.initialize_components() - # Track the number of iterations. - num_iterations = 0 - - # Keep connecting components until only one component remains. - while num_components > 1: - num_iterations += 1 - print( - f"\nIteration {num_iterations}:\nCurrent MST edges: {mst_edges}\n" - f"Current MST Weight: {mst_weight}" - ) - # Perform one iteration of the algorithm. - mst_weight, num_components = self.perform_iteration( - vertex_to_component, - component_sizes, - num_components, - mst_edges, - mst_weight, - ) - - # Summarise the MST found. - print("\nMST found with Boruvka's algorithm.") - print("MST edges (vertex_1, vertex_2, weight):") - for edge in sorted(mst_edges): - print(f" {edge}") - print(f"MST weight: {mst_weight}") + cur_parent = parent[node] + while cur_parent != parent[cur_parent]: + # Compress the links as we go up the chain of parents to make + # it faster to traverse in the future - amortised O(a(n)) time, + # where a(n) is the inverse Ackermann function. + parent[cur_parent] = parent[parent[cur_parent]] + cur_parent = parent[cur_parent] + return cur_parent - return mst_weight, mst_edges + def union(node1: int, node2: int) -> bool: + """ + Combines the two nodes into the larger segment. + Args: + node1: The first node to combine. + node2: The second node to combine. -def main(): - """ - Run Boruvka's algorithm on an example graph. - """ + Returns: + True if the nodes were combined, False if they were already in the + same segment. + """ + root1 = find(node1) + root2 = find(node2) + # If they have the same root parent, they're already connected. + if root1 == root2: + return False + + # Combine the two nodes into the larger segment based on the rank. + if rank[root1] > rank[root2]: + parent[root2] = root1 + rank[root1] += rank[root2] + else: + parent[root1] = root2 + rank[root2] += rank[root1] + return True + + num_vertices = len(graph.vertices) + # Each node is its own parent initially. + parent: list[int] = list(range(num_vertices)) + # Each tree has size 1 (itself) initially. + rank: list[int] = [1] * num_vertices + + print("\nFinding MST with Boruvka's algorithm:") + graph.print_graph_info() + + mst_weight = 0 + mst_edges: list[tuple[int, int, int]] = [] + num_components = num_vertices + num_iterations = 0 + + # Keep connecting components until only one component remains. + while num_components > 1: + num_iterations += 1 + print( + f"\nIteration {num_iterations}:\nCurrent MST edges: {mst_edges}\n" + f"Current MST Weight: {mst_weight}" + ) + + # Find the minimum connecting edge for each component. + min_edge_per_component: list[tuple[int, int, int] | None] = [ + None + ] * num_vertices + for edge in graph.edges: + node1, node2, weight = edge + comp1, comp2 = find(node1), find(node2) + + if comp1 != comp2: + current_min1 = min_edge_per_component[comp1] + if current_min1 is None or weight < current_min1[2]: + min_edge_per_component[comp1] = edge + current_min2 = min_edge_per_component[comp2] + if current_min2 is None or weight < current_min2[2]: + min_edge_per_component[comp2] = edge + + # Connect components using the minimum connecting edges. + for edge in min_edge_per_component: + if edge is not None: + node1, node2, weight = edge + if find(node1) != find(node2): + mst_edges.append(edge) + mst_weight += weight + union(node1, node2) + num_components -= 1 + print(f"Added edge {node1} - {node2} with weight {weight} to MST.") + + # Summarise the MST found. + print("\nMST found with Boruvka's algorithm.") + print("MST edges (node1, node2, weight):") + for edge in sorted(mst_edges): + print(f" {edge}") + print(f"MST weight: {mst_weight}") + + return mst_weight, mst_edges + + +def run_boruvka_example(): + """Runs Boruvka's algorithm on an example graph.""" graph = Graph(9) graph.add_edge(0, 1, 4) graph.add_edge(0, 6, 7) @@ -319,10 +213,11 @@ def main(): graph.add_edge(5, 8, 12) graph.add_edge(6, 7, 1) graph.add_edge(7, 8, 3) - _, mst_edges = graph.run_boruvkas_algorithm() + + _, mst_edges = find_mst_with_boruvkas_algorithm(graph) # Draw the graph with the minimum spanning tree highlighted. graph.draw_mst(mst_edges) if __name__ == "__main__": - main() + run_boruvka_example() diff --git a/tests/test_boruvka.py b/tests/test_boruvka.py index 7030a70..298b159 100644 --- a/tests/test_boruvka.py +++ b/tests/test_boruvka.py @@ -1,6 +1,6 @@ import pytest -from src.boruvkas_algorithm.boruvka import Graph +from boruvkas_algorithm.boruvka import Graph, find_mst_with_boruvkas_algorithm @pytest.fixture @@ -27,20 +27,20 @@ def test_add_edge(setup_graph: Graph): def test_add_edge_invalid_vertices(setup_graph: Graph): """ - Test the addition of an edge with non-existing vertices. + Tests the addition of an edge with non-existing vertices. Expects a ValueError to be raised when trying to add an edge with at least - one non-existing vertex. + one non-existing node. """ graph = setup_graph with pytest.raises(ValueError): - # Use vertex indices that do not exist in the graph. + # Use node indices that do not exist in the graph. graph.add_edge(10, 11, 5) def test_mst(setup_graph: Graph): """ - Test that the MST has the correct total weight and structure by comparing + Tests that the MST has the correct total weight and structure by comparing to known MST values for a predefined graph. """ graph = setup_graph @@ -61,7 +61,7 @@ def test_mst(setup_graph: Graph): graph.add_edge(6, 7, 1) graph.add_edge(7, 8, 3) - mst_weight, mst_edges = graph.run_boruvkas_algorithm() + mst_weight, mst_edges = find_mst_with_boruvkas_algorithm(graph) expected_weight = 29 expected_edges = [ (0, 1, 4),