From f1cf808229f4213f91609000ce58642fd12a3eb8 Mon Sep 17 00:00:00 2001 From: kdt523 Date: Mon, 20 Oct 2025 12:20:19 +0530 Subject: [PATCH 1/4] Add_Federated_Averaging_FedAvg_module_with_doctests --- machine_learning/federated_averaging.py | 150 ++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 machine_learning/federated_averaging.py diff --git a/machine_learning/federated_averaging.py b/machine_learning/federated_averaging.py new file mode 100644 index 000000000000..ed38cdff83d9 --- /dev/null +++ b/machine_learning/federated_averaging.py @@ -0,0 +1,150 @@ +""" +Federated Averaging (FedAvg) +https://arxiv.org/abs/1602.05629 + +This module provides a minimal, educational implementation of the Federated +Learning paradigm using the Federated Averaging algorithm. Multiple clients +compute local model updates on their private data and the server aggregates +their updates by (weighted) averaging without collecting raw data. + +Notes +----- +- This implementation is framework-agnostic and uses NumPy arrays to represent + model parameters for simplicity and portability within this repository. +- It demonstrates the mechanics of FedAvg, not production concerns like + privacy amplification (e.g., differential privacy), robustness, or security. + +Terminology +----------- +- Global model: a list of NumPy arrays representing model parameters. +- Client update: new model parameters produced locally, or the delta from the + global model; we aggregate parameters directly here for clarity. + +Examples +-------- +Create three synthetic "clients" whose local training produces simple parameter +arrays, then aggregate them with FedAvg. + +>>> import numpy as np +>>> # Global model with two parameter tensors +>>> global_model = [np.array([0.0, 0.0]), np.array([[0.0]])] +>>> # Client models after local training +>>> client_models = [ +... [np.array([1.0, 2.0]), np.array([[1.0]])], +... [np.array([3.0, 4.0]), np.array([[3.0]])], +... [np.array([5.0, 6.0]), np.array([[5.0]])], +... ] +>>> # Equal weights -> simple average +>>> new_global = federated_average(client_models) +>>> [arr.tolist() for arr in new_global] +[[3.0, 4.0], [[3.0]]] + +Weighted averaging by client data sizes: + +>>> weights = np.array([10, 20, 30], dtype=float) +>>> new_global_w = federated_average(client_models, weights) +>>> [arr.tolist() for arr in new_global_w] +[[3.6666666666666665, 4.666666666666666], [[3.6666666666666665]]] + +Contract +-------- +Inputs: + - client_models: list[list[np.ndarray]]: each inner list mirrors model layers + - weights: Optional[np.ndarray] of shape (num_clients,), non-negative, sums to > 0 +Output: + - list[np.ndarray]: aggregated model parameters, same shapes as client models +Error modes: + - ValueError for empty clients, shape mismatch, or invalid weights +""" + +from __future__ import annotations + +from typing import Iterable, List, Sequence + +import numpy as np + + +def _validate_clients(client_models: Sequence[Sequence[np.ndarray]]) -> None: + if not client_models: + raise ValueError("client_models must be a non-empty list") + # Ensure all clients have same number of layers and shapes + ref_shapes = [tuple(arr.shape) for arr in client_models[0]] + for idx, cm in enumerate(client_models, start=1): + if len(cm) != len(ref_shapes): + raise ValueError("All clients must have the same number of tensors") + for s_ref, arr in zip(ref_shapes, cm): + if tuple(arr.shape) != s_ref: + raise ValueError( + f"Client {idx} tensor shape {tuple(arr.shape)} does not match {s_ref}" + ) + + +def _normalize_weights(weights: np.ndarray, n: int) -> np.ndarray: + if weights.shape != (n,): + raise ValueError(f"weights must have shape ({n},)") + if np.any(weights < 0): + raise ValueError("weights must be non-negative") + total = float(weights.sum()) + if total <= 0.0: + raise ValueError("weights must sum to a positive value") + return weights / total + + +def federated_average( + client_models: Sequence[Sequence[np.ndarray]], + weights: np.ndarray | None = None, +) -> List[np.ndarray]: + """ + Aggregate client model parameters using (weighted) averaging. + + Parameters + ---------- + client_models : list[list[np.ndarray]] + Model parameters for each client; all clients must have same shapes. + weights : np.ndarray | None + Optional non-negative weights per client. If None, equal weights. + + Returns + ------- + list[np.ndarray] + Aggregated model parameters (same shapes as client tensors). + + Examples + -------- + >>> import numpy as np + >>> cm = [ + ... [np.array([1.0, 2.0])], + ... [np.array([3.0, 4.0])], + ... ] + >>> [arr.tolist() for arr in federated_average(cm)] + [[2.0, 3.0]] + >>> w = np.array([1.0, 3.0]) + >>> [arr.tolist() for arr in federated_average(cm, w)] + [[2.5, 3.5]] + """ + _validate_clients(client_models) + num_clients = len(client_models) + + if weights is None: + weights_n = np.full((num_clients,), 1.0 / num_clients, dtype=float) + else: + weights = np.asarray(weights, dtype=float) + weights_n = _normalize_weights(weights, num_clients) + + num_tensors = len(client_models[0]) + aggregated: List[np.ndarray] = [] + for t_idx in range(num_tensors): + # Stack the t_idx-th tensor from each client into shape (num_clients, ...) + stacked = np.stack([np.asarray(cm[t_idx]) for cm in client_models], axis=0) + # Weighted sum across clients axis=0 + # np.tensordot weights of shape (n,) with stacked of shape (n, *dims) + agg = np.tensordot(weights_n, stacked, axes=(0, 0)) + aggregated.append(np.asarray(agg)) + + return aggregated + + +if __name__ == "__main__": + import doctest + + doctest.testmod() From 5e1b69978f27cb13f89468b41f57dffb201fac65 Mon Sep 17 00:00:00 2001 From: kdt523 Date: Mon, 20 Oct 2025 12:34:08 +0530 Subject: [PATCH 2/4] Update_FedAvg_doctests --- machine_learning/federated_averaging.py | 157 ++++++++++++------------ 1 file changed, 78 insertions(+), 79 deletions(-) diff --git a/machine_learning/federated_averaging.py b/machine_learning/federated_averaging.py index ed38cdff83d9..642e3199b13d 100644 --- a/machine_learning/federated_averaging.py +++ b/machine_learning/federated_averaging.py @@ -1,66 +1,77 @@ """ -Federated Averaging (FedAvg) -https://arxiv.org/abs/1602.05629 - -This module provides a minimal, educational implementation of the Federated -Learning paradigm using the Federated Averaging algorithm. Multiple clients -compute local model updates on their private data and the server aggregates -their updates by (weighted) averaging without collecting raw data. - -Notes ------ -- This implementation is framework-agnostic and uses NumPy arrays to represent - model parameters for simplicity and portability within this repository. -- It demonstrates the mechanics of FedAvg, not production concerns like - privacy amplification (e.g., differential privacy), robustness, or security. - -Terminology ------------ -- Global model: a list of NumPy arrays representing model parameters. -- Client update: new model parameters produced locally, or the delta from the - global model; we aggregate parameters directly here for clarity. - -Examples --------- -Create three synthetic "clients" whose local training produces simple parameter -arrays, then aggregate them with FedAvg. - ->>> import numpy as np ->>> # Global model with two parameter tensors ->>> global_model = [np.array([0.0, 0.0]), np.array([[0.0]])] ->>> # Client models after local training ->>> client_models = [ -... [np.array([1.0, 2.0]), np.array([[1.0]])], -... [np.array([3.0, 4.0]), np.array([[3.0]])], -... [np.array([5.0, 6.0]), np.array([[5.0]])], -... ] ->>> # Equal weights -> simple average ->>> new_global = federated_average(client_models) ->>> [arr.tolist() for arr in new_global] -[[3.0, 4.0], [[3.0]]] - -Weighted averaging by client data sizes: - ->>> weights = np.array([10, 20, 30], dtype=float) ->>> new_global_w = federated_average(client_models, weights) ->>> [arr.tolist() for arr in new_global_w] -[[3.6666666666666665, 4.666666666666666], [[3.6666666666666665]]] - -Contract --------- -Inputs: - - client_models: list[list[np.ndarray]]: each inner list mirrors model layers - - weights: Optional[np.ndarray] of shape (num_clients,), non-negative, sums to > 0 -Output: - - list[np.ndarray]: aggregated model parameters, same shapes as client models -Error modes: - - ValueError for empty clients, shape mismatch, or invalid weights +Federated averaging (FedAvg) utilities. + +This module provides a simple NumPy-based implementation of the FedAvg +aggregation algorithm. It supports equal weighting and custom non-negative +weights that are normalized internally. + +Doctests +======== + +Basic equal-weight averaging across two "clients" with two tensors each +(vector and 2x2 matrix): + +>>> A = [np.array([1.0, 2.0]), np.array([[1.0, 2.0], [3.0, 4.0]])] +>>> B = [np.array([3.0, 4.0]), np.array([[5.0, 6.0], [7.0, 8.0]])] +>>> eq = federated_average([A, B]) +>>> eq[0].tolist() +[2.0, 3.0] +>>> eq[1].tolist() +[[3.0, 4.0], [5.0, 6.0]] + +Weighted averaging with weights [2, 1] (normalized to [2/3, 1/3]): + +>>> w = federated_average([A, B], weights=np.array([2.0, 1.0])) +>>> w[0].tolist() +[1.6666666666666665, 2.6666666666666665] +>>> w[1].tolist() +[[2.333333333333333, 3.333333333333333], [4.333333333333333, 5.333333333333333]] + +Error cases: + +- No clients + +>>> federated_average([]) # doctest: +ELLIPSIS +Traceback (most recent call last): +... +ValueError: client_models must be a non-empty list + +- Mismatched number of tensors per client + +>>> C = [np.array([1.0, 2.0])] # only one tensor +>>> federated_average([A, C]) # doctest: +ELLIPSIS +Traceback (most recent call last): +... +ValueError: All clients must have the same number of tensors + +- Mismatched tensor shapes across clients + +>>> C2 = [np.array([1.0, 2.0]), np.array([[1.0, 2.0]])] # second tensor has different shape +>>> federated_average([A, C2]) # doctest: +ELLIPSIS +Traceback (most recent call last): +... +ValueError: Client 2 tensor shape (1, 2) does not match (2, 2) + +- Invalid weights: negative or wrong shape or zero-sum + +>>> federated_average([A, B], weights=np.array([1.0, -1.0])) # doctest: +ELLIPSIS +Traceback (most recent call last): +... +ValueError: weights must be non-negative + +>>> federated_average([A, B], weights=np.array([0.0, 0.0])) # doctest: +ELLIPSIS +Traceback (most recent call last): +... +ValueError: weights must sum to a positive value + +>>> federated_average([A, B], weights=np.array([1.0, 2.0, 3.0])) # doctest: +ELLIPSIS +Traceback (most recent call last): +... +ValueError: weights must have shape (2,) """ from __future__ import annotations - from typing import Iterable, List, Sequence - import numpy as np @@ -94,33 +105,21 @@ def federated_average( client_models: Sequence[Sequence[np.ndarray]], weights: np.ndarray | None = None, ) -> List[np.ndarray]: - """ - Aggregate client model parameters using (weighted) averaging. + """Compute the weighted average of clients' model tensors. Parameters ---------- - client_models : list[list[np.ndarray]] - Model parameters for each client; all clients must have same shapes. - weights : np.ndarray | None - Optional non-negative weights per client. If None, equal weights. + client_models : Sequence[Sequence[np.ndarray]] + A list of clients, each being a sequence of NumPy arrays (tensors). + All clients must have the same number of tensors with identical shapes. + weights : np.ndarray | None, optional + A 1-D array of non-negative weights, one per client. If None, + equal weighting is used. Weights are normalized to sum to 1. Returns ------- - list[np.ndarray] - Aggregated model parameters (same shapes as client tensors). - - Examples - -------- - >>> import numpy as np - >>> cm = [ - ... [np.array([1.0, 2.0])], - ... [np.array([3.0, 4.0])], - ... ] - >>> [arr.tolist() for arr in federated_average(cm)] - [[2.0, 3.0]] - >>> w = np.array([1.0, 3.0]) - >>> [arr.tolist() for arr in federated_average(cm, w)] - [[2.5, 3.5]] + List[np.ndarray] + The list of aggregated tensors with the same shapes as the inputs. """ _validate_clients(client_models) num_clients = len(client_models) From cd5f2dd821790ac7ea3d78e69ebd6e91db9183a9 Mon Sep 17 00:00:00 2001 From: kdt523 Date: Mon, 20 Oct 2025 12:55:09 +0530 Subject: [PATCH 3/4] Rename_normalize_weights_param_to_num_clients --- machine_learning/federated_averaging.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/machine_learning/federated_averaging.py b/machine_learning/federated_averaging.py index 642e3199b13d..0f4818d51f04 100644 --- a/machine_learning/federated_averaging.py +++ b/machine_learning/federated_averaging.py @@ -90,9 +90,9 @@ def _validate_clients(client_models: Sequence[Sequence[np.ndarray]]) -> None: ) -def _normalize_weights(weights: np.ndarray, n: int) -> np.ndarray: - if weights.shape != (n,): - raise ValueError(f"weights must have shape ({n},)") +def _normalize_weights(weights: np.ndarray, num_clients: int) -> np.ndarray: + if weights.shape != (num_clients,): + raise ValueError(f"weights must have shape ({num_clients},)") if np.any(weights < 0): raise ValueError("weights must be non-negative") total = float(weights.sum()) From 45aaf40e4fe8b348b6aaa2b3ec2da01b4ba92876 Mon Sep 17 00:00:00 2001 From: kdt523 Date: Mon, 20 Oct 2025 12:59:37 +0530 Subject: [PATCH 4/4] Fix_ruff_issues_in_FedAvg_module --- machine_learning/federated_averaging.py | 44 ++++++++++++++++++------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/machine_learning/federated_averaging.py b/machine_learning/federated_averaging.py index 0f4818d51f04..5b54f38700f6 100644 --- a/machine_learning/federated_averaging.py +++ b/machine_learning/federated_averaging.py @@ -11,8 +11,14 @@ Basic equal-weight averaging across two "clients" with two tensors each (vector and 2x2 matrix): ->>> A = [np.array([1.0, 2.0]), np.array([[1.0, 2.0], [3.0, 4.0]])] ->>> B = [np.array([3.0, 4.0]), np.array([[5.0, 6.0], [7.0, 8.0]])] +>>> A = [ +... np.array([1.0, 2.0]), +... np.array([[1.0, 2.0], [3.0, 4.0]]), +... ] +>>> B = [ +... np.array([3.0, 4.0]), +... np.array([[5.0, 6.0], [7.0, 8.0]]), +... ] >>> eq = federated_average([A, B]) >>> eq[0].tolist() [2.0, 3.0] @@ -21,7 +27,10 @@ Weighted averaging with weights [2, 1] (normalized to [2/3, 1/3]): ->>> w = federated_average([A, B], weights=np.array([2.0, 1.0])) +>>> w = federated_average( +... [A, B], +... weights=np.array([2.0, 1.0]), +... ) >>> w[0].tolist() [1.6666666666666665, 2.6666666666666665] >>> w[1].tolist() @@ -46,7 +55,10 @@ - Mismatched tensor shapes across clients ->>> C2 = [np.array([1.0, 2.0]), np.array([[1.0, 2.0]])] # second tensor has different shape +>>> C2 = [ +... np.array([1.0, 2.0]), +... np.array([[1.0, 2.0]]), +... ] # second tensor has different shape >>> federated_average([A, C2]) # doctest: +ELLIPSIS Traceback (most recent call last): ... @@ -64,14 +76,19 @@ ... ValueError: weights must sum to a positive value ->>> federated_average([A, B], weights=np.array([1.0, 2.0, 3.0])) # doctest: +ELLIPSIS +>>> federated_average( +... [A, B], +... weights=np.array([1.0, 2.0, 3.0]), +... ) # doctest: +ELLIPSIS Traceback (most recent call last): ... ValueError: weights must have shape (2,) """ from __future__ import annotations -from typing import Iterable, List, Sequence + +from collections.abc import Sequence + import numpy as np @@ -85,14 +102,17 @@ def _validate_clients(client_models: Sequence[Sequence[np.ndarray]]) -> None: raise ValueError("All clients must have the same number of tensors") for s_ref, arr in zip(ref_shapes, cm): if tuple(arr.shape) != s_ref: - raise ValueError( - f"Client {idx} tensor shape {tuple(arr.shape)} does not match {s_ref}" + msg = ( + f"Client {idx} tensor shape {tuple(arr.shape)} " + f"does not match {s_ref}" ) + raise ValueError(msg) def _normalize_weights(weights: np.ndarray, num_clients: int) -> np.ndarray: if weights.shape != (num_clients,): - raise ValueError(f"weights must have shape ({num_clients},)") + msg = f"weights must have shape ({num_clients},)" + raise ValueError(msg) if np.any(weights < 0): raise ValueError("weights must be non-negative") total = float(weights.sum()) @@ -104,7 +124,7 @@ def _normalize_weights(weights: np.ndarray, num_clients: int) -> np.ndarray: def federated_average( client_models: Sequence[Sequence[np.ndarray]], weights: np.ndarray | None = None, -) -> List[np.ndarray]: +) -> list[np.ndarray]: """Compute the weighted average of clients' model tensors. Parameters @@ -118,7 +138,7 @@ def federated_average( Returns ------- - List[np.ndarray] + list[np.ndarray] The list of aggregated tensors with the same shapes as the inputs. """ _validate_clients(client_models) @@ -131,7 +151,7 @@ def federated_average( weights_n = _normalize_weights(weights, num_clients) num_tensors = len(client_models[0]) - aggregated: List[np.ndarray] = [] + aggregated: list[np.ndarray] = [] for t_idx in range(num_tensors): # Stack the t_idx-th tensor from each client into shape (num_clients, ...) stacked = np.stack([np.asarray(cm[t_idx]) for cm in client_models], axis=0)