From 5b1d98bc5d11997589de54f5f4581bdc5ecde977 Mon Sep 17 00:00:00 2001 From: seilmast Date: Thu, 13 Feb 2025 13:43:06 +0100 Subject: [PATCH 1/3] Changed markdown header --- doc/Magnus_page.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/Magnus_page.md b/doc/Magnus_page.md index 6c3ad8d..c7c1d32 100644 --- a/doc/Magnus_page.md +++ b/doc/Magnus_page.md @@ -1,8 +1,6 @@ Magnus Individual Task ====================== -# Magnus Størdal Individual Task - ## Task overview In addition to the overall task, I was tasked to implement a three layer linear network, a dataset loader for the SVHN dataset, and a entropy metric. From dd5c6c6c901b08d7525162ad8e4a49f95cb3b4de Mon Sep 17 00:00:00 2001 From: seilmast Date: Thu, 13 Feb 2025 13:48:13 +0100 Subject: [PATCH 2/3] Added test_load_model --- tests/test_metrics.py | 10 +++++++-- tests/test_wrappers.py | 50 +++++++++++++++++++++--------------------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index b747a1c..4c4a96c 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -3,7 +3,13 @@ import pytest from CollaborativeCoding.load_metric import MetricWrapper -from CollaborativeCoding.metrics import Accuracy, F1Score, Precision, Recall +from CollaborativeCoding.metrics import ( + Accuracy, + EntropyPrediction, + F1Score, + Precision, + Recall, +) @pytest.mark.parametrize( @@ -17,7 +23,7 @@ ("accuracy", randint(2, 10), True), ("precision", randint(2, 10), False), ("precision", randint(2, 10), True), - # TODO: Add test for EntropyPrediction + ("EntropyPrediction", randint(2, 10), False), ], ) def test_metric_wrapper(metric, num_classes, macro_averaging): diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index e2ac595..db9a697 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -1,30 +1,30 @@ from CollaborativeCoding import load_data, load_metric, load_model -# def test_load_model(): -# import torch as th - -# image_shape = (1, 16, 16) -# num_classes = 4 - -# dummy_img = th.rand((1, *image_shape)) - -# modelnames = [ -# "magnusmodel", -# "christianmodel", -# "janmodel", -# "solveigmodel", -# "johanmodel", -# ] - -# for name in modelnames: -# print(name) -# model = load_model(name, image_shape=image_shape, num_classes=num_classes) - -# with th.no_grad(): -# output = model(dummy_img) -# assert output.size() == (1, 4), ( -# f"Model {name} returned image of size {output}. Expected (1,4)" -# ) + +def test_load_model(): + import torch as th + + image_shape = (1, 16, 16) + num_classes = 4 + + dummy_img = th.rand((1, *image_shape)) + + modelnames = [ + "magnusmodel", + "christianmodel", + "janmodel", + "solveigmodel", + "johanmodel", + ] + + for name in modelnames: + model = load_model(name, image_shape=image_shape, num_classes=num_classes) + + with th.no_grad(): + output = model(dummy_img) + assert output.size() == (1, 4), ( + f"Model {name} returned image of size {output}. Expected (1,4)" + ) def test_load_data(): From 686e443cfa2f7b55e326094dccbb23f14025f79e Mon Sep 17 00:00:00 2001 From: seilmast Date: Thu, 13 Feb 2025 20:02:06 +0100 Subject: [PATCH 3/3] Fixed and passed all my related functions --- CollaborativeCoding/dataloaders/download.py | 23 ++++++++++++++- CollaborativeCoding/dataloaders/svhn.py | 9 +++--- CollaborativeCoding/load_data.py | 3 ++ CollaborativeCoding/load_metric.py | 4 +-- CollaborativeCoding/metrics/EntropyPred.py | 18 +++++------- main.py | 15 +++++----- tests/test_metrics.py | 32 ++++++++++++--------- tests/test_models.py | 18 ++++++++---- tests/test_wrappers.py | 5 +++- 9 files changed, 82 insertions(+), 45 deletions(-) diff --git a/CollaborativeCoding/dataloaders/download.py b/CollaborativeCoding/dataloaders/download.py index 59b6345..5e90beb 100644 --- a/CollaborativeCoding/dataloaders/download.py +++ b/CollaborativeCoding/dataloaders/download.py @@ -8,6 +8,8 @@ import h5py as h5 import numpy as np +from scipy.io import loadmat +from torchvision.datasets import SVHN from .datasources import MNIST_SOURCE, USPS_SOURCE @@ -84,7 +86,26 @@ def _get_labels(path: Path) -> np.ndarray: return train_labels, test_labels def svhn(self, data_dir: Path) -> tuple[np.ndarray, np.ndarray]: - raise NotImplementedError("SVHN download not implemented yet") + def download_svhn(path, train: bool = True): + SVHN() + + parent_path = data_dir / "SVHN" + + if not parent_path.exists(): + parent_path.mkdir(parents=True) + + train_data = parent_path / "train_32x32.mat" + test_data = parent_path / "test_32x32.mat" + + if not train_data.exists(): + download_svhn(parent_path, train=True) + if not test_data.exists(): + download_svhn(parent_path, train=False) + + train_labels = loadmat(train_data)["y"] + test_labels = loadmat(test_data)["y"] + + return train_labels, test_labels def usps(self, data_dir: Path) -> tuple[np.ndarray, np.ndarray]: """ diff --git a/CollaborativeCoding/dataloaders/svhn.py b/CollaborativeCoding/dataloaders/svhn.py index e48b517..4d039ac 100644 --- a/CollaborativeCoding/dataloaders/svhn.py +++ b/CollaborativeCoding/dataloaders/svhn.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import h5py import numpy as np @@ -11,10 +12,10 @@ class SVHNDataset(Dataset): def __init__( self, - data_path: str, + data_path: Path, + sample_ids: list, train: bool, transform=None, - download: bool = True, nr_channels=3, ): """ @@ -31,11 +32,9 @@ def __init__( super().__init__() self.data_path = data_path + self.indexes = sample_ids self.split = "train" if train else "test" - if download: - self._download_data(data_path) - self.nr_channels = nr_channels self.transforms = transform diff --git a/CollaborativeCoding/load_data.py b/CollaborativeCoding/load_data.py index c3a8951..200368f 100644 --- a/CollaborativeCoding/load_data.py +++ b/CollaborativeCoding/load_data.py @@ -86,6 +86,7 @@ def load_data(dataset: str, *args, **kwargs) -> tuple: sample_ids=train_samples, train=True, transform=transform, + nr_channels=kwargs.get("nr_channels"), ) val = dataset( @@ -93,6 +94,7 @@ def load_data(dataset: str, *args, **kwargs) -> tuple: sample_ids=val_samples, train=True, transform=transform, + nr_channels=kwargs.get("nr_channels"), ) test = dataset( @@ -100,6 +102,7 @@ def load_data(dataset: str, *args, **kwargs) -> tuple: sample_ids=test_samples, train=False, transform=transform, + nr_channels=kwargs.get("nr_channels"), ) return train, val, test diff --git a/CollaborativeCoding/load_metric.py b/CollaborativeCoding/load_metric.py index 11e091b..839d9c6 100644 --- a/CollaborativeCoding/load_metric.py +++ b/CollaborativeCoding/load_metric.py @@ -82,7 +82,7 @@ def __call__(self, y_true, y_pred): for key in self.metrics: self.metrics[key](y_true, y_pred) - def __getmetrics__(self, str_prefix: str = None): + def getmetrics(self, str_prefix: str = None): return_metrics = {} for key in self.metrics: if str_prefix is not None: @@ -91,6 +91,6 @@ def __getmetrics__(self, str_prefix: str = None): return_metrics[key] = self.metrics[key].__returnmetric__() return return_metrics - def __resetmetrics__(self): + def resetmetric(self): for key in self.metrics: self.metrics[key].__reset__() diff --git a/CollaborativeCoding/metrics/EntropyPred.py b/CollaborativeCoding/metrics/EntropyPred.py index b8daf9a..b77e8d7 100644 --- a/CollaborativeCoding/metrics/EntropyPred.py +++ b/CollaborativeCoding/metrics/EntropyPred.py @@ -5,7 +5,7 @@ class EntropyPrediction(nn.Module): - def __init__(self, averages: str = "mean"): + def __init__(self, num_classes, macro_averaging=None): """ Initializes the EntropyPrediction module, which calculates the Shannon Entropy of predicted logits and aggregates the results based on the specified method. @@ -17,11 +17,8 @@ def __init__(self, averages: str = "mean"): """ super().__init__() - assert averages in ["mean", "sum", "none"], ( - "averages must be 'mean', 'sum', or 'none'" - ) - self.averages = averages self.stored_entropy_values = [] + self.num_classes = num_classes def __call__(self, y_true: th.Tensor, y_logits: th.Tensor): """ @@ -36,6 +33,10 @@ def __call__(self, y_true: th.Tensor, y_logits: th.Tensor): """ assert len(y_logits.size()) == 2, f"y_logits shape: {y_logits.size()}" + assert y_logits.size(-1) == self.num_classes, ( + f"y_logit class length: {y_logits.size(-1)}, expected: {self.num_classes}" + ) + y_pred = nn.Softmax(dim=1)(y_logits) print(f"y_pred: {y_pred}") entropy_values = entropy(y_pred, axis=1) @@ -50,13 +51,8 @@ def __call__(self, y_true: th.Tensor, y_logits: th.Tensor): def __returnmetric__(self): stored_entropy_values = th.from_numpy(np.asarray(self.stored_entropy_values)) + stored_entropy_values = th.mean(stored_entropy_values) - if self.averages == "mean": - stored_entropy_values = th.mean(stored_entropy_values) - elif self.averages == "sum": - stored_entropy_values = th.sum(stored_entropy_values) - elif self.averages == "none": - pass return stored_entropy_values def __reset__(self): diff --git a/main.py b/main.py index ba37479..aef303f 100644 --- a/main.py +++ b/main.py @@ -50,6 +50,7 @@ def main(): data_dir=args.datafolder, transform=transform, val_size=args.val_size, + nr_channels=args.nr_channels, ) train_metrics = MetricWrapper( @@ -121,7 +122,7 @@ def main(): train_metrics(y, logits) break - print(train_metrics.accumulate()) + print(train_metrics.getmetrics()) print("Dry run completed successfully.") exit() @@ -169,11 +170,11 @@ def main(): "Train loss": np.mean(trainingloss), "Validation loss": np.mean(valloss), } - | train_metrics.__getmetrics__(str_prefix="Train ") - | val_metrics.__getmetrics__(str_prefix="Validation ") + | train_metrics.getmetric(str_prefix="Train ") + | val_metrics.getmetric(str_prefix="Validation ") ) - train_metrics.__resetmetrics__() - val_metrics.__resetmetrics__() + train_metrics.resetmetric() + val_metrics.resetmetric() testloss = [] model.eval() @@ -189,9 +190,9 @@ def main(): wandb.log( {"Epoch": 1, "Test loss": np.mean(testloss)} - | test_metrics.__getmetrics__(str_prefix="Test ") + | test_metrics.getmetric(str_prefix="Test ") ) - test_metrics.__resetmetrics__() + test_metrics.resetmetric() if __name__ == "__main__": diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 4c4a96c..3107d73 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -23,7 +23,7 @@ ("accuracy", randint(2, 10), True), ("precision", randint(2, 10), False), ("precision", randint(2, 10), True), - ("EntropyPrediction", randint(2, 10), False), + ("entropy", randint(2, 10), False), ], ) def test_metric_wrapper(metric, num_classes, macro_averaging): @@ -40,9 +40,9 @@ def test_metric_wrapper(metric, num_classes, macro_averaging): ) metrics(y_true, logits) - score = metrics.accumulate() - metrics.reset() - empty_score = metrics.accumulate() + score = metrics.getmetrics() + metrics.resetmetric() + empty_score = metrics.getmetrics() assert isinstance(score, dict), "Expected a dictionary output." assert metric in score, f"Expected {metric} metric in the output." @@ -151,16 +151,22 @@ def test_accuracy(): def test_entropypred(): import torch as th - pred_logits = th.rand(6, 5) true_lab = th.rand(6, 5) - metric = EntropyPrediction(averages="mean") - metric2 = EntropyPrediction(averages="sum") + metric = EntropyPrediction(num_classes=5) - # Test for averaging metric consistency + # Test if the metric stores multiple values + pred_logits = th.rand(6, 5) metric(true_lab, pred_logits) - metric2(true_lab, pred_logits) - assert ( - th.abs(th.sum(6 * metric.__returnmetric__() - metric2.__returnmetric__())) - < 1e-5 - ) + + pred_logits = th.rand(6, 5) + metric(true_lab, pred_logits) + + pred_logits = th.rand(6, 5) + metric(true_lab, pred_logits) + + assert type(metric.__returnmetric__()) == th.Tensor + + # Test than an error is raised with num_class != class dimension length + with pytest.raises(AssertionError): + metric(true_lab, th.rand(6, 6)) diff --git a/tests/test_models.py b/tests/test_models.py index 0af2717..1b70987 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,7 +1,13 @@ import pytest import torch -from CollaborativeCoding.models import ChristianModel, JanModel, MagnusModel +from CollaborativeCoding.models import ( + ChristianModel, + JanModel, + JohanModel, + MagnusModel, + SolveigModel, +) @pytest.mark.parametrize( @@ -49,15 +55,17 @@ def test_solveig_model(image_shape, num_classes): assert y.shape == (n, num_classes), f"Shape: {y.shape}" -@pytest.mark.parametrize("image_shape", [(3, 28, 28)]) -def test_magnus_model(image_shape): +@pytest.mark.parametrize( + "image_shape, num_classes", [((3, 28, 28), 10), ((1, 16, 16), 10)] +) +def test_magnus_model(image_shape, num_classes): import torch as th n, c, h, w = 5, *image_shape - model = MagnusModel([h, w], 10, c) + model = MagnusModel([h, w], num_classes, c) x = th.rand((n, c, h, w)) with th.no_grad(): y = model(x) - assert y.shape == (n, 10), f"Shape: {y.shape}" + assert y.shape == (n, num_classes), f"Shape: {y.shape}" diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index db9a697..f30176b 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -1,3 +1,5 @@ +from pathlib import Path + from CollaborativeCoding import load_data, load_metric, load_model @@ -18,6 +20,7 @@ def test_load_model(): ] for name in modelnames: + print(name) model = load_model(name, image_shape=image_shape, num_classes=num_classes) with th.no_grad(): @@ -51,7 +54,7 @@ def test_load_data(): with TemporaryDirectory() as tmppath: for name in dataset_names: dataset = load_data( - name, train=False, data_path=tmppath, download=True, transform=trans + name, train=False, data_dir=Path(tmppath), transform=trans ) im, _ = dataset.__getitem__(0)