Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion CollaborativeCoding/dataloaders/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

import h5py as h5
import numpy as np
from scipy.io import loadmat
from torchvision.datasets import SVHN

from .datasources import MNIST_SOURCE, USPS_SOURCE

Expand Down Expand Up @@ -84,7 +86,26 @@ def _get_labels(path: Path) -> np.ndarray:
return train_labels, test_labels

def svhn(self, data_dir: Path) -> tuple[np.ndarray, np.ndarray]:
raise NotImplementedError("SVHN download not implemented yet")
def download_svhn(path, train: bool = True):
SVHN()

parent_path = data_dir / "SVHN"

if not parent_path.exists():
parent_path.mkdir(parents=True)

train_data = parent_path / "train_32x32.mat"
test_data = parent_path / "test_32x32.mat"

if not train_data.exists():
download_svhn(parent_path, train=True)
if not test_data.exists():
download_svhn(parent_path, train=False)

train_labels = loadmat(train_data)["y"]
test_labels = loadmat(test_data)["y"]

return train_labels, test_labels

def usps(self, data_dir: Path) -> tuple[np.ndarray, np.ndarray]:
"""
Expand Down
9 changes: 4 additions & 5 deletions CollaborativeCoding/dataloaders/svhn.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from pathlib import Path

import h5py
import numpy as np
Expand All @@ -11,10 +12,10 @@
class SVHNDataset(Dataset):
def __init__(
self,
data_path: str,
data_path: Path,
sample_ids: list,
train: bool,
transform=None,
download: bool = True,
nr_channels=3,
):
"""
Expand All @@ -31,11 +32,9 @@ def __init__(
super().__init__()

self.data_path = data_path
self.indexes = sample_ids
self.split = "train" if train else "test"

if download:
self._download_data(data_path)

self.nr_channels = nr_channels
self.transforms = transform

Expand Down
3 changes: 3 additions & 0 deletions CollaborativeCoding/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,20 +86,23 @@ def load_data(dataset: str, *args, **kwargs) -> tuple:
sample_ids=train_samples,
train=True,
transform=transform,
nr_channels=kwargs.get("nr_channels"),
)

val = dataset(
data_path=data_dir,
sample_ids=val_samples,
train=True,
transform=transform,
nr_channels=kwargs.get("nr_channels"),
)

test = dataset(
data_path=data_dir,
sample_ids=test_samples,
train=False,
transform=transform,
nr_channels=kwargs.get("nr_channels"),
)

return train, val, test
4 changes: 2 additions & 2 deletions CollaborativeCoding/load_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def __call__(self, y_true, y_pred):
for key in self.metrics:
self.metrics[key](y_true, y_pred)

def __getmetrics__(self, str_prefix: str = None):
def getmetrics(self, str_prefix: str = None):
return_metrics = {}
for key in self.metrics:
if str_prefix is not None:
Expand All @@ -91,6 +91,6 @@ def __getmetrics__(self, str_prefix: str = None):
return_metrics[key] = self.metrics[key].__returnmetric__()
return return_metrics

def __resetmetrics__(self):
def resetmetric(self):
for key in self.metrics:
self.metrics[key].__reset__()
18 changes: 7 additions & 11 deletions CollaborativeCoding/metrics/EntropyPred.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class EntropyPrediction(nn.Module):
def __init__(self, averages: str = "mean"):
def __init__(self, num_classes, macro_averaging=None):
"""
Initializes the EntropyPrediction module, which calculates the Shannon Entropy
of predicted logits and aggregates the results based on the specified method.
Expand All @@ -17,11 +17,8 @@ def __init__(self, averages: str = "mean"):
"""
super().__init__()

assert averages in ["mean", "sum", "none"], (
"averages must be 'mean', 'sum', or 'none'"
)
self.averages = averages
self.stored_entropy_values = []
self.num_classes = num_classes

def __call__(self, y_true: th.Tensor, y_logits: th.Tensor):
"""
Expand All @@ -36,6 +33,10 @@ def __call__(self, y_true: th.Tensor, y_logits: th.Tensor):
"""

assert len(y_logits.size()) == 2, f"y_logits shape: {y_logits.size()}"
assert y_logits.size(-1) == self.num_classes, (
f"y_logit class length: {y_logits.size(-1)}, expected: {self.num_classes}"
)

y_pred = nn.Softmax(dim=1)(y_logits)
print(f"y_pred: {y_pred}")
entropy_values = entropy(y_pred, axis=1)
Expand All @@ -50,13 +51,8 @@ def __call__(self, y_true: th.Tensor, y_logits: th.Tensor):

def __returnmetric__(self):
stored_entropy_values = th.from_numpy(np.asarray(self.stored_entropy_values))
stored_entropy_values = th.mean(stored_entropy_values)

if self.averages == "mean":
stored_entropy_values = th.mean(stored_entropy_values)
elif self.averages == "sum":
stored_entropy_values = th.sum(stored_entropy_values)
elif self.averages == "none":
pass
return stored_entropy_values

def __reset__(self):
Expand Down
2 changes: 0 additions & 2 deletions doc/Magnus_page.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
Magnus Individual Task
======================

# Magnus Størdal Individual Task

## Task overview
In addition to the overall task, I was tasked to implement a three layer linear network, a dataset loader for the SVHN dataset, and a entropy metric.

Expand Down
15 changes: 8 additions & 7 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def main():
data_dir=args.datafolder,
transform=transform,
val_size=args.val_size,
nr_channels=args.nr_channels,
)

train_metrics = MetricWrapper(
Expand Down Expand Up @@ -121,7 +122,7 @@ def main():
train_metrics(y, logits)

break
print(train_metrics.accumulate())
print(train_metrics.getmetrics())
print("Dry run completed successfully.")
exit()

Expand Down Expand Up @@ -169,11 +170,11 @@ def main():
"Train loss": np.mean(trainingloss),
"Validation loss": np.mean(valloss),
}
| train_metrics.__getmetrics__(str_prefix="Train ")
| val_metrics.__getmetrics__(str_prefix="Validation ")
| train_metrics.getmetric(str_prefix="Train ")
| val_metrics.getmetric(str_prefix="Validation ")
)
train_metrics.__resetmetrics__()
val_metrics.__resetmetrics__()
train_metrics.resetmetric()
val_metrics.resetmetric()

testloss = []
model.eval()
Expand All @@ -189,9 +190,9 @@ def main():

wandb.log(
{"Epoch": 1, "Test loss": np.mean(testloss)}
| test_metrics.__getmetrics__(str_prefix="Test ")
| test_metrics.getmetric(str_prefix="Test ")
)
test_metrics.__resetmetrics__()
test_metrics.resetmetric()


if __name__ == "__main__":
Expand Down
40 changes: 26 additions & 14 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
import pytest

from CollaborativeCoding.load_metric import MetricWrapper
from CollaborativeCoding.metrics import Accuracy, F1Score, Precision, Recall
from CollaborativeCoding.metrics import (
Accuracy,
EntropyPrediction,
F1Score,
Precision,
Recall,
)


@pytest.mark.parametrize(
Expand All @@ -17,7 +23,7 @@
("accuracy", randint(2, 10), True),
("precision", randint(2, 10), False),
("precision", randint(2, 10), True),
# TODO: Add test for EntropyPrediction
("entropy", randint(2, 10), False),
],
)
def test_metric_wrapper(metric, num_classes, macro_averaging):
Expand All @@ -34,9 +40,9 @@ def test_metric_wrapper(metric, num_classes, macro_averaging):
)

metrics(y_true, logits)
score = metrics.accumulate()
metrics.reset()
empty_score = metrics.accumulate()
score = metrics.getmetrics()
metrics.resetmetric()
empty_score = metrics.getmetrics()

assert isinstance(score, dict), "Expected a dictionary output."
assert metric in score, f"Expected {metric} metric in the output."
Expand Down Expand Up @@ -145,16 +151,22 @@ def test_accuracy():
def test_entropypred():
import torch as th

pred_logits = th.rand(6, 5)
true_lab = th.rand(6, 5)

metric = EntropyPrediction(averages="mean")
metric2 = EntropyPrediction(averages="sum")
metric = EntropyPrediction(num_classes=5)

# Test for averaging metric consistency
# Test if the metric stores multiple values
pred_logits = th.rand(6, 5)
metric(true_lab, pred_logits)
metric2(true_lab, pred_logits)
assert (
th.abs(th.sum(6 * metric.__returnmetric__() - metric2.__returnmetric__()))
< 1e-5
)

pred_logits = th.rand(6, 5)
metric(true_lab, pred_logits)

pred_logits = th.rand(6, 5)
metric(true_lab, pred_logits)

assert type(metric.__returnmetric__()) == th.Tensor

# Test than an error is raised with num_class != class dimension length
with pytest.raises(AssertionError):
metric(true_lab, th.rand(6, 6))
18 changes: 13 additions & 5 deletions tests/test_models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import pytest
import torch

from CollaborativeCoding.models import ChristianModel, JanModel, MagnusModel
from CollaborativeCoding.models import (
ChristianModel,
JanModel,
JohanModel,
MagnusModel,
SolveigModel,
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -49,15 +55,17 @@ def test_solveig_model(image_shape, num_classes):
assert y.shape == (n, num_classes), f"Shape: {y.shape}"


@pytest.mark.parametrize("image_shape", [(3, 28, 28)])
def test_magnus_model(image_shape):
@pytest.mark.parametrize(
"image_shape, num_classes", [((3, 28, 28), 10), ((1, 16, 16), 10)]
)
def test_magnus_model(image_shape, num_classes):
import torch as th

n, c, h, w = 5, *image_shape
model = MagnusModel([h, w], 10, c)
model = MagnusModel([h, w], num_classes, c)

x = th.rand((n, c, h, w))
with th.no_grad():
y = model(x)

assert y.shape == (n, 10), f"Shape: {y.shape}"
assert y.shape == (n, num_classes), f"Shape: {y.shape}"
45 changes: 24 additions & 21 deletions tests/test_wrappers.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,33 @@
from pathlib import Path

from CollaborativeCoding import load_data, load_metric, load_model

# def test_load_model():
# import torch as th

# image_shape = (1, 16, 16)
# num_classes = 4
def test_load_model():
import torch as th

# dummy_img = th.rand((1, *image_shape))
image_shape = (1, 16, 16)
num_classes = 4

# modelnames = [
# "magnusmodel",
# "christianmodel",
# "janmodel",
# "solveigmodel",
# "johanmodel",
# ]
dummy_img = th.rand((1, *image_shape))

# for name in modelnames:
# print(name)
# model = load_model(name, image_shape=image_shape, num_classes=num_classes)
modelnames = [
"magnusmodel",
"christianmodel",
"janmodel",
"solveigmodel",
"johanmodel",
]

# with th.no_grad():
# output = model(dummy_img)
# assert output.size() == (1, 4), (
# f"Model {name} returned image of size {output}. Expected (1,4)"
# )
for name in modelnames:
print(name)
model = load_model(name, image_shape=image_shape, num_classes=num_classes)

with th.no_grad():
output = model(dummy_img)
assert output.size() == (1, 4), (
f"Model {name} returned image of size {output}. Expected (1,4)"
)


def test_load_data():
Expand All @@ -51,7 +54,7 @@ def test_load_data():
with TemporaryDirectory() as tmppath:
for name in dataset_names:
dataset = load_data(
name, train=False, data_path=tmppath, download=True, transform=trans
name, train=False, data_dir=Path(tmppath), transform=trans
)

im, _ = dataset.__getitem__(0)
Expand Down
Loading