Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions CollaborativeCoding/dataloaders/mnist_4_9.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,11 @@ def __init__(
self.labels_path = self.mnist_path / (
MNIST_SOURCE["train_labels"][1] if train else MNIST_SOURCE["test_labels"][1]
)

# Functions to map the labels from (4,9) -> (0,5) for CrossEntropyLoss to work properly.
self.label_shift = lambda x: x-4
self.label_restore = lambda x: x+4



# Functions to map the labels from (4,9) -> (0,5) for CrossEntropyLoss to work properly.
self.label_shift = lambda x: x - 4
self.label_restore = lambda x: x + 4

def __len__(self):
return len(self.samples)

Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def main():

for epoch in range(args.epoch):
# Training loop start
print(f"Epoch: {epoch+1}/{args.epoch}")
print(f"Epoch: {epoch + 1}/{args.epoch}")
trainingloss = []
model.train()
for x, y in tqdm(trainloader, desc="Training"):
Expand Down
20 changes: 12 additions & 8 deletions tests/test_dataloaders.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from pathlib import Path

import numpy as np
import pytest
import torch
from torchvision import transforms
Expand All @@ -26,14 +25,19 @@
],
)
def test_load_data(data_name, expected):
print(data_name)
dataset, _, _ = load_data(
data_name,
data_dir=Path("data"),
train=False,
data_dir=Path("Data"),
transform=transforms.ToTensor(),
)
assert isinstance(dataset, expected)
assert len(dataset) > 0
assert isinstance(dataset[0], tuple)
assert isinstance(dataset[0][0], torch.Tensor)
assert isinstance(dataset[0][1], int)

sample = dataset[0]
img, label = sample

assert isinstance(dataset, expected), f"{type(dataset)} != {expected}"
assert len(dataset) > 0, "Dataset is empty"
assert isinstance(sample, tuple), f"{type(sample)} != tuple"
assert isinstance(img, torch.Tensor), f"{type(img)} != torch.Tensor"
assert isinstance(label, int), f"{type(label)} != int"
assert len(img.size()) == 3, f"{len(img.size())} != 3"
231 changes: 37 additions & 194 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
@@ -1,214 +1,57 @@
from random import randint
import itertools

import pytest

from CollaborativeCoding.load_metric import MetricWrapper
from CollaborativeCoding.metrics import (
Accuracy,
EntropyPrediction,
F1Score,
Precision,
Recall,
)


@pytest.mark.parametrize(
"metric, num_classes, macro_averaging",
[
("f1", randint(2, 10), False),
("f1", randint(2, 10), True),
("recall", randint(2, 10), False),
("recall", randint(2, 10), True),
("accuracy", randint(2, 10), False),
("accuracy", randint(2, 10), True),
("precision", randint(2, 10), False),
("precision", randint(2, 10), True),
("entropy", randint(2, 10), False),
],
)
def test_metric_wrapper(metric, num_classes, macro_averaging):
import numpy as np
import torch

y_true = torch.arange(num_classes, dtype=torch.int64)
logits = torch.rand(num_classes, num_classes)

metrics = MetricWrapper(
metric,
num_classes=num_classes,
macro_averaging=macro_averaging,
)

metrics(y_true, logits)
score = metrics.getmetrics()
metrics.resetmetric()
empty_score = metrics.getmetrics()

assert isinstance(score, dict), "Expected a dictionary output."
assert metric in score, f"Expected {metric} metric in the output."
assert score[metric] >= 0, "Expected a non-negative value."
assert np.isnan(empty_score[metric]), "Expected an empty list."


def test_recall():
import torch

y_true = torch.tensor([0, 1, 2, 3, 4, 5, 6])
logits = torch.randn(7, 7)

recall_micro = Recall(7)
recall_macro = Recall(7, macro_averaging=True)

recall_micro(y_true, logits)
recall_macro(y_true, logits)

recall_micro_score = recall_micro.__returnmetric__()
recall_macro_score = recall_macro.__returnmetric__()

assert isinstance(recall_micro_score, torch.Tensor), "Expected a tensor output."
assert isinstance(recall_macro_score, torch.Tensor), "Expected a tensor output."
assert recall_micro_score.item() >= 0, "Expected a non-negative value."
assert recall_macro_score.item() >= 0, "Expected a non-negative value."
METRICS = ["f1", "recall", "accuracy", "precision", "entropy"]


def test_f1score():
import torch

# Example case with known output
y_true = torch.tensor([0, 1, 2, 2, 1, 0]) # True labels
y_pred = torch.tensor([0, 1, 1, 2, 0, 0]) # Predicted labels

# Create F1Score object for micro and macro averaging
f1_micro = F1Score(num_classes=3, macro_averaging=False)
f1_macro = F1Score(num_classes=3, macro_averaging=True)

# Update F1 score with predictions
f1_micro(y_true, y_pred)
f1_macro(y_true, y_pred)
def _metric_combinations():
"""
Yield various combinations of metrics:
1. Single metric as a list
2. Pairs of metrics
3. All metrics
"""

# Get F1 scores
micro_f1_score = f1_micro.__returnmetric__()
macro_f1_score = f1_macro.__returnmetric__()

# Check if outputs are tensors
assert isinstance(micro_f1_score, torch.Tensor), (
"Micro F1 score should be a tensor."
)
assert isinstance(macro_f1_score, torch.Tensor), (
"Macro F1 score should be a tensor."
)
# Single metrics as lists
for m in METRICS:
yield [m]

# Check that F1 scores are between 0 and 1
assert 0 <= micro_f1_score.item() <= 1, "Micro F1 score should be between 0 and 1."
assert 0 <= macro_f1_score.item() <= 1, "Macro F1 score should be between 0 and 1."
# Pairs of metrics (2-combinations)
for combo in itertools.combinations(METRICS, 2):
yield list(combo)

print(f"Micro F1 Score: {micro_f1_score.item()}")
print(f"Macro F1 Score: {macro_f1_score.item()}")
# Also test all metrics at once
yield METRICS


def test_precision():
@pytest.mark.parametrize("metrics", _metric_combinations())
@pytest.mark.parametrize("num_classes", [2, 3, 5, 10])
@pytest.mark.parametrize("macro_averaging", [True, False])
def test_metric_wrapper(metrics, num_classes, macro_averaging):
import numpy as np
import torch
from sklearn.metrics import precision_score

C = randint(2, 10) # number of classes
N = randint(2, 10 * C) # batchsize
y_true = torch.randint(0, C, (N,))
logits = torch.randn(N, C)

# create metric objects
precision_micro = Precision(num_classes=C)
precision_macro = Precision(num_classes=C, macro_averaging=True)

# run metric object
precision_micro(y_true, logits)
precision_macro(y_true, logits)

# get metric scores
micro_precision_score = precision_micro.__returnmetric__()
macro_precision_score = precision_macro.__returnmetric__()

# check output to be tensor
assert isinstance(micro_precision_score, torch.Tensor), "Tensor output is expected."
assert isinstance(macro_precision_score, torch.Tensor), "Tensor output is expected."

# check for non-negativity
assert micro_precision_score.item() >= 0, "Expected non-negative value"
assert macro_precision_score.item() >= 0, "Expected non-negative value"

# find predictions
y_pred = logits.argmax(dim=-1)

# check dimension
assert y_true.shape == torch.Size([N])
assert logits.shape == torch.Size([N, C])
assert y_pred.shape == torch.Size([N])

# find true values with scikit learn
scikit_macro_precision = precision_score(y_true, y_pred, average="macro")
scikit_micro_precision = precision_score(y_true, y_pred, average="micro")

# check for similarity
assert np.isclose(scikit_micro_precision, micro_precision_score, atol=1e-5), (
"Score does not match scikit's score"
)
assert np.isclose(scikit_macro_precision, macro_precision_score, atol=1e-5), (
"Score does not match scikit's score"
)


def test_accuracy():
import numpy as np
import torch
y_true = torch.arange(num_classes, dtype=torch.int64)
logits = torch.rand(num_classes, num_classes)

# Test the accuracy metric
y_true = torch.tensor([0, 1, 2, 3, 4, 5])
y_pred = torch.tensor([0, 1, 2, 3, 4, 5])
accuracy = Accuracy(num_classes=6, macro_averaging=False)
accuracy(y_true, y_pred)
assert accuracy.__returnmetric__() == 1.0, "Expected accuracy to be 1.0"
accuracy.__reset__()
assert accuracy.__returnmetric__() is np.nan, "Expected accuracy to be 0.0"
y_pred = torch.tensor([0, 1, 2, 3, 4, 4])
accuracy(y_true, y_pred)
assert np.abs(accuracy.__returnmetric__() - 0.8333333134651184) < 1e-5, (
"Expected accuracy to be 0.8333333134651184"
)
accuracy.__reset__()
accuracy.macro_averaging = True
accuracy(y_true, y_pred)
y_true_1 = torch.tensor([0, 1, 2, 3, 4, 5])
y_pred_1 = torch.tensor([0, 1, 2, 3, 4, 4])
accuracy(y_true_1, y_pred_1)
assert np.abs(accuracy.__returnmetric__() - 0.8333333134651184) < 1e-5, (
"Expected accuracy to be 0.8333333134651186"
)
accuracy.macro_averaging = False
assert np.abs(accuracy.__returnmetric__() - 0.8333333134651184) < 1e-5, (
"Expected accuracy to be 0.8333333134651184"
mw = MetricWrapper(
*metrics,
num_classes=num_classes,
macro_averaging=macro_averaging,
)
accuracy.__reset__()

mw(y_true, logits)
score = mw.getmetrics()
mw.resetmetric()
empty_score = mw.getmetrics()

def test_entropypred():
import torch as th

true_lab = th.rand(6, 5)

metric = EntropyPrediction(num_classes=5)

# Test if the metric stores multiple values
pred_logits = th.rand(6, 5)
metric(true_lab, pred_logits)

pred_logits = th.rand(6, 5)
metric(true_lab, pred_logits)

pred_logits = th.rand(6, 5)
metric(true_lab, pred_logits)

assert type(metric.__returnmetric__()) == th.Tensor
assert isinstance(score, dict), "Expected a dictionary output."
for m in metrics:
assert m in score, f"Expected metric '{m}' in the output."
assert score[m] >= 0, "Expected a non-negative value."

# Test than an error is raised with num_class != class dimension length
with pytest.raises(AssertionError):
metric(true_lab, th.rand(6, 6))
assert m in empty_score, f"Expected metric '{m}' in the output."
assert np.isnan(empty_score[m]), "Expected an empty list."
Loading
Loading