diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..0e064db
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,29 @@
+name: Test
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+
+    - uses: mamba-org/setup-micromamba@v1
+      with:
+        micromamba-version: '2.0.5-0' # any version from https://github.com/mamba-org/micromamba-releases
+        environment-file: environment.yml
+        init-shell: bash
+        cache-environment: true
+        post-cleanup: 'all'
+        generate-run-shell: false
+
+    - name: Run tests
+      run: |
+        PYTHONPATH=. pytest tests
+      shell: bash -el {0}
diff --git a/environment.yml b/environment.yml
index f1df2f5..214e5fc 100644
--- a/environment.yml
+++ b/environment.yml
@@ -18,5 +18,8 @@ dependencies:
   - pytest
   - ruff
   - scalene
+  - pip:
+    - torch
+    - torchvision
 prefix: /opt/miniconda3/envs/cc-exam
 
diff --git a/tests/test_createfolders.py b/tests/test_createfolders.py
index e69de29..2db90aa 100644
--- a/tests/test_createfolders.py
+++ b/tests/test_createfolders.py
@@ -0,0 +1,49 @@
+from utils import createfolders
+
+
+def test_createfolders():
+    import argparse
+    from pathlib import Path
+    from tempfile import TemporaryDirectory
+
+    with TemporaryDirectory() as temp_dir:
+        temp_dir = Path(temp_dir)
+
+        parser = argparse.ArgumentParser()
+
+        # Structuture related values
+        parser.add_argument(
+            "--datafolder",
+            type=Path,
+            default=temp_dir / "Data",
+            help="Path to where data will be saved during training.",
+        )
+        parser.add_argument(
+            "--resultfolder",
+            type=Path,
+            default=temp_dir / "Results",
+            help="Path to where results will be saved during evaluation.",
+        )
+        parser.add_argument(
+            "--modelfolder",
+            type=Path,
+            default=temp_dir / "Experiments",
+            help="Path to where model weights will be saved at the end of training.",
+        )
+
+        args = parser.parse_args(
+            [
+                "--datafolder",
+                str(temp_dir / "Data"),
+                "--resultfolder",
+                str(temp_dir / "Results"),
+                "--modelfolder",
+                str(temp_dir / "Experiments"),
+            ]
+        )
+
+        createfolders(args.datafolder, args.resultfolder, args.modelfolder)
+
+        assert (temp_dir / "Data").exists()
+        assert (temp_dir / "Results").exists()
+        assert (temp_dir / "Experiments").exists()
diff --git a/tests/test_dataloaders.py b/tests/test_dataloaders.py
index e69de29..16534b1 100644
--- a/tests/test_dataloaders.py
+++ b/tests/test_dataloaders.py
@@ -0,0 +1,20 @@
+from utils.dataloaders.usps_0_6 import USPSDataset0_6
+
+
+def test_uspsdataset0_6():
+    from pathlib import Path
+    from tempfile import TemporaryFile
+
+    import h5py
+    import numpy as np
+
+    with TemporaryFile() as tf:
+        with h5py.File(tf, "w") as f:
+            f["train/data"] = np.random.rand(10, 16 * 16)
+            f["train/target"] = np.array([6, 5, 4, 3, 2, 1, 0, 0, 0, 0])
+
+        dataset = USPSDataset0_6(data_path=tf, train=True)
+        assert len(dataset) == 10
+        data, target = dataset[0]
+        assert data.shape == (1, 16, 16)
+        assert all(target == np.array([0, 0, 0, 0, 0, 0, 1]))
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index e69de29..c25d861 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -0,0 +1,16 @@
+from utils.metrics import Recall
+
+
+def test_recall():
+    import torch
+
+    recall = Recall(7)
+
+    y_true = torch.tensor([0, 1, 2, 3, 4, 5, 6])
+    y_pred = torch.tensor([2, 1, 2, 1, 4, 5, 6])
+
+    recall_score = recall(y_true, y_pred)
+
+    assert recall_score.allclose(torch.tensor(0.7143), atol=1e-5), (
+        f"Recall Score: {recall_score.item()}"
+    )
diff --git a/tests/test_models.py b/tests/test_models.py
index e69de29..4747490 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -0,0 +1,19 @@
+import pytest
+import torch
+
+from utils.models import ChristianModel
+
+
+@pytest.mark.parametrize("in_channels, num_classes", [(1, 6), (3, 6)])
+def test_christian_model(in_channels, num_classes):
+    n, c, h, w = 5, in_channels, 16, 16
+
+    model = ChristianModel(c, num_classes)
+
+    x = torch.randn(n, c, h, w)
+    y = model(x)
+
+    assert y.shape == (n, num_classes), f"Shape: {y.shape}"
+    assert y.sum(dim=1).allclose(torch.ones(n), atol=1e-5), (
+        f"Softmax output should sum to 1, but got: {y.sum()}"
+    )
diff --git a/utils/createfolders.py b/utils/createfolders.py
index cdc3d4b..f6374fb 100644
--- a/utils/createfolders.py
+++ b/utils/createfolders.py
@@ -1,6 +1,4 @@
-import argparse
 from pathlib import Path
-from tempfile import TemporaryDirectory
 
 
 def createfolders(*dirs: Path) -> None:
@@ -16,47 +14,3 @@ def createfolders(*dirs: Path) -> None:
 
     for dir in dirs:
         dir.mkdir(parents=True, exist_ok=True)
-
-
-def test_createfolders():
-    with TemporaryDirectory() as temp_dir:
-        temp_dir = Path(temp_dir)
-
-        parser = argparse.ArgumentParser()
-
-        # Structuture related values
-        parser.add_argument(
-            "--datafolder",
-            type=Path,
-            default=temp_dir / "Data",
-            help="Path to where data will be saved during training.",
-        )
-        parser.add_argument(
-            "--resultfolder",
-            type=Path,
-            default=temp_dir / "Results",
-            help="Path to where results will be saved during evaluation.",
-        )
-        parser.add_argument(
-            "--modelfolder",
-            type=Path,
-            default=temp_dir / "Experiments",
-            help="Path to where model weights will be saved at the end of training.",
-        )
-
-        args = parser.parse_args(
-            [
-                "--datafolder",
-                temp_dir / "Data",
-                "--resultfolder",
-                temp_dir / "Results",
-                "--modelfolder",
-                temp_dir / "Experiments",
-            ]
-        )
-
-        createfolders(args.datafolder, args.resultfolder, args.modelfolder)
-
-        assert (temp_dir / "Data").exists()
-        assert (temp_dir / "Results").exists()
-        assert (temp_dir / "Experiments").exists()
diff --git a/utils/dataloaders/usps_0_6.py b/utils/dataloaders/usps_0_6.py
index 4e68191..41fdc0d 100644
--- a/utils/dataloaders/usps_0_6.py
+++ b/utils/dataloaders/usps_0_6.py
@@ -71,7 +71,7 @@ def __init__(
         download: bool = False,
     ):
         super().__init__()
-        self.path = list(data_path.glob("*.h5"))[0]
+        self.path = data_path
         self.transform = transform
         self.num_classes = 7
 
@@ -116,19 +116,3 @@ def __getitem__(self, idx):
             data = self.transform(data)
 
         return data, target
-
-
-def test_uspsdataset0_6():
-    import pytest
-
-    datapath = Path("data/USPS/usps.h5")
-
-    dataset = USPSDataset0_6(path=datapath, mode="train")
-    assert len(dataset) == 5460
-    data, target = dataset[0]
-    assert data.shape == (16, 16)
-    assert target == 6
-
-    # Test for an invalid mode
-    with pytest.raises(ValueError):
-        USPSDataset0_6(path=datapath, mode="inference")
diff --git a/utils/dataloaders/uspsh5_7_9.py b/utils/dataloaders/uspsh5_7_9.py
index a343554..98cbd03 100644
--- a/utils/dataloaders/uspsh5_7_9.py
+++ b/utils/dataloaders/uspsh5_7_9.py
@@ -1,9 +1,9 @@
-from torch.utils.data import Dataset
-import numpy as np
 import h5py
-from torchvision import transforms
-from PIL import Image
+import numpy as np
 import torch
+from PIL import Image
+from torch.utils.data import Dataset
+from torchvision import transforms
 
 
 class USPSH5_Digit_7_9_Dataset(Dataset):
@@ -95,14 +95,20 @@ def __getitem__(self, id):
 
 def main():
     # Example Usage:
-    transform = transforms.Compose([
-        transforms.Resize((16, 16)),  # Ensure images are 16x16
-        transforms.ToTensor(),
-        transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
-    ])
+    transform = transforms.Compose(
+        [
+            transforms.Resize((16, 16)),  # Ensure images are 16x16
+            transforms.ToTensor(),
+            transforms.Normalize((0.5,), (0.5,)),  # Normalize to [-1, 1]
+        ]
+    )
 
     # Load the dataset
-    dataset = USPSH5_Digit_7_9_Dataset(h5_path="C:/Users/Solveig/OneDrive/Dokumente/UiT PhD/Courses/Git/usps.h5", mode="train", transform=transform)
+    dataset = USPSH5_Digit_7_9_Dataset(
+        h5_path="C:/Users/Solveig/OneDrive/Dokumente/UiT PhD/Courses/Git/usps.h5",
+        mode="train",
+        transform=transform,
+    )
     data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True)
     batch = next(iter(data_loader))  # grab a batch from the dataloader
     img, label = batch
@@ -112,5 +118,6 @@ def main():
     # Check dataset size
     print(f"Dataset size: {len(dataset)}")
 
-if __name__ == '__main__':
-    main()
\ No newline at end of file
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/metrics/F1.py b/utils/metrics/F1.py
index 16c87f8..d13bddb 100644
--- a/utils/metrics/F1.py
+++ b/utils/metrics/F1.py
@@ -1,40 +1,41 @@
-import torch.nn as nn
 import torch
+import torch.nn as nn
 
 
 class F1Score(nn.Module):
     """
-          F1 Score implementation with direct averaging inside the compute method.
+    F1 Score implementation with direct averaging inside the compute method.
+
+    Parameters
+    ----------
+    num_classes : int
+        Number of classes.
 
-          Parameters
-          ----------
-          num_classes : int
-              Number of classes.
+    Attributes
+    ----------
+    num_classes : int
+        The number of classes.
 
-          Attributes
-          ----------
-          num_classes : int
-              The number of classes.
+    tp : torch.Tensor
+        Tensor for True Positives (TP) for each class.
 
-          tp : torch.Tensor
-              Tensor for True Positives (TP) for each class.
+    fp : torch.Tensor
+        Tensor for False Positives (FP) for each class.
 
-          fp : torch.Tensor
-              Tensor for False Positives (FP) for each class.
+    fn : torch.Tensor
+        Tensor for False Negatives (FN) for each class.
+    """
 
-          fn : torch.Tensor
-              Tensor for False Negatives (FN) for each class.
-          """
     def __init__(self, num_classes):
         """
-             Initializes the F1Score object, setting up the necessary state variables.
+        Initializes the F1Score object, setting up the necessary state variables.
 
-             Parameters
-             ----------
-             num_classes : int
-                 The number of classes in the classification task.
+        Parameters
+        ----------
+        num_classes : int
+            The number of classes in the classification task.
 
-             """
+        """
 
         super().__init__()
 
@@ -47,16 +48,16 @@ def __init__(self, num_classes):
 
     def update(self, preds, target):
         """
-       Update the variables with predictions and true labels.
+        Update the variables with predictions and true labels.
 
-       Parameters
-       ----------
-       preds : torch.Tensor
-           Predicted logits (shape: [batch_size, num_classes]).
+        Parameters
+        ----------
+        preds : torch.Tensor
+            Predicted logits (shape: [batch_size, num_classes]).
 
-       target : torch.Tensor
-           True labels (shape: [batch_size]).
-       """
+        target : torch.Tensor
+            True labels (shape: [batch_size]).
+        """
         preds = torch.argmax(preds, dim=1)
 
         # Calculate True Positives (TP), False Positives (FP), and False Negatives (FN) per class
@@ -76,17 +77,20 @@ def compute(self):
         """
 
         # Compute F1 score based on the specified averaging method
-        f1_score = 2 * torch.sum(self.tp) / (2 * torch.sum(self.tp) + torch.sum(self.fp) + torch.sum(self.fn))
+        f1_score = (
+            2
+            * torch.sum(self.tp)
+            / (2 * torch.sum(self.tp) + torch.sum(self.fp) + torch.sum(self.fn))
+        )
 
         return f1_score
 
 
 def test_f1score():
     f1_metric = F1Score(num_classes=3)
-    preds = torch.tensor([[0.8, 0.1, 0.1],
-                          [0.2, 0.7, 0.1],
-                          [0.2, 0.3, 0.5],
-                          [0.1, 0.2, 0.7]])
+    preds = torch.tensor(
+        [[0.8, 0.1, 0.1], [0.2, 0.7, 0.1], [0.2, 0.3, 0.5], [0.1, 0.2, 0.7]]
+    )
 
     target = torch.tensor([0, 1, 0, 2])
 
diff --git a/utils/metrics/recall.py b/utils/metrics/recall.py
index 4aaae43..ab9ae16 100644
--- a/utils/metrics/recall.py
+++ b/utils/metrics/recall.py
@@ -40,23 +40,3 @@ def forward(self, y_true, y_pred):
         recall = true_positives / (true_positives + false_negatives)
 
         return recall
-
-
-def test_recall():
-    recall = Recall(7)
-
-    y_true = torch.tensor([0, 1, 2, 3, 4, 5, 6])
-    y_pred = torch.tensor([2, 1, 2, 1, 4, 5, 6])
-
-    recall_score = recall(y_true, y_pred)
-
-    assert recall_score.allclose(torch.tensor(0.7143), atol=1e-5), f"Recall Score: {recall_score.item()}"
-
-
-def test_one_hot_encode():
-    num_classes = 7
-
-    y_true = torch.tensor([0, 1, 2, 3, 4, 5, 6])
-    y_onehot = one_hot_encode(y_true, num_classes)
-
-    assert y_onehot.shape == (7, 7), f"Shape: {y_onehot.shape}"
diff --git a/utils/models/christian_model.py b/utils/models/christian_model.py
index 9bdd2da..1adb76e 100644
--- a/utils/models/christian_model.py
+++ b/utils/models/christian_model.py
@@ -1,4 +1,3 @@
-import pytest
 import torch
 import torch.nn as nn
 
@@ -49,6 +48,7 @@ class ChristianModel(nn.Module):
     CNN2 Output Shape: (5, 100, 4, 4)
     FC Output Shape: (5, num_classes)
     """
+
     def __init__(self, in_channels, num_classes):
         super().__init__()
 
@@ -69,21 +69,7 @@ def forward(self, x):
         return x
 
 
-@pytest.mark.parametrize("in_channels, num_classes", [(1, 6), (3, 6)])
-def test_christian_model(in_channels, num_classes):
-    n, c, h, w = 5, in_channels, 16, 16
-
-    model = ChristianModel(c, num_classes)
-
-    x = torch.randn(n, c, h, w)
-    y = model(x)
-
-    assert y.shape == (n, num_classes), f"Shape: {y.shape}"
-    assert y.sum(dim=1).allclose(torch.ones(n), atol=1e-5), f"Softmax output should sum to 1, but got: {y.sum()}"
-
-
 if __name__ == "__main__":
-
     model = ChristianModel(3, 7)
 
     x = torch.randn(3, 3, 16, 16)