Skip to content
Merged

Sync #28

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
d88ef49
Dont track (personal) binary directory
c-salomonsen Jan 30, 2025
1b20dd4
Add a dataset class loading USPS images with labels 0 to 6
c-salomonsen Jan 30, 2025
1e69248
dont track pycache and notebook checkpoints
c-salomonsen Jan 30, 2025
726c9e0
add jupyter, formatters/lsp/lint pytest and h5py
c-salomonsen Jan 30, 2025
85d4ebe
Merge branch 'main' into christian/dataloader
c-salomonsen Jan 30, 2025
9427e0a
Merge pull request #16 from SFI-Visual-Intelligence/magnus-branch
Seilmast Jan 30, 2025
d131266
Add USPS HDF5 dataloader and F1 metric implementation
sot176 Jan 30, 2025
61b0e53
Remove notebook
c-salomonsen Jan 30, 2025
1a664b7
Merge pull request #18 from SFI-Visual-Intelligence/christian/refactor
c-salomonsen Jan 30, 2025
cba9b80
Simplify how metrics are parsed
c-salomonsen Jan 30, 2025
380116b
Add automatic formatting with ruff and import sorting (isort)
c-salomonsen Jan 30, 2025
85f7edf
Merge pull request #20 from SFI-Visual-Intelligence/christian/formatting
c-salomonsen Jan 30, 2025
8693d41
Merge pull request #19 from SFI-Visual-Intelligence/christian/simpler…
c-salomonsen Jan 30, 2025
53d23e3
Auto-format: Applied ruff format and isort
github-actions[bot] Jan 30, 2025
565cb80
Merge branch 'main' into christian/dataloader
c-salomonsen Jan 31, 2025
8a50e7c
Move code to utils folder
c-salomonsen Jan 31, 2025
7579ba3
USPS dataloader for 0-6 digits
c-salomonsen Jan 31, 2025
6dfd94d
Make dataloaders module
c-salomonsen Jan 31, 2025
6ad365c
Add usps dataloader as alternative
c-salomonsen Jan 31, 2025
1947e82
Format using ruff
c-salomonsen Jan 31, 2025
8b358bf
Use recommended python gitignore template
c-salomonsen Jan 30, 2025
eae20cc
add __all__ to __init__.py
c-salomonsen Jan 31, 2025
781362e
fix relative imports
c-salomonsen Jan 31, 2025
8ef502f
Modernize to use pathlib instead of os.path
c-salomonsen Jan 31, 2025
faac193
load_data now gives arguments to the datasets
c-salomonsen Jan 31, 2025
f7c2058
Fix bug where string was treated as a list in argparse due to nargs="+"
c-salomonsen Jan 31, 2025
d045a2a
Add option for setting device to mps (for mac) and a dry_run parameter
c-salomonsen Jan 31, 2025
ed1f1ee
Merge pull request #21 from SFI-Visual-Intelligence/christian/dataloader
c-salomonsen Jan 31, 2025
e5aafb0
Auto-format: Applied ruff format and isort
github-actions[bot] Jan 31, 2025
3e601f7
updated documentation of my functions
sot176 Jan 31, 2025
4f981ec
Created the folder for our tests
sot176 Jan 31, 2025
afeae2a
Delete .idea directory
sot176 Jan 31, 2025
40bb5c0
Add ChristianModel: 2 layer CNN w/maxpooling
c-salomonsen Jan 31, 2025
4159b78
Instead of formatting changes, fail the test if code needs formatting
c-salomonsen Jan 31, 2025
fc787c2
finds number of channels based on dataset. Adds num_classes to dataset
c-salomonsen Jan 31, 2025
3f79234
Merge pull request #26 from SFI-Visual-Intelligence/fix-main
c-salomonsen Jan 31, 2025
cd1e086
Add Recall metric
c-salomonsen Jan 31, 2025
e772828
Merge branch 'main' into christian/model
c-salomonsen Jan 31, 2025
79fbfed
Merge branch 'christian/model' into christian/metrics
c-salomonsen Jan 31, 2025
60abd72
Merge pull request #25 from SFI-Visual-Intelligence/christian/format-…
c-salomonsen Jan 31, 2025
efe6894
Fix bug where labels werent put on the device
c-salomonsen Jan 31, 2025
68b5616
Onehot encode labels in dataset
c-salomonsen Jan 31, 2025
970fe05
Merge pull request #22 from SFI-Visual-Intelligence/solveig-branch
sot176 Jan 31, 2025
4350664
Merge pull request #24 from SFI-Visual-Intelligence/christian/model
c-salomonsen Jan 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/workflows/format.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Format

on:
push:
paths:
- 'utils/**'
pull_request:
paths:
- 'utils/**'

jobs:
format:
name: Run Ruff and isort
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'

- name: Install dependencies
run: |
pip install ruff isort

- name: Run Ruff check
run: |
ruff check utils/

- name: Run isort check
run: |
isort --check-only utils/
8 changes: 8 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ dependencies:
- sphinx-autobuild
- sphinx-rtd-theme
- pip
- h5py
- black
- isort
- jupyterlab
- numpy
- pandas
- pytest
- ruff
- scalene
prefix: /opt/miniconda3/envs/cc-exam

237 changes: 160 additions & 77 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,158 @@
import torch as th
import torch.nn as nn
from torch.utils.data import DataLoader
import argparse
import wandb
from pathlib import Path

import numpy as np
from utils import MetricWrapper, load_model, load_data, createfolders
import torch as th
import torch.nn as nn
import wandb
from torch.utils.data import DataLoader

from utils import MetricWrapper, createfolders, load_data, load_model


def main():
'''
"""

Parameters
----------

Returns
-------

Raises
------
'''

"""
parser = argparse.ArgumentParser(
prog='',
description='',
epilog='',
)
#Structuture related values
parser.add_argument('--datafolder', type=str, default='Data/', help='Path to where data will be saved during training.')
parser.add_argument('--resultfolder', type=str, default='Results/', help='Path to where results will be saved during evaluation.')
parser.add_argument('--modelfolder', type=str, default='Experiments/', help='Path to where model weights will be saved at the end of training.')
parser.add_argument('--savemodel', type=bool, default=False, help='Whether model should be saved or not.')

parser.add_argument('--download-data', type=bool, default=False, help='Whether the data should be downloaded or not. Might cause code to start a bit slowly.')

#Data/Model specific values
parser.add_argument('--modelname', type=str, default='MagnusModel',
choices = ['MagnusModel'], help="Model which to be trained on")
parser.add_argument('--dataset', type=str, default='svhn',
choices=['svhn'], help='Which dataset to train the model on.')

parser.add_argument('--EntropyPrediction', type=bool, default=True, help='Include the Entropy Prediction metric in evaluation')
parser.add_argument('--F1Score', type=bool, default=True, help='Include the F1Score metric in evaluation')
parser.add_argument('--Recall', type=bool, default=True, help='Include the Recall metric in evaluation')
parser.add_argument('--Precision', type=bool, default=True, help='Include the Precision metric in evaluation')
parser.add_argument('--Accuracy', type=bool, default=True, help='Include the Accuracy metric in evaluation')

#Training specific values
parser.add_argument('--epoch', type=int, default=20, help='Amount of training epochs the model will do.')
parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning rate parameter for model training.')
parser.add_argument('--batchsize', type=int, default=64, help='Amount of training images loaded in one go')

prog="",
description="",
epilog="",
)
# Structuture related values
parser.add_argument(
"--datafolder",
type=Path,
default="Data",
help="Path to where data will be saved during training.",
)
parser.add_argument(
"--resultfolder",
type=Path,
default="Results",
help="Path to where results will be saved during evaluation.",
)
parser.add_argument(
"--modelfolder",
type=Path,
default="Experiments",
help="Path to where model weights will be saved at the end of training.",
)
parser.add_argument(
"--savemodel",
type=bool,
default=False,
help="Whether model should be saved or not.",
)

parser.add_argument(
"--download-data",
type=bool,
default=False,
help="Whether the data should be downloaded or not. Might cause code to start a bit slowly.",
)

# Data/Model specific values
parser.add_argument(
"--modelname",
type=str,
default="MagnusModel",
choices=["MagnusModel", "ChristianModel"],
help="Model which to be trained on",
)
parser.add_argument(
"--dataset",
type=str,
default="svhn",
choices=["svhn", "usps_0-6"],
help="Which dataset to train the model on.",
)

parser.add_argument(
"--metric",
type=str,
default=["entropy"],
choices=["entropy", "f1", "recall", "precision", "accuracy"],
nargs="+",
help="Which metric to use for evaluation",
)

# Training specific values
parser.add_argument(
"--epoch",
type=int,
default=20,
help="Amount of training epochs the model will do.",
)
parser.add_argument(
"--learning_rate",
type=float,
default=0.001,
help="Learning rate parameter for model training.",
)
parser.add_argument(
"--batchsize",
type=int,
default=64,
help="Amount of training images loaded in one go",
)
parser.add_argument(
"--device",
type=str,
default="cpu",
choices=["cuda", "cpu", "mps"],
help="Which device to run the training on.",
)
parser.add_argument(
"--dry_run",
action="store_true",
help="If true, the code will not run the training loop.",
)

args = parser.parse_args()


createfolders(args)

device = 'cuda' if th.cuda.is_available() else 'cpu'

#load model
model = load_model()

createfolders(args.datafolder, args.resultfolder, args.modelfolder)

device = args.device

metrics = MetricWrapper(*args.metric)

# Dataset
traindata = load_data(
args.dataset,
train=True,
data_path=args.datafolder,
download=args.download_data,
)
validata = load_data(
args.dataset,
train=False,
data_path=args.datafolder,
)

# Find number of channels in the dataset
if len(traindata[0][0].shape) == 2:
channels = 1
else:
channels = traindata[0][0].shape[0]

# load model
model = load_model(
args.modelname,
in_channels=channels,
num_classes=traindata.num_classes,
)
model.to(device)

metrics = MetricWrapper(
EntropyPred = args.EntropyPrediction,
F1Score = args.F1Score,
Recall = args.Recall,
Precision = args.Precision,
Accuracy = args.Accuracy
)

#Dataset
traindata = load_data(args.dataset)
validata = load_data(args.dataset)


trainloader = DataLoader(traindata,
batch_size=args.batchsize,
shuffle=True,
Expand All @@ -82,48 +162,51 @@ def main():
batch_size=args.batchsize,
shuffle=False,
pin_memory=True)

criterion = nn.CrossEntropyLoss()
optimizer = th.optim.Adam(model.parameters(), lr = args.learning_rate)


optimizer = th.optim.Adam(model.parameters(), lr=args.learning_rate)

# This allows us to load all the components without running the training loop
if args.dry_run:
print("Dry run completed")
exit(0)

wandb.init(project='',
tags=[])
wandb.watch(model)

for epoch in range(args.epoch):
#Training loop start

# Training loop start
trainingloss = []
model.train()
for x, y in traindata:
for x, y in trainloader:
x, y = x.to(device), y.to(device)
pred = model.forward(x)

loss = criterion(y, pred)
loss.backward()

optimizer.step()
optimizer.zero_grad(set_to_none=True)
trainingloss.append(loss.item())

evalloss = []
#Eval loop start
# Eval loop start
model.eval()
with th.no_grad():
for x, y in valiloader:
x = x.to(device)
x, y = x.to(device), y.to(device)
pred = model.forward(x)
loss = criterion(y, pred)
evalloss.append(loss.item())

wandb.log({
'Epoch': epoch,
'Train loss': np.mean(trainingloss),
'Evaluation Loss': np.mean(evalloss)
})



if __name__ == '__main__':
main()
main()
Loading
Loading