diff --git a/doc/Jan_page.md b/doc/Jan_page.md new file mode 100644 index 0000000..442872e --- /dev/null +++ b/doc/Jan_page.md @@ -0,0 +1,54 @@ +# Jan Individual Task +====================== + +## Task Overview +In addition to the overall task, I was assigned the implementation of a multi-layer perceptron model, a dataset loader for a subset of the MNIST dataset, and an accuracy metric. + +## Network Implementation In-Depth +For the network part, I was tasked with making a simple MLP network model for image classification tasks. The model consists of two hidden layers with 100 neurons each followed by a leaky-relu activation. This implementation involves creating a custom class that inherits from the PyTorch `nn.Module` class. This allows our class to have two methods: the `__init__` method and a `forward` method. When we create an instance of the class, we can call the instance like a function, which will run the `forward` method. + +The network is initialized with the following parameters: +* `image_shape` +* `num_classes` + +The `image_shape` argument provides the shape of the input image (channels, height, width) which is used to correctly initialize the input size of the first layer. The `num_classes` argument defines the number of output neurons, corresponding to the number of classes in the dataset. + +The forward method in this class processes the input as follows: +1. Flattens the input image. +2. Passes the flattened input through the first fully connected layer (`fc1`). +3. Applies a LeakyReLU activation function. +4. Passes the result through the second fully connected layer (`fc2`). +5. Applies another LeakyReLU activation function. +6. Passes the result through the output layer (`out`). + +## MNIST Dataset In-Depth +For the dataset part, I was tasked with creating a custom dataset class for loading a subset of the MNIST dataset containing digits 0 to 3. This involved creating a class that inherits from the PyTorch `Dataset` class. + +The class is initialized with the following parameters: +* `data_path` +* `sample_ids` +* `train` (optional, default is False) +* `transform` (optional, default is None) +* `nr_channels` (optional, default is 1) + +The `data_path` argument stores the path to the four binary files containing MNIST dataset. The verification of presence of these files and their download, if necessary, is facilitated by the `Downloader`class. The `sample_ids` parameter contains the indices of images and their respective labels that are to be loaded from MNIST dataset. Filtering and random splitting of these indices is performed within the `load_data`function. `train`is a boolean flag indicating whether to load data from training (for training and validation splits) or from testing (test split) part of the MNIST dataset. `transform` is a callable created with `torch.compose()` to be applied on the images. `nr_channels` is not used in this dataset, only included for compatibility with other functions. + +The class has two main methods: +* `__len__`: Returns the number of samples in the dataset. +* `__getitem__`: Retrieves the image and label at the specified index. + +## Accuracy Metric In-Depth +For the metric part, I was tasked with creating an accuracy metric class. The `Accuracy` class computes the accuracy of a model's predictions. The class is initialized with the following parameters: +* `num_classes` +* `macro_averaging` (optional, default is False) + +The `num_classes` argument specifies the number of classes in the classification task. The `macro_averaging`argument is a boolean flag specifying whether to compute the accuracy using micro or macro averaging. + +The class has the following methods: +* `forward`: Stores the true and predicted labels computed on a batch level. +* `_macro_acc`: Computes the macro-averaged accuracy on stored values. +* `_micro_acc`: Computes the micro-averaged accuracy on stored values. +* `__returnmetric__`: Returns the computed accuracy based on the averaging method for all stored predictions. +* `__reset__`: Resets the stored true and predicted labels. + +The `forward` method takes the true labels and predicted labels as input and stores them. The `_macro_acc` method computes the macro-average accuracy by averaging the accuracy for each class. The `_micro_acc` method computes the micro-average accuracy by calculating the overall accuracy. The `__returnmetric__` method returns the computed accuracy based on the averaging method. The `__reset__` method resets the stored true and predicted labels to prepare for the next epoch. \ No newline at end of file diff --git a/doc/about.md b/doc/about.md index 531269c..5da6b1a 100644 --- a/doc/about.md +++ b/doc/about.md @@ -1,3 +1,3 @@ # About this code -Work is still in progress ... +This project was created as part of a Collaboratice Coding and Reproducible Research special curriculum, held at UiT in february 2025. diff --git a/doc/index.md b/doc/index.md index 9b536b8..d525fea 100644 --- a/doc/index.md +++ b/doc/index.md @@ -8,12 +8,12 @@ fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. :::{toctree} -:maxdepth: 2 -:caption: Some caption +:maxdepth: 1 +:caption: Table of contents about.md Magnus_page.md +Jan_page.md ::: -Individual Sections -=================== + diff --git a/pyproject.toml b/pyproject.toml index acf6e74..a6f05a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ dependencies = [ "h5py>=3.12.1", "isort>=6.0.0", "jupyterlab>=4.3.5", + "myst-parser>=4.0.1", "numpy>=2.2.2", "pandas>=2.2.3", "pip>=25.0", diff --git a/uv.lock b/uv.lock index 97ea764..f637107 100644 --- a/uv.lock +++ b/uv.lock @@ -329,6 +329,7 @@ dependencies = [ { name = "h5py" }, { name = "isort" }, { name = "jupyterlab" }, + { name = "myst-parser" }, { name = "numpy" }, { name = "pandas" }, { name = "pip" }, @@ -352,6 +353,7 @@ requires-dist = [ { name = "h5py", specifier = ">=3.12.1" }, { name = "isort", specifier = ">=6.0.0" }, { name = "jupyterlab", specifier = ">=4.3.5" }, + { name = "myst-parser", specifier = ">=4.0.1" }, { name = "numpy", specifier = ">=2.2.2" }, { name = "pandas", specifier = ">=2.2.3" }, { name = "pip", specifier = ">=25.0" }, @@ -992,6 +994,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 }, ] +[[package]] +name = "mdit-py-plugins" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/03/a2ecab526543b152300717cf232bb4bb8605b6edb946c845016fa9c9c9fd/mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5", size = 43542 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/f7/7782a043553ee469c1ff49cfa1cdace2d6bf99a1f333cf38676b3ddf30da/mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636", size = 55316 }, +] + [[package]] name = "mdurl" version = "0.1.2" @@ -1028,6 +1042,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/e2/5d3f6ada4297caebe1a2add3b126fe800c96f56dbe5d1988a2cbe0b267aa/mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", size = 4695 }, ] +[[package]] +name = "myst-parser" +version = "4.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docutils" }, + { name = "jinja2" }, + { name = "markdown-it-py" }, + { name = "mdit-py-plugins" }, + { name = "pyyaml" }, + { name = "sphinx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/a5/9626ba4f73555b3735ad86247a8077d4603aa8628537687c839ab08bfe44/myst_parser-4.0.1.tar.gz", hash = "sha256:5cfea715e4f3574138aecbf7d54132296bfd72bb614d31168f48c477a830a7c4", size = 93985 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d", size = 84579 }, +] + [[package]] name = "nbclient" version = "0.10.2" @@ -1198,7 +1229,7 @@ name = "nvidia-cudnn-cu12" version = "9.1.0.70" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741 }, @@ -1209,7 +1240,7 @@ name = "nvidia-cufft-cu12" version = "11.2.1.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/27/94/3266821f65b92b3138631e9c8e7fe1fb513804ac934485a8d05776e1dd43/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9", size = 211459117 }, @@ -1228,9 +1259,9 @@ name = "nvidia-cusolver-cu12" version = "11.6.1.9" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/3a/e1/5b9089a4b2a4790dfdea8b3a006052cfecff58139d5a4e34cb1a51df8d6f/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260", size = 127936057 }, @@ -1241,7 +1272,7 @@ name = "nvidia-cusparse-cu12" version = "12.3.1.170" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/db/f7/97a9ea26ed4bbbfc2d470994b8b4f338ef663be97b8f677519ac195e113d/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1", size = 207454763 },