From 203ace1c3354079b077a0a5471132087c050ddb5 Mon Sep 17 00:00:00 2001 From: Solveig Date: Thu, 27 Feb 2025 11:23:40 +0100 Subject: [PATCH] Updated my documentations --- CollaborativeCoding/dataloaders/uspsh5_7_9.py | 68 +++++++--- CollaborativeCoding/metrics/F1.py | 76 ++++++++++-- CollaborativeCoding/models/solveig_model.py | 117 ++++++++++++------ 3 files changed, 192 insertions(+), 69 deletions(-) diff --git a/CollaborativeCoding/dataloaders/uspsh5_7_9.py b/CollaborativeCoding/dataloaders/uspsh5_7_9.py index 72cb268..879cca2 100644 --- a/CollaborativeCoding/dataloaders/uspsh5_7_9.py +++ b/CollaborativeCoding/dataloaders/uspsh5_7_9.py @@ -8,26 +8,45 @@ class USPSH5_Digit_7_9_Dataset(Dataset): """ - Custom USPS dataset class that loads images with digits 7-9 from an .h5 file. + This class loads a subset of the USPS dataset, specifically images of digits 7, 8, and 9, from an HDF5 file. + It allows for applying transformations to the images and provides methods to retrieve images and their corresponding labels. Parameters ---------- - h5_path : str - Path to the USPS `.h5` file. + data_path : str or Path + Path to the directory containing the USPS `.h5` file. This file should contain the data in the "train" or "test" group. + + sample_ids : list of int + A list of sample indices to be used from the dataset. This allows for filtering or selecting a subset of the full dataset. + + train : bool, optional, default=False + If `True`, the dataset is loaded in training mode (using the "train" group). If `False`, the dataset is loaded in test mode (using the "test" group). transform : callable, optional, default=None - A transform function to apply on images. If None, no transformation is applied. + A transformation function to apply to each image. If `None`, no transformation is applied. Typically used for data augmentation or normalization. + + nr_channels : int, optional, default=1 + The number of channels in the image. USPS images are typically grayscale, so this should generally be set to 1. This parameter allows for potential future flexibility. Attributes ---------- images : numpy.ndarray - The filtered images corresponding to digits 7-9. + Array of images corresponding to digits 7, 8, and 9 from the USPS dataset. The images are loaded from the HDF5 file and filtered based on the labels. labels : numpy.ndarray - The filtered labels corresponding to digits 7-9. + Array of labels corresponding to the images. Only labels of digits 7, 8, and 9 are retained, and they are mapped to 0, 1, and 2 for classification tasks. transform : callable, optional - A transform function to apply to the images. + A transformation function to apply to the images. This is passed as an argument during initialization. + + label_shift : function + A function to shift the labels for classification purposes. It maps the original labels (7, 8, 9) to (0, 1, 2). + + label_restore : function + A function to restore the original labels (7, 8, 9) from the shifted labels (0, 1, 2). + + num_classes : int + The number of unique labels in the dataset, which is 3 (for digits 7, 8, and 9). """ def __init__( @@ -36,14 +55,25 @@ def __init__( super().__init__() """ Initializes the USPS dataset by loading images and labels from the given `.h5` file. - + + The dataset is filtered to only include images of digits 7, 8, and 9, which are mapped to labels 0, 1, and 2 respectively for classification purposes. + Parameters ---------- - h5_path : str - Path to the USPS `.h5` file. - + data_path : str or Path + Path to the directory containing the USPS `.h5` file. + + sample_ids : list of int + List of sample indices to load from the dataset. + + train : bool, optional, default=False + If `True`, loads the training data from the HDF5 file. If `False`, loads the test data. + transform : callable, optional, default=None - A transform function to apply on images. + A function to apply transformations to the images. If None, no transformation is applied. + + nr_channels : int, optional, default=1 + The number of channels in the image. Defaults to 1 for grayscale images. """ self.filename = "usps.h5" path = data_path if isinstance(data_path, Path) else Path(data_path) @@ -72,27 +102,33 @@ def __len__(self): """ Returns the total number of samples in the dataset. + This method is required for PyTorch's Dataset class, as it allows PyTorch to determine the size of the dataset. + Returns ------- int - The number of images in the dataset. + The number of images in the dataset (after filtering for digits 7, 8, and 9). """ + return len(self.images) def __getitem__(self, id): """ Returns a sample from the dataset given an index. + This method is required for PyTorch's Dataset class, as it allows indexing into the dataset to retrieve specific samples. + Parameters ---------- idx : int - The index of the sample to retrieve. + The index of the sample to retrieve from the dataset. Returns ------- tuple - - image (PIL Image): The image at the specified index. - - label (int): The label corresponding to the image. + A tuple containing: + - image (PIL Image): The image at the specified index. + - label (int): The label corresponding to the image, shifted to be in the range [0, 2] for classification. """ # Convert to PIL Image (USPS images are typically grayscale 16x16) image = Image.fromarray(self.images[id].astype(np.uint8), mode="L") diff --git a/CollaborativeCoding/metrics/F1.py b/CollaborativeCoding/metrics/F1.py index 630c736..e8deb65 100644 --- a/CollaborativeCoding/metrics/F1.py +++ b/CollaborativeCoding/metrics/F1.py @@ -5,18 +5,67 @@ class F1Score(nn.Module): """ - F1 Score implementation with support for both macro and micro averaging. - This class computes the F1 score during training using either macro or micro averaging. + Computes the F1 score for classification tasks with support for both macro and micro averaging. + + This class allows you to compute the F1 score during training or evaluation. You can select between two methods of averaging: + - **Micro Averaging**: Computes the F1 score globally, treating each individual prediction as equally important. + - **Macro Averaging**: Computes the F1 score for each class individually and then averages the scores. + Parameters ---------- num_classes : int The number of classes in the classification task. - macro_averaging : bool, default=False - If True, computes the macro-averaged F1 score. If False, computes the micro-averaged F1 score. + macro_averaging : bool, optional, default=False + If True, computes the macro-averaged F1 score. If False, computes the micro-averaged F1 score. Default is micro averaging. + + Attributes + ---------- + num_classes : int + The number of classes in the classification task. + + macro_averaging : bool + A flag to determine whether to compute the macro-averaged or micro-averaged F1 score. + + y_true : list + A list to store true labels for the current batch. + + y_pred : list + A list to store predicted labels for the current batch. + + Methods + ------- + forward(target, preds) + Stores predictions and true labels for computing the F1 score during training or evaluation. + + compute_f1() + Computes and returns the F1 score based on the stored predictions and true labels. + + _micro_F1(target, preds) + Computes the micro-averaged F1 score based on the global true positive, false positive, and false negative counts. + + _macro_F1(target, preds) + Computes the macro-averaged F1 score by calculating the F1 score per class and then averaging across all classes. + + __returnmetric__() + Computes and returns the F1 score (Micro or Macro) as specified. + + __reset__() + Resets the stored predictions and true labels, preparing for the next batch or epoch. """ def __init__(self, num_classes, macro_averaging=False): + """ + Initializes the F1Score object with the number of classes and averaging mode. + + Parameters + ---------- + num_classes : int + The number of classes in the classification task. + + macro_averaging : bool, optional, default=False + If True, compute the macro-averaged F1 score. If False, compute the micro-averaged F1 score. + """ super().__init__() self.num_classes = num_classes self.macro_averaging = macro_averaging @@ -25,14 +74,15 @@ def __init__(self, num_classes, macro_averaging=False): def forward(self, target, preds): """ - Stores predictions and targets for computing the F1 score. + Stores the true labels and predictions to compute the F1 score. Parameters ---------- - preds : torch.Tensor - Predicted logits (shape: [batch_size, num_classes]). target : torch.Tensor True labels (shape: [batch_size]). + + preds : torch.Tensor + Predicted logits (shape: [batch_size, num_classes]). """ preds = torch.argmax(preds, dim=-1) # Convert logits to class indices self.y_true.append(target.detach()) @@ -47,7 +97,7 @@ def compute_f1(self): Returns ------- torch.Tensor - The computed F1 score. + The computed F1 score. Returns NaN if no predictions or targets are available. """ if not self.y_true or not self.y_pred: # Check if empty return torch.tensor(np.nan) @@ -63,7 +113,7 @@ def compute_f1(self): ) def _micro_F1(self, target, preds): - """Computes Micro F1 Score (global TP, FP, FN).""" + """Computes the Micro-averaged F1 score (global TP, FP, FN).""" tp = torch.sum(preds == target).float() fp = torch.sum(preds != target).float() fn = fp # Since all errors are either FP or FN @@ -75,7 +125,7 @@ def _micro_F1(self, target, preds): return f1 def _macro_F1(self, target, preds): - """Computes Macro F1 Score in a vectorized way (no loops).""" + """Computes the Macro-averaged F1 score.""" num_classes = self.num_classes target = target.long() # Ensure target is a LongTensor preds = preds.long() @@ -100,12 +150,12 @@ def _macro_F1(self, target, preds): def __returnmetric__(self): """ - Computes and returns the F1 score (Micro or Macro). + Computes and returns the F1 score (Micro or Macro) based on the stored predictions and targets. Returns ------- torch.Tensor - The computed F1 score. + The computed F1 score. Returns NaN if no predictions or targets are available. """ if not self.y_true or not self.y_pred: # Check if empty return torch.tensor(np.nan) @@ -121,6 +171,6 @@ def __returnmetric__(self): ) def __reset__(self): - """Resets stored predictions and targets.""" + """Resets the stored predictions and targets for the next batch or epoch.""" self.y_true = [] self.y_pred = [] diff --git a/CollaborativeCoding/models/solveig_model.py b/CollaborativeCoding/models/solveig_model.py index 96407b0..5b0f32f 100644 --- a/CollaborativeCoding/models/solveig_model.py +++ b/CollaborativeCoding/models/solveig_model.py @@ -4,23 +4,30 @@ def find_fc_input_shape(image_shape, model): """ - Find the shape of the input to the fully connected layer after passing through the convolutional layers. + Finds the shape of the input to the fully connected layer after passing through the convolutional layers. - Code inspired by @Seilmast (https://github.com/SFI-Visual-Intelligence/Collaborative-Coding-Exam/issues/67#issuecomment-2651212254) + This function takes an input image shape and the model's convolutional layers and computes + the number of features passed into the first fully connected layer after the image has been processed + through the convolutional layers. + + Code inspired by @Seilmast (https://github.com/SFI-Visual-Intelligence/Collaborative-Coding-Exam/issues/67#issuecomment-2651212254). Args ---- image_shape : tuple(int, int, int) Shape of the input image (C, H, W), where C is the number of channels, - H is the height, and W is the width of the image. + H is the height, and W is the width of the image. This shape defines the input image dimensions. + model : nn.Module - The CNN model containing the convolutional layers, whose output size is used to - determine the number of input features for the fully connected layer. + The CNN model containing the convolutional layers. This model is used to pass the image through its + layers to determine the output size, which is used to calculate the number of input features for the + fully connected layer. Returns ------- int - The number of elements in the input to the fully connected layer. + The number of elements in the input to the fully connected layer after the image has passed + through the convolutional layers. This value is used to initialize the size of the fully connected layer. """ dummy_img = torch.randn(1, *image_shape) @@ -35,28 +42,60 @@ def find_fc_input_shape(image_shape, model): class SolveigModel(nn.Module): """ - A Convolutional Neural Network model for classification. + A Convolutional Neural Network (CNN) model for classification. - Args - ---- - image_shape : tuple(int, int, int) - Shape of the input image (C, H, W). - num_classes : int - Number of classes in the dataset. - - Attributes: - ----------- - conv_block1 : nn.Sequential - First convolutional block containing a convolutional layer, ReLU activation, and max-pooling. - conv_block2 : nn.Sequential - Second convolutional block containing a convolutional layer and ReLU activation. - conv_block3 : nn.Sequential - Third convolutional block containing a convolutional layer and ReLU activation. - fc1 : nn.Linear - Fully connected layer that outputs the final classification scores. - """ + This model is designed for image classification tasks. It contains three convolutional blocks followed by + a fully connected layer to make class predictions. + + Args + ---- + image_shape : tuple(int, int, int) + Shape of the input image (C, H, W), where C is the number of channels, + H is the height, and W is the width of the image. This parameter defines the input shape of the image + that will be passed through the network. + + num_classes : int + The number of output classes for classification. This defines the size of the output layer (i.e., the + number of units in the final fully connected layer). + + Attributes + ---------- + conv_block1 : nn.Sequential + The first convolutional block consisting of a convolutional layer, ReLU activation, and max-pooling. + + conv_block2 : nn.Sequential + The second convolutional block consisting of a convolutional layer and ReLU activation. + + conv_block3 : nn.Sequential + The third convolutional block consisting of a convolutional layer and ReLU activation. + + fc1 : nn.Linear + The fully connected layer that takes the output from the convolutional blocks and outputs the final + classification logits (raw scores for each class). + + Methods + ------- + forward(x) + Defines the forward pass of the network, which passes the input through the convolutional layers + followed by the fully connected layer to produce class logits. + """ def __init__(self, image_shape, num_classes): + """ + Initializes the SolveigModel with convolutional and fully connected layers. + + The model is constructed using three convolutional blocks, followed by a fully connected layer. + The size of the input to the fully connected layer is determined dynamically based on the input image shape. + + Args + ---- + image_shape : tuple(int, int, int) + The shape of the input image (C, H, W) where C is the number of channels, + H is the height, and W is the width. + + num_classes : int + The number of classes for classification. This defines the output size of the final fully connected layer. + """ super().__init__() C, *_ = image_shape @@ -86,12 +125,19 @@ def __init__(self, image_shape, num_classes): def forward(self, x): """ - Defines the forward pass. - Args: - x (torch.Tensor): A four-dimensional tensor with shape - (Batch Size, Channels, Image Height, Image Width). - Returns: - torch.Tensor: The output tensor containing class logits for each input sample. + Defines the forward pass of the network. + + Args + ---- + x : torch.Tensor + A 4D tensor with shape (Batch Size, Channels, Height, Width) representing the input images. + + Returns + ------- + torch.Tensor + A 2D tensor of shape (Batch Size, num_classes) containing the logits (raw class scores) + for each input image in the batch. These logits can be passed through a softmax function + for probability values. """ x = self.conv_block1(x) x = self.conv_block2(x) @@ -102,12 +148,3 @@ def forward(self, x): return x - -if __name__ == "__main__": - x = torch.randn(1, 3, 28, 28) - - model = SolveigModel(x.shape[1:], 3) - - y = model(x) - - print(y)