From 91551907cbe5c407ce5beeca3914d432045084ec Mon Sep 17 00:00:00 2001
From: "omobayode.fagbohungbe" <omobayode.fagbohungbe@ibm.com>
Date: Wed, 16 Jul 2025 22:29:08 +0000
Subject: [PATCH 1/3] feat: enabled GPTQv2

Signed-off-by: omobayode.fagbohungbe <omobayode.fagbohungbe@ibm.com>
---
 examples/GPTQ/README.md | 81 +++++++++++++++++++++++++++--------------
 fms_mo/run_quant.py     | 25 +++++++++----
 fms_mo/training_args.py |  3 +-
 3 files changed, 73 insertions(+), 36 deletions(-)

diff --git a/examples/GPTQ/README.md b/examples/GPTQ/README.md
index f5661a12..d173a7f2 100644
--- a/examples/GPTQ/README.md
+++ b/examples/GPTQ/README.md
@@ -7,6 +7,7 @@ For generative LLMs, very often the bottleneck of inference is no longer the com
 
 - [FMS Model Optimizer requirements](../../README.md#requirements)
 - `gptqmodel` is needed for this example. Use `pip install gptqmodel` or [install from source](https://github.com/ModelCloud/GPTQModel/tree/main?tab=readme-ov-file)
+    - It is advised to install from source if you plan to use `GPTQv2`
 - Optionally for the evaluation section below, install [lm-eval](https://github.com/EleutherAI/lm-evaluation-harness)
     ```
     pip install lm-eval
@@ -32,7 +33,7 @@ This end-to-end example utilizes the common set of interfaces provided by `fms_m
 > - Tokenized data will be saved in `<path_to_save>_train` and `<path_to_save>_test`
 > - If you have trouble downloading Llama family of models from Hugging Face ([LLama models require access](https://www.llama.com/docs/getting-the-models/hugging-face/)), you can use `ibm-granite/granite-8b-code` instead
 
-2. **Quantize the model** using the data generated above, the following command will kick off the quantization job (by invoking `gptqmodel` under the hood.) Additional acceptable arguments can be found here in [GPTQArguments](../../fms_mo/training_args.py#L127).
+2. **Quantize the model** using the data generated above, the following command will kick off the `GPTQv1' quantization job (by invoking `gptqmodel` under the hood.) Additional acceptable arguments can be found here in [GPTQArguments](../../fms_mo/training_args.py#L127).
 
     ```bash
     python -m fms_mo.run_quant \
@@ -41,9 +42,10 @@ This end-to-end example utilizes the common set of interfaces provided by `fms_m
         --quant_method gptq \
         --output_dir Meta-Llama-3-8B-GPTQ \
         --bits 4 \
-        --group_size 128
+        --group_size 128 \ 
+
     ```
-    The model that can be found in the specified output directory (`Meta-Llama-3-8B-GPTQ` in our case) can be deployed and inferenced via `vLLM`.
+    The model that can be found in the specified output directory (`Meta-Llama-3-8B-GPTQ` in our case) can be deployed and inferenced via `vLLM`. To enable `GPTQv2`, set the `quant_method` argument to `gptqv2`. 
 
 > [!NOTE]
 > - In GPTQ, `group_size` is a trade-off between accuracy and speed, but there is an additional constraint that `in_features` of the Linear layer to be quantized needs to be an **integer multiple** of `group_size`, i.e. some models may have to use smaller `group_size` than default.
@@ -82,25 +84,33 @@ This end-to-end example utilizes the common set of interfaces provided by `fms_m
 ## Example Test Results
 
 - Unquantized Model
-- 
-|Model       |    Tasks     |Version|Filter|n-shot|  Metric  |   |Value |   |Stderr|
-|------------|--------------|------:|------|-----:|----------|---|-----:|---|-----:|
-| LLAMA3-8B  |lambada_openai|      1|none  |     5|acc       |↑  |0.7103|±  |0.0063|
-|            |              |       |none  |     5|perplexity|↓  |3.7915|±  |0.0727|
+
+        |Model       |    Tasks     |Version|Filter|n-shot|  Metric  |   |Value |   |Stderr|
+        |------------|--------------|------:|------|-----:|----------|---|-----:|---|-----:|
+        | LLAMA3-8B  |lambada_openai|      1|none  |     5|acc       |↑  |0.7103|±  |0.0063|
+        |            |              |       |none  |     5|perplexity|↓  |3.7915|±  |0.0727|
 
 - Quantized model with the settings showed above (`desc_act` default to False.)
-- 
-|Model       |    Tasks     |Version|Filter|n-shot|  Metric  |   |Value  |   |Stderr|
-|------------|--------------|------:|------|-----:|----------|---|------:|---|-----:|
-| LLAMA3-8B  |lambada_openai|      1|none  |     5|acc       |↑  |0.6365 |±  |0.0067|
-|            |              |       |none  |     5|perplexity|↓  |5.9307 |±  |0.1830|
+    - `GPTQv1`
+
+        |Model       |    Tasks     |Version|Filter|n-shot|  Metric  |   |Value  |   |Stderr|
+        |------------|--------------|------:|------|-----:|----------|---|------:|---|-----:|
+        | LLAMA3-8B  |lambada_openai|      1|none  |     5|acc       |↑  |0.6365 |±  |0.0067|
+        |            |              |       |none  |     5|perplexity|↓  |5.9307 |±  |0.1830|
+
+    - `GPTQv2`
+
+        |Model       |    Tasks     |Version|Filter|n-shot|  Metric  |   |Value  |   |Stderr|
+        |------------|--------------|------:|------|-----:|----------|---|------:|---|-----:|
+        | LLAMA3-8B  |lambada_openai|      1|none  |     5|acc       |↑  |0.6817 |±  |0.0065|
+        |            |              |       |none  |     5|perplexity|↓  |4.3994 |±  |0.0995|
 
 - Quantized model with `desc_act` set to `True` (could improve the model quality, but at the cost of inference speed.)
-- 
-|Model       |    Tasks     |Version|Filter|n-shot|  Metric  |   |Value  |   |Stderr|
-|------------|--------------|------:|------|-----:|----------|---|------:|---|-----:|
-| LLAMA3-8B  |lambada_openai|      1|none  |     5|acc       |↑  |0.6193 |±  |0.0068|
-|            |              |       |none  |     5|perplexity|↓  |5.8879 |±  |0.1546|
+    - `GPTQv1` 
+        |Model       |    Tasks     |Version|Filter|n-shot|  Metric  |   |Value  |   |Stderr|
+        |------------|--------------|------:|------|-----:|----------|---|------:|---|-----:|
+        | LLAMA3-8B  |lambada_openai|      1|none  |     5|acc       |↑  |0.6193 |±  |0.0068|
+        |            |              |       |none  |     5|perplexity|↓  |5.8879 |±  |0.1546|
 
 > [!NOTE]
 > There is some randomness in generating the model and data, the resulting accuracy may vary ~$\pm$ 0.05.
@@ -108,18 +118,33 @@ This end-to-end example utilizes the common set of interfaces provided by `fms_m
 
 ## Code Walk-through
 
-1.  Command line arguments will be used to create a GPTQ quantization config. Information about the required arguments and their default values can be found [here](../../fms_mo/training_args.py)
+1.  Command line arguments will be used to create a GPTQ quantization config. Information about the required arguments and their default values can be found [here](../../fms_mo/training_args.py). `GPTQv1` and `GPTQv2` is supported. 
 
-    ```python
-    from gptqmodel import GPTQModel, QuantizeConfig
+    - To use `GPTQv1`, set the parameter `quant_method` to `gptq` in the command line. 
 
-    quantize_config = QuantizeConfig(
-        bits=gptq_args.bits,
-        group_size=gptq_args.group_size,
-        desc_act=gptq_args.desc_act,
-        damp_percent=gptq_args.damp_percent,
-    )
+    ```python
+        from gptqmodel import GPTQModel, QuantizeConfig
+
+        quantize_config = QuantizeConfig(
+            bits=gptq_args.bits,
+            group_size=gptq_args.group_size,
+            desc_act=gptq_args.desc_act,
+            damp_percent=gptq_args.damp_percent,
+            )
+    ```
+    - To use `GPTQv2`, simply set  `quant_method` to `gptqv2`in the command line. Under the hood, two additional arguments will be added to QuantizeConfig, i.e. `v2` = `True` and `v2_memory_device` = `cpu`.
 
+    ```python
+        from gptqmodel import GPTQModel, QuantizeConfig
+
+        quantize_config = QuantizeConfig(
+            bits=gptq_args.bits,
+            group_size=gptq_args.group_size,
+            desc_act=gptq_args.desc_act,
+            damp_percent=gptq_args.damp_percent,
+            v2=True,
+            v2_memory_device='cpu',
+            )
     ```
 
 2. Load the pre_trained model with `gptqmodel` class/wrapper. Tokenizer is optional because we already tokenized the data in a previous step.
@@ -158,4 +183,4 @@ This end-to-end example utilizes the common set of interfaces provided by `fms_m
     tokenizer.save_pretrained(output_dir) # optional
     ```
 > [!NOTE]
-> 1. GPTQ of a 70B model usually takes ~4-10 hours on A100.
+> 1. GPTQ of a 70B model usually takes ~4-10 hours on A100 with `GPTQv1`.
diff --git a/fms_mo/run_quant.py b/fms_mo/run_quant.py
index 06ceecdc..e9062112 100644
--- a/fms_mo/run_quant.py
+++ b/fms_mo/run_quant.py
@@ -88,7 +88,7 @@ def quantize(
 
     logger.info(f"{fms_mo_args}\n{opt_args.quant_method}\n")
 
-    if opt_args.quant_method == "gptq":
+    if opt_args.quant_method in ["gptq","gptqv2"]:
         if not available_packages["gptqmodel"]:
             raise ImportError(
                 "Quantization method has been selected as gptq but unable to use external library, "
@@ -138,12 +138,23 @@ def run_gptq(model_args, data_args, opt_args, gptq_args):
 
     logger = set_log_level(opt_args.log_level, "fms_mo.run_gptq")
 
-    quantize_config = QuantizeConfig(
-        bits=gptq_args.bits,
-        group_size=gptq_args.group_size,
-        desc_act=gptq_args.desc_act,
-        damp_percent=gptq_args.damp_percent,
-    )
+    if opt_args.quant_method == "gptq":
+        quantize_config = QuantizeConfig(
+            bits=gptq_args.bits,
+            group_size=gptq_args.group_size,
+            desc_act=gptq_args.desc_act,
+            damp_percent=gptq_args.damp_percent,
+        )
+    else:
+        quantize_config = QuantizeConfig(
+            bits=gptq_args.bits,
+            group_size=gptq_args.group_size,
+            desc_act=gptq_args.desc_act,
+            damp_percent=gptq_args.damp_percent,
+            v2=True,
+            v2_memory_device="cpu",
+        )
+
 
     # Add custom model_type mapping to gptqmodel LUT so GPTQModel can recognize them.
     for mtype, cls in custom_gptq_classes.items():
diff --git a/fms_mo/training_args.py b/fms_mo/training_args.py
index 661f72bd..ae041d59 100644
--- a/fms_mo/training_args.py
+++ b/fms_mo/training_args.py
@@ -138,7 +138,7 @@ class OptArguments(TypeChecker):
     """Dataclass for optimization related arguments."""
 
     quant_method: str = field(
-        metadata={"choices": ["gptq", "fp8", "dq"], "help": "Quantization technique"}
+        metadata={"choices": ["gptq", "gptqv2", "fp8", "dq"], "help": "Quantization technique"}
     )
     output_dir: str = field(
         metadata={
@@ -226,6 +226,7 @@ class GPTQArguments(TypeChecker):
     cache_examples_on_gpu: bool = True
 
 
+
 @dataclass
 class FP8Arguments(TypeChecker):
     """Dataclass for FP8 related arguments that will be used by llm-compressor."""

From 3a069a2acfb15772ad3e263c013ee15f6da0a449 Mon Sep 17 00:00:00 2001
From: "omobayode.fagbohungbe" <omobayode.fagbohungbe@ibm.com>
Date: Thu, 17 Jul 2025 11:17:47 +0000
Subject: [PATCH 2/3] fix: implementing edits for lint

Signed-off-by: omobayode.fagbohungbe <omobayode.fagbohungbe@ibm.com>
---
 fms_mo/run_quant.py                   |   2 +-
 fms_mo/training_args.py               |   5 +-
 tutorials/quantization_tutorial.ipynb | 133 +++++++++++++-------------
 3 files changed, 72 insertions(+), 68 deletions(-)

diff --git a/fms_mo/run_quant.py b/fms_mo/run_quant.py
index e9062112..f13ee0bc 100644
--- a/fms_mo/run_quant.py
+++ b/fms_mo/run_quant.py
@@ -88,7 +88,7 @@ def quantize(
 
     logger.info(f"{fms_mo_args}\n{opt_args.quant_method}\n")
 
-    if opt_args.quant_method in ["gptq","gptqv2"]:
+    if opt_args.quant_method in ["gptq", "gptqv2"]:
         if not available_packages["gptqmodel"]:
             raise ImportError(
                 "Quantization method has been selected as gptq but unable to use external library, "
diff --git a/fms_mo/training_args.py b/fms_mo/training_args.py
index ae041d59..9f7da8e0 100644
--- a/fms_mo/training_args.py
+++ b/fms_mo/training_args.py
@@ -138,7 +138,10 @@ class OptArguments(TypeChecker):
     """Dataclass for optimization related arguments."""
 
     quant_method: str = field(
-        metadata={"choices": ["gptq", "gptqv2", "fp8", "dq"], "help": "Quantization technique"}
+        metadata={
+            "choices": ["gptq", "gptqv2", "fp8", "dq"], 
+            "help": "Quantization technique"
+        }
     )
     output_dir: str = field(
         metadata={
diff --git a/tutorials/quantization_tutorial.ipynb b/tutorials/quantization_tutorial.ipynb
index 387685b2..5354b3d4 100644
--- a/tutorials/quantization_tutorial.ipynb
+++ b/tutorials/quantization_tutorial.ipynb
@@ -162,7 +162,7 @@
     "\n",
     "# Plotting the histogram.\n",
     "plt.figure(figsize=(16, 10))\n",
-    "plt.hist(raw_data, density=True, bins=128, alpha=0.8, label='y')\n",
+    "plt.hist(raw_data, density=True, bins=128, alpha=0.8, label=\"y\")\n",
     "#plt.legend(loc='upper right')\n",
     "plt.xlabel(\"Data\")\n",
     "plt.ylabel(\"density\")\n",
@@ -220,9 +220,9 @@
     "isClipped=np.logical_or(raw_data>clip_max, raw_data<clip_min)\n",
     "idx_clipped_elements=np.where( isClipped )[0]\n",
     "pd.DataFrame( \n",
-    "                {'idx':idx_clipped_elements[:5], \n",
-    "                'raw': raw_data[ idx_clipped_elements[:5] ],\n",
-    "                'clipped': clipped_data[idx_clipped_elements[:5]] }\n",
+    "                {\"idx\":idx_clipped_elements[:5], \n",
+    "                \"raw\": raw_data[ idx_clipped_elements[:5] ],\n",
+    "                \"clipped\": clipped_data[idx_clipped_elements[:5]] }\n",
     "            )"
    ]
   },
@@ -236,8 +236,8 @@
     "# Plot the distribution and the clipped data to visualize\n",
     "\n",
     "plt.figure(figsize=(16, 10))\n",
-    "plt.hist(raw_data,     density=True, bins=64, label=\"y (raw values)\", histtype='step', linewidth=3.5),\n",
-    "plt.hist(clipped_data, density=True, bins=64, color=['#33b1ff'], alpha=0.8,label=\"y_clamp (clipped edges)\"), \n",
+    "plt.hist(raw_data,     density=True, bins=64, label=\"y (raw values)\", histtype=\"step\", linewidth=3.5),\n",
+    "plt.hist(clipped_data, density=True, bins=64, color=[\"#33b1ff\"], alpha=0.8,label=\"y_clamp (clipped edges)\"), \n",
     "plt.legend(fancybox=True, ncol=2)\n",
     "plt.xlabel(\"Data\")\n",
     "plt.ylabel(\"density\")\n",
@@ -294,10 +294,10 @@
    "outputs": [],
    "source": [
     "plt.figure(figsize=(16, 10))\n",
-    "plt.hist(raw_data, density=True, bins=64, alpha=0.8,label=\"y (raw values)\", histtype='step', linewidth=3.5)\n",
-    "plt.hist(y_scaled, density=True,  bins=64, color=['#33b1ff'], alpha=0.6,label=\"scale+shift\")\n",
-    "plt.hist(y_int,    density=True,  bins=64, color=['#007d79'],alpha=0.8,label=\"quantize\")\n",
-    "plt.legend(loc='upper left', fancybox=True, ncol=3)\n",
+    "plt.hist(raw_data, density=True, bins=64, alpha=0.8,label=\"y (raw values)\", histtype=\"step\", linewidth=3.5)\n",
+    "plt.hist(y_scaled, density=True,  bins=64, color=[\"#33b1ff\"], alpha=0.6,label=\"scale+shift\")\n",
+    "plt.hist(y_int,    density=True,  bins=64, color=[\"#007d79\"],alpha=0.8,label=\"quantize\")\n",
+    "plt.legend(loc=\"upper left\", fancybox=True, ncol=3)\n",
     "plt.xlabel(\"Data\")\n",
     "plt.ylabel(\"density\")\n",
     "#plt.yscale('log')\n",
@@ -340,8 +340,8 @@
     "yq = y_int * stepsize + zp\n",
     "\n",
     "plt.figure(figsize=(16, 10))\n",
-    "plt.hist(raw_data, density=True, bins=64, label=\"original y\", histtype='step', linewidth=2.5)#alpha=0.8,\n",
-    "plt.hist(yq,       density=True, color=['#33b1ff'], bins=64, label=\"quantized y\")#alpha=0.7,\n",
+    "plt.hist(raw_data, density=True, bins=64, label=\"original y\", histtype=\"step\", linewidth=2.5)#alpha=0.8,\n",
+    "plt.hist(yq,       density=True, color=[\"#33b1ff\"], bins=64, label=\"quantized y\")#alpha=0.7,\n",
     "plt.legend(fancybox=True, ncol=2)\n",
     "plt.xlabel(\"Data\")\n",
     "plt.ylabel(\"density\")\n",
@@ -367,15 +367,15 @@
    "source": [
     "plt.subplots(3,1, figsize=(16, 12), sharex=True)\n",
     "\n",
-    "arstyle=dict(facecolor='C1',alpha=0.5, shrink=0.05)\n",
+    "arstyle=dict(facecolor=\"C1\",alpha=0.5, shrink=0.05)\n",
     "\n",
     "n_bit = 4\n",
     "clip_min, clip_max = -2.5, 2.5\n",
     "asym_raw_data = np.abs(raw_data)\n",
-    "for i, (raw_i, lbl_i) in enumerate([(raw_data, 'Case 1: sym data, sym Q'), \n",
-    "                                    (asym_raw_data, 'Case 2: asym data, asym Q'),\n",
-    "                                    (asym_raw_data, 'Case 3: asym data sym Q') ]):\n",
-    "    if 'asym Q' in lbl_i:\n",
+    "for i, (raw_i, lbl_i) in enumerate([(raw_data, \"Case 1: sym data, sym Q\"), \n",
+    "                                    (asym_raw_data, \"Case 2: asym data, asym Q\"),\n",
+    "                                    (asym_raw_data, \"Case 3: asym data sym Q\") ]):\n",
+    "    if \"asym Q\" in lbl_i:\n",
     "        # asym quantization for range [0, clip_max]\n",
     "        clip_min_i = np.min(raw_i)\n",
     "        nbins = 2**n_bit -1\n",
@@ -396,13 +396,13 @@
     "    max_bin_i = np.round( (clip_max-zp)/scale)*scale + zp\n",
     "\n",
     "    plt.subplot(311+i)\n",
-    "    plt.hist(raw_i, density=False, bins=64, label=\"original y\", histtype='step', linewidth=2.5)\n",
-    "    plt.hist(yq_i,  density=False, color=['#33b1ff'], bins=64, label='y_q')\n",
+    "    plt.hist(raw_i, density=False, bins=64, label=\"original y\", histtype=\"step\", linewidth=2.5)\n",
+    "    plt.hist(yq_i,  density=False, color=[\"#33b1ff\"], bins=64, label=\"y_q\")\n",
     "    plt.legend(fancybox=True, ncol=2, fontsize=14)\n",
     "\n",
     "    plt.ylabel(\"Count\")\n",
-    "    plt.annotate('upper clip bound', xy=(max_bin_i, 0), xytext=(max_bin_i, 1e5), arrowprops=arstyle)    \n",
-    "    plt.annotate('lower clip bound', xy=(clip_min_i, 0), xytext=(clip_min_i, 1e5), arrowprops=arstyle)    \n",
+    "    plt.annotate(\"upper clip bound\", xy=(max_bin_i, 0), xytext=(max_bin_i, 1e5), arrowprops=arstyle)    \n",
+    "    plt.annotate(\"lower clip bound\", xy=(clip_min_i, 0), xytext=(clip_min_i, 1e5), arrowprops=arstyle)    \n",
     "    plt.title(lbl_i)\n",
     "\n",
     "plt.tight_layout()\n",
@@ -478,9 +478,9 @@
     "# Generate 1 sample\n",
     "input = torch.randn(N,C,H,W)\n",
     "\n",
-    "print('Input Shape: ', input.shape)\n",
-    "print('Number of unique input values: ', input.detach().unique().size()[0])\n",
-    "print(f'Expected: {N * C * H * W} (Based on randomly generated values for shape {N} x {C} x {H} x {W})')"
+    "print(\"Input Shape: \", input.shape)\n",
+    "print(\"Number of unique input values: \", input.detach().unique().size()[0])\n",
+    "print(f\"Expected: {N * C * H * W} (Based on randomly generated values for shape {N} x {C} x {H} x {W})\")"
    ]
   },
   {
@@ -508,9 +508,9 @@
     "# Quantize the input data\n",
     "input_quant = simpleQuantizer(input, n_bit, clip_min, clip_max)\n",
     "\n",
-    "print('Quantized input Shape: ', input_quant.shape)\n",
-    "print('Number of unique quantized input values: ', input_quant.detach().unique().size()[0])\n",
-    "print(f'Expected: {2 ** n_bit} (Based on 2 ^ {n_bit})')"
+    "print(\"Quantized input Shape: \", input_quant.shape)\n",
+    "print(\"Number of unique quantized input values: \", input_quant.detach().unique().size()[0])\n",
+    "print(f\"Expected: {2 ** n_bit} (Based on 2 ^ {n_bit})\")"
    ]
   },
   {
@@ -577,9 +577,9 @@
     "# ignore bias for now \n",
     "net.conv.bias = torch.nn.Parameter(bias)\n",
     "\n",
-    "print('Weight Shape: ', weight.shape)\n",
-    "print('Number of unique weight values: ', weight.detach().unique().size()[0])\n",
-    "print(f'Expected: {weight.numel()} (Based on randomly generated values for shape {weight.shape[0]} x {weight.shape[1]} x {weight.shape[2]} x {weight.shape[3]})')"
+    "print(\"Weight Shape: \", weight.shape)\n",
+    "print(\"Number of unique weight values: \", weight.detach().unique().size()[0])\n",
+    "print(f\"Expected: {weight.numel()} (Based on randomly generated values for shape {weight.shape[0]} x {weight.shape[1]} x {weight.shape[2]} x {weight.shape[3]})\")"
    ]
   },
   {
@@ -605,10 +605,10 @@
     "# Quantize the weights (similar to input)\n",
     "weight_quant = simpleQuantizer(weight, n_bit, clip_min, clip_max)\n",
     "\n",
-    "print('Quantized weight Shape: ', weight_quant.shape)\n",
-    "print('Number of unique quantized weight values: ', weight_quant.detach().unique().size()[0])\n",
-    "print(f'Expected: {2 ** n_bit} (Based on 2 ^ {n_bit})')\n",
-    "print('First Channel of Quantized Weight', weight_quant[0])\n"
+    "print(\"Quantized weight Shape: \", weight_quant.shape)\n",
+    "print(\"Number of unique quantized weight values: \", weight_quant.detach().unique().size()[0])\n",
+    "print(f\"Expected: {2 ** n_bit} (Based on 2 ^ {n_bit})\")\n",
+    "print(\"First Channel of Quantized Weight\", weight_quant[0])\n"
    ]
   },
   {
@@ -635,9 +635,9 @@
     "# Generate quantized output y, NOTE, this net is currently using non-quantized weight \n",
     "y_quant = net(input_quant)\n",
     "\n",
-    "print('Number of unique output values: ', y.detach().unique().size()[0])\n",
-    "print('Expected maximum unique output values: ', y.flatten().size()[0])\n",
-    "print('Number of unique quantized output values: ', y_quant.detach().unique().size()[0])\n"
+    "print(\"Number of unique output values: \", y.detach().unique().size()[0])\n",
+    "print(\"Expected maximum unique output values: \", y.flatten().size()[0])\n",
+    "print(\"Number of unique quantized output values: \", y_quant.detach().unique().size()[0])\n"
    ]
   },
   {
@@ -662,26 +662,26 @@
    "outputs": [],
    "source": [
     "def PlotAndCompare(d1, d2, labels, title):\n",
-    "    mse = nn.functional.mse_loss(d1, d2, reduction='mean' )\n",
+    "    mse = nn.functional.mse_loss(d1, d2, reduction=\"mean\" )\n",
     "    plt.hist( d1.flatten().detach().numpy(), bins=64, alpha = 0.7, density=True, label=labels[0])\n",
-    "    plt.hist( d2.flatten().detach().numpy(), bins=64, color=['#33b1ff'], alpha = 0.8, density=True, label=labels[1], histtype='step', linewidth=3.5)\n",
-    "    plt.yscale('log')\n",
-    "    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), fancybox=True, ncol=2)\n",
+    "    plt.hist( d2.flatten().detach().numpy(), bins=64, color=[\"#33b1ff\"], alpha = 0.8, density=True, label=labels[1], histtype=\"step\", linewidth=3.5)\n",
+    "    plt.yscale(\"log\")\n",
+    "    plt.legend(loc=\"upper center\", bbox_to_anchor=(0.5, -0.1), fancybox=True, ncol=2)\n",
     "    plt.title(f\"{title}, MSE={mse:.3f}\")\n",
     "\n",
     "\n",
     "\n",
-    "titles=['inputs', 'weights', 'outputs']\n",
-    "isQ = ['not quantized', 'quantized']\n",
+    "titles=[\"inputs\", \"weights\", \"outputs\"]\n",
+    "isQ = [\"not quantized\", \"quantized\"]\n",
     "for i, inp in enumerate([input, input_quant]):\n",
     "    for j, W in enumerate([weight, weight_quant]):\n",
     "        plt.subplots(1,3,figsize=(18,5))\n",
-    "        plt.suptitle(f'Case {i*2+j+1}: Input {isQ[i]}, Weight {isQ[j]}', fontsize=20, ha='center', va='bottom')\n",
-    "        plt.subplot(131); PlotAndCompare(input,     inp,        ['raw', isQ[i]],  f\"input, {isQ[i]}\")\n",
-    "        plt.subplot(132); PlotAndCompare(weight,    W,          ['raw', isQ[j]],  f\"weight, {isQ[j]}\")\n",
+    "        plt.suptitle(f\"Case {i*2+j+1}: Input {isQ[i]}, Weight {isQ[j]}\", fontsize=20, ha=\"center\", va=\"bottom\")\n",
+    "        plt.subplot(131); PlotAndCompare(input,     inp,        [\"raw\", isQ[i]],  f\"input, {isQ[i]}\")\n",
+    "        plt.subplot(132); PlotAndCompare(weight,    W,          [\"raw\", isQ[j]],  f\"weight, {isQ[j]}\")\n",
     "        net.conv.weight = torch.nn.Parameter(W)\n",
     "        y_quant = net(inp)\n",
-    "        plt.subplot(133); PlotAndCompare(y,         y_quant,   ['raw', f'A={isQ[j]}, W={isQ[i]}'], \"conv output\")\n",
+    "        plt.subplot(133); PlotAndCompare(y,         y_quant,   [\"raw\", f\"A={isQ[j]}, W={isQ[i]}\"], \"conv output\")\n",
     "        plt.show()\n",
     "\n",
     "\n"
@@ -737,34 +737,34 @@
     "qcfg = qconfig_init()\n",
     "\n",
     "# set bits for quantization (nbits_a needs to be set to quantize input regardless of bias)\n",
-    "qcfg['nbits_w'] = 4\n",
-    "qcfg['nbits_a'] = 4\n",
+    "qcfg[\"nbits_w\"] = 4\n",
+    "qcfg[\"nbits_a\"] = 4\n",
     "\n",
     "# just to be consistent with our \"simple Quantizer\" (normally align_zero is True)\n",
-    "qcfg['align_zero'] = False\n",
+    "qcfg[\"align_zero\"] = False\n",
     "\n",
     "# Quantization Mode here means which quantizers we would like to use,\n",
     "# There are many quantizers available in fms_mo, such as PArameterized Clipping acTivation (PACT),\n",
     "# Statstics-Aware Weight Binning (SAWB).\n",
-    "qcfg['qw_mode'] = 'pact'\n",
-    "qcfg['qa_mode'] = 'pact'\n",
+    "qcfg[\"qw_mode\"] = \"pact\"\n",
+    "qcfg[\"qa_mode\"] = \"pact\"\n",
     "\n",
     "# Set weight and input (activation) clip vals\n",
-    "qcfg['w_clip_init_valn'], qcfg['w_clip_init_val'] = -2.5, 2.5\n",
-    "qcfg['act_clip_init_valn'], qcfg['act_clip_init_val'] = -2.5, 2.5\n",
+    "qcfg[\"w_clip_init_valn\"], qcfg[\"w_clip_init_val\"] = -2.5, 2.5\n",
+    "qcfg[\"act_clip_init_valn\"], qcfg[\"act_clip_init_val\"] = -2.5, 2.5\n",
     "\n",
     "\n",
     "# This parameter is usually False, but for Demo purposes we quantize the first/only layer\n",
-    "qcfg['q1stlastconv'] = True\n",
+    "qcfg[\"q1stlastconv\"] = True\n",
     "\n",
     "\n",
     "if path.exists(\"results\"):\n",
     "    print(\"results folder exists!\")\n",
     "else:\n",
-    "    os.makedirs('results')\n",
+    "    os.makedirs(\"results\")\n",
     "    \n",
     "# Step 2: Prepare the model to convert layer to add Quantizers\n",
-    "qmodel_prep(net_fms_mo, input, qcfg, save_fname='./results/temp.pt')\n",
+    "qmodel_prep(net_fms_mo, input, qcfg, save_fname=\"./results/temp.pt\")\n",
     "\n"
    ]
   },
@@ -780,7 +780,7 @@
     "y_quant      = net(input_quant) \n",
     "\n",
     "plt.figure(figsize=(16, 10))\n",
-    "PlotAndCompare(y_quant_fms_mo, y_quant, ['fms_mo','manual'],'quantized Conv output by different methods')\n",
+    "PlotAndCompare(y_quant_fms_mo, y_quant, [\"fms_mo\",\"manual\"],\"quantized Conv output by different methods\")\n",
     "plt.show()\n"
    ]
   },
@@ -804,9 +804,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os, wget\n",
-    "IMG_FILE_NAME = 'lion.png'\n",
-    "url = 'https://raw.githubusercontent.com/foundation-model-stack/fms-model-optimizer/main/tutorials/images/' + IMG_FILE_NAME\n",
+    "import os\n",
+    "import wget\n",
+    "IMG_FILE_NAME = \"lion.png\"\n",
+    "url = \"https://raw.githubusercontent.com/foundation-model-stack/fms-model-optimizer/main/tutorials/images/\" + IMG_FILE_NAME\n",
     "\n",
     "if not os.path.isfile(IMG_FILE_NAME):\n",
     "  wget.download(url, out=IMG_FILE_NAME)\n",
@@ -863,19 +864,19 @@
     "\n",
     "plt.subplots(3,1,figsize=(16,25))\n",
     "plt.subplot(311)\n",
-    "plt.title('Output from non-quantized model', fontsize=20)\n",
-    "plt.imshow(feature_map, cmap='RdBu')\n",
+    "plt.title(\"Output from non-quantized model\", fontsize=20)\n",
+    "plt.imshow(feature_map, cmap=\"RdBu\")\n",
     "plt.clim(0,255)\n",
     "plt.colorbar()\n",
     "\n",
     "plt.subplot(312)\n",
-    "plt.title('Output from quantized model', fontsize=20)\n",
-    "plt.imshow(feature_map_quant, cmap='RdBu')\n",
+    "plt.title(\"Output from quantized model\", fontsize=20)\n",
+    "plt.imshow(feature_map_quant, cmap=\"RdBu\")\n",
     "plt.clim(0,255)\n",
     "plt.colorbar()\n",
     "\n",
     "plt.subplot(313)\n",
-    "PlotAndCompare(y_img_tensor, y_img_quant, ['raw','quantized'],'Conv output')\n",
+    "PlotAndCompare(y_img_tensor, y_img_quant, [\"raw\",\"quantized\"],\"Conv output\")\n",
     "\n",
     "plt.tight_layout()\n",
     "plt.show()\n"

From caa5b2f920ae7456289a80d8263882dcac4fc087 Mon Sep 17 00:00:00 2001
From: "omobayode.fagbohungbe" <omobayode.fagbohungbe@ibm.com>
Date: Thu, 17 Jul 2025 15:49:00 +0000
Subject: [PATCH 3/3] fix: tutorial file restored

Signed-off-by: omobayode.fagbohungbe <omobayode.fagbohungbe@ibm.com>
---
 tutorials/quantization_tutorial.ipynb | 133 +++++++++++++-------------
 1 file changed, 66 insertions(+), 67 deletions(-)

diff --git a/tutorials/quantization_tutorial.ipynb b/tutorials/quantization_tutorial.ipynb
index 5354b3d4..387685b2 100644
--- a/tutorials/quantization_tutorial.ipynb
+++ b/tutorials/quantization_tutorial.ipynb
@@ -162,7 +162,7 @@
     "\n",
     "# Plotting the histogram.\n",
     "plt.figure(figsize=(16, 10))\n",
-    "plt.hist(raw_data, density=True, bins=128, alpha=0.8, label=\"y\")\n",
+    "plt.hist(raw_data, density=True, bins=128, alpha=0.8, label='y')\n",
     "#plt.legend(loc='upper right')\n",
     "plt.xlabel(\"Data\")\n",
     "plt.ylabel(\"density\")\n",
@@ -220,9 +220,9 @@
     "isClipped=np.logical_or(raw_data>clip_max, raw_data<clip_min)\n",
     "idx_clipped_elements=np.where( isClipped )[0]\n",
     "pd.DataFrame( \n",
-    "                {\"idx\":idx_clipped_elements[:5], \n",
-    "                \"raw\": raw_data[ idx_clipped_elements[:5] ],\n",
-    "                \"clipped\": clipped_data[idx_clipped_elements[:5]] }\n",
+    "                {'idx':idx_clipped_elements[:5], \n",
+    "                'raw': raw_data[ idx_clipped_elements[:5] ],\n",
+    "                'clipped': clipped_data[idx_clipped_elements[:5]] }\n",
     "            )"
    ]
   },
@@ -236,8 +236,8 @@
     "# Plot the distribution and the clipped data to visualize\n",
     "\n",
     "plt.figure(figsize=(16, 10))\n",
-    "plt.hist(raw_data,     density=True, bins=64, label=\"y (raw values)\", histtype=\"step\", linewidth=3.5),\n",
-    "plt.hist(clipped_data, density=True, bins=64, color=[\"#33b1ff\"], alpha=0.8,label=\"y_clamp (clipped edges)\"), \n",
+    "plt.hist(raw_data,     density=True, bins=64, label=\"y (raw values)\", histtype='step', linewidth=3.5),\n",
+    "plt.hist(clipped_data, density=True, bins=64, color=['#33b1ff'], alpha=0.8,label=\"y_clamp (clipped edges)\"), \n",
     "plt.legend(fancybox=True, ncol=2)\n",
     "plt.xlabel(\"Data\")\n",
     "plt.ylabel(\"density\")\n",
@@ -294,10 +294,10 @@
    "outputs": [],
    "source": [
     "plt.figure(figsize=(16, 10))\n",
-    "plt.hist(raw_data, density=True, bins=64, alpha=0.8,label=\"y (raw values)\", histtype=\"step\", linewidth=3.5)\n",
-    "plt.hist(y_scaled, density=True,  bins=64, color=[\"#33b1ff\"], alpha=0.6,label=\"scale+shift\")\n",
-    "plt.hist(y_int,    density=True,  bins=64, color=[\"#007d79\"],alpha=0.8,label=\"quantize\")\n",
-    "plt.legend(loc=\"upper left\", fancybox=True, ncol=3)\n",
+    "plt.hist(raw_data, density=True, bins=64, alpha=0.8,label=\"y (raw values)\", histtype='step', linewidth=3.5)\n",
+    "plt.hist(y_scaled, density=True,  bins=64, color=['#33b1ff'], alpha=0.6,label=\"scale+shift\")\n",
+    "plt.hist(y_int,    density=True,  bins=64, color=['#007d79'],alpha=0.8,label=\"quantize\")\n",
+    "plt.legend(loc='upper left', fancybox=True, ncol=3)\n",
     "plt.xlabel(\"Data\")\n",
     "plt.ylabel(\"density\")\n",
     "#plt.yscale('log')\n",
@@ -340,8 +340,8 @@
     "yq = y_int * stepsize + zp\n",
     "\n",
     "plt.figure(figsize=(16, 10))\n",
-    "plt.hist(raw_data, density=True, bins=64, label=\"original y\", histtype=\"step\", linewidth=2.5)#alpha=0.8,\n",
-    "plt.hist(yq,       density=True, color=[\"#33b1ff\"], bins=64, label=\"quantized y\")#alpha=0.7,\n",
+    "plt.hist(raw_data, density=True, bins=64, label=\"original y\", histtype='step', linewidth=2.5)#alpha=0.8,\n",
+    "plt.hist(yq,       density=True, color=['#33b1ff'], bins=64, label=\"quantized y\")#alpha=0.7,\n",
     "plt.legend(fancybox=True, ncol=2)\n",
     "plt.xlabel(\"Data\")\n",
     "plt.ylabel(\"density\")\n",
@@ -367,15 +367,15 @@
    "source": [
     "plt.subplots(3,1, figsize=(16, 12), sharex=True)\n",
     "\n",
-    "arstyle=dict(facecolor=\"C1\",alpha=0.5, shrink=0.05)\n",
+    "arstyle=dict(facecolor='C1',alpha=0.5, shrink=0.05)\n",
     "\n",
     "n_bit = 4\n",
     "clip_min, clip_max = -2.5, 2.5\n",
     "asym_raw_data = np.abs(raw_data)\n",
-    "for i, (raw_i, lbl_i) in enumerate([(raw_data, \"Case 1: sym data, sym Q\"), \n",
-    "                                    (asym_raw_data, \"Case 2: asym data, asym Q\"),\n",
-    "                                    (asym_raw_data, \"Case 3: asym data sym Q\") ]):\n",
-    "    if \"asym Q\" in lbl_i:\n",
+    "for i, (raw_i, lbl_i) in enumerate([(raw_data, 'Case 1: sym data, sym Q'), \n",
+    "                                    (asym_raw_data, 'Case 2: asym data, asym Q'),\n",
+    "                                    (asym_raw_data, 'Case 3: asym data sym Q') ]):\n",
+    "    if 'asym Q' in lbl_i:\n",
     "        # asym quantization for range [0, clip_max]\n",
     "        clip_min_i = np.min(raw_i)\n",
     "        nbins = 2**n_bit -1\n",
@@ -396,13 +396,13 @@
     "    max_bin_i = np.round( (clip_max-zp)/scale)*scale + zp\n",
     "\n",
     "    plt.subplot(311+i)\n",
-    "    plt.hist(raw_i, density=False, bins=64, label=\"original y\", histtype=\"step\", linewidth=2.5)\n",
-    "    plt.hist(yq_i,  density=False, color=[\"#33b1ff\"], bins=64, label=\"y_q\")\n",
+    "    plt.hist(raw_i, density=False, bins=64, label=\"original y\", histtype='step', linewidth=2.5)\n",
+    "    plt.hist(yq_i,  density=False, color=['#33b1ff'], bins=64, label='y_q')\n",
     "    plt.legend(fancybox=True, ncol=2, fontsize=14)\n",
     "\n",
     "    plt.ylabel(\"Count\")\n",
-    "    plt.annotate(\"upper clip bound\", xy=(max_bin_i, 0), xytext=(max_bin_i, 1e5), arrowprops=arstyle)    \n",
-    "    plt.annotate(\"lower clip bound\", xy=(clip_min_i, 0), xytext=(clip_min_i, 1e5), arrowprops=arstyle)    \n",
+    "    plt.annotate('upper clip bound', xy=(max_bin_i, 0), xytext=(max_bin_i, 1e5), arrowprops=arstyle)    \n",
+    "    plt.annotate('lower clip bound', xy=(clip_min_i, 0), xytext=(clip_min_i, 1e5), arrowprops=arstyle)    \n",
     "    plt.title(lbl_i)\n",
     "\n",
     "plt.tight_layout()\n",
@@ -478,9 +478,9 @@
     "# Generate 1 sample\n",
     "input = torch.randn(N,C,H,W)\n",
     "\n",
-    "print(\"Input Shape: \", input.shape)\n",
-    "print(\"Number of unique input values: \", input.detach().unique().size()[0])\n",
-    "print(f\"Expected: {N * C * H * W} (Based on randomly generated values for shape {N} x {C} x {H} x {W})\")"
+    "print('Input Shape: ', input.shape)\n",
+    "print('Number of unique input values: ', input.detach().unique().size()[0])\n",
+    "print(f'Expected: {N * C * H * W} (Based on randomly generated values for shape {N} x {C} x {H} x {W})')"
    ]
   },
   {
@@ -508,9 +508,9 @@
     "# Quantize the input data\n",
     "input_quant = simpleQuantizer(input, n_bit, clip_min, clip_max)\n",
     "\n",
-    "print(\"Quantized input Shape: \", input_quant.shape)\n",
-    "print(\"Number of unique quantized input values: \", input_quant.detach().unique().size()[0])\n",
-    "print(f\"Expected: {2 ** n_bit} (Based on 2 ^ {n_bit})\")"
+    "print('Quantized input Shape: ', input_quant.shape)\n",
+    "print('Number of unique quantized input values: ', input_quant.detach().unique().size()[0])\n",
+    "print(f'Expected: {2 ** n_bit} (Based on 2 ^ {n_bit})')"
    ]
   },
   {
@@ -577,9 +577,9 @@
     "# ignore bias for now \n",
     "net.conv.bias = torch.nn.Parameter(bias)\n",
     "\n",
-    "print(\"Weight Shape: \", weight.shape)\n",
-    "print(\"Number of unique weight values: \", weight.detach().unique().size()[0])\n",
-    "print(f\"Expected: {weight.numel()} (Based on randomly generated values for shape {weight.shape[0]} x {weight.shape[1]} x {weight.shape[2]} x {weight.shape[3]})\")"
+    "print('Weight Shape: ', weight.shape)\n",
+    "print('Number of unique weight values: ', weight.detach().unique().size()[0])\n",
+    "print(f'Expected: {weight.numel()} (Based on randomly generated values for shape {weight.shape[0]} x {weight.shape[1]} x {weight.shape[2]} x {weight.shape[3]})')"
    ]
   },
   {
@@ -605,10 +605,10 @@
     "# Quantize the weights (similar to input)\n",
     "weight_quant = simpleQuantizer(weight, n_bit, clip_min, clip_max)\n",
     "\n",
-    "print(\"Quantized weight Shape: \", weight_quant.shape)\n",
-    "print(\"Number of unique quantized weight values: \", weight_quant.detach().unique().size()[0])\n",
-    "print(f\"Expected: {2 ** n_bit} (Based on 2 ^ {n_bit})\")\n",
-    "print(\"First Channel of Quantized Weight\", weight_quant[0])\n"
+    "print('Quantized weight Shape: ', weight_quant.shape)\n",
+    "print('Number of unique quantized weight values: ', weight_quant.detach().unique().size()[0])\n",
+    "print(f'Expected: {2 ** n_bit} (Based on 2 ^ {n_bit})')\n",
+    "print('First Channel of Quantized Weight', weight_quant[0])\n"
    ]
   },
   {
@@ -635,9 +635,9 @@
     "# Generate quantized output y, NOTE, this net is currently using non-quantized weight \n",
     "y_quant = net(input_quant)\n",
     "\n",
-    "print(\"Number of unique output values: \", y.detach().unique().size()[0])\n",
-    "print(\"Expected maximum unique output values: \", y.flatten().size()[0])\n",
-    "print(\"Number of unique quantized output values: \", y_quant.detach().unique().size()[0])\n"
+    "print('Number of unique output values: ', y.detach().unique().size()[0])\n",
+    "print('Expected maximum unique output values: ', y.flatten().size()[0])\n",
+    "print('Number of unique quantized output values: ', y_quant.detach().unique().size()[0])\n"
    ]
   },
   {
@@ -662,26 +662,26 @@
    "outputs": [],
    "source": [
     "def PlotAndCompare(d1, d2, labels, title):\n",
-    "    mse = nn.functional.mse_loss(d1, d2, reduction=\"mean\" )\n",
+    "    mse = nn.functional.mse_loss(d1, d2, reduction='mean' )\n",
     "    plt.hist( d1.flatten().detach().numpy(), bins=64, alpha = 0.7, density=True, label=labels[0])\n",
-    "    plt.hist( d2.flatten().detach().numpy(), bins=64, color=[\"#33b1ff\"], alpha = 0.8, density=True, label=labels[1], histtype=\"step\", linewidth=3.5)\n",
-    "    plt.yscale(\"log\")\n",
-    "    plt.legend(loc=\"upper center\", bbox_to_anchor=(0.5, -0.1), fancybox=True, ncol=2)\n",
+    "    plt.hist( d2.flatten().detach().numpy(), bins=64, color=['#33b1ff'], alpha = 0.8, density=True, label=labels[1], histtype='step', linewidth=3.5)\n",
+    "    plt.yscale('log')\n",
+    "    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), fancybox=True, ncol=2)\n",
     "    plt.title(f\"{title}, MSE={mse:.3f}\")\n",
     "\n",
     "\n",
     "\n",
-    "titles=[\"inputs\", \"weights\", \"outputs\"]\n",
-    "isQ = [\"not quantized\", \"quantized\"]\n",
+    "titles=['inputs', 'weights', 'outputs']\n",
+    "isQ = ['not quantized', 'quantized']\n",
     "for i, inp in enumerate([input, input_quant]):\n",
     "    for j, W in enumerate([weight, weight_quant]):\n",
     "        plt.subplots(1,3,figsize=(18,5))\n",
-    "        plt.suptitle(f\"Case {i*2+j+1}: Input {isQ[i]}, Weight {isQ[j]}\", fontsize=20, ha=\"center\", va=\"bottom\")\n",
-    "        plt.subplot(131); PlotAndCompare(input,     inp,        [\"raw\", isQ[i]],  f\"input, {isQ[i]}\")\n",
-    "        plt.subplot(132); PlotAndCompare(weight,    W,          [\"raw\", isQ[j]],  f\"weight, {isQ[j]}\")\n",
+    "        plt.suptitle(f'Case {i*2+j+1}: Input {isQ[i]}, Weight {isQ[j]}', fontsize=20, ha='center', va='bottom')\n",
+    "        plt.subplot(131); PlotAndCompare(input,     inp,        ['raw', isQ[i]],  f\"input, {isQ[i]}\")\n",
+    "        plt.subplot(132); PlotAndCompare(weight,    W,          ['raw', isQ[j]],  f\"weight, {isQ[j]}\")\n",
     "        net.conv.weight = torch.nn.Parameter(W)\n",
     "        y_quant = net(inp)\n",
-    "        plt.subplot(133); PlotAndCompare(y,         y_quant,   [\"raw\", f\"A={isQ[j]}, W={isQ[i]}\"], \"conv output\")\n",
+    "        plt.subplot(133); PlotAndCompare(y,         y_quant,   ['raw', f'A={isQ[j]}, W={isQ[i]}'], \"conv output\")\n",
     "        plt.show()\n",
     "\n",
     "\n"
@@ -737,34 +737,34 @@
     "qcfg = qconfig_init()\n",
     "\n",
     "# set bits for quantization (nbits_a needs to be set to quantize input regardless of bias)\n",
-    "qcfg[\"nbits_w\"] = 4\n",
-    "qcfg[\"nbits_a\"] = 4\n",
+    "qcfg['nbits_w'] = 4\n",
+    "qcfg['nbits_a'] = 4\n",
     "\n",
     "# just to be consistent with our \"simple Quantizer\" (normally align_zero is True)\n",
-    "qcfg[\"align_zero\"] = False\n",
+    "qcfg['align_zero'] = False\n",
     "\n",
     "# Quantization Mode here means which quantizers we would like to use,\n",
     "# There are many quantizers available in fms_mo, such as PArameterized Clipping acTivation (PACT),\n",
     "# Statstics-Aware Weight Binning (SAWB).\n",
-    "qcfg[\"qw_mode\"] = \"pact\"\n",
-    "qcfg[\"qa_mode\"] = \"pact\"\n",
+    "qcfg['qw_mode'] = 'pact'\n",
+    "qcfg['qa_mode'] = 'pact'\n",
     "\n",
     "# Set weight and input (activation) clip vals\n",
-    "qcfg[\"w_clip_init_valn\"], qcfg[\"w_clip_init_val\"] = -2.5, 2.5\n",
-    "qcfg[\"act_clip_init_valn\"], qcfg[\"act_clip_init_val\"] = -2.5, 2.5\n",
+    "qcfg['w_clip_init_valn'], qcfg['w_clip_init_val'] = -2.5, 2.5\n",
+    "qcfg['act_clip_init_valn'], qcfg['act_clip_init_val'] = -2.5, 2.5\n",
     "\n",
     "\n",
     "# This parameter is usually False, but for Demo purposes we quantize the first/only layer\n",
-    "qcfg[\"q1stlastconv\"] = True\n",
+    "qcfg['q1stlastconv'] = True\n",
     "\n",
     "\n",
     "if path.exists(\"results\"):\n",
     "    print(\"results folder exists!\")\n",
     "else:\n",
-    "    os.makedirs(\"results\")\n",
+    "    os.makedirs('results')\n",
     "    \n",
     "# Step 2: Prepare the model to convert layer to add Quantizers\n",
-    "qmodel_prep(net_fms_mo, input, qcfg, save_fname=\"./results/temp.pt\")\n",
+    "qmodel_prep(net_fms_mo, input, qcfg, save_fname='./results/temp.pt')\n",
     "\n"
    ]
   },
@@ -780,7 +780,7 @@
     "y_quant      = net(input_quant) \n",
     "\n",
     "plt.figure(figsize=(16, 10))\n",
-    "PlotAndCompare(y_quant_fms_mo, y_quant, [\"fms_mo\",\"manual\"],\"quantized Conv output by different methods\")\n",
+    "PlotAndCompare(y_quant_fms_mo, y_quant, ['fms_mo','manual'],'quantized Conv output by different methods')\n",
     "plt.show()\n"
    ]
   },
@@ -804,10 +804,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
-    "import wget\n",
-    "IMG_FILE_NAME = \"lion.png\"\n",
-    "url = \"https://raw.githubusercontent.com/foundation-model-stack/fms-model-optimizer/main/tutorials/images/\" + IMG_FILE_NAME\n",
+    "import os, wget\n",
+    "IMG_FILE_NAME = 'lion.png'\n",
+    "url = 'https://raw.githubusercontent.com/foundation-model-stack/fms-model-optimizer/main/tutorials/images/' + IMG_FILE_NAME\n",
     "\n",
     "if not os.path.isfile(IMG_FILE_NAME):\n",
     "  wget.download(url, out=IMG_FILE_NAME)\n",
@@ -864,19 +863,19 @@
     "\n",
     "plt.subplots(3,1,figsize=(16,25))\n",
     "plt.subplot(311)\n",
-    "plt.title(\"Output from non-quantized model\", fontsize=20)\n",
-    "plt.imshow(feature_map, cmap=\"RdBu\")\n",
+    "plt.title('Output from non-quantized model', fontsize=20)\n",
+    "plt.imshow(feature_map, cmap='RdBu')\n",
     "plt.clim(0,255)\n",
     "plt.colorbar()\n",
     "\n",
     "plt.subplot(312)\n",
-    "plt.title(\"Output from quantized model\", fontsize=20)\n",
-    "plt.imshow(feature_map_quant, cmap=\"RdBu\")\n",
+    "plt.title('Output from quantized model', fontsize=20)\n",
+    "plt.imshow(feature_map_quant, cmap='RdBu')\n",
     "plt.clim(0,255)\n",
     "plt.colorbar()\n",
     "\n",
     "plt.subplot(313)\n",
-    "PlotAndCompare(y_img_tensor, y_img_quant, [\"raw\",\"quantized\"],\"Conv output\")\n",
+    "PlotAndCompare(y_img_tensor, y_img_quant, ['raw','quantized'],'Conv output')\n",
     "\n",
     "plt.tight_layout()\n",
     "plt.show()\n"