fix: correcting whitespaces in repo

bayo-ibm · bayo-ibm · commit 24a7205362f0 · 2025-08-20T16:53:29.000-04:00
Signed-off-by: Omobayode Fagbohungbe &lt;omobayode.fagbohungbe@ibm.com&gt;
diff --git a/fms_mo/dq.py b/fms_mo/dq.py
@@ -134,10 +134,12 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
         low_cpu_mem_usage=bool(model_args.device_map),
     )
 
-    inference = model.config.to_dict().get("quantization_config", None)
+    inference_qconfig = None
+    if hasattr(model, "config"):
+        inference_qconfig = model.config.to_dict().get("quantization_config", None)
 
-    if inference:
-        quant_setting = check_quantization_setting(inference)
+    if inference_qconfig:
+        quant_setting = check_quantization_setting(inference_qconfig)
         if quant_setting:
             logger.info("Quantization config settings validated ")
             model = convert_fp8_vllm_to_fms_mo(model=model)
@@ -152,7 +154,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
     logger.info(f"Model is at {model.device} after intialization")
     logger.info(f"Tokenizer is {tokenizer}, block size is {block_size}")
 
-    if not inference:
+    if not inference_qconfig:
         logger.info("quantization mode activated, initalizing the qcfg file ")
         qcfg = qconfig_init(recipe="dq", args=fms_mo_args)
     else:
@@ -198,7 +200,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
 
     qcfg["model"] = model_args.model_name_or_path
     # config layers to skip, smooth scale
-    if not inference:
+    if not inference_qconfig:
         config_quantize_smooth_layers(qcfg)
 
     use_dynamo = True
@@ -231,7 +233,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
     )
 
     # For loading or creating smoothquant scale. Sometimes we may include scales in ckpt as well.
-    if not inference and qcfg["smoothq"]:
+    if not inference_qconfig and qcfg["smoothq"]:
         scale_file = Path(f"./act_scales/{qcfg['model'].replace('/', '-')}.pt")
         if qcfg.get("act_scale_path", None):
             # user provided a scale file (or a dir)
@@ -265,12 +267,12 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
             use_layer_name_pattern_matching=use_layer_name_pattern_matching,
             use_dynamo=use_dynamo,
             dev=dev,
-            mode=inference,
+            mode=inference_qconfig,
             save_fname="dq",
         )
         logger.info(f"Quantized model {model}")
         logger.info("==" * 20)
-    if not inference:
+    if not inference_qconfig:
         if qcfg["smoothq"]:
             logger.info("Starting to apply smooth scale")
             dq_llm(model, act_scales, qcfg)
diff --git a/fms_mo/modules/linear.py b/fms_mo/modules/linear.py
@@ -293,8 +293,10 @@ def forward(self, x):
             else:
                 qweight = self.quantize_weight(self.weight * scale).to(
                     self.weight.dtype
-                )      
+                )
+
         qbias = self.bias
+
         # pylint: disable=not-callable
         output = F.linear(qinput, qweight, qbias)
 
diff --git a/fms_mo/prep.py b/fms_mo/prep.py
@@ -22,7 +22,7 @@
 
 # Third Party
 from torch import nn
-import compressed_tensors
+#import compressed_tensors as ct
 import torch
 
 # Local
@@ -392,7 +392,8 @@ def make_quant_module(module, curr_full_name, qcfg, verbose=False):
     # For nn.Linear
     elif isinstance(module, nn.Linear):
         if module.__class__ != nn.Linear:
-            if isinstance(module, compressed_tensors.linear.compressed_linear.CompressedLinear):
+            if isinstance(module, nn.Linear):
+                #module, ct.linear.compressed_linear.CompressedLinear):
                 pass
             else:
                 logger.warning(