Skip to content

Commit 24a7205

Browse files
committed
fix: correcting whitespaces in repo
Signed-off-by: Omobayode Fagbohungbe <omobayode.fagbohungbe@ibm.com>
1 parent 3aba051 commit 24a7205

File tree

3 files changed

+16
-11
lines changed

3 files changed

+16
-11
lines changed

fms_mo/dq.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,12 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
134134
low_cpu_mem_usage=bool(model_args.device_map),
135135
)
136136

137-
inference = model.config.to_dict().get("quantization_config", None)
137+
inference_qconfig = None
138+
if hasattr(model, "config"):
139+
inference_qconfig = model.config.to_dict().get("quantization_config", None)
138140

139-
if inference:
140-
quant_setting = check_quantization_setting(inference)
141+
if inference_qconfig:
142+
quant_setting = check_quantization_setting(inference_qconfig)
141143
if quant_setting:
142144
logger.info("Quantization config settings validated ")
143145
model = convert_fp8_vllm_to_fms_mo(model=model)
@@ -152,7 +154,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
152154
logger.info(f"Model is at {model.device} after intialization")
153155
logger.info(f"Tokenizer is {tokenizer}, block size is {block_size}")
154156

155-
if not inference:
157+
if not inference_qconfig:
156158
logger.info("quantization mode activated, initalizing the qcfg file ")
157159
qcfg = qconfig_init(recipe="dq", args=fms_mo_args)
158160
else:
@@ -198,7 +200,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
198200

199201
qcfg["model"] = model_args.model_name_or_path
200202
# config layers to skip, smooth scale
201-
if not inference:
203+
if not inference_qconfig:
202204
config_quantize_smooth_layers(qcfg)
203205

204206
use_dynamo = True
@@ -231,7 +233,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
231233
)
232234

233235
# For loading or creating smoothquant scale. Sometimes we may include scales in ckpt as well.
234-
if not inference and qcfg["smoothq"]:
236+
if not inference_qconfig and qcfg["smoothq"]:
235237
scale_file = Path(f"./act_scales/{qcfg['model'].replace('/', '-')}.pt")
236238
if qcfg.get("act_scale_path", None):
237239
# user provided a scale file (or a dir)
@@ -265,12 +267,12 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
265267
use_layer_name_pattern_matching=use_layer_name_pattern_matching,
266268
use_dynamo=use_dynamo,
267269
dev=dev,
268-
mode=inference,
270+
mode=inference_qconfig,
269271
save_fname="dq",
270272
)
271273
logger.info(f"Quantized model {model}")
272274
logger.info("==" * 20)
273-
if not inference:
275+
if not inference_qconfig:
274276
if qcfg["smoothq"]:
275277
logger.info("Starting to apply smooth scale")
276278
dq_llm(model, act_scales, qcfg)

fms_mo/modules/linear.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,10 @@ def forward(self, x):
293293
else:
294294
qweight = self.quantize_weight(self.weight * scale).to(
295295
self.weight.dtype
296-
)
296+
)
297+
297298
qbias = self.bias
299+
298300
# pylint: disable=not-callable
299301
output = F.linear(qinput, qweight, qbias)
300302

fms_mo/prep.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
# Third Party
2424
from torch import nn
25-
import compressed_tensors
25+
#import compressed_tensors as ct
2626
import torch
2727

2828
# Local
@@ -392,7 +392,8 @@ def make_quant_module(module, curr_full_name, qcfg, verbose=False):
392392
# For nn.Linear
393393
elif isinstance(module, nn.Linear):
394394
if module.__class__ != nn.Linear:
395-
if isinstance(module, compressed_tensors.linear.compressed_linear.CompressedLinear):
395+
if isinstance(module, nn.Linear):
396+
#module, ct.linear.compressed_linear.CompressedLinear):
396397
pass
397398
else:
398399
logger.warning(

0 commit comments

Comments
 (0)