@@ -134,10 +134,12 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
134134 low_cpu_mem_usage = bool (model_args .device_map ),
135135 )
136136
137- inference = model .config .to_dict ().get ("quantization_config" , None )
137+ inference_qconfig = None
138+ if hasattr (model , "config" ):
139+ inference_qconfig = model .config .to_dict ().get ("quantization_config" , None )
138140
139- if inference :
140- quant_setting = check_quantization_setting (inference )
141+ if inference_qconfig :
142+ quant_setting = check_quantization_setting (inference_qconfig )
141143 if quant_setting :
142144 logger .info ("Quantization config settings validated " )
143145 model = convert_fp8_vllm_to_fms_mo (model = model )
@@ -152,7 +154,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
152154 logger .info (f"Model is at { model .device } after intialization" )
153155 logger .info (f"Tokenizer is { tokenizer } , block size is { block_size } " )
154156
155- if not inference :
157+ if not inference_qconfig :
156158 logger .info ("quantization mode activated, initalizing the qcfg file " )
157159 qcfg = qconfig_init (recipe = "dq" , args = fms_mo_args )
158160 else :
@@ -198,7 +200,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
198200
199201 qcfg ["model" ] = model_args .model_name_or_path
200202 # config layers to skip, smooth scale
201- if not inference :
203+ if not inference_qconfig :
202204 config_quantize_smooth_layers (qcfg )
203205
204206 use_dynamo = True
@@ -231,7 +233,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
231233 )
232234
233235 # For loading or creating smoothquant scale. Sometimes we may include scales in ckpt as well.
234- if not inference and qcfg ["smoothq" ]:
236+ if not inference_qconfig and qcfg ["smoothq" ]:
235237 scale_file = Path (f"./act_scales/{ qcfg ['model' ].replace ('/' , '-' )} .pt" )
236238 if qcfg .get ("act_scale_path" , None ):
237239 # user provided a scale file (or a dir)
@@ -265,12 +267,12 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
265267 use_layer_name_pattern_matching = use_layer_name_pattern_matching ,
266268 use_dynamo = use_dynamo ,
267269 dev = dev ,
268- mode = inference ,
270+ mode = inference_qconfig ,
269271 save_fname = "dq" ,
270272 )
271273 logger .info (f"Quantized model { model } " )
272274 logger .info ("==" * 20 )
273- if not inference :
275+ if not inference_qconfig :
274276 if qcfg ["smoothq" ]:
275277 logger .info ("Starting to apply smooth scale" )
276278 dq_llm (model , act_scales , qcfg )
0 commit comments