diff --git a/examples/FP8_QUANT/README.md b/examples/FP8_QUANT/README.md index 976ea3f2..0fa2d839 100644 --- a/examples/FP8_QUANT/README.md +++ b/examples/FP8_QUANT/README.md @@ -92,7 +92,8 @@ This end-to-end example utilizes the common set of interfaces provided by `fms_m ```python from llmcompressor.modifiers.quantization import QuantizationModifier - from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot + from llmcompressor.transformers import SparseAutoModelForCausalLM + from llmcompressor import oneshot model = SparseAutoModelForCausalLM.from_pretrained(model_args.model_name_or_path, torch_dtype=model_args.torch_dtype) tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path) diff --git a/fms_mo/run_quant.py b/fms_mo/run_quant.py index 8ed6084a..c65c401b 100644 --- a/fms_mo/run_quant.py +++ b/fms_mo/run_quant.py @@ -198,8 +198,9 @@ def run_fp8(model_args, data_args, opt_args, fp8_args): """ # Third Party + from llmcompressor import oneshot from llmcompressor.modifiers.quantization import QuantizationModifier - from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot + from llmcompressor.transformers import SparseAutoModelForCausalLM logger = set_log_level(opt_args.log_level, "fms_mo.run_fp8")