From 59d047c16f0aab7d3107c044660c00db798611a8 Mon Sep 17 00:00:00 2001 From: Kyle Sayers Date: Wed, 17 Dec 2025 15:21:38 +0000 Subject: [PATCH 1/3] change defaults Signed-off-by: Kyle Sayers --- src/llmcompressor/args/dataset_arguments.py | 2 +- src/llmcompressor/entrypoints/oneshot.py | 30 ++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/llmcompressor/args/dataset_arguments.py b/src/llmcompressor/args/dataset_arguments.py index 6f3c16fcf..4dc96f833 100644 --- a/src/llmcompressor/args/dataset_arguments.py +++ b/src/llmcompressor/args/dataset_arguments.py @@ -143,7 +143,7 @@ class DatasetArguments(CustomDatasetArguments): metadata={"help": "Number of samples to use for one-shot calibration"}, ) shuffle_calibration_samples: bool = field( - default=False, + default=True, metadata={ "help": "whether to shuffle the dataset before selecting calibration data" }, diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py index cd0cf6628..a1645dfe4 100644 --- a/src/llmcompressor/entrypoints/oneshot.py +++ b/src/llmcompressor/entrypoints/oneshot.py @@ -253,7 +253,7 @@ def oneshot( batch_size: int = 1, data_collator: str | Callable = "truncation", num_calibration_samples: int = 512, - shuffle_calibration_samples: bool = False, + shuffle_calibration_samples: bool = True, max_seq_length: int = 384, pad_to_max_length: bool = True, text_column: str = "text", @@ -263,6 +263,23 @@ def oneshot( preprocessing_num_workers: int | None = None, min_tokens_per_module: float | None = None, moe_calibrate_all_experts: bool = True, + pipeline: str | None = "independent", + tracing_ignore: list[str] = [ + "_update_causal_mask", + "create_causal_mask", + "_update_mamba_mask", + "make_causal_mask", + "get_causal_mask", + "mask_interface", + "mask_function", + "_prepare_4d_causal_attention_mask", + "_prepare_fsmt_decoder_inputs", + "_prepare_4d_causal_attention_mask_with_cache_position", + "_update_linear_attn_mask", + "project_per_layer_inputs", + ], + sequential_targets: list[str] | None = None, + sequential_offload_device: str = "cpu", quantization_aware_calibration: bool = True, # Miscellaneous arguments output_dir: str | None = None, @@ -335,6 +352,17 @@ def oneshot( model calibration. When True, all experts will see all tokens during calibration, ensuring proper quantization statistics. When False, only routed experts will be used. Only relevant for MoE models. Default is True. + :param pipeline: Calibration pipeline used to calibrate model Options: + ['basic', 'datafree', 'sequential', independent] + :param tracing_ignore: List of functions to ignore during tracing, either + {module}.{method_name} or {function_name} + :param sequential_targets: List of layer targets for the sequential pipeline. + This is typically a single DecoderLayer. Not specifying this argument will + cause the sequential pipeline to default to using the `no_split_params` + specified by the HF model definition + :param sequential_offload_device: Device used to offload intermediate activations + between sequential layers. It is recommended to use `cuda:1` if using more + than one gpu. Default is cpu. :param quantization_aware_calibration: Whether to enable quantization-aware calibration in the sequential pipeline. When True, quantization is applied during forward pass in calibration. When False, quantization is disabled From 20cba23f9403df11217ea2a36d243f85e1722fdf Mon Sep 17 00:00:00 2001 From: Kyle Sayers Date: Wed, 17 Dec 2025 15:28:18 +0000 Subject: [PATCH 2/3] set gemma default Signed-off-by: Kyle Sayers --- examples/multimodal_vision/gemma3_example.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/multimodal_vision/gemma3_example.py b/examples/multimodal_vision/gemma3_example.py index b9fcb59bf..83df9d6bc 100644 --- a/examples/multimodal_vision/gemma3_example.py +++ b/examples/multimodal_vision/gemma3_example.py @@ -39,6 +39,7 @@ splits=DATASET_SPLIT, recipe=recipe, batch_size=BATCH_SIZE, + shuffle_calibration_samples=False, max_seq_length=MAX_SEQUENCE_LENGTH, num_calibration_samples=NUM_CALIBRATION_SAMPLES, ) From a43b43e45b63e09f47de47e1979ac8dd90ba065b Mon Sep 17 00:00:00 2001 From: Kyle Sayers Date: Thu, 18 Dec 2025 01:30:14 -0500 Subject: [PATCH 3/3] Update src/llmcompressor/entrypoints/oneshot.py Co-authored-by: Brian Dellabetta Signed-off-by: Kyle Sayers --- src/llmcompressor/entrypoints/oneshot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py index a1645dfe4..7ac023630 100644 --- a/src/llmcompressor/entrypoints/oneshot.py +++ b/src/llmcompressor/entrypoints/oneshot.py @@ -353,7 +353,7 @@ def oneshot( calibration, ensuring proper quantization statistics. When False, only routed experts will be used. Only relevant for MoE models. Default is True. :param pipeline: Calibration pipeline used to calibrate model Options: - ['basic', 'datafree', 'sequential', independent] + ['basic', 'datafree', 'sequential', 'independent'] :param tracing_ignore: List of functions to ignore during tracing, either {module}.{method_name} or {function_name} :param sequential_targets: List of layer targets for the sequential pipeline.