From 59d047c16f0aab7d3107c044660c00db798611a8 Mon Sep 17 00:00:00 2001
From: Kyle Sayers <kylesayrs@gmail.com>
Date: Wed, 17 Dec 2025 15:21:38 +0000
Subject: [PATCH 1/3] change defaults

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
---
 src/llmcompressor/args/dataset_arguments.py |  2 +-
 src/llmcompressor/entrypoints/oneshot.py    | 30 ++++++++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/src/llmcompressor/args/dataset_arguments.py b/src/llmcompressor/args/dataset_arguments.py
index 6f3c16fcf..4dc96f833 100644
--- a/src/llmcompressor/args/dataset_arguments.py
+++ b/src/llmcompressor/args/dataset_arguments.py
@@ -143,7 +143,7 @@ class DatasetArguments(CustomDatasetArguments):
         metadata={"help": "Number of samples to use for one-shot calibration"},
     )
     shuffle_calibration_samples: bool = field(
-        default=False,
+        default=True,
         metadata={
             "help": "whether to shuffle the dataset before selecting calibration data"
         },
diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py
index cd0cf6628..a1645dfe4 100644
--- a/src/llmcompressor/entrypoints/oneshot.py
+++ b/src/llmcompressor/entrypoints/oneshot.py
@@ -253,7 +253,7 @@ def oneshot(
     batch_size: int = 1,
     data_collator: str | Callable = "truncation",
     num_calibration_samples: int = 512,
-    shuffle_calibration_samples: bool = False,
+    shuffle_calibration_samples: bool = True,
     max_seq_length: int = 384,
     pad_to_max_length: bool = True,
     text_column: str = "text",
@@ -263,6 +263,23 @@ def oneshot(
     preprocessing_num_workers: int | None = None,
     min_tokens_per_module: float | None = None,
     moe_calibrate_all_experts: bool = True,
+    pipeline: str | None = "independent",
+    tracing_ignore: list[str] = [
+        "_update_causal_mask",
+        "create_causal_mask",
+        "_update_mamba_mask",
+        "make_causal_mask",
+        "get_causal_mask",
+        "mask_interface",
+        "mask_function",
+        "_prepare_4d_causal_attention_mask",
+        "_prepare_fsmt_decoder_inputs",
+        "_prepare_4d_causal_attention_mask_with_cache_position",
+        "_update_linear_attn_mask",
+        "project_per_layer_inputs",
+    ],
+    sequential_targets: list[str] | None = None,
+    sequential_offload_device: str = "cpu",
     quantization_aware_calibration: bool = True,
     # Miscellaneous arguments
     output_dir: str | None = None,
@@ -335,6 +352,17 @@ def oneshot(
         model calibration. When True, all experts will see all tokens during
         calibration, ensuring proper quantization statistics. When False, only
         routed experts will be used. Only relevant for MoE models. Default is True.
+    :param pipeline: Calibration pipeline used to calibrate model Options:
+        ['basic', 'datafree', 'sequential', independent]
+    :param tracing_ignore: List of functions to ignore during tracing, either
+        {module}.{method_name} or {function_name}
+    :param sequential_targets: List of layer targets for the sequential pipeline.
+        This is typically a single DecoderLayer. Not specifying this argument will
+        cause the sequential pipeline to default to using the `no_split_params`
+        specified by the HF model definition
+    :param sequential_offload_device: Device used to offload intermediate activations
+        between sequential layers. It is recommended to use `cuda:1` if using more
+        than one gpu. Default is cpu.
     :param quantization_aware_calibration: Whether to enable quantization-aware
         calibration in the sequential pipeline. When True, quantization is applied
         during forward pass in calibration. When False, quantization is disabled

From 20cba23f9403df11217ea2a36d243f85e1722fdf Mon Sep 17 00:00:00 2001
From: Kyle Sayers <kylesayrs@gmail.com>
Date: Wed, 17 Dec 2025 15:28:18 +0000
Subject: [PATCH 2/3] set gemma default

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
---
 examples/multimodal_vision/gemma3_example.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/multimodal_vision/gemma3_example.py b/examples/multimodal_vision/gemma3_example.py
index b9fcb59bf..83df9d6bc 100644
--- a/examples/multimodal_vision/gemma3_example.py
+++ b/examples/multimodal_vision/gemma3_example.py
@@ -39,6 +39,7 @@
     splits=DATASET_SPLIT,
     recipe=recipe,
     batch_size=BATCH_SIZE,
+    shuffle_calibration_samples=False,
     max_seq_length=MAX_SEQUENCE_LENGTH,
     num_calibration_samples=NUM_CALIBRATION_SAMPLES,
 )

From a43b43e45b63e09f47de47e1979ac8dd90ba065b Mon Sep 17 00:00:00 2001
From: Kyle Sayers <kylesayrs@gmail.com>
Date: Thu, 18 Dec 2025 01:30:14 -0500
Subject: [PATCH 3/3] Update src/llmcompressor/entrypoints/oneshot.py

Co-authored-by: Brian Dellabetta <brian-dellabetta@users.noreply.github.com>
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
---
 src/llmcompressor/entrypoints/oneshot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py
index a1645dfe4..7ac023630 100644
--- a/src/llmcompressor/entrypoints/oneshot.py
+++ b/src/llmcompressor/entrypoints/oneshot.py
@@ -353,7 +353,7 @@ def oneshot(
         calibration, ensuring proper quantization statistics. When False, only
         routed experts will be used. Only relevant for MoE models. Default is True.
     :param pipeline: Calibration pipeline used to calibrate model Options:
-        ['basic', 'datafree', 'sequential', independent]
+        ['basic', 'datafree', 'sequential', 'independent']
     :param tracing_ignore: List of functions to ignore during tracing, either
         {module}.{method_name} or {function_name}
     :param sequential_targets: List of layer targets for the sequential pipeline.