Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/multimodal_vision/gemma3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
splits=DATASET_SPLIT,
recipe=recipe,
batch_size=BATCH_SIZE,
shuffle_calibration_samples=False,
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
)
Expand Down
2 changes: 1 addition & 1 deletion src/llmcompressor/args/dataset_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ class DatasetArguments(CustomDatasetArguments):
metadata={"help": "Number of samples to use for one-shot calibration"},
)
shuffle_calibration_samples: bool = field(
default=False,
default=True,
metadata={
"help": "whether to shuffle the dataset before selecting calibration data"
},
Expand Down
30 changes: 29 additions & 1 deletion src/llmcompressor/entrypoints/oneshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def oneshot(
batch_size: int = 1,
data_collator: str | Callable = "truncation",
num_calibration_samples: int = 512,
shuffle_calibration_samples: bool = False,
shuffle_calibration_samples: bool = True,
max_seq_length: int = 384,
pad_to_max_length: bool = True,
text_column: str = "text",
Expand All @@ -263,6 +263,23 @@ def oneshot(
preprocessing_num_workers: int | None = None,
min_tokens_per_module: float | None = None,
moe_calibrate_all_experts: bool = True,
pipeline: str | None = "independent",
tracing_ignore: list[str] = [
"_update_causal_mask",
"create_causal_mask",
"_update_mamba_mask",
"make_causal_mask",
"get_causal_mask",
"mask_interface",
"mask_function",
"_prepare_4d_causal_attention_mask",
"_prepare_fsmt_decoder_inputs",
"_prepare_4d_causal_attention_mask_with_cache_position",
"_update_linear_attn_mask",
"project_per_layer_inputs",
],
sequential_targets: list[str] | None = None,
sequential_offload_device: str = "cpu",
quantization_aware_calibration: bool = True,
# Miscellaneous arguments
output_dir: str | None = None,
Expand Down Expand Up @@ -335,6 +352,17 @@ def oneshot(
model calibration. When True, all experts will see all tokens during
calibration, ensuring proper quantization statistics. When False, only
routed experts will be used. Only relevant for MoE models. Default is True.
:param pipeline: Calibration pipeline used to calibrate model Options:
['basic', 'datafree', 'sequential', 'independent']
:param tracing_ignore: List of functions to ignore during tracing, either
{module}.{method_name} or {function_name}
:param sequential_targets: List of layer targets for the sequential pipeline.
This is typically a single DecoderLayer. Not specifying this argument will
cause the sequential pipeline to default to using the `no_split_params`
specified by the HF model definition
:param sequential_offload_device: Device used to offload intermediate activations
between sequential layers. It is recommended to use `cuda:1` if using more
than one gpu. Default is cpu.
:param quantization_aware_calibration: Whether to enable quantization-aware
calibration in the sequential pipeline. When True, quantization is applied
during forward pass in calibration. When False, quantization is disabled
Expand Down