Skip to content

Commit 59d047c

Browse files
committed
change defaults
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
1 parent a5f7031 commit 59d047c

File tree

2 files changed

+30
-2
lines changed

2 files changed

+30
-2
lines changed

src/llmcompressor/args/dataset_arguments.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ class DatasetArguments(CustomDatasetArguments):
143143
metadata={"help": "Number of samples to use for one-shot calibration"},
144144
)
145145
shuffle_calibration_samples: bool = field(
146-
default=False,
146+
default=True,
147147
metadata={
148148
"help": "whether to shuffle the dataset before selecting calibration data"
149149
},

src/llmcompressor/entrypoints/oneshot.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def oneshot(
253253
batch_size: int = 1,
254254
data_collator: str | Callable = "truncation",
255255
num_calibration_samples: int = 512,
256-
shuffle_calibration_samples: bool = False,
256+
shuffle_calibration_samples: bool = True,
257257
max_seq_length: int = 384,
258258
pad_to_max_length: bool = True,
259259
text_column: str = "text",
@@ -263,6 +263,23 @@ def oneshot(
263263
preprocessing_num_workers: int | None = None,
264264
min_tokens_per_module: float | None = None,
265265
moe_calibrate_all_experts: bool = True,
266+
pipeline: str | None = "independent",
267+
tracing_ignore: list[str] = [
268+
"_update_causal_mask",
269+
"create_causal_mask",
270+
"_update_mamba_mask",
271+
"make_causal_mask",
272+
"get_causal_mask",
273+
"mask_interface",
274+
"mask_function",
275+
"_prepare_4d_causal_attention_mask",
276+
"_prepare_fsmt_decoder_inputs",
277+
"_prepare_4d_causal_attention_mask_with_cache_position",
278+
"_update_linear_attn_mask",
279+
"project_per_layer_inputs",
280+
],
281+
sequential_targets: list[str] | None = None,
282+
sequential_offload_device: str = "cpu",
266283
quantization_aware_calibration: bool = True,
267284
# Miscellaneous arguments
268285
output_dir: str | None = None,
@@ -335,6 +352,17 @@ def oneshot(
335352
model calibration. When True, all experts will see all tokens during
336353
calibration, ensuring proper quantization statistics. When False, only
337354
routed experts will be used. Only relevant for MoE models. Default is True.
355+
:param pipeline: Calibration pipeline used to calibrate model Options:
356+
['basic', 'datafree', 'sequential', independent]
357+
:param tracing_ignore: List of functions to ignore during tracing, either
358+
{module}.{method_name} or {function_name}
359+
:param sequential_targets: List of layer targets for the sequential pipeline.
360+
This is typically a single DecoderLayer. Not specifying this argument will
361+
cause the sequential pipeline to default to using the `no_split_params`
362+
specified by the HF model definition
363+
:param sequential_offload_device: Device used to offload intermediate activations
364+
between sequential layers. It is recommended to use `cuda:1` if using more
365+
than one gpu. Default is cpu.
338366
:param quantization_aware_calibration: Whether to enable quantization-aware
339367
calibration in the sequential pipeline. When True, quantization is applied
340368
during forward pass in calibration. When False, quantization is disabled

0 commit comments

Comments
 (0)