diff --git a/src/llmcompressor/pipelines/sequential/pipeline.py b/src/llmcompressor/pipelines/sequential/pipeline.py index 91516f280..1bb2e8a0e 100644 --- a/src/llmcompressor/pipelines/sequential/pipeline.py +++ b/src/llmcompressor/pipelines/sequential/pipeline.py @@ -59,10 +59,6 @@ def __call__( """ session = active_session() - # prepare model for sequential onloading - dispatch_for_sequential(model) - model_device = get_execution_device(model) - # prepare to trace subgraphs modifiers = session.lifecycle.recipe.modifiers sequential_targets = get_sequential_targets(modifiers, model, dataset_args) @@ -73,6 +69,10 @@ def __call__( subgraphs = trace_subgraphs(model, sample_input, sequential_targets, ignore) num_subgraphs = len(subgraphs) + # prepare model for sequential onloading + dispatch_for_sequential(model) + model_device = get_execution_device(model) + LifecycleCallbacks.calibration_epoch_start() # TODO: remove this to enable quantization aware calibration