File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -423,7 +423,7 @@ class TrainingArguments:
423423 Whether to load checkpoint data into CPU memory first before transferring to GPU.
424424 This helps mitigate GPU memory shortage by staging data on the CPU and only moving required parts to the GPU on demand during communication.
425425 Defaults to False.
426- save_hf_steps (`int`, *optional*, defaults to 500 ):
426+ save_hf_steps (`int`, *optional*, defaults to -1 ):
427427 Number of updates steps before two huggingface checkpoint saves if `save_strategy="steps"`.
428428 hybrid_parallel_expert_grad_scale (float, optional, defaults to None)(
429429 Scaling factor for expert gradients when Expert Parallel is enabled.
@@ -1217,7 +1217,7 @@ class TrainingArguments:
12171217 },
12181218 )
12191219
1220- save_hf_steps : int = field (default = 500 , metadata = {"help" : "Save huggingface checkpoint every X updates steps." })
1220+ save_hf_steps : int = field (default = - 1 , metadata = {"help" : "Save huggingface checkpoint every X updates steps." })
12211221
12221222 hybrid_parallel_expert_grad_scale : Optional [float ] = field (
12231223 default = None ,
You can’t perform that action at this time.
0 commit comments