File tree Expand file tree Collapse file tree 2 files changed +124
-0
lines changed
Expand file tree Collapse file tree 2 files changed +124
-0
lines changed Original file line number Diff line number Diff line change 1+ # ## data
2+ train_dataset_type : erniekit
3+ eval_dataset_type : erniekit
4+ train_dataset_path : ./data/pt/train.jsonl
5+ train_dataset_prob : ' 1.0'
6+ eval_dataset_path : ./data/pt/eval.jsonl
7+ eval_dataset_prob : ' 1.0'
8+ max_seq_len : 8192
9+ mix_strategy : concat
10+
11+ # ## model
12+ model_name_or_path : /root/paddlejob/gpfs/efficient_benchmark/huggingface/GLM-4.5-Air
13+ attn_impl : flashmask
14+
15+ # ## finetuning
16+ # base
17+ stage : PT
18+ fine_tuning : full
19+ seed : 23
20+ do_train : true
21+ do_eval : true
22+ per_device_eval_batch_size : 1
23+ per_device_train_batch_size : 1
24+ num_train_epochs : 1
25+ max_steps : 30
26+ eval_iters : 10
27+ eval_steps : 100
28+ evaluation_strategy : steps
29+ save_steps : 100
30+ save_strategy : steps
31+ logging_steps : 1
32+ gradient_accumulation_steps : 8
33+ logging_dir : ./vdl_log
34+ output_dir : /root/paddlejob/tmpspace/GLM-4.5-Air-PT/outputs/checkpoint
35+ disable_tqdm : true
36+ eval_accumulation_steps : 16
37+
38+ # train
39+ warmup_steps : 20
40+ learning_rate : 1.0e-05
41+ random_shuffle : false
42+
43+ # performance
44+ tensor_parallel_degree : 4
45+ pipeline_parallel_degree : 4
46+ use_expert_parallel : true
47+ expert_parallel_degree : 8
48+ sequence_parallel : true
49+ sharding_parallel_config : split_param
50+ amp_master_grad : true
51+ sharding : stage1
52+ recompute : true
53+ bf16 : true
54+ fp16_opt_level : O2
55+ unified_checkpoint : false
56+ load_via_cpu : true
57+ save_checkpoint_format : flex_checkpoint
58+ load_checkpoint_format : flex_checkpoint
59+ save_to_hf : true
60+
61+ benchmark : true
Original file line number Diff line number Diff line change 1+ # ## data
2+ train_dataset_type : erniekit
3+ eval_dataset_type : erniekit
4+ train_dataset_path : ./data/sft/train.jsonl
5+ train_dataset_prob : " 1.0"
6+ eval_dataset_path : ./data/sft/eval.jsonl
7+ eval_dataset_prob : " 1.0"
8+ max_seq_len : 8192
9+ mix_strategy : concat
10+
11+ # ## model
12+ model_name_or_path : /root/paddlejob/gpfs/efficient_benchmark/huggingface/GLM-4.5-Air
13+ attn_impl : flashmask
14+
15+ # ## finetuning
16+ # base
17+ stage : SFT
18+ fine_tuning : full
19+ seed : 23
20+ do_train : true
21+ do_eval : true
22+ per_device_eval_batch_size : 1
23+ per_device_train_batch_size : 1
24+ num_train_epochs : 1
25+ max_steps : 100
26+ eval_iters : 10
27+ eval_steps : 100
28+ evaluation_strategy : steps
29+ save_steps : 100
30+ save_strategy : steps
31+ logging_steps : 1
32+ gradient_accumulation_steps : 8
33+ logging_dir : ./vdl_log
34+ output_dir : ./checkpoints/qwen3_hf_0p6b_sft_ckpts
35+ disable_tqdm : true
36+ eval_accumulation_steps : 16
37+
38+ # train
39+ warmup_steps : 20
40+ learning_rate : 1.0e-5
41+ random_shuffle : false
42+
43+ # performance
44+ tensor_parallel_degree : 4
45+ pipeline_parallel_degree : 4
46+ use_expert_parallel : true
47+ expert_parallel_degree : 8
48+ sequence_parallel : true
49+ sharding_parallel_config : " split_param"
50+ amp_master_grad : true
51+ # sharding_parallel_degree: 8
52+ sharding : stage1
53+ recompute : true
54+ bf16 : true
55+ fp16_opt_level : O2
56+ # unified_checkpoint: true
57+ load_via_cpu : true
58+ # resume_from_checkpoint: false
59+ save_checkpoint_format : " flex_checkpoint"
60+ load_checkpoint_format : " flex_checkpoint"
61+ save_to_hf : true
62+
63+ benchmark : true
You can’t perform that action at this time.
0 commit comments