diff --git a/areal/api/io_struct.py b/areal/api/io_struct.py index 50cddc723..c550a8b3e 100644 --- a/areal/api/io_struct.py +++ b/areal/api/io_struct.py @@ -148,7 +148,7 @@ def from_disk( use_lora: bool = False, clear_checkpoint_after_load: bool = True, lora_name: str = "", - lora_int_id: int = 0, + lora_int_id: int = 1, base_model_name: str = "", ) -> "WeightUpdateMeta": from areal.utils.saver import Saver diff --git a/areal/experimental/trainer/rl.py b/areal/experimental/trainer/rl.py index 91a05252c..f28c10a15 100644 --- a/areal/experimental/trainer/rl.py +++ b/areal/experimental/trainer/rl.py @@ -132,14 +132,25 @@ def __init__( # Prepare weight update meta and connect to inference engine if self.config.actor.weight_update_mode == "disk": - self.weight_update_meta = WeightUpdateMeta.from_disk( - experiment_name=config.experiment_name, - trial_name=config.trial_name, - file_root=config.cluster.fileroot, - name="default", - use_lora=config.actor.use_lora, - clear_checkpoint_after_load=True, - ) + if config.actor.use_lora: + self.weight_update_meta = WeightUpdateMeta.from_disk( + experiment_name=config.experiment_name, + trial_name=config.trial_name, + file_root=config.cluster.fileroot, + name="default", + clear_checkpoint_after_load=True, + use_lora=config.actor.use_lora, + lora_name=config.gconfig.lora_name, + base_model_name=config.actor.path, + ) + else: + self.weight_update_meta = WeightUpdateMeta.from_disk( + experiment_name=config.experiment_name, + trial_name=config.trial_name, + file_root=config.cluster.fileroot, + name="default", + clear_checkpoint_after_load=True, + ) elif self.config.actor.weight_update_mode == "xccl": # NCCL/XCCL weight update if self.allocation_mode.train_backend == "megatron": diff --git a/examples/math/gsm8k_grpo_lora.yaml b/examples/math/gsm8k_grpo_lora.yaml new file mode 100644 index 000000000..b0cbdce36 --- /dev/null +++ b/examples/math/gsm8k_grpo_lora.yaml @@ -0,0 +1,192 @@ +experiment_name: gsm8k-grpo +trial_name: trial0 + +seed: 1 +enable_offload: false +total_train_epochs: 3 +tokenizer_path: ${actor.path} + +cluster: + n_nodes: 1 + n_gpus_per_node: 16 + fileroot: /tmp/areal/experiments + name_resolve: + type: nfs + nfs_record_root: /tmp/areal/name_resolve + +allocation_mode: vllm:d8p1t1+d8p1t1 + + +scheduler: + type: local + + +rollout: + experiment_name: ${experiment_name} + trial_name: ${trial_name} + max_concurrent_rollouts: 256 + queue_size: null + consumer_batch_size: ${train_dataset.batch_size} + max_head_offpolicyness: 2 + enable_rollout_tracing: false + use_lora: true + scheduling_spec: ${actor.scheduling_spec} + +gconfig: + n_samples: 4 + min_new_tokens: 0 + max_new_tokens: 1024 + greedy: false + temperature: 1.0 + lora_name: "lora-gsm8k" + +actor: + experiment_name: ${experiment_name} + trial_name: ${trial_name} + path: Qwen/Qwen3-0.6B + init_from_scratch: false + disable_dropout: true + gradient_checkpointing: true + dtype: bfloat16 + mb_spec: + max_tokens_per_mb: 10240 + optimizer: + type: adam + lr: 1.70e-4 + weight_decay: 0.017 + beta1: 0.9 + beta2: 0.999 + eps: 1e-8 + lr_scheduler_type: constant + gradient_clipping: 1.0 + warmup_steps_proportion: 0.001 + group_size: ${gconfig.n_samples} + eps_clip: 0.4 + temperature: ${gconfig.temperature} + reward_scaling: 10.0 + reward_bias: -0.5 + kl_ctl: 0.0 + ppo_n_minibatches: 1 + recompute_logprob: true + use_decoupled_loss: true + behav_imp_weight_cap: 5.0 + dynamic_sampling: false + reward_norm: + mean_level: group + std_level: group + group_size: ${gconfig.n_samples} + adv_norm: + mean_level: batch + std_level: batch + max_new_tokens: ${gconfig.max_new_tokens} + weight_update_mode: disk + use_lora: ${rollout.use_lora} + peft_type: lora + lora_rank: 16 + lora_alpha: 16 + target_modules: [all-linear] + scheduling_spec: + - task_type: worker + port_count: 2 + gpu: 1 + cpu: 4 + mem: 32 + cmd: python3 -m areal.scheduler.rpc.rpc_server + env_vars: {} + +ref: + experiment_name: ${experiment_name} + trial_name: ${trial_name} + path: ${actor.path} + init_from_scratch: false + disable_dropout: true + dtype: ${actor.dtype} + mb_spec: + max_tokens_per_mb: 10240 + optimizer: null + scheduling_strategy: + type: colocation + target: actor + scheduling_spec: ${actor.scheduling_spec} + + +# SGLang +sglang: + model_path: ${actor.path} + random_seed: ${seed} + skip_tokenizer_init: true + dtype: ${actor.dtype} + max_running_requests: null + context_length: 32768 + mem_fraction_static: 0.8 + +# vLLM +vllm: + model: ${actor.path} + seed: ${seed} + skip_tokenizer_init: false + dtype: ${actor.dtype} + max_model_len: 32768 + gpu_memory_utilization: 0.8 + enable_lora: ${rollout.use_lora} + lora_modules: '{"name": "${gconfig.lora_name}", "path": ./model/Qwen3.0.6B-16rank", "base_model_name": "${actor.path}"}' + enforce_eager: true + +# datasets +train_dataset: + batch_size: 256 + shuffle: true + pin_memory: true + num_workers: 4 + path: openai/gsm8k + type: rl + max_length: 1024 + +valid_dataset: + batch_size: 256 + pin_memory: true + num_workers: 4 + path: openai/gsm8k + type: rl + +# Utilities +saver: + experiment_name: ${experiment_name} + trial_name: ${trial_name} + fileroot: ${cluster.fileroot} + freq_epochs: 1 + freq_steps: null + freq_secs: null + +recover: + mode: disabled + experiment_name: ${experiment_name} + trial_name: ${trial_name} + fileroot: ${cluster.fileroot} + freq_epochs: 1 + freq_steps: null + freq_secs: 3600 + +evaluator: + experiment_name: ${experiment_name} + trial_name: ${trial_name} + fileroot: ${cluster.fileroot} + freq_epochs: null + freq_steps: null + freq_secs: null + +stats_logger: + experiment_name: ${experiment_name} + trial_name: ${trial_name} + fileroot: ${cluster.fileroot} + wandb: + mode: disabled + + +perf_tracer: + experiment_name: ${experiment_name} + trial_name: ${trial_name} + fileroot: ${cluster.fileroot} + enabled: false + session_tracer: + enabled: false