Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion areal/api/io_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def from_disk(
use_lora: bool = False,
clear_checkpoint_after_load: bool = True,
lora_name: str = "",
lora_int_id: int = 0,
lora_int_id: int = 1,
base_model_name: str = "",
) -> "WeightUpdateMeta":
from areal.utils.saver import Saver
Expand Down
27 changes: 19 additions & 8 deletions areal/experimental/trainer/rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,25 @@ def __init__(

# Prepare weight update meta and connect to inference engine
if self.config.actor.weight_update_mode == "disk":
self.weight_update_meta = WeightUpdateMeta.from_disk(
experiment_name=config.experiment_name,
trial_name=config.trial_name,
file_root=config.cluster.fileroot,
name="default",
use_lora=config.actor.use_lora,
clear_checkpoint_after_load=True,
)
if config.actor.use_lora:
self.weight_update_meta = WeightUpdateMeta.from_disk(
experiment_name=config.experiment_name,
trial_name=config.trial_name,
file_root=config.cluster.fileroot,
name="default",
clear_checkpoint_after_load=True,
use_lora=config.actor.use_lora,
lora_name=config.gconfig.lora_name,
base_model_name=config.actor.path,
)
else:
self.weight_update_meta = WeightUpdateMeta.from_disk(
experiment_name=config.experiment_name,
trial_name=config.trial_name,
file_root=config.cluster.fileroot,
name="default",
clear_checkpoint_after_load=True,
)
elif self.config.actor.weight_update_mode == "xccl":
# NCCL/XCCL weight update
if self.allocation_mode.train_backend == "megatron":
Expand Down
192 changes: 192 additions & 0 deletions examples/math/gsm8k_grpo_lora.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
experiment_name: gsm8k-grpo
trial_name: trial0

seed: 1
enable_offload: false
total_train_epochs: 3
tokenizer_path: ${actor.path}

cluster:
n_nodes: 1
n_gpus_per_node: 16
fileroot: /tmp/areal/experiments
name_resolve:
type: nfs
nfs_record_root: /tmp/areal/name_resolve

allocation_mode: vllm:d8p1t1+d8p1t1


scheduler:
type: local


rollout:
experiment_name: ${experiment_name}
trial_name: ${trial_name}
max_concurrent_rollouts: 256
queue_size: null
consumer_batch_size: ${train_dataset.batch_size}
max_head_offpolicyness: 2
enable_rollout_tracing: false
use_lora: true
scheduling_spec: ${actor.scheduling_spec}

gconfig:
n_samples: 4
min_new_tokens: 0
max_new_tokens: 1024
greedy: false
temperature: 1.0
lora_name: "lora-gsm8k"

actor:
experiment_name: ${experiment_name}
trial_name: ${trial_name}
path: Qwen/Qwen3-0.6B
init_from_scratch: false
disable_dropout: true
gradient_checkpointing: true
dtype: bfloat16
mb_spec:
max_tokens_per_mb: 10240
optimizer:
type: adam
lr: 1.70e-4
weight_decay: 0.017
beta1: 0.9
beta2: 0.999
eps: 1e-8
lr_scheduler_type: constant
gradient_clipping: 1.0
warmup_steps_proportion: 0.001
group_size: ${gconfig.n_samples}
eps_clip: 0.4
temperature: ${gconfig.temperature}
reward_scaling: 10.0
reward_bias: -0.5
kl_ctl: 0.0
ppo_n_minibatches: 1
recompute_logprob: true
use_decoupled_loss: true
behav_imp_weight_cap: 5.0
dynamic_sampling: false
reward_norm:
mean_level: group
std_level: group
group_size: ${gconfig.n_samples}
adv_norm:
mean_level: batch
std_level: batch
max_new_tokens: ${gconfig.max_new_tokens}
weight_update_mode: disk
use_lora: ${rollout.use_lora}
peft_type: lora
lora_rank: 16
lora_alpha: 16
target_modules: [all-linear]
scheduling_spec:
- task_type: worker
port_count: 2
gpu: 1
cpu: 4
mem: 32
cmd: python3 -m areal.scheduler.rpc.rpc_server
env_vars: {}

ref:
experiment_name: ${experiment_name}
trial_name: ${trial_name}
path: ${actor.path}
init_from_scratch: false
disable_dropout: true
dtype: ${actor.dtype}
mb_spec:
max_tokens_per_mb: 10240
optimizer: null
scheduling_strategy:
type: colocation
target: actor
scheduling_spec: ${actor.scheduling_spec}


# SGLang
sglang:
model_path: ${actor.path}
random_seed: ${seed}
skip_tokenizer_init: true
dtype: ${actor.dtype}
max_running_requests: null
context_length: 32768
mem_fraction_static: 0.8

# vLLM
vllm:
model: ${actor.path}
seed: ${seed}
skip_tokenizer_init: false
dtype: ${actor.dtype}
max_model_len: 32768
gpu_memory_utilization: 0.8
enable_lora: ${rollout.use_lora}
lora_modules: '{"name": "${gconfig.lora_name}", "path": ./model/Qwen3.0.6B-16rank", "base_model_name": "${actor.path}"}'
enforce_eager: true

# datasets
train_dataset:
batch_size: 256
shuffle: true
pin_memory: true
num_workers: 4
path: openai/gsm8k
type: rl
max_length: 1024

valid_dataset:
batch_size: 256
pin_memory: true
num_workers: 4
path: openai/gsm8k
type: rl

# Utilities
saver:
experiment_name: ${experiment_name}
trial_name: ${trial_name}
fileroot: ${cluster.fileroot}
freq_epochs: 1
freq_steps: null
freq_secs: null

recover:
mode: disabled
experiment_name: ${experiment_name}
trial_name: ${trial_name}
fileroot: ${cluster.fileroot}
freq_epochs: 1
freq_steps: null
freq_secs: 3600

evaluator:
experiment_name: ${experiment_name}
trial_name: ${trial_name}
fileroot: ${cluster.fileroot}
freq_epochs: null
freq_steps: null
freq_secs: null

stats_logger:
experiment_name: ${experiment_name}
trial_name: ${trial_name}
fileroot: ${cluster.fileroot}
wandb:
mode: disabled


perf_tracer:
experiment_name: ${experiment_name}
trial_name: ${trial_name}
fileroot: ${cluster.fileroot}
enabled: false
session_tracer:
enabled: false