@@ -25,7 +25,7 @@ echo "To stop monitoring: kill $!"
2525
2626echo " Detected $N_GPUS GPUs on this machine"
2727
28- MODEL_PATH=/capacity/userdata/vcfenxd75jiv/shichenrui/ui_tars/ByteDance-Seed/UI-TARS-1.5
28+ MODEL_PATH=/root/verl/checkpoints/verl_osworld_grpo/Newenv_45_vllm_logp_pt_w_KL_trainset_osworld_reward_script_grpo_k8s_20250921_x0d74m8j/global_step_34/actor/huggingface
2929
3030# /root/verl/checkpoints/verl_osworld_grpo/Newenv_181_vllm_logp_pt_w_KL_trainset_osworld_reward_script_grpo_k8s_20250919_6oe5izen/global_step_16/actor/huggingface
3131# /root/verl/checkpoints/verl_osworld_grpo/1NODE_152_vllm_logp_pt_w_KL_trainset_osworld_reward_script_grpo_k8s_20250917_y7mx07hl/global_step_38/actor/huggingface
@@ -45,16 +45,16 @@ export SWAN_FS_GROUP_HOOK=https://open.feishu.cn/open-apis/bot/v2/hook/793155e5-
4545# export ROOT_DATA_DIR=rollouter/results/pass16_20250825_train152_pass16_gpu4_env36
4646# export RUN_ID=results/pass16_20250825_train152_pass16_gpu4_env36
4747
48- export ROOT_DATA_DIR=rollouter/results/ablation_novllm_logp_pass8_20250920_trainset45_gpu2_env20_vllm_logp_maxstep30_osworld_new
49- export RUN_ID=results/ablation_novllm_logp_pass8_20250920_trainset45_gpu2_env20_vllm_logp_maxstep30_osworld_new
48+ export ROOT_DATA_DIR=rollouter/results/resume_ablation_logp_pass8_20250924_trainset45_gpu2_env20_vllm_logp_maxstep30_osworld_new
49+ export RUN_ID=results/resume_ablation_logp_pass8_20250924_trainset45_gpu2_env20_vllm_logp_maxstep30_osworld_new
5050
51- export EXPERIMENT_NAME=Newenv_45_vllm_logp_pt_w_KL_trainset_osworld_reward_script_grpo_k8s_$( date +%Y%m%d) _$( cat /dev/urandom | tr -dc ' a-z0-9' | fold -w 8 | head -n 1)
51+ # export EXPERIMENT_NAME=Newenv_45_vllm_logp_pt_w_KL_trainset_osworld_reward_script_grpo_k8s_$(date +%Y%m%d)_$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 8 | head -n 1)
5252# export EXPERIMENT_NAME=Newenv_181_vllm_logp_pt_w_KL_trainset_osworld_reward_script_grpo_k8s_20250919_6oe5izen
5353# export EXPERIMENT_NAME=1NODE_152_vllm_logp_pt_w_KL_trainset_osworld_reward_script_grpo_k8s_20250917_y7mx07hl
54- # export EXPERIMENT_NAME=1NODE_152_vllm_logp_pt_w_KL_trainset_osworld_reward_script_grpo_k8s_20250916_8ik050h8
54+ export EXPERIMENT_NAME=Newenv_45_vllm_logp_pt_w_KL_trainset_osworld_reward_script_grpo_k8s_20250921_x0d74m8j
5555
5656# export ROLLOUT_SERVER_URL=http://172.19.47.166:15959
57- export ROLLOUT_SERVER_URL=http://172.19.131.148 :15959
57+ export ROLLOUT_SERVER_URL=http://172.19.131.92 :15959
5858
5959# training parameters
6060adv_estimator=grpo
0 commit comments