Skip to content

Commit b5e3885

Browse files
committed
new code
1 parent 5daa12d commit b5e3885

File tree

12 files changed

+491
-63
lines changed

12 files changed

+491
-63
lines changed

examples/osworld/async/run_trainer_debug_w_rollout_stepwise_train_pt.sh

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ pip install pymysql
99
set -x
1010
ENGINE=${1:-vllm_osworld}
1111

12+
ray stop
13+
1214
cd /workspace/codes/verl
1315

1416
# Initialize Ray cluster for multi-node training
@@ -37,6 +39,8 @@ echo "To stop monitoring: kill $!"
3739
echo "Detected $N_GPUS GPUs on this machine"
3840

3941
# MODEL_PATH=/workspace/huggingface/dart-gui-7b
42+
# MODEL_PATH=/data/liuyang/ByteDance-Seed/UI-TARS-1.5-7B
43+
# MODEL_PATH=/workspace/codes/verl/checkpoints/verl_osworld_grpo/test_1115_20251115_e7gd4jr2/global_step_19/actor/huggingface
4044
MODEL_PATH=/data/liuyang/ByteDance-Seed/UI-TARS-1.5-7B
4145

4246
#/root/verl/checkpoints/verl_osworld_grpo/vllm_logp_pt_test5_w_KL_trainset15_osworld_reward_script_grpo_k8s_20250906_m3ou6di7/global_step_63/actor/huggingface
@@ -61,23 +65,30 @@ export REWARD_MODEL=qwen2.5_vl_7b
6165
export SWAN_WX_GROUP_HOOK=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=a68bb693-d0a0-4510-bc56-7efa7b8b546f
6266
export SWAN_FS_GROUP_HOOK=https://open.feishu.cn/open-apis/bot/v2/hook/793155e5-f0ca-47c4-9a09-bf34cd7a8ebb
6367

64-
# export ROOT_DATA_DIR=data/traj/pass@32_trainset90
65-
# export ROOT_DATA_DIR=rollouter/results/pass16_20250825_train152_pass16_gpu4_env36
66-
# export RUN_ID=results/pass16_20250825_train152_pass16_gpu4_env36
6768

68-
export ROOT_DATA_DIR=pass32_uitars_0928
69-
export RUN_ID=pass32_uitars_0928
69+
export ROOT_DATA_DIR=rollouter/results/test_1118_no_DA_2
70+
export RUN_ID=results/test_1118_no_DA_2
71+
72+
# export ROOT_DATA_DIR=rollouter/results/test_1115
73+
# export RUN_ID=results/test_1115
7074
# export EXPERIMENT_NAME=osworld_all_feasible_reward_script_grpo_k8s_20250821_vxer2wco
71-
export EXPERIMENT_NAME=Fixed_$(date +%Y%m%d)_$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 8 | head -n 1)
75+
export EXPERIMENT_NAME=test_1118_no_DA_$(date +%Y%m%d)_$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 8 | head -n 1)
76+
# export EXPERIMENT_NAME=test_1115_20251115_e7gd4jr2
7277
# export EXPERIMENT_NAME=vllm_logp_pt_test5_w_KL_trainset15_osworld_reward_script_grpo_k8s_20250906_m3ou6di7
7378
# export EXPERIMENT_NAME=pt_test5_w_KL_trainset15_vllm_logp_osworld_reward_script_grpo_k8s_20250905_91ww0y85
7479
# export EXPERIMENT_NAME=osworld_all_feasible_reward_script_grpo_k8s_20250827_2txpd14d
7580

81+
# Create logs directory if it doesn't exist
82+
mkdir -p logs
83+
84+
# Redirect all output to log file (both stdout and stderr) while still displaying on terminal
85+
exec > >(tee logs/${EXPERIMENT_NAME}_1.log) 2>&1
86+
7687
# export ROOT_DATA_DIR=tmp_async_sql_0802_max_variance
7788
# export RUN_ID=pengxiang_test_0802_max_variance
7889
# export EXPERIMENT_NAME=osworld_all_feasible_reward_script_grpo_k8s_0802_8_mb64_micro8
7990
# export ROLLOUT_SERVER_URL=http://172.19.47.166:15959
80-
export ROLLOUT_SERVER_URL=0.0.0.0:8888
91+
export ROLLOUT_SERVER_URL=http://172.16.0.2:15959
8192

8293
# training parameters
8394
adv_estimator=grpo
@@ -98,18 +109,18 @@ max_response_length=500
98109
loss_agg_mode="seq-mean-token-mean"
99110

100111

101-
train_bz_min=2
102-
train_bz_max=4
103-
train_prompt_bsz=8
112+
train_bz_min=6
113+
train_bz_max=8
114+
train_prompt_bsz=6
104115
rollout_n=8
105-
train_prompt_mini_bsz=32
116+
train_prompt_mini_bsz=48
106117

107118
# Performance Related Parameter
108119
sp_size=4
109120
use_dynamic_bsz=False
110121
actor_ppo_max_token_len=$(((max_prompt_length + max_response_length) * 2))
111122
infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) * 3))
112-
offload=True
123+
offload=False
113124
gen_tp=4
114125
fsdp_size=32
115126

@@ -120,11 +131,11 @@ splitter=stepwise
120131
splitter_parallel=True
121132
window_size=5
122133
stride_size=5
123-
max_steps=15
134+
max_steps=30
124135

125136
use_vllm_logp=False
126137
use_sft_loss=False
127-
use_token_ids_from_pt=False
138+
use_token_ids_from_pt=True
128139

129140
python3 -m verl.trainer.main_ppo_async \
130141
algorithm.adv_estimator=grpo \
@@ -182,11 +193,11 @@ python3 -m verl.trainer.main_ppo_async \
182193
trainer.experiment_name=$EXPERIMENT_NAME \
183194
trainer.n_gpus_per_node=$N_GPUS_PER_NODE \
184195
trainer.nnodes=$N_NODES \
185-
trainer.save_freq=3 \
196+
trainer.save_freq=1 \
186197
trainer.test_freq=10 \
187198
trainer.val_before_train=False \
188199
trainer.total_epochs=1 \
189-
trainer.max_actor_ckpt_to_keep=2 \
200+
trainer.max_actor_ckpt_to_keep=5 \
190201
+trainer.run_id=$RUN_ID \
191202
+trainer.splitter=${splitter} \
192203
+trainer.limit_messages=${limit_messages} \
@@ -203,7 +214,7 @@ python3 -m verl.trainer.main_ppo_async \
203214
+actor_rollout_ref.rollout.max_steps=15 \
204215
+actor_rollout_ref.rollout.limit_images=5 \
205216
+actor_rollout_ref.rollout.server_url=$ROLLOUT_SERVER_URL \
206-
+actor_rollout_ref.actor.offline=false \
217+
+actor_rollout_ref.actor.offline=false
207218
# +trainer.splitter=sliding_window \
208219
#
209220
# trainer.experiment_name="osworld_all_feasible_reward_script_grpo_k8s_0802_16_$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 6 | head -n 1)" \

rename.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import os
2+
import re
3+
4+
def rename_images_in_subfolders(root_folder):
5+
"""
6+
遍历指定根文件夹下的所有子文件夹,
7+
并将子文件夹中的图片名称(如 'image_5.png')重命名为4位补零的格式('image_0005.png')。
8+
"""
9+
print(f"开始处理根文件夹: {root_folder}\n")
10+
11+
# 检查根文件夹是否存在
12+
if not os.path.isdir(root_folder):
13+
print(f"错误: 文件夹 '{root_folder}' 不存在。请检查路径是否正确。")
14+
return
15+
16+
# 遍历根文件夹下的所有项目(文件和子文件夹)
17+
for subdir_name in os.listdir(root_folder):
18+
subdir_path = os.path.join(root_folder, subdir_name)
19+
20+
# 只处理子文件夹
21+
if os.path.isdir(subdir_path):
22+
print(f"--- 正在进入子文件夹: {subdir_path} ---")
23+
files_renamed_count = 0
24+
25+
# 遍历子文件夹中的所有文件
26+
for filename in os.listdir(subdir_path):
27+
# 使用正则表达式匹配 'image_数字.png' 格式的文件名
28+
# \d+ 匹配一个或多个数字
29+
match = re.match(r'image_(\d+)\.png$', filename)
30+
31+
if match:
32+
# 提取括号中匹配到的数字部分
33+
image_idx_str = match.group(1)
34+
image_idx_int = int(image_idx_str)
35+
36+
# 格式化为4位整数,不足的前面补零 (e.g., 5 -> '0005')
37+
new_idx_str = f"{image_idx_int:04d}"
38+
39+
# 构建新的文件名
40+
new_filename = f"image_{new_idx_str}.png"
41+
42+
# 如果新旧文件名不同,则执行重命名
43+
if new_filename != filename:
44+
old_filepath = os.path.join(subdir_path, filename)
45+
new_filepath = os.path.join(subdir_path, new_filename)
46+
47+
try:
48+
os.rename(old_filepath, new_filepath)
49+
print(f" ✅ 已重命名: {filename} -> {new_filename}")
50+
files_renamed_count += 1
51+
except OSError as e:
52+
print(f" ❌ 重命名失败: {filename}。错误: {e}")
53+
else:
54+
# 如果文件名已经符合格式,则跳过
55+
print(f" 👌 已跳过 (格式正确): {filename}")
56+
57+
if files_renamed_count == 0:
58+
print(" 该文件夹中没有需要重命名的文件。")
59+
print("-" * (len(subdir_path) + 18))
60+
print("\n")
61+
62+
print("🎉 所有操作完成!")
63+
64+
65+
if __name__ == '__main__':
66+
# --- 请在这里设置你的目标文件夹路径 ---
67+
target_directory = 'rollouter/results/test_1115'
68+
69+
# 运行主函数
70+
rename_images_in_subfolders(target_directory)
71+
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{
2+
"030eeff7-b492-4218-b312-701ec99ee0cc":

rollouter/model_service.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -816,9 +816,10 @@ async def generate(self, messages: List[Dict[str, Any]], **kwargs) -> str:
816816
task_id = kwargs.get("task_id")
817817
trace_id = kwargs.get("trace_id")
818818
step = kwargs.get("step")
819-
save_dir = os.path.join(self.save_path, f"{task_id}_trace-{trace_id}")
819+
save_dir = os.path.join(self.save_path, f"{task_id}_{trace_id}")
820820
os.makedirs(save_dir, exist_ok=True)
821-
save_path = os.path.join(save_dir, f"image_{int(step) - 1}.png")
821+
# save_path = os.path.join(save_dir, f"image_{int(step)}.png")
822+
save_path = os.path.join(save_dir, f"image_{int(step):04d}.png")
822823
with open(save_path, "wb") as f:
823824
f.write(base64.b64decode(encoded))
824825
except Exception as e:
@@ -865,9 +866,9 @@ async def generate(self, messages: List[Dict[str, Any]], **kwargs) -> str:
865866
task_id = kwargs.get("task_id")
866867
trace_id = kwargs.get("trace_id")
867868
step = kwargs.get("step")
868-
save_dir = os.path.join(self.save_path, f"{task_id}_trace-{trace_id}")
869+
save_dir = os.path.join(self.save_path, f"{task_id}_{trace_id}")
869870
os.makedirs(save_dir, exist_ok=True)
870-
save_path = os.path.join(save_dir, f"data_for_step_{int(step)}.pt")
871+
save_path = os.path.join(save_dir, f"data_for_step_{int(step)+1}.pt")
871872

872873
data_to_save = {
873874
"logp": torch.tensor(logp_list).cpu() if logp_list is not None else torch.tensor([]).cpu(),
@@ -896,7 +897,7 @@ async def save(self, messages: List[Dict], reward: float, task_id: str, trace_id
896897
return {"status": "skipped"}
897898

898899
try:
899-
save_dir = os.path.join(self.save_path, f"{task_id}_trace-{trace_id}")
900+
save_dir = os.path.join(self.save_path, f"{task_id}_{trace_id}")
900901
os.makedirs(save_dir, exist_ok=True)
901902

902903
# 保存 messages

rollouter/scripts/test_mysql.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1580,7 +1580,7 @@
15801580
],
15811581
"metadata": {
15821582
"kernelspec": {
1583-
"display_name": "base",
1583+
"display_name": "verl",
15841584
"language": "python",
15851585
"name": "python3"
15861586
},
@@ -1594,7 +1594,7 @@
15941594
"name": "python",
15951595
"nbconvert_exporter": "python",
15961596
"pygments_lexer": "ipython3",
1597-
"version": "3.13.2"
1597+
"version": "3.10.19"
15981598
}
15991599
},
16001600
"nbformat": 4,

rollouter/task_loader.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,3 +330,140 @@ def build_task(raw: Dict, osworld_root: Path, use_call_user: bool = False) -> Ta
330330
task_config = task_data
331331
)
332332

333+
334+
335+
def hf_processor(name_or_path, **kwargs):
336+
"""Create a huggingface processor to process multimodal data.
337+
338+
Args:
339+
name_or_path (str): The name of the processor.
340+
341+
Returns:
342+
transformers.ProcessorMixin: The pretrained processor.
343+
"""
344+
from transformers import AutoProcessor
345+
346+
try:
347+
processor = AutoProcessor.from_pretrained(name_or_path, **kwargs)
348+
except Exception as e:
349+
processor = None
350+
# TODO(haibin.lin): try-catch should be removed after adding transformer version req to setup.py to avoid
351+
# silent failure
352+
# Avoid load tokenizer, see:
353+
# https://github.com/huggingface/transformers/blob/v4.49.0/src/transformers/models/auto/processing_auto.py#L344
354+
if processor is not None and "Processor" not in processor.__class__.__name__:
355+
processor = None
356+
return processor
357+
358+
359+
def process_text_sync(processor, messages):
360+
formatted = ""
361+
for m in messages:
362+
content = m["content"]
363+
364+
# 如果 content 是 list(多模态消息)
365+
if isinstance(content, list):
366+
# 只取文本部分
367+
texts = [c["text"] for c in content if c.get("type") == "text"]
368+
content_str = "\n".join(texts)
369+
else:
370+
# 普通 string
371+
content_str = content
372+
373+
formatted += f"{content_str}<|im_end|>\n"
374+
model_inputs = processor(text=[formatted], images=None, return_tensors="pt")
375+
input_ids = model_inputs.pop("input_ids")
376+
return input_ids[0]
377+
378+
379+
import os
380+
import json
381+
import torch
382+
383+
# 假设这些变量和函数已经在别处定义好了
384+
# def hf_processor(path, trust_remote_code, use_fast): ...
385+
# def process_text_sync(processor, messages): ...
386+
# COMPUTER_USE_PROMPT = "..."
387+
388+
def process_system_prompt(instruction_root='evaluation_examples/examples_linux_osworld_0912'):
389+
# 假设 hf_processor 和 COMPUTER_USE_PROMPT 已经定义
390+
processor = hf_processor('/workspace/huggingface/dart-gui-7b', trust_remote_code=True, use_fast=True)
391+
system_prompt = COMPUTER_USE_PROMPT
392+
393+
# --- 修改点 1: 定义输出文件路径 ---
394+
output_dir = 'evaluation_examples/system_prompt_token_ids'
395+
os.makedirs(output_dir, exist_ok=True)
396+
output_file_path = os.path.join(output_dir, 'system_prompts.pt') # 定义最终的输出文件名
397+
print(f"Output will be saved to '{output_file_path}'")
398+
399+
# --- 修改点 2: 初始化一个字典来收集所有数据 ---
400+
all_token_ids = {}
401+
402+
# 遍历所有子目录
403+
for subdir_name in sorted(os.listdir(instruction_root)):
404+
subdir_path = os.path.join(instruction_root, subdir_name)
405+
if not os.path.isdir(subdir_path):
406+
continue
407+
408+
# 遍历目录中的所有文件
409+
for file_name in sorted(os.listdir(subdir_path)):
410+
if not file_name.endswith('.json'):
411+
continue
412+
413+
file_path = os.path.join(subdir_path, file_name)
414+
task_id = file_name.split('.')[0]
415+
416+
try:
417+
with open(file_path, 'r', encoding='utf-8') as f:
418+
data = json.load(f)
419+
420+
instruction = data.get('instruction')
421+
if not instruction:
422+
print(f"Warning: 'instruction' key not found or empty in {file_path}. Skipping.")
423+
continue
424+
425+
messages = [
426+
{
427+
"role": "system",
428+
"content": "You are a helpful assistant."
429+
},
430+
{
431+
"role": "user",
432+
"content": [
433+
{
434+
"type": "text",
435+
"text": system_prompt.format(
436+
instruction=instruction,
437+
language="English"
438+
)}
439+
]
440+
}
441+
]
442+
443+
input_ids = process_text_sync(processor, messages)
444+
445+
# --- 修改点 3: 将结果存入字典,而不是保存为单个文件 ---
446+
if input_ids is not None:
447+
all_token_ids[task_id] = input_ids
448+
print(f"Processed and collected token IDs for task: {task_id}")
449+
else:
450+
print(f"Warning: process_text_sync returned None for task {task_id}. Skipping.")
451+
452+
except json.JSONDecodeError:
453+
print(f"Error: Could not decode JSON from {file_path}. Skipping.")
454+
except Exception as e:
455+
print(f"An unexpected error occurred while processing {file_path}: {e}")
456+
457+
# --- 修改点 4: 循环结束后,将整个字典保存到单个文件 ---
458+
if all_token_ids:
459+
torch.save(all_token_ids, output_file_path)
460+
print(f"\nSuccessfully saved {len(all_token_ids)} token ID sets to {output_file_path}")
461+
else:
462+
print("\nNo token IDs were generated. Output file was not created.")
463+
464+
465+
def main():
466+
process_system_prompt()
467+
468+
if __name__ == "__main__":
469+
main()

0 commit comments

Comments
 (0)