Computer-use-agents
diff --git a/‎examples/osworld/async/run_trainer_debug_w_rollout_stepwise_train_pt.sh‎
Lines changed: 28 additions & 17 deletions b/‎examples/osworld/async/run_trainer_debug_w_rollout_stepwise_train_pt.sh‎
Lines changed: 28 additions & 17 deletions
diff --git a/‎rename.py‎
Lines changed: 71 additions & 0 deletions b/‎rename.py‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎rollouter/evaluation_examples/system_prompt_token_ids/system_prompt_token_ids.json‎
Lines changed: 2 additions & 0 deletions b/‎rollouter/evaluation_examples/system_prompt_token_ids/system_prompt_token_ids.json‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎rollouter/model_service.py‎
Lines changed: 6 additions & 5 deletions b/‎rollouter/model_service.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎rollouter/scripts/test_mysql.ipynb‎
Lines changed: 2 additions & 2 deletions b/‎rollouter/scripts/test_mysql.ipynb‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎rollouter/task_loader.py‎
Lines changed: 137 additions & 0 deletions b/‎rollouter/task_loader.py‎
Lines changed: 137 additions & 0 deletions
@@ -9,6 +9,8 @@ pip install pymysql
 set -x
 ENGINE=${1:-vllm_osworld}
 
+ray stop
+
 cd /workspace/codes/verl
 
 # Initialize Ray cluster for multi-node training
@@ -37,6 +39,8 @@ echo "To stop monitoring: kill $!"
 echo "Detected $N_GPUS GPUs on this machine"
 
 # MODEL_PATH=/workspace/huggingface/dart-gui-7b
+# MODEL_PATH=/data/liuyang/ByteDance-Seed/UI-TARS-1.5-7B
+# MODEL_PATH=/workspace/codes/verl/checkpoints/verl_osworld_grpo/test_1115_20251115_e7gd4jr2/global_step_19/actor/huggingface
 MODEL_PATH=/data/liuyang/ByteDance-Seed/UI-TARS-1.5-7B
 
 #/root/verl/checkpoints/verl_osworld_grpo/vllm_logp_pt_test5_w_KL_trainset15_osworld_reward_script_grpo_k8s_20250906_m3ou6di7/global_step_63/actor/huggingface
@@ -61,23 +65,30 @@ export REWARD_MODEL=qwen2.5_vl_7b
 export SWAN_WX_GROUP_HOOK=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=a68bb693-d0a0-4510-bc56-7efa7b8b546f
 export SWAN_FS_GROUP_HOOK=https://open.feishu.cn/open-apis/bot/v2/hook/793155e5-f0ca-47c4-9a09-bf34cd7a8ebb
 
-# export ROOT_DATA_DIR=data/traj/pass@32_trainset90
-# export ROOT_DATA_DIR=rollouter/results/pass16_20250825_train152_pass16_gpu4_env36
-# export RUN_ID=results/pass16_20250825_train152_pass16_gpu4_env36
 
-export ROOT_DATA_DIR=pass32_uitars_0928
-export RUN_ID=pass32_uitars_0928
+export ROOT_DATA_DIR=rollouter/results/test_1118_no_DA_2
+export RUN_ID=results/test_1118_no_DA_2
+
+# export ROOT_DATA_DIR=rollouter/results/test_1115
+# export RUN_ID=results/test_1115
 # export EXPERIMENT_NAME=osworld_all_feasible_reward_script_grpo_k8s_20250821_vxer2wco
-export EXPERIMENT_NAME=Fixed_$(date +%Y%m%d)_$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 8 | head -n 1)
+export EXPERIMENT_NAME=test_1118_no_DA_$(date +%Y%m%d)_$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 8 | head -n 1)
+# export EXPERIMENT_NAME=test_1115_20251115_e7gd4jr2
 # export EXPERIMENT_NAME=vllm_logp_pt_test5_w_KL_trainset15_osworld_reward_script_grpo_k8s_20250906_m3ou6di7
 # export EXPERIMENT_NAME=pt_test5_w_KL_trainset15_vllm_logp_osworld_reward_script_grpo_k8s_20250905_91ww0y85
 # export EXPERIMENT_NAME=osworld_all_feasible_reward_script_grpo_k8s_20250827_2txpd14d
 
+# Create logs directory if it doesn't exist
+mkdir -p logs
+
+# Redirect all output to log file (both stdout and stderr) while still displaying on terminal
+exec > >(tee logs/${EXPERIMENT_NAME}_1.log) 2>&1
+
 # export ROOT_DATA_DIR=tmp_async_sql_0802_max_variance 
 # export RUN_ID=pengxiang_test_0802_max_variance
 # export EXPERIMENT_NAME=osworld_all_feasible_reward_script_grpo_k8s_0802_8_mb64_micro8
 # export ROLLOUT_SERVER_URL=http://172.19.47.166:15959
-export ROLLOUT_SERVER_URL=0.0.0.0:8888
+export ROLLOUT_SERVER_URL=http://172.16.0.2:15959
 
 # training parameters
 adv_estimator=grpo
@@ -98,18 +109,18 @@ max_response_length=500
 loss_agg_mode="seq-mean-token-mean"
 
 
-train_bz_min=2
-train_bz_max=4
-train_prompt_bsz=8
+train_bz_min=6
+train_bz_max=8
+train_prompt_bsz=6
 rollout_n=8
-train_prompt_mini_bsz=32
+train_prompt_mini_bsz=48
 
 # Performance Related Parameter
 sp_size=4
 use_dynamic_bsz=False
 actor_ppo_max_token_len=$(((max_prompt_length + max_response_length) * 2))
 infer_ppo_max_token_len=$(((max_prompt_length + max_response_length) * 3))
-offload=True
+offload=False
 gen_tp=4
 fsdp_size=32
 
@@ -120,11 +131,11 @@ splitter=stepwise
 splitter_parallel=True
 window_size=5 
 stride_size=5
-max_steps=15
+max_steps=30
 
 use_vllm_logp=False
 use_sft_loss=False
-use_token_ids_from_pt=False
+use_token_ids_from_pt=True
 
 python3 -m verl.trainer.main_ppo_async \
     algorithm.adv_estimator=grpo \
@@ -182,11 +193,11 @@ python3 -m verl.trainer.main_ppo_async \
     trainer.experiment_name=$EXPERIMENT_NAME \
     trainer.n_gpus_per_node=$N_GPUS_PER_NODE \
     trainer.nnodes=$N_NODES \
-    trainer.save_freq=3 \
+    trainer.save_freq=1 \
     trainer.test_freq=10 \
     trainer.val_before_train=False \
     trainer.total_epochs=1 \
-    trainer.max_actor_ckpt_to_keep=2 \
+    trainer.max_actor_ckpt_to_keep=5 \
     +trainer.run_id=$RUN_ID \
     +trainer.splitter=${splitter} \
     +trainer.limit_messages=${limit_messages} \
@@ -203,7 +214,7 @@ python3 -m verl.trainer.main_ppo_async \
     +actor_rollout_ref.rollout.max_steps=15 \
     +actor_rollout_ref.rollout.limit_images=5 \
     +actor_rollout_ref.rollout.server_url=$ROLLOUT_SERVER_URL \
-    +actor_rollout_ref.actor.offline=false \
+    +actor_rollout_ref.actor.offline=false
     #  +trainer.splitter=sliding_window \
     # 
     #     trainer.experiment_name="osworld_all_feasible_reward_script_grpo_k8s_0802_16_$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 6 | head -n 1)" \
 
@@ -0,0 +1,71 @@
+import os
+import re
+
+def rename_images_in_subfolders(root_folder):
+    """
+    遍历指定根文件夹下的所有子文件夹，
+    并将子文件夹中的图片名称（如 'image_5.png'）重命名为4位补零的格式（'image_0005.png'）。
+    """
+    print(f"开始处理根文件夹: {root_folder}\n")
+
+    # 检查根文件夹是否存在
+    if not os.path.isdir(root_folder):
+        print(f"错误: 文件夹 '{root_folder}' 不存在。请检查路径是否正确。")
+        return
+
+    # 遍历根文件夹下的所有项目（文件和子文件夹）
+    for subdir_name in os.listdir(root_folder):
+        subdir_path = os.path.join(root_folder, subdir_name)
+
+        # 只处理子文件夹
+        if os.path.isdir(subdir_path):
+            print(f"--- 正在进入子文件夹: {subdir_path} ---")
+            files_renamed_count = 0
+            
+            # 遍历子文件夹中的所有文件
+            for filename in os.listdir(subdir_path):
+                # 使用正则表达式匹配 'image_数字.png' 格式的文件名
+                # \d+ 匹配一个或多个数字
+                match = re.match(r'image_(\d+)\.png$', filename)
+
+                if match:
+                    # 提取括号中匹配到的数字部分
+                    image_idx_str = match.group(1)
+                    image_idx_int = int(image_idx_str)
+
+                    # 格式化为4位整数，不足的前面补零 (e.g., 5 -> '0005')
+                    new_idx_str = f"{image_idx_int:04d}"
+                    
+                    # 构建新的文件名
+                    new_filename = f"image_{new_idx_str}.png"
+
+                    # 如果新旧文件名不同，则执行重命名
+                    if new_filename != filename:
+                        old_filepath = os.path.join(subdir_path, filename)
+                        new_filepath = os.path.join(subdir_path, new_filename)
+                        
+                        try:
+                            os.rename(old_filepath, new_filepath)
+                            print(f"  ✅ 已重命名: {filename} -> {new_filename}")
+                            files_renamed_count += 1
+                        except OSError as e:
+                            print(f"  ❌ 重命名失败: {filename}。错误: {e}")
+                    else:
+                        # 如果文件名已经符合格式，则跳过
+                        print(f"  👌 已跳过 (格式正确): {filename}")
+
+            if files_renamed_count == 0:
+                print("  该文件夹中没有需要重命名的文件。")
+            print("-" * (len(subdir_path) + 18))
+            print("\n")
+
+    print("🎉 所有操作完成！")
+
+
+if __name__ == '__main__':
+    # --- 请在这里设置你的目标文件夹路径 ---
+    target_directory = 'rollouter/results/test_1115'
+    
+    # 运行主函数
+    rename_images_in_subfolders(target_directory)
+
@@ -0,0 +1,2 @@
+{
+    "030eeff7-b492-4218-b312-701ec99ee0cc": 
@@ -816,9 +816,10 @@ async def generate(self, messages: List[Dict[str, Any]], **kwargs) -> str:
                         task_id = kwargs.get("task_id")
                         trace_id = kwargs.get("trace_id")
                         step = kwargs.get("step")
-                        save_dir = os.path.join(self.save_path, f"{task_id}_trace-{trace_id}")
+                        save_dir = os.path.join(self.save_path, f"{task_id}_{trace_id}")
                         os.makedirs(save_dir, exist_ok=True)
-                        save_path = os.path.join(save_dir, f"image_{int(step) - 1}.png")
+                        # save_path = os.path.join(save_dir, f"image_{int(step)}.png")
+                        save_path = os.path.join(save_dir, f"image_{int(step):04d}.png")
                         with open(save_path, "wb") as f:
                             f.write(base64.b64decode(encoded))
                     except Exception as e:
@@ -865,9 +866,9 @@ async def generate(self, messages: List[Dict[str, Any]], **kwargs) -> str:
                                 task_id = kwargs.get("task_id")
                                 trace_id = kwargs.get("trace_id")
                                 step = kwargs.get("step")
-                                save_dir = os.path.join(self.save_path, f"{task_id}_trace-{trace_id}")
+                                save_dir = os.path.join(self.save_path, f"{task_id}_{trace_id}")
                                 os.makedirs(save_dir, exist_ok=True)
-                                save_path = os.path.join(save_dir, f"data_for_step_{int(step)}.pt")
+                                save_path = os.path.join(save_dir, f"data_for_step_{int(step)+1}.pt")
 
                                 data_to_save = {
                                     "logp": torch.tensor(logp_list).cpu() if logp_list is not None else torch.tensor([]).cpu(),
@@ -896,7 +897,7 @@ async def save(self, messages: List[Dict], reward: float, task_id: str, trace_id
             return {"status": "skipped"}
 
         try:
-            save_dir = os.path.join(self.save_path, f"{task_id}_trace-{trace_id}")
+            save_dir = os.path.join(self.save_path, f"{task_id}_{trace_id}")
             os.makedirs(save_dir, exist_ok=True)
 
             # 保存 messages
 
@@ -1580,7 +1580,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "base",
+   "display_name": "verl",
    "language": "python",
    "name": "python3"
   },
@@ -1594,7 +1594,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.2"
+   "version": "3.10.19"
   }
  },
  "nbformat": 4,
 
@@ -330,3 +330,140 @@ def build_task(raw: Dict, osworld_root: Path, use_call_user: bool = False) -> Ta
         task_config = task_data
     )
 
+
+
+def hf_processor(name_or_path, **kwargs):
+    """Create a huggingface processor to process multimodal data.
+
+    Args:
+        name_or_path (str): The name of the processor.
+
+    Returns:
+        transformers.ProcessorMixin: The pretrained processor.
+    """
+    from transformers import AutoProcessor
+
+    try:
+        processor = AutoProcessor.from_pretrained(name_or_path, **kwargs)
+    except Exception as e:
+        processor = None
+        # TODO(haibin.lin): try-catch should be removed after adding transformer version req to setup.py to avoid
+        # silent failure
+    # Avoid load tokenizer, see:
+    # https://github.com/huggingface/transformers/blob/v4.49.0/src/transformers/models/auto/processing_auto.py#L344
+    if processor is not None and "Processor" not in processor.__class__.__name__:
+        processor = None
+    return processor
+
+
+def process_text_sync(processor, messages):
+        formatted = ""
+        for m in messages:
+            content = m["content"]
+
+            # 如果 content 是 list（多模态消息）
+            if isinstance(content, list):
+                # 只取文本部分
+                texts = [c["text"] for c in content if c.get("type") == "text"]
+                content_str = "\n".join(texts)
+            else:
+                # 普通 string
+                content_str = content
+
+            formatted += f"{content_str}<|im_end|>\n"
+        model_inputs = processor(text=[formatted], images=None, return_tensors="pt")
+        input_ids = model_inputs.pop("input_ids")
+        return input_ids[0]
+
+
+import os
+import json
+import torch
+
+# 假设这些变量和函数已经在别处定义好了
+# def hf_processor(path, trust_remote_code, use_fast): ...
+# def process_text_sync(processor, messages): ...
+# COMPUTER_USE_PROMPT = "..."
+
+def process_system_prompt(instruction_root='evaluation_examples/examples_linux_osworld_0912'):
+    # 假设 hf_processor 和 COMPUTER_USE_PROMPT 已经定义
+    processor = hf_processor('/workspace/huggingface/dart-gui-7b', trust_remote_code=True, use_fast=True)
+    system_prompt = COMPUTER_USE_PROMPT  
+    
+    # --- 修改点 1: 定义输出文件路径 ---
+    output_dir = 'evaluation_examples/system_prompt_token_ids'
+    os.makedirs(output_dir, exist_ok=True)
+    output_file_path = os.path.join(output_dir, 'system_prompts.pt') # 定义最终的输出文件名
+    print(f"Output will be saved to '{output_file_path}'")
+
+    # --- 修改点 2: 初始化一个字典来收集所有数据 ---
+    all_token_ids = {}
+
+    # 遍历所有子目录
+    for subdir_name in sorted(os.listdir(instruction_root)):
+        subdir_path = os.path.join(instruction_root, subdir_name)
+        if not os.path.isdir(subdir_path):
+            continue
+            
+        # 遍历目录中的所有文件
+        for file_name in sorted(os.listdir(subdir_path)):
+            if not file_name.endswith('.json'):
+                continue
+
+            file_path = os.path.join(subdir_path, file_name)
+            task_id = file_name.split('.')[0]
+            
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                
+                instruction = data.get('instruction')
+                if not instruction:
+                    print(f"Warning: 'instruction' key not found or empty in {file_path}. Skipping.")
+                    continue
+
+                messages = [
+                    {
+                        "role": "system",
+                        "content": "You are a helpful assistant."
+                    },
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text", 
+                                "text": system_prompt.format(
+                                    instruction=instruction, 
+                                    language="English"
+                            )}
+                        ]
+                    }
+                ]
+                
+                input_ids = process_text_sync(processor, messages)
+                
+                # --- 修改点 3: 将结果存入字典，而不是保存为单个文件 ---
+                if input_ids is not None:
+                    all_token_ids[task_id] = input_ids
+                    print(f"Processed and collected token IDs for task: {task_id}")
+                else:
+                    print(f"Warning: process_text_sync returned None for task {task_id}. Skipping.")
+                 
+            except json.JSONDecodeError:
+                print(f"Error: Could not decode JSON from {file_path}. Skipping.")
+            except Exception as e:
+                print(f"An unexpected error occurred while processing {file_path}: {e}")
+
+    # --- 修改点 4: 循环结束后，将整个字典保存到单个文件 ---
+    if all_token_ids:
+        torch.save(all_token_ids, output_file_path)
+        print(f"\nSuccessfully saved {len(all_token_ids)} token ID sets to {output_file_path}")
+    else:
+        print("\nNo token IDs were generated. Output file was not created.")
+
+
+def main():
+    process_system_prompt()
+
+if __name__ == "__main__":
+    main()
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+{`
	`2`	`+ "030eeff7-b492-4218-b312-701ec99ee0cc":`