From 0e3744c3d4f207f419772051140425e5667f88ec Mon Sep 17 00:00:00 2001
From: Oleh Shliazhko <oleh.shliazhko@servicenow.com>
Date: Tue, 29 Jul 2025 15:39:04 +0200
Subject: [PATCH] update osworld docs

---
 experiments/osworld_docker_test.py            | 37 -------------------
 .../benchmarks/{setup.md => osworld.md}       | 22 ++++++++++-
 2 files changed, 20 insertions(+), 39 deletions(-)
 delete mode 100644 experiments/osworld_docker_test.py
 rename src/agentlab/benchmarks/{setup.md => osworld.md} (62%)

diff --git a/experiments/osworld_docker_test.py b/experiments/osworld_docker_test.py
deleted file mode 100644
index 3b68db5d..00000000
--- a/experiments/osworld_docker_test.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import logging
-
-from desktop_env.desktop_env import DesktopEnv
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-    handlers=[logging.StreamHandler()],
-)
-
-example = {
-    "id": "94d95f96-9699-4208-98ba-3c3119edf9c2",
-    "instruction": "I want to install Spotify on my current system. Could you please help me?",
-    "config": [
-        {
-            "type": "execute",
-            "parameters": {
-                "command": [
-                    "python",
-                    "-c",
-                    "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5);",
-                ]
-            },
-        }
-    ],
-    "evaluator": {
-        "func": "check_include_exclude",
-        "result": {"type": "vm_command_line", "command": "which spotify"},
-        "expected": {"type": "rule", "rules": {"include": ["spotify"], "exclude": ["not found"]}},
-    },
-}
-
-env = DesktopEnv(action_space="pyautogui", provider_name="docker", os_type="Ubuntu")
-
-obs = env.reset(task_config=example)
-obs, reward, done, info = env.step("pyautogui.rightClick()")
-print(obs)
diff --git a/src/agentlab/benchmarks/setup.md b/src/agentlab/benchmarks/osworld.md
similarity index 62%
rename from src/agentlab/benchmarks/setup.md
rename to src/agentlab/benchmarks/osworld.md
index 071670d1..d7e99d69 100644
--- a/src/agentlab/benchmarks/setup.md
+++ b/src/agentlab/benchmarks/osworld.md
@@ -31,7 +31,7 @@ The main entry point `experiments/run_osworld.py` is currently configured with h
 2. **Environment Variables:**
    - `AGENTLAB_DEBUG=1`: Automatically runs the debug subset (7 tasks from `osworld_debug_task_ids.json`)
 
-### Running OSWorld Tasks
+### Task subsets
 
 We provide different subsets of tasks:
 
@@ -42,10 +42,28 @@ We provide different subsets of tasks:
 ### Example Commands
 
 ```bash
-# Run with default debug subset (7 tasks)
+# Run with default debug subset using sequential execution in VMware VM
 python experiments/run_osworld.py
 ```
 
+### Parallel Execution with Docker
+To run OSWorld in parallel using Docker, ensure you have Docker installed and configured.
+To install it, follow the section from the OSWorld README on [Docker setup](https://github.com/xlang-ai/OSWorld?tab=readme-ov-file#docker-server-with-kvm-support-for-better-performance).
+Ensure that your docker installation support KVM, as OSWorld requires it for running VMs.
+We also recommend pulling the latest Docker image for OSWorld before running the benchmark:
+
+```bash
+docker pull happysixd/osworld-docker
+```
+
+After setting up Docker, you can change the `use_vmware` parameter in the script to `False` and run:
+
+```bash
+python experiments/run_osworld.py
+```
+You can control number of parallel jobs by setting the `n_jobs` parameter in the script, the default is 4.
+We recommend setting `n_jobs` to `your_number_of_cpu_cores - 2` to leave some resources for the host system and the benchmark itself.
+
 
 ### Configuration Notes