Skip to content

Commit efff826

Browse files
committed
new sql
1 parent 4059fd7 commit efff826

File tree

6 files changed

+309
-23
lines changed

6 files changed

+309
-23
lines changed

examples/osworld/async/run_trainer_debug_w_rollout_stepwise_train_pt.sh

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,21 @@ pip install cryptography
33

44
set -x
55
ENGINE=${1:-vllm_osworld}
6-
cd /root/verl
6+
7+
cd /workspace/codes/verl
78

89
# Initialize Ray cluster for multi-node training
910
# Make sure Ray is running on all nodes before executing this script
1011
# On head node: ray start --head --port=6379
1112
# On worker nodes: ray start --address='head_node_ip:6379'
1213
# Detect number of GPUs on the current machine
1314
N_NODES=1
14-
N_GPUS=$(nvidia-smi --list-gpus | wc -l)
15+
# Check if nvidia-smi is available and working
16+
if command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi >/dev/null 2>&1; then
17+
N_GPUS=$(nvidia-smi --list-gpus | wc -l)
18+
else
19+
N_GPUS=0
20+
fi
1521
N_GPUS_PER_NODE=$N_GPUS
1622

1723
# # 生成带时间戳的唯一文件ID,后台运行
@@ -25,7 +31,7 @@ echo "To stop monitoring: kill $!"
2531

2632
echo "Detected $N_GPUS GPUs on this machine"
2733

28-
MODEL_PATH=/capacity/userdata/vcfenxd75jiv/shichenrui/ui_tars/ByteDance-Seed/UI-TARS-1.5
34+
MODEL_PATH=/workspace/huggingface/dart-gui-7b
2935

3036
#/root/verl/checkpoints/verl_osworld_grpo/vllm_logp_pt_test5_w_KL_trainset15_osworld_reward_script_grpo_k8s_20250906_m3ou6di7/global_step_63/actor/huggingface
3137

@@ -53,10 +59,10 @@ export SWAN_FS_GROUP_HOOK=https://open.feishu.cn/open-apis/bot/v2/hook/793155e5-
5359
# export ROOT_DATA_DIR=rollouter/results/pass16_20250825_train152_pass16_gpu4_env36
5460
# export RUN_ID=results/pass16_20250825_train152_pass16_gpu4_env36
5561

56-
export ROOT_DATA_DIR=rollouter/results/pass8_20250904_train15_pass8_gpu2_env20_vllm_logp_maxstep15_tesl_vllm_logp_test6
57-
export RUN_ID=results/pass8_20250904_train15_pass8_gpu2_env20_vllm_logp_maxstep15_tesl_vllm_logp_test6
62+
export ROOT_DATA_DIR=pass32_uitars_0928
63+
export RUN_ID=pass32_uitars_0928
5864
# export EXPERIMENT_NAME=osworld_all_feasible_reward_script_grpo_k8s_20250821_vxer2wco
59-
export EXPERIMENT_NAME=vllm_logp_pt_test5_w_KL_trainset15_osworld_reward_script_grpo_k8s_$(date +%Y%m%d)_$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 8 | head -n 1)
65+
export EXPERIMENT_NAME=Fixed_$(date +%Y%m%d)_$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 8 | head -n 1)
6066
# export EXPERIMENT_NAME=vllm_logp_pt_test5_w_KL_trainset15_osworld_reward_script_grpo_k8s_20250906_m3ou6di7
6167
# export EXPERIMENT_NAME=pt_test5_w_KL_trainset15_vllm_logp_osworld_reward_script_grpo_k8s_20250905_91ww0y85
6268
# export EXPERIMENT_NAME=osworld_all_feasible_reward_script_grpo_k8s_20250827_2txpd14d
@@ -65,7 +71,7 @@ export EXPERIMENT_NAME=vllm_logp_pt_test5_w_KL_trainset15_osworld_reward_script_
6571
# export RUN_ID=pengxiang_test_0802_max_variance
6672
# export EXPERIMENT_NAME=osworld_all_feasible_reward_script_grpo_k8s_0802_8_mb64_micro8
6773
# export ROLLOUT_SERVER_URL=http://172.19.47.166:15959
68-
export ROLLOUT_SERVER_URL=http://172.19.171.243:15959
74+
export ROLLOUT_SERVER_URL=h0.0.0.0:8888
6975

7076
# training parameters
7177
adv_estimator=grpo

pass32_uitars_0928

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/data/liuyang/pass32_osworldnew_tmp07/pass32_osworldnew_tmp07

rollouter/requirements.txt

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
accelerate==1.6.0
2+
agentenv==0.0.1
3+
aiofiles==24.1.0
4+
aiohappyeyeballs==2.6.1
5+
aiohttp==3.11.18
6+
aiohttp-cors==0.8.1
7+
aiosignal==1.3.2
8+
airportsdata==20250224
9+
annotated-types==0.7.0
10+
anthropic==0.50.0
11+
antlr4-python3-runtime==4.9.3
12+
anyio==4.9.0
13+
astor==0.8.1
14+
asttokens==3.0.0
15+
async-timeout==5.0.1
16+
attrs==25.3.0
17+
av==15.0.0
18+
backcall==0.2.0
19+
backoff==2.2.1
20+
beautifulsoup4==4.13.4
21+
blake3==1.0.4
22+
bleach==6.2.0
23+
boto3==1.39.10
24+
botocore==1.39.10
25+
build==1.2.2.post1
26+
cachetools==5.5.2
27+
certifi==2025.1.31
28+
cffi==1.17.1
29+
charset-normalizer==3.4.1
30+
click==8.1.8
31+
cloudpickle==3.1.1
32+
codetiming==1.4.0
33+
colorful==0.5.6
34+
compressed-tensors==0.9.3
35+
cuda-bindings==12.8.0
36+
cuda-python==12.8.0
37+
cupy-cuda12x==13.4.1
38+
datasets==3.5.0
39+
decorator==5.2.1
40+
decord==0.6.0
41+
defusedxml==0.7.1
42+
deprecated==1.2.18
43+
depyf==0.18.0
44+
dill==0.3.8
45+
diskcache==5.6.3
46+
distlib==0.3.9
47+
distro==1.9.0
48+
dnspython==2.7.0
49+
docker-pycreds==0.4.0
50+
docopt==0.6.2
51+
einops==0.8.1
52+
email-validator==2.2.0
53+
exceptiongroup==1.2.2
54+
executing==2.2.0
55+
farama-notifications==0.0.4
56+
fastapi==0.115.12
57+
fastapi-cli==0.0.7
58+
fastjsonschema==2.21.1
59+
fastrlock==0.8.3
60+
filelock==3.18.0
61+
flashinfer-python==0.2.3
62+
frozenlist==1.6.0
63+
fsspec==2024.12.0
64+
gguf==0.16.3
65+
gitdb==4.0.12
66+
gitpython==3.1.44
67+
google-api-core==2.24.2
68+
google-auth==2.39.0
69+
googleapis-common-protos==1.70.0
70+
greenlet==3.2.3
71+
grpcio==1.71.0
72+
gymnasium==1.2.0
73+
h11==0.16.0
74+
hf-transfer==0.1.9
75+
hf-xet==1.0.5
76+
httpcore==1.0.9
77+
httptools==0.6.4
78+
httpx==0.28.1
79+
huggingface-hub==0.30.2
80+
hydra-core==1.3.2
81+
idna==3.10
82+
importlib-metadata==8.0.0
83+
interegular==0.3.3
84+
ipython==8.12.3
85+
jedi==0.19.2
86+
jinja2==3.1.6
87+
jiter==0.9.0
88+
jmespath==1.0.1
89+
jsonschema==4.23.0
90+
jsonschema-specifications==2025.4.1
91+
jupyter-client==8.6.3
92+
jupyter-core==5.8.1
93+
jupyterlab-pygments==0.3.0
94+
lark==1.2.2
95+
litellm==1.67.2
96+
llguidance==0.7.19
97+
llvmlite==0.44.0
98+
lm-format-enforcer==0.10.11
99+
markdown-it-py==3.0.0
100+
markupsafe==3.0.2
101+
matplotlib-inline==0.1.7
102+
mdurl==0.1.2
103+
mistral-common==1.5.4
104+
mistune==3.1.3
105+
modelscope==1.25.0
106+
mpmath==1.3.0
107+
msgpack==1.1.0
108+
msgspec==0.19.0
109+
multidict==6.4.3
110+
multiprocess==0.70.16
111+
nanobind==2.7.0
112+
nbclient==0.10.2
113+
nbconvert==7.16.6
114+
nbformat==5.10.4
115+
nest-asyncio==1.6.0
116+
networkx==3.4.2
117+
ninja==1.11.1.4
118+
numba==0.61.2
119+
nvidia-cublas-cu12==12.4.5.8
120+
nvidia-cuda-cupti-cu12==12.4.127
121+
nvidia-cuda-nvrtc-cu12==12.4.127
122+
nvidia-cuda-runtime-cu12==12.4.127
123+
nvidia-cudnn-cu12==9.1.0.70
124+
nvidia-cufft-cu12==11.2.1.3
125+
nvidia-curand-cu12==10.3.5.147
126+
nvidia-cusolver-cu12==11.6.1.9
127+
nvidia-cusparse-cu12==12.3.1.170
128+
nvidia-cusparselt-cu12==0.6.2
129+
nvidia-ml-py==12.570.86
130+
nvidia-nccl-cu12==2.21.5
131+
nvidia-nvjitlink-cu12==12.4.127
132+
nvidia-nvtx-cu12==12.4.127
133+
nvitop==1.5.0
134+
omegaconf==2.3.0
135+
openai==1.76.0
136+
opencensus==0.11.4
137+
opencensus-context==0.1.3
138+
opencv-python-headless==4.11.0.86
139+
opentelemetry-api==1.26.0
140+
opentelemetry-exporter-otlp==1.26.0
141+
opentelemetry-exporter-otlp-proto-common==1.26.0
142+
opentelemetry-exporter-otlp-proto-grpc==1.26.0
143+
opentelemetry-exporter-otlp-proto-http==1.26.0
144+
opentelemetry-proto==1.26.0
145+
opentelemetry-sdk==1.26.0
146+
opentelemetry-semantic-conventions==0.47b0
147+
opentelemetry-semantic-conventions-ai==0.4.6
148+
orjson==3.10.16
149+
outlines==0.1.11
150+
outlines-core==0.1.26
151+
packaging==25.0
152+
pandas==2.2.3
153+
pandocfilters==1.5.1
154+
parso==0.8.4
155+
partial-json-parser==0.2.1.1.post5
156+
peft==0.15.2
157+
pexpect==4.9.0
158+
pickleshare==0.7.5
159+
pillow==11.2.1
160+
pip==25.2
161+
pip-tools==7.4.1
162+
pipreqs==0.5.0
163+
platformdirs==4.3.7
164+
prettytable==3.16.0
165+
prometheus-client==0.21.1
166+
prometheus-fastapi-instrumentator==7.1.0
167+
prompt-toolkit==3.0.51
168+
propcache==0.3.1
169+
proto-plus==1.26.1
170+
protobuf
171+
psutil==7.0.0
172+
ptyprocess==0.7.0
173+
pure-eval==0.2.3
174+
py-cpuinfo==9.0.0
175+
py-spy==0.4.0
176+
pyarrow==19.0.1
177+
pyasn1==0.6.1
178+
pyasn1-modules==0.4.2
179+
pybind11==2.13.6
180+
pycountry==24.6.1
181+
pycparser==2.22
182+
pydantic==2.11.3
183+
pydantic-core==2.33.1
184+
pyecharts==2.0.8
185+
pygments==2.19.1
186+
pylatexenc==2.10
187+
pymysql==1.1.1
188+
pynvml==12.0.0
189+
pyproject-hooks==1.2.0
190+
python-dateutil==2.9.0.post0
191+
python-dotenv==1.1.0
192+
python-json-logger==3.3.0
193+
python-multipart==0.0.20
194+
pytz==2025.2
195+
pyyaml==6.0.2
196+
pyzmq==27.0.0
197+
qwen-vl-utils==0.0.11
198+
ray==2.43.0
199+
referencing==0.36.2
200+
regex==2024.11.6
201+
requests==2.32.3
202+
rich==13.9.4
203+
rich-toolkit==0.14.3
204+
rpds-py==0.24.0
205+
rsa==4.9.1
206+
s3transfer==0.13.1
207+
safetensors==0.5.3
208+
scipy==1.15.2
209+
sentencepiece==0.2.0
210+
sentry-sdk==2.27.0
211+
setproctitle==1.3.5
212+
setuptools==80.9.0
213+
shellingham==1.5.4
214+
simplejson==3.20.1
215+
six
216+
smart-open==7.1.0
217+
smmap==5.0.2
218+
sniffio==1.3.1
219+
soundfile==0.13.1
220+
soupsieve==2.7
221+
sqlalchemy==2.0.41
222+
stack-data==0.6.3
223+
starlette==0.46.2
224+
swankit==0.2.4
225+
swanlab==0.6.7
226+
sympy==1.13.1
227+
tensordict==0.6.2
228+
tiktoken==0.9.0
229+
tinycss2==1.4.0
230+
tokenizers==0.21.1
231+
tomli==2.2.1
232+
torch==2.6.0
233+
torch-memory-saver==0.0.5
234+
torchao==0.10.0
235+
torchaudio==2.6.0
236+
torchdata==0.11.0
237+
torchvision==0.21.0
238+
tornado==6.5.1
239+
tqdm==4.67.1
240+
traitlets==5.14.3
241+
transformers==4.51.1
242+
triton==3.2.0
243+
typer==0.15.2
244+
typing-extensions==4.13.2
245+
typing-inspection==0.4.0
246+
tzdata==2025.2
247+
urllib3==2.4.0
248+
uvicorn==0.34.2
249+
uvloop==0.21.0
250+
verl==0.3.0.post1
251+
virtualenv==20.30.0
252+
vllm==0.8.5.post1
253+
wandb==0.19.10
254+
watchfiles==1.0.5
255+
wcwidth==0.2.13
256+
webencodings==0.5.1
257+
websockets==15.0.1
258+
wheel==0.45.1
259+
wrapt==1.17.2
260+
xformers==0.0.29.post2
261+
xgrammar==0.1.18
262+
xxhash==3.5.0
263+
yarg==0.1.9
264+
yarl==1.20.0
265+
zipp==3.21.0
266+

sim_rollout_pass8.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def simulate_rollout(
8383
limit: Optional[int] = None,
8484
dry_run: bool = False,
8585
loops = 10,
86-
bootstrap_count = 256,
86+
bootstrap_count = 2,
8787
delete_existing: bool = False,
8888
) -> None:
8989
"""Simulate rollout by inserting rows at a controlled, steady rate."""
@@ -109,6 +109,7 @@ def item_at(global_idx: int) -> Dict[str, Any]:
109109

110110
# Initialize DB manager
111111
db_manager = create_database_manager()
112+
print(db_manager.engine.url) # 看连接到哪个库
112113

113114
# Optionally clear previous rows for this run_id
114115
if delete_existing:
@@ -155,6 +156,8 @@ def item_at(global_idx: int) -> Dict[str, Any]:
155156

156157
# 插入数据
157158
model_version = latest_model_version(db_manager, run_id)
159+
if model_version == "":
160+
model_version = "v0"
158161
payload = dict(
159162
trajectory_id=item["trajectory_id"],
160163
run_id=run_id,
@@ -201,15 +204,15 @@ def item_at(global_idx: int) -> Dict[str, Any]:
201204

202205
def main() -> None:
203206
parser = argparse.ArgumentParser(description="Simulated rollout producer for trainer unit tests.")
204-
parser.add_argument("--json", default="data/train/data_pass@8_train90.json", help="Path to the static JSON data.")
207+
parser.add_argument("--json", default="data/train/pass@32_90_trainingser.json", help="Path to the static JSON data.")
205208
# parser.add_argument("--run-id", default="pengxiang_test_0824_fixed_4_task", help="Run ID to write into DB rows.")
206-
parser.add_argument("--run-id", default="pengxiang_test_0829_stepwise_pass8_multinode", help="Run ID to write into DB rows.")
207-
parser.add_argument("--rate", type=int, default=26, help="Insert rate per minute.")
209+
parser.add_argument("--run-id", default="pass32_uitars_0928", help="Run ID to write into DB rows.")
210+
parser.add_argument("--rate", type=int, default=100, help="Insert rate per minute.")
208211
parser.add_argument("--start-index", type=int, default=0, help="Start from this index in the JSON list.")
209212
parser.add_argument("--limit", type=int, default=None, help="Only process this many items.")
210213
parser.add_argument("--dry-run", action="store_true", help="Don't write to DB; just print what would happen.")
211214
parser.add_argument("--loops", type=int, default=10000, help="Maximum number of full loops over the JSON (default: 10).")
212-
parser.add_argument("--bootstrap", type=int, default=512, help="Number of items to insert immediately at startup (default: 256).")
215+
parser.add_argument("--bootstrap", type=int, default=200, help="Number of items to insert immediately at startup (default: 256).")
213216
parser.add_argument(
214217
"--delete-existing",
215218
action="store_true",

verl/trainer/main_ppo_async.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def run_ppo(config) -> None:
5050
runtime_env={"env_vars": {"TOKENIZERS_PARALLELISM": "true", "NCCL_DEBUG": "WARN", "VLLM_LOGGING_LEVEL": "WARN", "VLLM_ALLOW_RUNTIME_LORA_UPDATING": "true"}},
5151
num_cpus=config.ray_init.num_cpus,
5252
dashboard_host="0.0.0.0",
53-
object_store_memory=800 * 1024**3, # 00GB for object store
53+
object_store_memory=800 * 1024**3, # 800GB for object store
5454
)
5555

5656
# Create a remote instance of the TaskRunner class, and

0 commit comments

Comments
 (0)