diff --git a/requirements.txt b/requirements.txt index 558d03d5..c598b342 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ black[jupyter]>=24.2.0 blacken-docs pre-commit pytest==7.3.2 +flaky pytest-xdist pytest-playwright dask diff --git a/tests/experiments/test_launch_exp.py b/tests/experiments/test_launch_exp.py index 782a9edc..1a58f797 100644 --- a/tests/experiments/test_launch_exp.py +++ b/tests/experiments/test_launch_exp.py @@ -8,7 +8,7 @@ from agentlab.agents.generic_agent.agent_configs import FLAGS_GPT_3_5, AGENT_4o_MINI from agentlab.agents.generic_agent.generic_agent import GenericAgentArgs from agentlab.analyze import inspect_results -from agentlab.experiments.launch_exp import find_incomplete, run_experiments, non_dummy_count +from agentlab.experiments.launch_exp import find_incomplete, non_dummy_count, run_experiments from agentlab.experiments.study import Study from agentlab.llm.chat_api import CheatMiniWoBLLMArgs @@ -91,6 +91,7 @@ def test_launch_system_ray(): _test_launch_system(backend="ray") +@pytest.mark.pricy def test_timeout_ray(): _test_launch_system(backend="ray", cause_timeout=True) diff --git a/tests/experiments/test_ray.py b/tests/experiments/test_ray.py index a509742f..28ddfa34 100644 --- a/tests/experiments/test_ray.py +++ b/tests/experiments/test_ray.py @@ -1,12 +1,15 @@ import bgym import pytest import ray -from agentlab.experiments.graph_execution_ray import execute_task_graph +from flaky import flaky + from agentlab.experiments.exp_utils import MockedExpArgs, add_dependencies +from agentlab.experiments.graph_execution_ray import execute_task_graph TASK_TIME = 3 +@flaky(max_runs=3, min_passes=1) def test_execute_task_graph(): # Define a list of ExpArgs with dependencies exp_args_list = [