diff --git a/install_fast_downward.sh b/install_fast_downward.sh new file mode 100644 index 0000000000..23a5e7da6e --- /dev/null +++ b/install_fast_downward.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Install Fast Downward for predicators +echo "Installing Fast Downward..." + +# Create external directory if it doesn't exist +mkdir -p external + +# Clone Fast Downward if not already present +if [ ! -d "external/downward" ]; then + echo "Cloning Fast Downward repository..." + git clone https://github.com/aibasel/downward.git external/downward +fi + +# Build Fast Downward +echo "Building Fast Downward..." +cd external/downward +python build.py + +# Get the absolute path +FD_PATH=$(pwd)/fast-downward.py + +# Go back to predicators root +cd ../.. + +# Create environment setup script +cat > setup_fd_env.sh << EOF +#!/bin/bash +export FD_EXEC_PATH=$FD_PATH +echo "Fast Downward path set to: $FD_PATH" +EOF + +chmod +x setup_fd_env.sh + +echo "Fast Downward installed successfully!" +echo "To set the environment variable for your current session, run:" +echo " source ./setup_fd_env.sh" +echo "" +echo "To make it permanent, add this line to your shell profile (.bashrc, .zshrc, etc.):" +echo " export FD_EXEC_PATH=$FD_PATH" diff --git a/instruction_python3.13.md b/instruction_python3.13.md new file mode 100644 index 0000000000..f8cec54c80 --- /dev/null +++ b/instruction_python3.13.md @@ -0,0 +1,155 @@ +# Predicators Installation Instructions for Python 3.13 + +This guide provides step-by-step instructions for installing the `predicators` package on Python 3.13, addressing compatibility issues with the original setup.py dependencies. + +## Prerequisites + +- Python 3.13.x +- A virtual environment (recommended) +- Git (for Git-based dependencies) + +## Installation Steps + +### 1. Set up and activate your virtual environment + +```bash +# If you don't have a virtual environment yet: +python3 -m venv .venv + +# Activate the virtual environment +source .venv/bin/activate +``` + +### 2. Install build dependencies + +The original setup.py has strict version pins that aren't compatible with Python 3.13. First, install the required build tools: + +```bash +pip install --upgrade setuptools wheel +``` + +### 3. Install predicators without dependencies + +This avoids the dependency resolution conflicts: + +```bash +pip install --no-deps -e . +``` + +### 4. Install compatible dependencies + +Use the Python 3.13 compatible requirements file: + +```bash +pip install -r requirements-python3.13.txt +``` + +### 5. Install Git-based dependencies + +Install the remaining dependencies from Git repositories: + +```bash +pip install "git+https://github.com/sebdumancic/structure_mapping.git" "git+https://github.com/tomsilver/pg3.git" "git+https://github.com/Learning-and-Intelligent-Systems/gym-sokoban.git" +``` + +### 6. Set up environment variables + +Set the required environment variable: + +```bash +export PYTHONHASHSEED=0 +``` + +To make it permanent, add it to your shell profile: + +```bash +# For bash +echo "export PYTHONHASHSEED=0" >> ~/.bashrc + +# For zsh +echo "export PYTHONHASHSEED=0" >> ~/.zshrc +``` + +### 7. Install Fast Downward + +```bash +bash install_fast_downward.sh +export FD_EXEC_PATH=$(pwd)/external/downward/fast-downward.py +``` + +Add to shell profile for persistence: +```bash +echo "export FD_EXEC_PATH=$(pwd)/external/downward/fast-downward.py" >> ~/.zshrc +``` + +## Running Predicators + +After installation, you can run predicators with the environment variables set: + +```bash +# Set environment variable (required) +export PYTHONHASHSEED=0 + +# Example command +python predicators/main.py --env burger --approach vlm_open_loop --seed 0 --num_train_tasks 1 --num_test_tasks 1 --bilevel_plan_without_sim True --make_failure_videos --sesame_task_planner fdopt --debug --vlm_model_name gemini-1.5-pro-latest --vlm_open_loop_use_training_demos True +``` + +## Verification + +To verify the installation worked: + +```bash +source .venv/bin/activate +export PYTHONHASHSEED=0 +python predicators/main.py --help +``` + +You should see the help message without any import errors. + +## Known Issues and Warnings + +1. **PyBullet not available**: PyBullet has compilation issues with Python 3.13. All PyBullet-dependent environments (those with names starting with `pybullet_`) will be automatically skipped. This is expected and allows you to use non-PyBullet environments. + +2. **Gym deprecation warning**: You may see warnings about gym being unmaintained. This is expected and doesn't affect functionality. + +3. **Package version conflicts**: The dependency resolver may show warnings about version conflicts between the strict pins in setup.py and the installed versions. These are expected and generally don't cause issues. + +4. **pkg_resources deprecation**: You may see warnings about pkg_resources being deprecated. This comes from some dependencies and is not critical. + +## Troubleshooting + +### If you encounter "ModuleNotFoundError" + +Make sure: +1. Your virtual environment is activated +2. You've installed all dependencies as listed above +3. The predicators package was installed with `pip install --no-deps -e .` + +### If you encounter build errors + +Ensure you have the latest setuptools and wheel: +```bash +pip install --upgrade setuptools wheel pip +``` + +### If specific dependencies fail to install + +Some dependencies may need system-level packages. On macOS with Homebrew: +```bash +# For opencv-python issues +brew install opencv + +# For other compilation issues +xcode-select --install +``` + +## Key Differences from Original setup.py + +The main changes made for Python 3.13 compatibility: + +- **numpy**: Updated from `==1.23.5` to `>=1.24.0` (numpy 1.23.5 doesn't support Python 3.13) +- **torch/torchvision**: Updated to compatible versions +- **Other packages**: Used more flexible version constraints instead of strict pins +- **Missing dependencies**: Added `psutil` which was required but not listed + +This approach maintains compatibility while working around the Python 3.13 restrictions in the original dependency specifications. diff --git a/predicators/approaches/active_sampler_learning_approach.py b/predicators/approaches/active_sampler_learning_approach.py index 4f86ae7c60..56f7e73dc0 100644 --- a/predicators/approaches/active_sampler_learning_approach.py +++ b/predicators/approaches/active_sampler_learning_approach.py @@ -1,10 +1,10 @@ """An approach that performs active sampler learning. -The current implementation assumes for convenience that NSRTs and options are -1:1 and share the same parameters (like a PDDL environment). It is -straightforward conceptually to remove this assumption, because the approach -uses its own NSRTs to select options, but it is difficult implementation-wise, -so we're punting for now. +The current implementation assumes for convenience that NSRTs and +options are 1:1 and share the same parameters (like a PDDL environment). +It is straightforward conceptually to remove this assumption, because +the approach uses its own NSRTs to select options, but it is difficult +implementation-wise, so we're punting for now. See scripts/configs/active_sampler_learning.yaml for examples. """ diff --git a/predicators/approaches/bridge_policy_approach.py b/predicators/approaches/bridge_policy_approach.py index d8a1987c74..8e60912399 100644 --- a/predicators/approaches/bridge_policy_approach.py +++ b/predicators/approaches/bridge_policy_approach.py @@ -402,7 +402,7 @@ def _Can_plan(self, state: State, _: Sequence[Object]) -> bool: def call_planner_policy(self, state: State, _: Dict, __: Sequence[Object], ___: Array) -> Action: - """policy for CallPlanner option.""" + """Policy for CallPlanner option.""" self._current_control = "planner" # create a new task where the init state is our current state current_task = Task(state, self._train_tasks[0].goal) diff --git a/predicators/approaches/llm_option_renaming_approach.py b/predicators/approaches/llm_option_renaming_approach.py index 335afac2f6..0937075fbd 100644 --- a/predicators/approaches/llm_option_renaming_approach.py +++ b/predicators/approaches/llm_option_renaming_approach.py @@ -37,8 +37,9 @@ def _renaming_suffixes(self) -> List[str]: def _create_replacements(self) -> Dict[str, str]: return { - o.name: utils.generate_random_string(len(o.name), - list(string.ascii_lowercase), - self._rng) + o.name: + utils.generate_random_string(len(o.name), + list(string.ascii_lowercase), + self._rng) for o in self._initial_options } diff --git a/predicators/approaches/llm_predicate_renaming_approach.py b/predicators/approaches/llm_predicate_renaming_approach.py index 7698479efb..870983aef4 100644 --- a/predicators/approaches/llm_predicate_renaming_approach.py +++ b/predicators/approaches/llm_predicate_renaming_approach.py @@ -37,8 +37,9 @@ def _renaming_suffixes(self) -> List[str]: def _create_replacements(self) -> Dict[str, str]: return { - p.name: utils.generate_random_string(len(p.name), - list(string.ascii_lowercase), - self._rng) + p.name: + utils.generate_random_string(len(p.name), + list(string.ascii_lowercase), + self._rng) for p in self._get_current_predicates() } diff --git a/predicators/approaches/maple_q_approach.py b/predicators/approaches/maple_q_approach.py index 865a6d8b9b..bdcc2a1b21 100644 --- a/predicators/approaches/maple_q_approach.py +++ b/predicators/approaches/maple_q_approach.py @@ -132,7 +132,8 @@ def _learn_nsrts(self, trajectories: List[LowLevelTrajectory], for nsrt in self._nsrts: all_objects = { o - for t in self._train_tasks for o in t.init + for t in self._train_tasks + for o in t.init } all_ground_nsrts.update( utils.all_ground_nsrts(nsrt, all_objects)) diff --git a/predicators/approaches/nsrt_rl_approach.py b/predicators/approaches/nsrt_rl_approach.py index e9faaca3ba..1e9762c1bb 100644 --- a/predicators/approaches/nsrt_rl_approach.py +++ b/predicators/approaches/nsrt_rl_approach.py @@ -139,8 +139,8 @@ def _get_experience_from_result( next_state, cur_option.objects) had_sufficient_steps = ( next_state.allclose(traj.states[-1]) - and (CFG.max_num_steps_interaction_request - j > - CFG.nsrt_rl_valid_reward_steps_threshold)) + and (CFG.max_num_steps_interaction_request - j + > CFG.nsrt_rl_valid_reward_steps_threshold)) if terminate: option_to_data[parent_option].append(experience) cur_option_idx += 1 diff --git a/predicators/approaches/online_nsrt_learning_approach.py b/predicators/approaches/online_nsrt_learning_approach.py index 5d3fcd1e80..0af8e4ff89 100644 --- a/predicators/approaches/online_nsrt_learning_approach.py +++ b/predicators/approaches/online_nsrt_learning_approach.py @@ -136,11 +136,12 @@ def _create_explorer(self) -> BaseExplorer: def _score_atoms_novelty(self, atoms: Set[GroundAtom]) -> float: """Score the novelty of a ground atom set, with higher better. - Score based on the number of times that this atom set has been seen in - the data, with object identities ignored (i.e., this is lifted). + Score based on the number of times that this atom set has been + seen in the data, with object identities ignored (i.e., this is + lifted). - Assumes that the size of the atom set is between CFG.glib_min_goal_size - and CFG.glib_max_goal_size (inclusive). + Assumes that the size of the atom set is between + CFG.glib_min_goal_size and CFG.glib_max_goal_size (inclusive). """ assert CFG.glib_min_goal_size <= len(atoms) <= CFG.glib_max_goal_size canonical_atoms = self._get_canonical_lifted_atoms(atoms) @@ -160,9 +161,10 @@ def _get_canonical_lifted_atoms( This is a helper for novelty scoring for GLIB. - This is an efficient approximation of what we really care about, which - is whether two atom sets unify. It's an approximation because there are - tricky cases where the sorting procedure is ambiguous. + This is an efficient approximation of what we really care about, + which is whether two atom sets unify. It's an approximation + because there are tricky cases where the sorting procedure is + ambiguous. """ # Create a "signature" for each object, which will be used to break # ties when sorting based on predicates alone is not enough. diff --git a/predicators/approaches/pp_param_learning_approach.py b/predicators/approaches/pp_param_learning_approach.py index 98045d4c4d..3e58424a45 100644 --- a/predicators/approaches/pp_param_learning_approach.py +++ b/predicators/approaches/pp_param_learning_approach.py @@ -380,7 +380,8 @@ def elbo_torch( use_sparse_trajectory: bool = True, debug_log: bool = False, ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - """*Differentiable* ELBO computation with efficient, cached condition checks.""" + """*Differentiable* ELBO computation with efficient, cached condition + checks.""" trajectory = atom_option_trajectory num_time_steps = len(trajectory.states) diff --git a/predicators/approaches/sme_pg3_analogy_approach.py b/predicators/approaches/sme_pg3_analogy_approach.py index 1c105fb0e3..e8609af412 100644 --- a/predicators/approaches/sme_pg3_analogy_approach.py +++ b/predicators/approaches/sme_pg3_analogy_approach.py @@ -242,7 +242,8 @@ def _create_name_to_instances(env: BaseEnv, nsrt_name_to_nsrt = {n.name: n for n in nsrts} var_name_to_nsrt_variables = { _variable_to_s_exp(v, n.name): (n, v) - for n in nsrts for v in n.parameters + for n in nsrts + for v in n.parameters } names_to_instances: Dict[str, Dict[str, Any]] = { "predicates": pred_name_to_pred, diff --git a/predicators/approaches/vlm_open_loop_approach.py b/predicators/approaches/vlm_open_loop_approach.py index 4245a92850..402525736b 100644 --- a/predicators/approaches/vlm_open_loop_approach.py +++ b/predicators/approaches/vlm_open_loop_approach.py @@ -18,7 +18,7 @@ from __future__ import annotations -from typing import Callable, List, Sequence, Set +from typing import Callable, Dict, List, Sequence, Set import numpy as np import PIL @@ -28,10 +28,12 @@ from predicators.approaches import ApproachFailure from predicators.approaches.bilevel_planning_approach import \ BilevelPlanningApproach +from predicators.approaches.pp_online_predicate_invention_approach import \ + _get_transition_str from predicators.nsrt_learning.segmentation import segment_trajectory from predicators.settings import CFG -from predicators.structs import Action, Box, Dataset, ParameterizedOption, \ - Predicate, State, Task, Type, _Option +from predicators.structs import Action, Box, Dataset, LowLevelTrajectory, \ + ParameterizedOption, Predicate, Segment, State, Task, Type, _Option class VLMOpenLoopApproach(BilevelPlanningApproach): # pragma: no cover @@ -106,39 +108,104 @@ def _append_to_prompt_state_imgs_list(state: State) -> None: draw._image) # type: ignore[attr-defined] # pylint: enable=protected-access + def _generate_string_demonstration(segmented_traj: List[Segment], + ll_traj: LowLevelTrajectory, + traj_num: int) -> str: + """Generate string-based demonstration similar to + _get_transition_str in + pp_online_predicate_invention_approach.py.""" + demo_str = "" + traj_goal = self._train_tasks[ll_traj.train_task_idx].goal + demo_str += f"Demonstration {traj_num}, Goal: {str(sorted(traj_goal))}\n" + + state_hash_to_id: Dict[int, int] = {} + + for state_num, seg in enumerate(segmented_traj): + # Get initial state info + init_state = seg.states[0] + init_state_hash = hash(init_state) + if init_state_hash not in state_hash_to_id: + state_hash_to_id[init_state_hash] = len(state_hash_to_id) + init_state_id = state_hash_to_id[init_state_hash] + state_name = f"state_{init_state_id}" + + # Add state description + if state_num == 0: + demo_str += f"Starting at {state_name} with state info:\n" + else: + demo_str += f"Now at {state_name} with state info:\n" + + state_str = init_state.dict_str( + indent=2, use_object_id=CFG.rgb_observation) + demo_str += f"{state_str}\n" + + # Add action + action = seg.get_option() + action_str = action.simple_str( + use_object_id=CFG.rgb_observation) + demo_str += f"Action {state_num}: {action_str} was executed\n" + + # Add resulting state + end_state = seg.states[-1] + end_state_hash = hash(end_state) + if end_state_hash not in state_hash_to_id: + state_hash_to_id[end_state_hash] = len(state_hash_to_id) + end_state_id = state_hash_to_id[end_state_hash] + end_state_name = f"state_{end_state_id}" + demo_str += f"This resulted in {end_state_name} with state info:\n" + end_state_str = end_state.dict_str( + indent=2, use_object_id=CFG.rgb_observation) + demo_str += f"{end_state_str}\n\n" + + return demo_str + if not CFG.vlm_open_loop_use_training_demos: return None - # Crawl thru the dataset and pull out all the images. - # For each image, add text to it in the bototm left indicating the - # trajectory and timestep it's from. - assert dataset.trajectories[0].states[0].simulator_state is not None - assert isinstance( - dataset.trajectories[0].states[0].simulator_state["images"], List) - num_imgs_per_state = len( - dataset.trajectories[0].states[0].simulator_state["images"]) + + # Handle both image and string-based demonstrations + if not CFG.vlm_open_loop_no_image: + # Original image-based demonstration logic + assert dataset.trajectories[0].states[ + 0].simulator_state is not None + assert isinstance( + dataset.trajectories[0].states[0].simulator_state["images"], + List) + num_imgs_per_state = len( + dataset.trajectories[0].states[0].simulator_state["images"]) + segmented_trajs = [ segment_trajectory(traj, self._initial_predicates) for traj in dataset.trajectories ] self._prompt_demos_str = "" + for traj_num, seg_traj in enumerate( zip(segmented_trajs, dataset.trajectories)): segment_traj, ll_traj = seg_traj if not ll_traj.is_demo: continue - traj_goal = self._train_tasks[ll_traj.train_task_idx].goal - self._prompt_demos_str += f"Demonstration {traj_num}, " + \ - f"Goal: {str(sorted(traj_goal))}\n" - assert len(segment_traj) > 0 - for state_num, seg in enumerate(segment_traj): - state = seg.states[0] + + if CFG.vlm_open_loop_no_image: + # Use string-based demonstrations with detailed state information + self._prompt_demos_str += _generate_string_demonstration( + segment_traj, ll_traj, traj_num) + else: + # Both image and string-based demonstrations + traj_goal = self._train_tasks[ll_traj.train_task_idx].goal + self._prompt_demos_str += f"Demonstration {traj_num}, " + \ + f"Goal: {str(sorted(traj_goal))}\n" + self._prompt_demos_str += _generate_string_demonstration( + segment_traj, ll_traj, traj_num) + assert len(segment_traj) > 0 + for state_num, seg in enumerate(segment_traj): + state = seg.states[0] + _append_to_prompt_state_imgs_list(state) + action = seg.get_option() + self._prompt_demos_str += f"Action {state_num}, from " + \ + f"state {state_num} is {action}\n" + # Make sure to append the final state of the final segment! + state = seg.states[-1] # pylint:disable=undefined-loop-variable _append_to_prompt_state_imgs_list(state) - action = seg.get_option() - self._prompt_demos_str += f"Action {state_num}, from " + \ - f"state {state_num} is {action}\n" - # Make sure to append the final state of the final segment! - state = seg.states[-1] # pylint:disable=undefined-loop-variable - _append_to_prompt_state_imgs_list(state) return None def _get_current_nsrts(self) -> Set[utils.NSRT]: @@ -240,6 +307,8 @@ def _query_vlm_for_option_plan(self, task: Task) -> Sequence[_Option]: parsable_plan_prediction, objects_list, self._types, self._initial_options, True) for option_tuple in parsed_option_plan: - option_plan.append(option_tuple[0].ground( - option_tuple[1], np.array(option_tuple[2]))) + # Convert empty params to list to avoid numpy boolean evaluation issues in domino + params = [] if len(option_tuple[2]) == 0 else np.array( + option_tuple[2]) + option_plan.append(option_tuple[0].ground(option_tuple[1], params)) return option_plan diff --git a/predicators/approaches/vlm_planning_prompts/few_shot_oc.txt b/predicators/approaches/vlm_planning_prompts/few_shot_oc.txt new file mode 100644 index 0000000000..9e986dcca9 --- /dev/null +++ b/predicators/approaches/vlm_planning_prompts/few_shot_oc.txt @@ -0,0 +1,39 @@ +You are highly skilled in robotic task planning, breaking down intricate and long-term tasks into distinct primitive actions. +Consider the following skills a robotic agent can perform. Note that each of these skills takes the form of a `ParameterizedOption` and may have both discrete arguments (indicated by the `types` field, referring to objects of particular types), +as well as continuous arguments (indicated by `params_space` field, which is formatted as `Box([, , ...], [, , ...], (,), )`). + +{options} + + +Here are a number of example demonstrations of solving a range of goals from an expert demonstrator. For each of these examples, you are provided +with the goal the demonstrator was trying to achieve, the plan (i.e., sequence of skills with discrete and continuous arguments specified), as well +as images corresponding to the state of the environment at every timestep (starting at 0, when the 0th skill is executed). Note that the state images +are annotated with text indicating which demonstration and timestep they belong to. Additionally, note that a state might have more than one image +associated with it. + +{demonstration_trajs} + + +To solve the task presented below, you are only allowed to use the provided skills. It's essential to stick to the format of these basic skills. When creating a plan, replace +the arguments of each skill with specific items or continuous parameters. You can first describe the provided scene and what it indicates about the provided +task objects to help you come up with a plan. + +Here is a list of objects present in this scene for this task, along with their type (formatted as : ): +{typed_objects} + +And here are the available types (formatted in PDDL style as ` ... - `). You can infer a hierarchy of types via this: +{type_hierarchy} + +Finally, here is an expression corresponding to the current task goal that must be achieved: +{goal_str} + +Please return a plan that achieves the provided goal from an initial state depicted by the image(s) below. +Please provide your output in the following format (excluding the angle brackets and ellipsis, which are just for illustration purposes). +Be sure to include the parens '(' and ')', as well as square brackets '[' and ']' even if there are no objects/continuous parameters. +Do not bold or italicize or otherwise apply any extra formaating to the plan text. Do not provide any numbers for steps in the plan, or +any reasoning for each step below the 'Plan:' heading: + +Plan: +(:, :, ...)[, , ...] +(:, :, ...)[, , ...] +... diff --git a/predicators/cogman.py b/predicators/cogman.py index d068d1cef6..9c16835562 100644 --- a/predicators/cogman.py +++ b/predicators/cogman.py @@ -1,10 +1,12 @@ """Cognitive manager (CogMan). -A wrapper around an approach that manages interaction with the environment. +A wrapper around an approach that manages interaction with the +environment. -Implements a perception module, which produces a State at each time step based -on the history of observations, and an execution monitor, which determines -whether to re-query the approach at each time step based on the states. +Implements a perception module, which produces a State at each time step +based on the history of observations, and an execution monitor, which +determines whether to re-query the approach at each time step based on +the states. The name "CogMan" is due to Leslie Kaelbling. """ diff --git a/predicators/envs/__init__.py b/predicators/envs/__init__.py index f2a22474af..bd275f7dd9 100644 --- a/predicators/envs/__init__.py +++ b/predicators/envs/__init__.py @@ -39,9 +39,9 @@ def get_or_create_env(name: str) -> BaseEnv: relies on the environment's internal state (i.e., you should not call reset() or step()). - Also note that the GUI is always turned off for environments that are - newly created by this function. If you want to use the GUI, you should - create the environment explicitly through create_new_env(). + Also note that the GUI is always turned off for environments that + are newly created by this function. If you want to use the GUI, you + should create the environment explicitly through create_new_env(). """ if name not in _MOST_RECENT_ENV_INSTANCE: logging.warning( diff --git a/predicators/envs/base_env.py b/predicators/envs/base_env.py index b77cab93ee..b3d76e8de9 100644 --- a/predicators/envs/base_env.py +++ b/predicators/envs/base_env.py @@ -49,8 +49,9 @@ def simulate(self, state: State, action: Action) -> State: representation is a member of self.action_space), NOT an option. This function is primarily used in the default option model, and - for implementing the default self.step(action). It is not meant to - be part of the "final system", where the environment is the real world. + for implementing the default self.step(action). It is not meant + to be part of the "final system", where the environment is the + real world. """ raise NotImplementedError("Override me!") @@ -136,8 +137,8 @@ def render_state(self, just for convenience, e.g., in test coverage. By default, calls render_state_plt, but subclasses may override, - e.g. if they do not use Matplotlib for rendering, and thus do not - define a render_state_plt() function. + e.g. if they do not use Matplotlib for rendering, and thus do + not define a render_state_plt() function. """ fig = self.render_state_plt(state, task, action, caption) img = utils.fig2data(fig, dpi=CFG.render_state_dpi) @@ -217,25 +218,15 @@ def _load_task_from_json(self, json_file: Path) -> EnvironmentTask: By default, we assume JSON files are in the following format: - { - "objects": { - : - } - "init": { - : { - : - } - } - "goal": { - : [ - [] - ] - } - } + { "objects": { : } + "init": { : { : + } } "goal": { : + [ [] ] } } Instead of "goal", "language_goal" can also be used. - Environments can override this method to handle different formats. + Environments can override this method to handle different + formats. """ with open(json_file, "r", encoding="utf-8") as f: json_dict = json.load(f) @@ -352,12 +343,12 @@ def reset(self, train_or_test: str, task_idx: int) -> Observation: def step(self, action: Action) -> Observation: """Apply the action, update the state, and return an observation. - Note that this action is a low-level action (i.e., action.arr - is a member of self.action_space), NOT an option. + Note that this action is a low-level action (i.e., action.arr is + a member of self.action_space), NOT an option. By default, this function just calls self.simulate. However, - environments that maintain a more complicated internal state, - or that don't implement simulate(), may override this method. + environments that maintain a more complicated internal state, or + that don't implement simulate(), may override this method. """ assert isinstance(self._current_observation, State) self._current_observation = self.simulate(self._current_observation, diff --git a/predicators/envs/cover.py b/predicators/envs/cover.py index dc9f75a99a..87ef493cc3 100644 --- a/predicators/envs/cover.py +++ b/predicators/envs/cover.py @@ -437,15 +437,16 @@ class CoverEnvRegrasp(CoverEnv): grasp on the initially held object sometimes requires placing and regrasping. - This environment also has two different oracle NSRTs for placing, one for - placing a target and one for placing on the table. + This environment also has two different oracle NSRTs for placing, + one for placing a target and one for placing on the table. - This environment also has a Clear predicate, to prevent placing on already - covered targets. + This environment also has a Clear predicate, to prevent placing on + already covered targets. - Finally, to allow placing on the table, we need to change the allowed - hand regions. We implement it so that there is a relatively small hand - region centered at each target, but then everywhere else is allowed. + Finally, to allow placing on the table, we need to change the + allowed hand regions. We implement it so that there is a relatively + small hand region centered at each target, but then everywhere else + is allowed. """ _allow_free_space_placing: ClassVar[bool] = True _initial_pick_offsets: ClassVar[List[float]] = [-0.95, 0.0, 0.95] @@ -496,15 +497,16 @@ class CoverMultistepOptions(CoverEnvTypedOptions): """Cover domain with a lower level action space. Useful for using and learning multistep options. - The action space is (dx, dy, dgrip). The last dimension - controls the gripper "magnet" or "vacuum". The state space is updated to - track x, y, grip. + The action space is (dx, dy, dgrip). The last dimension controls the + gripper "magnet" or "vacuum". The state space is updated to track x, + y, grip. - The robot can move anywhere as long as it, and the block it may be holding, - does not collide with another block. Picking up a block is allowed when the - robot gripper is empty, when the robot is in the allowable hand region, and - when the robot is sufficiently close to the block in the y-direction. - Placing is allowed anywhere. Collisions are handled in simulate(). + The robot can move anywhere as long as it, and the block it may be + holding, does not collide with another block. Picking up a block is + allowed when the robot gripper is empty, when the robot is in the + allowable hand region, and when the robot is sufficiently close to + the block in the y-direction. Placing is allowed anywhere. + Collisions are handled in simulate(). """ grasp_thresh: ClassVar[float] = 0.0 initial_block_y: ClassVar[float] = 0.1 diff --git a/predicators/envs/doors.py b/predicators/envs/doors.py index f95a5c4ce3..bcd9f4efb0 100644 --- a/predicators/envs/doors.py +++ b/predicators/envs/doors.py @@ -82,7 +82,7 @@ def get_name(cls) -> str: return "doors" def simulate_moving(self, state: State, action: Action) -> State: - """helper function to simulate moving.""" + """Helper function to simulate moving.""" assert self.action_space.contains(action.arr) dx, dy, _ = action.arr x = state.get(self._robot, "x") diff --git a/predicators/envs/narrow_passage.py b/predicators/envs/narrow_passage.py index c13477d56a..c8464f2a3c 100644 --- a/predicators/envs/narrow_passage.py +++ b/predicators/envs/narrow_passage.py @@ -20,12 +20,12 @@ class NarrowPassageEnv(BaseEnv): by passing through a narrow passage, or by opening a door and passing through a wider passageway. - The action space is 3D, specifying (dx, dy, door). - (dx, dy) defines a robot movement, where the magnitude of the movement - in each direction is constrained by action_magnitude. - door indicates a door-opening action. If door > 0, any attempted - movement is ignored (i.e. treat dx and dy as 0) and the robot will open - a closed door if nearby it. + The action space is 3D, specifying (dx, dy, door). (dx, dy) defines + a robot movement, where the magnitude of the movement in each + direction is constrained by action_magnitude. door indicates a door- + opening action. If door > 0, any attempted movement is ignored (i.e. + treat dx and dy as 0) and the robot will open a closed door if + nearby it. Based on the TouchPoint and Doors environments. """ diff --git a/predicators/envs/pddl_env.py b/predicators/envs/pddl_env.py index bead287858..d76d48d142 100644 --- a/predicators/envs/pddl_env.py +++ b/predicators/envs/pddl_env.py @@ -65,11 +65,11 @@ def copy(self) -> State: class _PDDLEnv(BaseEnv): """An environment induced by PDDL. - The state space is mostly unused. The continuous vectors are dummies. What - is actually used is state.simulator_state, which holds the current ground - atoms. Note that we need to use this pattern, as opposed to just - maintaining the ground atoms internally in the env, because the predicate - classifiers need access to the ground atoms. + The state space is mostly unused. The continuous vectors are + dummies. What is actually used is state.simulator_state, which holds + the current ground atoms. Note that we need to use this pattern, as + opposed to just maintaining the ground atoms internally in the env, + because the predicate classifiers need access to the ground atoms. The action space is hacked to conform to our convention that actions are fixed-dimensional vectors. Users of this class should not need @@ -102,7 +102,8 @@ def __init__(self, use_gui: bool = True) -> None: tasks = self._pregenerated_train_tasks + self._pregenerated_test_tasks self._goal_predicates = { a.predicate - for t in tasks for a in t.task.goal + for t in tasks + for a in t.task.goal } @classmethod diff --git a/predicators/envs/pybullet_coffee.py b/predicators/envs/pybullet_coffee.py index 435708ccec..13bf7a3d6d 100644 --- a/predicators/envs/pybullet_coffee.py +++ b/predicators/envs/pybullet_coffee.py @@ -131,7 +131,7 @@ class PyBulletCoffeeEnv(PyBulletEnv, CoffeeEnv): @classmethod def jug_height(cls) -> float: - """use class method to allow for dynamic changes.""" + """Use class method to allow for dynamic changes.""" if CFG.coffee_use_pixelated_jug: return cls.jug_new_height return cls.jug_old_height @@ -152,7 +152,7 @@ def jug_height(cls) -> float: @classmethod def jug_handle_height(cls) -> float: - """use class method to allow for dynamic changes.""" + """Use class method to allow for dynamic changes.""" if CFG.coffee_use_pixelated_jug: return cls.jug_new_handle_height return cls.jug_old_handle_height @@ -579,8 +579,8 @@ def _handle_machine_on_and_jug_filling(self, state: State) -> None: self._jug_current_liquid + self.coffee_machine_fill_speed) self._jug_liquid_id = self._create_liquid_for_jug() - if (not self._jug_filled and self._jug_current_liquid > - self.coffee_filled_threshold): + if (not self._jug_filled and self._jug_current_liquid + > self.coffee_filled_threshold): self._jug_filled = True else: # Instant filling diff --git a/predicators/envs/pybullet_domino.py b/predicators/envs/pybullet_domino.py index af448ceacc..64d13b34f9 100644 --- a/predicators/envs/pybullet_domino.py +++ b/predicators/envs/pybullet_domino.py @@ -1007,12 +1007,13 @@ def _AdjacentTo_holds(cls, atoms: Set[GroundAtom], objects: Sequence[Object]) -> bool: """Check if a position is adjacent to a domino in cardinal directions. - This is similar to _InFrontDirection_holds but checks if a position - is adjacent to any position where the domino could be placed, considering - that the domino can be in multiple positions during heuristic computation. + This is similar to _InFrontDirection_holds but checks if a + position is adjacent to any position where the domino could be + placed, considering that the domino can be in multiple positions + during heuristic computation. - Adjacent positions are those that are exactly one grid step away in - cardinal directions (up, down, left, right) but not diagonal. + Adjacent positions are those that are exactly one grid step away + in cardinal directions (up, down, left, right) but not diagonal. """ position, domino = objects diff --git a/predicators/envs/repeated_nextto.py b/predicators/envs/repeated_nextto.py index 825d6c05e9..a63411287a 100644 --- a/predicators/envs/repeated_nextto.py +++ b/predicators/envs/repeated_nextto.py @@ -157,8 +157,8 @@ def _get_tasks(self, num: int, def _NextTo_holds(self, state: State, objects: Sequence[Object]) -> bool: robot, dot = objects return (state.get(dot, "grasped") < self.grasped_thresh - and abs(state.get(robot, "x") - state.get(dot, "x")) < - self._nextto_thresh) + and abs(state.get(robot, "x") - state.get(dot, "x")) + < self._nextto_thresh) def _NextToNothing_holds(self, state: State, objects: Sequence[Object]) -> bool: @@ -276,8 +276,9 @@ def _get_tasks_simple(self, num: int, rng: np.random.Generator, goals = [goal1, goal2] # NOTE: 2.0 would be the exactly correct multiplier. 2.1 is just to # give some extra spacing. - assert (self.env_ub - self.env_lb) > ( - self._nextto_thresh * CFG.repeated_nextto_num_dots * 2.1) + assert (self.env_ub - + self.env_lb) > (self._nextto_thresh * + CFG.repeated_nextto_num_dots * 2.1) for i in range(num): data: Dict[Object, Array] = {} for d_num, dot in enumerate(dots): diff --git a/predicators/envs/sticky_table.py b/predicators/envs/sticky_table.py index f69fe7ea5a..6b57ab9d94 100644 --- a/predicators/envs/sticky_table.py +++ b/predicators/envs/sticky_table.py @@ -18,17 +18,17 @@ class StickyTableEnv(BaseEnv): """An environment where a cube must be transported between tables. - Most of the tables are flat, but one is half is smooth and half is sticky. - When placing on the smooth side, the cube usually falls off; when placing - on the sticky side, it usually stays. When it falls off, it falls onto the - floor. It can be picked up from the floor. + Most of the tables are flat, but one is half is smooth and half is + sticky. When placing on the smooth side, the cube usually falls off; + when placing on the sticky side, it usually stays. When it falls + off, it falls onto the floor. It can be picked up from the floor. Note that unlike almost all of our other environments, there is real stochasticity in the outcomes of placing. - The action space is 2D. When the robot is holding nothing, the only action - that changes anything is clicking on the cube. When the robot is holding - the cube, the action places the cube at that location. + The action space is 2D. When the robot is holding nothing, the only + action that changes anything is clicking on the cube. When the robot + is holding the cube, the action places the cube at that location. """ x_lb: ClassVar[float] = 0.0 x_ub: ClassVar[float] = 1.0 diff --git a/predicators/explorers/greedy_lookahead_explorer.py b/predicators/explorers/greedy_lookahead_explorer.py index 87d39e05ca..fe6c55116b 100644 --- a/predicators/explorers/greedy_lookahead_explorer.py +++ b/predicators/explorers/greedy_lookahead_explorer.py @@ -16,8 +16,8 @@ class GreedyLookaheadExplorer(BaseExplorer): """GreedyLookaheadExplorer implementation. - Sample a certain number of max-length trajectories and pick the one that - has the highest cumulative score. + Sample a certain number of max-length trajectories and pick the one + that has the highest cumulative score. The score function takes the atoms and state as input and returns a score, with higher better. diff --git a/predicators/explorers/random_nsrts_explorer.py b/predicators/explorers/random_nsrts_explorer.py index 909d90d022..adf13c3f43 100644 --- a/predicators/explorers/random_nsrts_explorer.py +++ b/predicators/explorers/random_nsrts_explorer.py @@ -16,30 +16,28 @@ class RandomNSRTsExplorer(BaseExplorer): """RandomNSRTsExplorer implementation. - Similar to RandomOptionsExplorer in that it chooses - uniformly at random out of whichever ground NSRTs are - applicable in the current state, but different in that - the continuous parameter of the parameterized option is - generated by the NSRT's sampler rather than by sampling - uniformly at random from the option's parameter space. - - This explorer is intended to be used when learning options - via reinforcement learning with oracle samplers. In this - setting, planning (e.g. using BilevelPlanningExplorer) is - not feasible because refinement is slow or fails when the - learned options aren't good enough yet. - - Note that the sampler requires the current low-level state - as input. An alternative approach would generate a plan of - options by planning towards the goal with the ground NSRTs - and avoid refinement by (1) inferring the terminal low-level - state of each option (the initial low-level state of the - subsequent option) from the option's sample, which is - proposing a low-level subgoal, and (2) setting the simulator - state directly to that state. Because applying dimensionality - reduction techniques on the sample makes this approach infeasible, - and this is something we may want to do, we avoid this approach - for now. + Similar to RandomOptionsExplorer in that it chooses uniformly at + random out of whichever ground NSRTs are applicable in the current + state, but different in that the continuous parameter of the + parameterized option is generated by the NSRT's sampler rather than + by sampling uniformly at random from the option's parameter space. + + This explorer is intended to be used when learning options via + reinforcement learning with oracle samplers. In this setting, + planning (e.g. using BilevelPlanningExplorer) is not feasible + because refinement is slow or fails when the learned options aren't + good enough yet. + + Note that the sampler requires the current low-level state as input. + An alternative approach would generate a plan of options by planning + towards the goal with the ground NSRTs and avoid refinement by (1) + inferring the terminal low-level state of each option (the initial + low-level state of the subsequent option) from the option's sample, + which is proposing a low-level subgoal, and (2) setting the + simulator state directly to that state. Because applying + dimensionality reduction techniques on the sample makes this + approach infeasible, and this is something we may want to do, we + avoid this approach for now. """ def __init__(self, predicates: Set[Predicate], diff --git a/predicators/ground_truth_models/__init__.py b/predicators/ground_truth_models/__init__.py index de024c34d3..2bcd8ba371 100644 --- a/predicators/ground_truth_models/__init__.py +++ b/predicators/ground_truth_models/__init__.py @@ -210,8 +210,9 @@ def parse_config_included_options(env: BaseEnv) -> Set[ParameterizedOption]: Return the set of included oracle options. - Note that "all" is not implemented because setting the option_learner flag - to "no_learning" is the preferred way to include all options. + Note that "all" is not implemented because setting the + option_learner flag to "no_learning" is the preferred way to include + all options. """ if not CFG.included_options: return set() diff --git a/predicators/ground_truth_models/domino/options.py b/predicators/ground_truth_models/domino/options.py index 625ad4b2ce..e1e621f43c 100644 --- a/predicators/ground_truth_models/domino/options.py +++ b/predicators/ground_truth_models/domino/options.py @@ -256,7 +256,7 @@ def _create_domino_move_to_push_domino_option( def _get_current_and_target_pose_and_finger_status( state: State, objects: Sequence[Object], params: Array) -> \ Tuple[Pose, Pose, str]: - assert not params + assert len(params) == 0 robot, domino = objects current_position = (state.get(robot, "x"), state.get(robot, "y"), state.get(robot, "z")) @@ -296,7 +296,7 @@ def _create_domino_move_to_domino_option( def _get_current_and_target_pose_and_finger_status( state: State, objects: Sequence[Object], params: Array) -> \ Tuple[Pose, Pose, str]: - assert not params + assert len(params) == 0 robot, domino = objects current_position = (state.get(robot, "x"), state.get(robot, "y"), state.get(robot, "z")) @@ -336,7 +336,7 @@ def _create_domino_place_option(cls, name: str, z_func: Callable[[float], def _get_current_and_target_pose_and_finger_status( state: State, objects: Sequence[Object], params: Array) -> \ Tuple[Pose, Pose, str]: - assert not params + assert len(params) == 0 robot, domino_f, domino_b, tgt_pos, rotation = objects current_position = (state.get(robot, "x"), state.get(robot, "y"), state.get(robot, "z")) @@ -374,8 +374,8 @@ def _get_current_and_target_pose_and_finger_status( # Get constants from the environment class gap = cls.env_cls.pos_gap - target_angle_is_cardinal = abs(np.sin( - 2 * target_rot_rad)) < angle_tol + target_angle_is_cardinal = abs(np.sin(2 * + target_rot_rad)) < angle_tol # Case 1: Place straight ahead if dir_value == 0.0 or target_angle_is_cardinal: # straight diff --git a/predicators/ground_truth_models/tools/options.py b/predicators/ground_truth_models/tools/options.py index dfc30e3ab4..60d3afaff6 100644 --- a/predicators/ground_truth_models/tools/options.py +++ b/predicators/ground_truth_models/tools/options.py @@ -159,8 +159,8 @@ def policy(state: State, memory: Dict, objects: Sequence[Object], # while all other Fasten options have 4 parameters. _, item, contraption = objects # For fastening by hand, we don't want to be holding any tool. - tool_is_correct = (ToolsEnv.get_held_item_or_tool(state) is - None) + tool_is_correct = (ToolsEnv.get_held_item_or_tool(state) + is None) else: _, item, tool, contraption = objects # For fastening with a tool, we should be holding it. diff --git a/predicators/image_patch_wrapper.py b/predicators/image_patch_wrapper.py index a2e8c28b0d..354eb2d15b 100644 --- a/predicators/image_patch_wrapper.py +++ b/predicators/image_patch_wrapper.py @@ -1,4 +1,4 @@ -"""adapted from SoM.""" +"""Adapted from SoM.""" import os from typing import TYPE_CHECKING, Dict, List, Optional, Sequence @@ -290,6 +290,7 @@ def crop(self, left: int, lower: int, right: int, upper: int) -> 'ImagePatch': """Returns a new ImagePatch containing a crop of the original image at the given coordinates. + Returns ------- ImagePatch diff --git a/predicators/main.py b/predicators/main.py index f1c9114ef1..46a9607b44 100644 --- a/predicators/main.py +++ b/predicators/main.py @@ -710,9 +710,9 @@ def _execute_policy( num_solved if num_solved > 0 else float("inf")) # Skeleton / sample info - metrics["min_num_samples"] = ( - cogman.metrics["min_num_samples"] - if cogman.metrics["min_num_samples"] < float("inf") else 0) + metrics["min_num_samples"] = (cogman.metrics["min_num_samples"] + if cogman.metrics["min_num_samples"] + < float("inf") else 0) metrics["max_num_samples"] = cogman.metrics["max_num_samples"] metrics["min_skeletons_optimized"] = ( cogman.metrics["min_num_skeletons_optimized"] @@ -733,8 +733,9 @@ def _execute_policy( "num_failures_discovered" ]: total = cogman.metrics[f"total_{metric_name}"] - metrics[f"avg_{metric_name}"] = ( - total / num_found_policy if num_found_policy > 0 else float("inf")) + metrics[f"avg_{metric_name}"] = (total / + num_found_policy if num_found_policy + > 0 else float("inf")) return metrics diff --git a/predicators/ml_models.py b/predicators/ml_models.py index 5972e7a783..392bbe01a4 100644 --- a/predicators/ml_models.py +++ b/predicators/ml_models.py @@ -303,7 +303,8 @@ class _NormalizingBinaryClassifier(BinaryClassifier): Also infers the dimensionality of the inputs and outputs from fit(). - Also implements data balancing (optionally) and single-class prediction. + Also implements data balancing (optionally) and single-class + prediction. """ def __init__(self, seed: int, balance_data: bool) -> None: diff --git a/predicators/nsrt_learning/nsrt_learning_main.py b/predicators/nsrt_learning/nsrt_learning_main.py index d13ef054b6..ce080a00dd 100644 --- a/predicators/nsrt_learning/nsrt_learning_main.py +++ b/predicators/nsrt_learning/nsrt_learning_main.py @@ -85,14 +85,14 @@ def learn_nsrts_from_data( # produce PNAD objects. Each PNAD # contains a STRIPSOperator, Datastore, and OptionSpec. The # samplers will be filled in on a later step. - pnads = learn_strips_operators( - trajectories, - train_tasks, - predicates, - segmented_trajs, - verify_harmlessness=True, - verbose=(CFG.option_learner != "no_learning"), - annotations=annotations) + pnads = learn_strips_operators(trajectories, + train_tasks, + predicates, + segmented_trajs, + verify_harmlessness=True, + verbose=(CFG.option_learner + != "no_learning"), + annotations=annotations) # Save least complex learned PNAD set across data orderings. pnads_complexity = sum(pnad.op.get_complexity() for pnad in pnads) diff --git a/predicators/nsrt_learning/option_learning.py b/predicators/nsrt_learning/option_learning.py index 8a900c7efb..41d88ee287 100644 --- a/predicators/nsrt_learning/option_learning.py +++ b/predicators/nsrt_learning/option_learning.py @@ -515,16 +515,18 @@ def get_rel_option_param_from_state(self, state: State, memory: Dict, class _BehaviorCloningOptionLearner(_OptionLearnerBase): """Learn _LearnedNeuralParameterizedOption objects by behavior cloning. - See the docstring for _LearnedNeuralParameterizedOption for a description - of the option structure. - - In this paradigm, the option initiable and termination are determined from - the operators, so the main thing that needs to be learned is the option - policy. We learn this policy by behavior cloning (fitting a regressor - via supervised learning) in learn_option_specs(). - - The is_parameterized kwarg is for a baseline that learns a policy without - continuous parameters. If it is False, the parameter space is null. + See the docstring for _LearnedNeuralParameterizedOption for a + description of the option structure. + + In this paradigm, the option initiable and termination are + determined from the operators, so the main thing that needs to be + learned is the option policy. We learn this policy by behavior + cloning (fitting a regressor via supervised learning) in + learn_option_specs(). + + The is_parameterized kwarg is for a baseline that learns a policy + without continuous parameters. If it is False, the parameter space + is null. """ def __init__(self, diff --git a/predicators/nsrt_learning/segmentation.py b/predicators/nsrt_learning/segmentation.py index 6a54e196fc..d2b49ad916 100644 --- a/predicators/nsrt_learning/segmentation.py +++ b/predicators/nsrt_learning/segmentation.py @@ -153,9 +153,10 @@ def _segment_with_oracle(ll_traj: LowLevelTrajectory, If options are known, just uses _segment_with_option_changes(). - Otherwise, starting at the beginning of the trajectory, keeps track of - which oracle ground NSRTs are applicable. When any of them have their - effects achieved, that marks the switch point between segments. + Otherwise, starting at the beginning of the trajectory, keeps track + of which oracle ground NSRTs are applicable. When any of them have + their effects achieved, that marks the switch point between + segments. """ if ll_traj.actions and ll_traj.actions[0].has_option(): assert CFG.option_learner == "no_learning" diff --git a/predicators/nsrt_learning/strips_learning/clustering_learner.py b/predicators/nsrt_learning/strips_learning/clustering_learner.py index 224abffb25..a5b630dd7c 100644 --- a/predicators/nsrt_learning/strips_learning/clustering_learner.py +++ b/predicators/nsrt_learning/strips_learning/clustering_learner.py @@ -164,9 +164,10 @@ def _learn(self) -> List[PNAD]: segment_param_option, segment_option_objs, pnads) if suc: - sub = cast(VarToObjSub, - {v: o - for o, v in ent_to_ent_sub.items()}) + sub = cast(VarToObjSub, { + v: o + for o, v in ent_to_ent_sub.items() + }) # Add to this PNAD. if CFG.exogenous_process_learner_do_intersect: # Find the largest conditions that unifies the init @@ -203,7 +204,8 @@ def _learn(self) -> List[PNAD]: # anything in the init atoms of the segment. objects |= { o - for atom in segment.init_atoms for o in atom.objects + for atom in segment.init_atoms + for o in atom.objects } objects_lst = sorted(objects) @@ -270,10 +272,10 @@ def _learn(self) -> List[PNAD]: segment_param_option, segment_option_objs, pnads) if suc: - sub = cast( - VarToObjSub, - {v: o - for o, v in ent_to_ent_sub.items()}) + sub = cast(VarToObjSub, { + v: o + for o, v in ent_to_ent_sub.items() + }) # Add to this PNAD. if CFG.exogenous_process_learner_do_intersect: # Find the largest conditions that unifies the init @@ -578,8 +580,8 @@ def _learn_pnad_preconditions(self, pnads: List[PNAD]) -> List[PNAD]: # Get the objects in the init atoms additional_objects = { o - for atom in init_atoms for o in atom.objects - if o not in existing_objs + for atom in init_atoms + for o in atom.objects if o not in existing_objs } # Create a new var_to_obj mapping for the objects objects_lst = sorted(additional_objects) @@ -1541,8 +1543,8 @@ def _is_unique_pnad(self, precon: FrozenSet[LiftedAtom], pnad: PNAD, """Check if a PNAD with given preconditions is unique.""" for final_pnad in final_pnads: # Quick size checks first for efficiency - if (len(precon) != len(final_pnad.op.preconditions) or - len(pnad.op.add_effects) != len(final_pnad.op.add_effects) + if (len(precon) != len(final_pnad.op.preconditions) or len( + pnad.op.add_effects) != len(final_pnad.op.add_effects) or len(pnad.op.delete_effects) != len( final_pnad.op.delete_effects)): continue diff --git a/predicators/nsrt_learning/strips_learning/pnad_search_learner.py b/predicators/nsrt_learning/strips_learning/pnad_search_learner.py index f7ebf9bd21..5057d5f9bf 100644 --- a/predicators/nsrt_learning/strips_learning/pnad_search_learner.py +++ b/predicators/nsrt_learning/strips_learning/pnad_search_learner.py @@ -259,9 +259,10 @@ def recompute_pnads_from_effects(self, pnads: List[PNAD]) -> List[PNAD]: self._compute_pnad_delete_effects(pnad) self._compute_pnad_ignore_effects(pnad) # Fix naming. - pnad_map: Dict[ParameterizedOption, - List[PNAD]] = {p.option_spec[0]: [] - for p in new_pnads} + pnad_map: Dict[ParameterizedOption, List[PNAD]] = { + p.option_spec[0]: [] + for p in new_pnads + } for p in new_pnads: p.op = p.op.copy_with(name=p.option_spec[0].name) pnad_map[p.option_spec[0]].append(p) diff --git a/predicators/planning.py b/predicators/planning.py index 27bb60301b..54bf299127 100644 --- a/predicators/planning.py +++ b/predicators/planning.py @@ -327,13 +327,14 @@ def task_plan( convenient wrapper around _skeleton_generator below (which IS used by SeSamE) that takes in only the minimal necessary arguments. - This method is tightly coupled with task_plan_grounding -- the reason they - are separate methods is that it is sometimes possible to ground only once - and then plan multiple times (e.g. from different initial states, or to - different goals). To run task planning once, call task_plan_grounding to - get ground_nsrts and reachable_atoms; then create a heuristic using - utils.create_task_planning_heuristic; then call this method. See the tests - in tests/test_planning for usage examples. + This method is tightly coupled with task_plan_grounding -- the + reason they are separate methods is that it is sometimes possible to + ground only once and then plan multiple times (e.g. from different + initial states, or to different goals). To run task planning once, + call task_plan_grounding to get ground_nsrts and reachable_atoms; + then create a heuristic using utils.create_task_planning_heuristic; + then call this method. See the tests in tests/test_planning for + usage examples. """ if not goal.issubset(reachable_atoms): logging.info(f"Detected goal unreachable. Goal: {goal}") @@ -370,16 +371,17 @@ def _skeleton_generator( sesame_max_policy_guided_rollout: int = 0, use_visited_state_set: bool = False ) -> Iterator[Tuple[List[_GroundNSRT], List[Set[GroundAtom]]]]: - """A* search over skeletons (sequences of ground NSRTs). - Iterates over pairs of (skeleton, atoms sequence). + """A* search over skeletons (sequences of ground NSRTs). Iterates over + pairs of (skeleton, atoms sequence). Note that we can't use utils.run_astar() here because we want to - yield multiple skeletons, whereas that utility method returns only - a single solution. Furthermore, it's easier to track and update our + yield multiple skeletons, whereas that utility method returns only a + single solution. Furthermore, it's easier to track and update our metrics dictionary if we re-implement the search here. If - use_visited_state_set is False (which is the default), then we may revisit - the same abstract states multiple times, unlike in typical A*. See - Issue #1117 for a discussion on why this is False by default. + use_visited_state_set is False (which is the default), then we may + revisit the same abstract states multiple times, unlike in typical + A*. See Issue #1117 for a discussion on why this is False by + default. """ start_time = time.perf_counter() @@ -919,11 +921,11 @@ def task_plan_with_option_plan_constraint( """Turn an option plan into a plan of ground NSRTs that achieves the goal from the initial atoms. - If atoms_seq is not None, the ground NSRT plan must also match up with - the given sequence of atoms. Otherwise, atoms are not checked. + If atoms_seq is not None, the ground NSRT plan must also match up + with the given sequence of atoms. Otherwise, atoms are not checked. - If no goal-achieving sequence of ground NSRTs corresponds to - the option plan, return None. + If no goal-achieving sequence of ground NSRTs corresponds to the + option plan, return None. """ dummy_nsrts = utils.ops_and_specs_to_dummy_nsrts(strips_ops, option_specs) ground_nsrts, _ = task_plan_grounding(init_atoms, diff --git a/predicators/planning_with_processes.py b/predicators/planning_with_processes.py index 6b1ffd9aef..db7fca27e0 100644 --- a/predicators/planning_with_processes.py +++ b/predicators/planning_with_processes.py @@ -781,13 +781,14 @@ def task_plan( convenient wrapper around _skeleton_generator below (which IS used by SeSamE) that takes in only the minimal necessary arguments. - This method is tightly coupled with task_plan_grounding -- the reason they - are separate methods is that it is sometimes possible to ground only once - and then plan multiple times (e.g. from different initial states, or to - different goals). To run task planning once, call task_plan_grounding to - get ground_nsrts and reachable_atoms; then create a heuristic using - utils.create_task_planning_heuristic; then call this method. See the tests - in tests/test_planning for usage examples. + This method is tightly coupled with task_plan_grounding -- the + reason they are separate methods is that it is sometimes possible to + ground only once and then plan multiple times (e.g. from different + initial states, or to different goals). To run task planning once, + call task_plan_grounding to get ground_nsrts and reachable_atoms; + then create a heuristic using utils.create_task_planning_heuristic; + then call this method. See the tests in tests/test_planning for + usage examples. """ if CFG.planning_check_dr_reachable and not goal.issubset(reachable_atoms): logging.info(f"Detected goal unreachable. Goal: {goal}") diff --git a/predicators/pybullet_compatibility.py b/predicators/pybullet_compatibility.py new file mode 100644 index 0000000000..5ab829184b --- /dev/null +++ b/predicators/pybullet_compatibility.py @@ -0,0 +1,69 @@ +""" +PyBullet compatibility layer for Python 3.13. + +This module provides a compatibility layer that allows predicators to run +without PyBullet when it's not available (e.g., on Python 3.13 where +PyBullet has compilation issues). +""" + +import warnings +from typing import Any, Optional + +# Try to import PyBullet and handle gracefully if not available +try: + import pybullet as _pybullet + PYBULLET_AVAILABLE = True +except ImportError: + PYBULLET_AVAILABLE = False + _pybullet = None + warnings.warn( + "PyBullet is not available. PyBullet-dependent environments will be skipped. " + "This is expected on Python 3.13 due to compilation issues.", + UserWarning, + stacklevel=2 + ) + +# Try to import pybullet_utils +try: + from pybullet_utils.transformations import euler_from_quaternion as _euler_from_quaternion + from pybullet_utils.transformations import quaternion_from_euler as _quaternion_from_euler + PYBULLET_UTILS_AVAILABLE = True +except ImportError: + PYBULLET_UTILS_AVAILABLE = False + _euler_from_quaternion = None + _quaternion_from_euler = None + +def get_pybullet() -> Optional[Any]: + """Get the PyBullet module if available, None otherwise.""" + return _pybullet if PYBULLET_AVAILABLE else None + +def euler_from_quaternion(*args, **kwargs) -> Any: + """Wrapper for pybullet_utils.transformations.euler_from_quaternion.""" + if not PYBULLET_UTILS_AVAILABLE: + raise NotImplementedError("PyBullet utils not available") + return _euler_from_quaternion(*args, **kwargs) + +def quaternion_from_euler(*args, **kwargs) -> Any: + """Wrapper for pybullet_utils.transformations.quaternion_from_euler.""" + if not PYBULLET_UTILS_AVAILABLE: + raise NotImplementedError("PyBullet utils not available") + return _quaternion_from_euler(*args, **kwargs) + +# Dummy JointPositions class for when PyBullet is not available +class JointPositions: + """Dummy JointPositions class for when PyBullet is not available.""" + + def __init__(self, *args, **kwargs): + if not PYBULLET_AVAILABLE: + raise NotImplementedError("PyBullet not available") + +# Export the actual JointPositions if PyBullet is available +if PYBULLET_AVAILABLE: + try: + from predicators.pybullet_helpers.joint import JointPositions as _RealJointPositions + JointPositions = _RealJointPositions + except ImportError: + # Keep the dummy class if the real one can't be imported + pass + + diff --git a/predicators/refinement_estimators/per_skeleton_estimator.py b/predicators/refinement_estimators/per_skeleton_estimator.py index 2b374b76fa..5e1e84597a 100644 --- a/predicators/refinement_estimators/per_skeleton_estimator.py +++ b/predicators/refinement_estimators/per_skeleton_estimator.py @@ -13,9 +13,10 @@ # Type of the (skeleton, atoms_sequence) key for model dictionary # which converts both of them to be immutable -ModelDictKey = Tuple[Tuple[_GroundNSRT, ...], # skeleton converted to tuple - Tuple[FrozenSet[GroundAtom], ...] # atoms_sequence - ] +ModelDictKey = Tuple[ + Tuple[_GroundNSRT, ...], # skeleton converted to tuple + Tuple[FrozenSet[GroundAtom], ...] # atoms_sequence +] Model = TypeVar('Model') diff --git a/predicators/structs.py b/predicators/structs.py index e0103a58cb..1b8a3059c1 100644 --- a/predicators/structs.py +++ b/predicators/structs.py @@ -1288,7 +1288,8 @@ def effect_to_ignore_effect(self, effect: LiftedAtom, remaining_params = { p for atom in self.preconditions | new_add_effects - | new_delete_effects for p in atom.variables + | new_delete_effects + for p in atom.variables } | set(option_vars) new_params = [p for p in self.parameters if p in remaining_params] return STRIPSOperator(self.name, new_params, self.preconditions, diff --git a/predicators/utils.py b/predicators/utils.py index 6778f42263..3ccc923a5b 100644 --- a/predicators/utils.py +++ b/predicators/utils.py @@ -911,18 +911,19 @@ class LinearChainParameterizedOption(ParameterizedOption): This class is meant to help ParameterizedOption manual design. - The children are executed in order starting with the first in the sequence - and transitioning when the terminal function of each child is hit. + The children are executed in order starting with the first in the + sequence and transitioning when the terminal function of each child + is hit. - The children are assumed to chain together, so the initiable of the next - child should always be True when the previous child terminates. If this - is not the case, an AssertionError is raised. + The children are assumed to chain together, so the initiable of the + next child should always be True when the previous child terminates. + If this is not the case, an AssertionError is raised. - The children must all have the same types and params_space, which in turn - become the types and params_space for this ParameterizedOption. + The children must all have the same types and params_space, which in + turn become the types and params_space for this ParameterizedOption. - The LinearChainParameterizedOption has memory, which stores the current - child index. + The LinearChainParameterizedOption has memory, which stores the + current child index. """ def __init__(self, name: str, @@ -2406,13 +2407,15 @@ def run_policy_guided_astar( Stop the rollout prematurely if the policy returns None. - Note that unlike the other search functions, which take get_successors as - input, this function takes get_valid_actions and get_next_state as two - separate inputs. This is necessary because we need to anticipate the next - state conditioned on the action output by the policy. + Note that unlike the other search functions, which take + get_successors as input, this function takes get_valid_actions and + get_next_state as two separate inputs. This is necessary because we + need to anticipate the next state conditioned on the action output + by the policy. - The get_valid_actions generates (action, cost) tuples. For policy-generated - transitions, the costs are ignored, and rollout_step_cost is used instead. + The get_valid_actions generates (action, cost) tuples. For policy- + generated transitions, the costs are ignored, and rollout_step_cost + is used instead. """ # Create a new successor function that rolls out the policy first. @@ -3819,9 +3822,10 @@ def create_pddl_types_str(types: Collection[Type]) -> str: types_str = " ".join(t.name for t in sorted(types)) # Case 2: type hierarchy. else: - parent_to_children_types: Dict[Type, - List[Type]] = {t: [] - for t in types} + parent_to_children_types: Dict[Type, List[Type]] = { + t: [] + for t in types + } for t in sorted(types): if t.parent: parent_to_children_types[t.parent].append(t) @@ -4569,11 +4573,11 @@ def query_ldl( ) -> Optional[_GroundNSRT]: """Queries a lifted decision list representing a goal-conditioned policy. - Given an abstract state and goal, the rules are grounded in order. The - first applicable ground rule is used to return a ground NSRT. + Given an abstract state and goal, the rules are grounded in order. + The first applicable ground rule is used to return a ground NSRT. - If static_predicates is provided, it is used to avoid grounding rules with - nonsense preconditions like IsBall(robot). + If static_predicates is provided, it is used to avoid grounding + rules with nonsense preconditions like IsBall(robot). If no rule is applicable, returns None. """ diff --git a/requirements-python3.13.txt b/requirements-python3.13.txt new file mode 100644 index 0000000000..2f2573b715 --- /dev/null +++ b/requirements-python3.13.txt @@ -0,0 +1,70 @@ +# Requirements file for Python 3.13 compatibility +# This file provides alternative dependencies that work with Python 3.13 +# while maintaining compatibility with the predicators package + +# Core scientific computing +numpy>=1.24.0 +scipy>=1.10.0 +pandas>=1.5.0 + +# Machine learning +torch>=2.0.0 +torchvision>=0.15.0 +scikit-learn>=1.3.0 + +# Environment and RL +gym>=0.26.0 +gymnasium>=0.27.0 # Modern replacement for gym + +# Image processing +opencv-python>=4.5.0 +pillow>=10.0.0 +imageio>=2.30.0 +imageio-ffmpeg>=0.6.0 + +# Plotting and visualization +matplotlib>=3.6.0 +seaborn>=0.12.0 + +# Development and testing +pytest>=7.0.0 +mypy>=1.0.0 +pylint>=2.15.0 + +# Utility libraries +pyyaml>=6.0 +requests>=2.28.0 +tqdm>=4.64.0 +tabulate>=0.9.0 +dill>=0.3.6 +pathos>=0.3.0 +colorlog>=6.0.0 + +# AI/ML APIs +openai>=1.19.0 +google-generativeai>=0.8.0 +tenacity>=8.0.0 +httpx>=0.27.0 + +# Planning +pyperplan>=2.0 +graphlib-backport>=1.0.0 + +# Additional dependencies +slack_bolt>=1.0.0 +wandb>=0.15.0 +types-PyYAML>=6.0.0 +psutil>=5.9.0 +lisdf>=0.1.0 +ImageHash>=4.3.0 + +# Git-based dependencies +# Install these separately with: +# pip install git+https://github.com/sebdumancic/structure_mapping.git +# pip install git+https://github.com/tomsilver/pg3.git +# pip install git+https://github.com/Learning-and-Intelligent-Systems/gym-sokoban.git + +# Note: PyBullet is excluded as it has compilation issues with Python 3.13 +# PyBullet-dependent environments will be skipped during execution + + diff --git a/scripts/configs/mara/causal_predicator_baselines.yaml b/scripts/configs/mara/causal_predicator_baselines.yaml index 0a9cd1013a..51a4ff03c7 100644 --- a/scripts/configs/mara/causal_predicator_baselines.yaml +++ b/scripts/configs/mara/causal_predicator_baselines.yaml @@ -5,20 +5,21 @@ APPROACHES: # oracle_nsrt: # NAME: "oracle" - oracle: - NAME: "oracle_process_planning" - FLAGS: - demonstrator: "oracle_process_planning" - terminate_on_goal_reached_and_option_terminated: True - bilevel_plan_without_sim: True - # vlm_plan: # ViLa baseline - # NAME: "vlm_open_loop" + # oracle: + # NAME: "oracle_process_planning" # FLAGS: # demonstrator: "oracle_process_planning" - # num_train_tasks: 0 + # terminate_on_goal_reached_and_option_terminated: True # bilevel_plan_without_sim: True - # llm_model_name: "google/gemini-2.5-pro" # "openai/gpt-4o", "gpt-4.1", "gpt-4o" - # vlm_open_loop_no_image: True + vlm_plan: # ViLa baseline + NAME: "vlm_open_loop" + FLAGS: + demonstrator: "oracle_process_planning" + num_train_tasks: 0 + bilevel_plan_without_sim: True + llm_model_name: "google/gemini-2.5-pro" # "openai/gpt-4o", "gpt-4.1", "gpt-4o" + vlm_open_loop_no_image: True + vlm_open_loop_use_training_demos: True # param_learning: # NAME: "param_learning_process_planning" # ARGS: diff --git a/scripts/lisdf_pybullet_visualizer.py b/scripts/lisdf_pybullet_visualizer.py index 63d5185e2c..f10377d4c8 100644 --- a/scripts/lisdf_pybullet_visualizer.py +++ b/scripts/lisdf_pybullet_visualizer.py @@ -2,8 +2,8 @@ Currently specific to the blocks environment. -For safety, this script should be run before executing an LISDF plan on the -real robot. If the plan looks jerky, fast, etc., don't execute it. +For safety, this script should be run before executing an LISDF plan on +the real robot. If the plan looks jerky, fast, etc., don't execute it. Quit early with the "q" key. """ diff --git a/scripts/local/launch.py b/scripts/local/launch.py index cbbdccad38..0175e19c3d 100644 --- a/scripts/local/launch.py +++ b/scripts/local/launch.py @@ -2,7 +2,7 @@ Run experiments sequentially, not in parallel. - python scripts/local/launch.py --config example_basic.yaml +python scripts/local/launch.py --config example_basic.yaml The default branch can be overridden with the --branch flag. """ diff --git a/setup.py b/setup.py index 387307e63f..e226a8981d 100644 --- a/setup.py +++ b/setup.py @@ -43,6 +43,7 @@ "opencv-python>=4.5.0", "colorlog", "wandb", + "psutil>=5.9.0", ], include_package_data=True, extras_require={ diff --git a/tests/envs/test_cluttered_table.py b/tests/envs/test_cluttered_table.py index 571ae5520f..af04d4a608 100644 --- a/tests/envs/test_cluttered_table.py +++ b/tests/envs/test_cluttered_table.py @@ -55,8 +55,8 @@ def test_cluttered_table(place_version=False): pose_x2 = state.get(can2, "pose_x") pose_y2 = state.get(can2, "pose_y") rad2 = state.get(can2, "radius") - assert np.linalg.norm([pose_y2 - pose_y1, pose_x2 - pose_x1 - ]) > rad1 + rad2 + assert np.linalg.norm([pose_y2 - pose_y1, + pose_x2 - pose_x1]) > rad1 + rad2 can = list(state)[0] act = Action(env.action_space.sample()) if i == 0: diff --git a/tests/pybullet_helpers/test_motion_planning.py b/tests/pybullet_helpers/test_motion_planning.py index ed8ca30fa3..1a068bb6b8 100644 --- a/tests/pybullet_helpers/test_motion_planning.py +++ b/tests/pybullet_helpers/test_motion_planning.py @@ -139,7 +139,8 @@ def test_move_to_shelf(): to forward-facing, so motion planning must be in position and orientation. - Also notably, the held object must be collision-checked like the robot. + Also notably, the held object must be collision-checked like the + robot. """ utils.reset_config({"pybullet_control_mode": "reset"})