diff --git a/colla/README.md b/colla/README.md new file mode 100644 index 0000000000..a2173a9f0c --- /dev/null +++ b/colla/README.md @@ -0,0 +1,87 @@ +# MiniBehavior Gridworld for CoLLA 2026 + +This directory contains the MiniBehavior gridworld environment setup for CoLLA 2026. + +## Setup Instructions + +### 1. Install Requirements + +Navigate to the `colla/` directory and install the required packages: + +```bash +cd colla/ +pip install -r requirements.txt +``` + +### 2. Run the Main Script + +From the `colla/` directory, run: + +```bash +python main.py +``` +(should take ~3min to complete) + +### 3. Fix Known Package Issues + +After installation, you may encounter two errors that require manual fixes in the installed packages: + +#### Error 1: Gymnasium Package + +**Location:** In your gymnasium package installation (typically in `site-packages/gymnasium/envs/registration.py`) + +**Find this code:** +```python +# Update the env spec kwargs with the `make` kwargs + env_spec_kwargs = copy.deepcopy(env_spec.kwargs) + env_spec_kwargs.update(kwargs) +``` + +**Replace with:** +```python +# Update the env spec kwargs with the `make` kwargs + env_spec_kwargs = copy.deepcopy(env_spec.kwargs) + env_spec_kwargs = {} + env_spec_kwargs.update(kwargs) +``` + +#### Error 2: Minigrid Environment + +**Location:** In your minigrid package installation (typically in `site-packages/minigrid/minigrid_env.py`) + +**Find this code:** +```python + assert isinstance( + max_steps, int + ), f"The argument max_steps must be an integer, got: {type(max_steps)}" + self.max_steps = max_steps +``` + +**Replace with:** +```python +max_steps = int(max_steps) + assert isinstance( + max_steps, int + ), f"The argument max_steps must be an integer, got: {type(max_steps)}" + self.max_steps = max_steps +``` + +## Notes + +- These fixes are temporary workarounds for compatibility issues between the packages +- Make sure to apply these fixes in your Python environment's site-packages directory +- You can find your site-packages location by running: `python -c "import site; print(site.getsitepackages())"` + +## Output and Results + +- The most recent frame is saved to `output_image.jpeg` +- Results including NSRTs, CSVs, and logs are saved in the `results/` directory +- Demos are located in `../demos/` (relative to the `colla/` directory) + - To view demos, run: `python view_demos.py` + +## Next Steps + +1. Fix remaining MiniBehavior environments (should be 20 total) +2. Implement LLM baseline operator learner +3. Add predicate invention +4. Generate results for CoLLA diff --git a/colla/main.py b/colla/main.py new file mode 100644 index 0000000000..8dfaa114bf --- /dev/null +++ b/colla/main.py @@ -0,0 +1,866 @@ +# TODO +# - Ours vs. BC vs. CI - single demo (no prior operators or demos) +# - Ours vs. BC vs. CI - multiple demos same task to get Groundtruth operators (no prior operators or demos) +# - (Zero-Shot - Many-to-One) Re-combination of operators to zero-shot new tasks [One Example] +# - (One-Shot - One-to-Many) Use previous operators to one-shot new tasks it could not solve before [One Example] +# - Groundtruth operators working on 20 envs + +# TODO LATER: +# - Lifelong Learning Table (on easy tasks) [Demos Ours vs. BC vs. CI][curriculum vs random] +# - Lifelong Learning Table (on all 20 tasks) [Final Thesis graph][curriculum vs random] +# - Entire Zero-Shot - Many-to-One Table +# - Entire One-Shot - One-to-Many Table + +# (1) implement the evaluation function evaluate(agent) returns dictionary of results +# (2) implment evaluation visualization visualize(results) +# (3) do whatever it takes to make results better (CI, BC, FF+BC, FF+BC+LLMs) + +results_folder = "results/" + +from minibehavior_env import MiniBehaviorEnv +from minibehavior_helpers import Box, LowLevelTrajectory, State, Task, \ + demo_files, get_demo_traj, learn_nsrts_from_data, parse_nsrt_block +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd +import random +from predicators.planning import task_plan, task_plan_grounding, _SkeletonSearchTimeout, PlanningFailure +from predicators import utils +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type, GroundAtom, Task, STRIPSOperator +import numpy as np +from collections import Counter + +from predicators.nsrt_learning.strips_learning.gen_to_spec_learner import parse_objs_preds_and_options + +import pickle as pkl +import numpy as np +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type +from predicators.nsrt_learning.segmentation import segment_trajectory + +opname_to_key = { + 'Actions.pickup_0': '0', + 'Actions.pickup_1': '1', + 'Actions.pickup_2': '2', + 'Actions.drop_0': '3', + 'Actions.drop_1': '4', + 'Actions.drop_2': '5', + 'Actions.drop_in': 'i', + 'Actions.toggle': 't', + 'Actions.close': 'c', + 'Actions.open': 'o', + 'Actions.cook': 'k', + 'Actions.slice': '6' +} + +class RandomAgent(): + def __init__(self, name): + self.name = name + self.actions = None + + def reset(self, task_name, learn=False): + pass + + def policy(self, obs, env): + #print(env.get_lifted_state()) + return env.key_to_action[random.choice(list(env.key_to_action.keys()))] + +def evaluation(agents, tasks, num_iterations=10, start_seed=100, short_task=True, learn=True): + results = {} + task_i = 0 + + # Create results file name + results_filename = results_folder + f"intermediate_results_{'short' if short_task else 'long'}_seed{start_seed}.log" + + for i in range(num_iterations): + for task in tasks: + for agent in agents: + env = MiniBehaviorEnv(env_id=task, seed=i+start_seed) + env.short_task = short_task + observation, _ = env.reset() + agent.short_task = short_task + agent.reset(env.env_id, learn=learn) + steps = 0 + for _ in range(50): + action = agent.policy(observation, env) + observation, reward, terminated, truncated, info = env.step(action) + steps += 1 + env.show() + + if reward != 0: + break + + if terminated or truncated: + break + found_plan = 0 + plan_diff = -1 + plan_length = 0 + num_preconditions_removed = 0 + num_replanned = 0 + goal_size = 0 + goal_atoms_achieved = 0 + + # Get goal size and achieved atoms + if hasattr(agent, 'goal') and agent.goal is not None: + goal_size = len(agent.goal) + # Get final state atoms + final_state = info['final_state'] + # Count how many goal atoms are achieved + goal_atoms_achieved = len(agent.parse_goal(env.env_id, final_state)) + + if agent.found_initial_plan: + found_plan = 1 + plan_length = agent.initial_plan_length + num_preconditions_removed = agent.initial_num_preconditions_removed + num_replanned = agent.num_replanned + + key_to_opname = {v:k for k,v in opname_to_key.items()} + plan = [key_to_opname[action] if not action.startswith("moveto") else "Move" for action in agent.initial_plan] + dataset_plan = agent.dataset[0].actions + + def differing_reoccurring_counts(list1, list2): + count1 = Counter(list1) + count2 = Counter(list2) + all_keys = set(count1.keys()) | set(count2.keys()) + result = {} + total_diff = 0 + for key in all_keys: + c1 = count1.get(key, 0) + c2 = count2.get(key, 0) + if (c1 > 1 or c2 > 1) and c1 != c2: + diff = abs(c1 - c2) + result[key] = diff + total_diff += diff + result['total'] = total_diff + return result + plan_diff = differing_reoccurring_counts(plan, dataset_plan)['total'] + + # if agent.version_space: + # assert found_plan == 1, f"Agent {agent.name} failed to find a plan for task {task}" + results[str(task_i) + "_" + task + "_" + agent.name] = (steps, reward, i, found_plan, plan_diff, plan_length, num_preconditions_removed, num_replanned, goal_size, goal_atoms_achieved) + # Save intermediate results after each iteration + with open(results_filename, 'a') as f: + f.write(f"\n{'='*80}\n") + f.write(f"Iteration {i} (seed={i+start_seed}) - {'SHORT' if short_task else 'LONG'} task\n") + f.write(f"{'='*80}\n") + for key, (steps, reward, iteration, found_plan, plan_diff, plan_length, num_preconditions_removed, num_replanned, goal_size, goal_atoms_achieved) in results.items(): + if iteration == i: # Only write results from current iteration + task_idx, task_name, agent_name = key.split("_", 2) + success = "SUCCESS" if reward > 0 else "FAIL" + goal_completion = f"{goal_atoms_achieved}/{goal_size}" if goal_size > 0 else "N/A" + f.write(f"\nTask {task_idx}: {task_name}\n") + f.write(f" Agent: {agent_name}\n") + f.write(f" Result: {success} (reward={reward}, steps={steps})\n") + f.write(f" Goal completion: {goal_completion}\n") + f.write(f" Plan found: {bool(found_plan)}, length={plan_length}, diff={plan_diff}\n") + f.write(f" Preconditions removed: {num_preconditions_removed}, Replanned: {num_replanned}\n") + f.write(f"\n") + task_i += 1 + + return results + +def structure_results(results_dict): + data = [] + for key, (steps, reward, iteration, found_plan, plan_diff, plan_length, num_preconditions_removed, num_replanned, goal_size, goal_atoms_achieved) in results_dict.items(): + task_idx, task_name, agent_name = key.split("_", 2) + data.append({ + "task_name": task_name, + "task_idx": int(task_idx), + "iteration": int(iteration), + "found_plan": int(found_plan), + "plan_diff": int(plan_diff), + "plan_length": int(plan_length), + "num_preconditions_removed": int(num_preconditions_removed), + "num_replanned": int(num_replanned), + "goal_size": int(goal_size), + "goal_atoms_achieved": int(goal_atoms_achieved), + "steps": steps, + "reward": reward, + "success": 1 if reward > 0 else 0, + "agent": agent_name + }) + df = pd.DataFrame(data) + df["task_order"] = df["task_idx"] + df["goal_completion_rate"] = df.apply(lambda row: row["goal_atoms_achieved"] / row["goal_size"] if row["goal_size"] > 0 else 0, axis=1) + return df.sort_values(["agent", "iteration", "task_order"]) + +def plot_lifelong_success(df): + plt.figure(figsize=(14, 5)) + + sns.lineplot( + data=df, + x="task_order", + y="success", + hue="agent", + marker="o" + ) + + # Set up x-ticks with task names, spaced across iterations + xticks = df["task_order"] + xticklabels = df["task_name"] + plt.xticks(ticks=xticks, labels=xticklabels, rotation=45, ha='right') + + plt.ylim(-0.1, 1.1) + plt.yticks([0, 1], ["Fail", "Success"]) + plt.ylabel("Success") + plt.xlabel("Tasks over Lifelong Iterations") + plt.title("Lifelong Learning Success per Task") + plt.tight_layout() + plt.grid(True, linestyle='--', alpha=0.3) + plt.savefig(results_folder + df["agent"].iloc[0] + "_lifelong_learning_success.png", dpi=200) + +class OperatorLearningAgent(): + def __init__(self, name, strips_learner, single_grounding=False, version_space=False): + self.name = name + self.num_demos = 1 + self.version_space = version_space + + # Initialized once; populated in get_data() + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.action_space = Box(0, 7, (1,)) + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + self.goal = None + + # Runtime variables + self.nsrts = None + self.actions = None + self.i = 0 + self.seed_i = 0 + self.short_task = True + self.num_preconditions_removed = 0 + self.initial_num_preconditions_removed = 0 + self.num_replanned = 0 + self.found_initial_plan = False + self.initial_plan = None + self.initial_plan_length = 0 + + # Learning Params + self.strips_learner = strips_learner + self.single_grounding = single_grounding + utils.reset_config({ + "strips_learner": self.strips_learner, + "segmenter": "every_step", + "disable_harmlessness_check": True, + "pnad_search_load_initial": True, + "backward_forward_load_initial": True, + "min_data_for_nsrt": 0, + "min_perc_data_for_nsrt": 0, + "pnad_search_timeout":1000.0, + "single_grounding": self.single_grounding, + "option_learner": "no_learning" + }) + + def reset(self, task_name, learn=True): + if False: + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + self.action_space = Box(0, 7, (1,)) + self.seed_i = 0 + self.num_replanned = 0 + self.initial_num_preconditions_removed = 0 + + # Learning Params + utils.reset_config({ + "strips_learner": self.strips_learner, + "segmenter": "every_step", + "disable_harmlessness_check": True, + "pnad_search_load_initial": True, + "backward_forward_load_initial": True, + "min_data_for_nsrt": 0, + "min_perc_data_for_nsrt": 0, + "pnad_search_timeout":1000.0, + "single_grounding": self.single_grounding, + "option_learner": "no_learning" + }) + + if learn: + self.nsrts = self.learn_nsrts(task_name) + else: + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + with open("test_saved.NSRTs.txt", "r") as file: + content = file.read() + nsrt_strs = ["NSRT-" + nsrt_str for nsrt_str in content.split("NSRT-") if nsrt_str != ''] + segmented_trajs = [segment_trajectory(traj, self.preds, atom_seq=atom_seq) for traj, atom_seq in self.ground_atom_dataset] + self.nsrts = [] + for nsrt_str in nsrt_strs: + nsrt = parse_nsrt_block(nsrt_str, segmented_trajs) + if nsrt is not None: + self.nsrts.append(nsrt) + assert len(self.nsrts) > 0, "No NSRTs loaded!" + + # Save NSRTS to results_folder/ as text file + with open(results_folder + self.name + "_" + task_name + "_NSRTs.txt", "w") as file: + for nsrt in self.nsrts: + if nsrt.op.add_effects != set(): + file.write(str(nsrt) + "\n") + + self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.actions = None + self.i = 0 + self.num_replanned = 0 + self.initial_num_preconditions_removed = 0 + self.found_initial_plan = False + self.initial_plan = None + self.initial_plan_length = 0 + + def parse_goal(self, task_name, ground_atoms_state): + if task_name == "MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) + + elif task_name == "MiniGrid-OpeningPackages-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("openable(")]) + + elif task_name == "MiniGrid-CleaningACar-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) | set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(")]) + + elif task_name == "MiniGrid-CleaningShoes-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("~stainable(") and "shoe" in str(atom)]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(") and "shoe" in str(atom)]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("onfloor(") and "towel" in str(atom)]) + + elif task_name == "MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if ( + str(atom).startswith("onTop(") and "blender" in str(atom) and "countertop" in str(atom) + ) or ( + str(atom).startswith("nextto(") and "soap" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("inside(") and "vegetable_oil" in str(atom) and "cabinet" in str(atom) + ) or ( + str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom) + ) or ( + str(atom).startswith("inside(") and "casserole" in str(atom) and "electric_refrigerator" in str(atom) + ) or ( + str(atom).startswith("inside(") and "apple" in str(atom) and "electric_refrigerator" in str(atom) + ) or ( + str(atom).startswith("inside(") and "rag" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("nextto(") and "rag" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("~dustyable(") and "cabinet" in str(atom) + ) or ( + str(atom).startswith("~stainable(") and "plate" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-CollectMisplacedItems-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "table" in str(atom) and ( + "gym_shoe" in str(atom) or + "necklace" in str(atom) or + "notebook" in str(atom) or + "sock" in str(atom) + ) and not str(atom).startswith("onTop(table") + ]) + + elif task_name == "MiniGrid-InstallingAPrinter-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("toggleable(")]) + + elif task_name == "MiniGrid-LayingWoodFloors-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("nextto(")]) + + elif task_name == "MiniGrid-MakingTea-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("sliceable(") and "lemon" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "teapot" in str(atom) and "stove" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("atsamelocation(") and "tea_bag" in str(atom) and "teapot" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("soakable(") and "teapot" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("toggleable(") and "stove" in str(atom) + ]) + + elif task_name == "MiniGrid-MovingBoxesToStorage-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) + + elif task_name == "MiniGrid-OrganizingFileCabinet-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "marker" in str(atom) and "table" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "document" in str(atom) and "cabinet" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "folder" in str(atom) and "cabinet" in str(atom) + ]) + + elif task_name == "MiniGrid-PreparingSalad-16x16-N2-v0": + import ipdb; ipdb.set_trace() + raise NotImplementedError("parse_goal not implemented for PreparingSalad") + + elif task_name == "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom) + ]) + + + elif task_name == "MiniGrid-SettingUpCandles-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) + + elif task_name == "MiniGrid-SortingBooks-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(") and "shelf" in str(atom) and ("book" in str(atom) or "hardback" in str(atom))]) + + elif task_name == "MiniGrid-StoringFood-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "cabinet" in str(atom) and ( + "oatmeal" in str(atom) or "chip" in str(atom) or "vegetable_oil" in str(atom) or "sugar" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-ThawingFrozenFood-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("nextto(") and ( + ("date" in str(atom) and "fish" in str(atom)) or + ("fish" in str(atom) and "sink" in str(atom)) or + ("olive" in str(atom) and "sink" in str(atom)) + ) + ]) + + elif task_name == "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(") and "hamburger" in str(atom) and "ashcan" in str(atom)]) + + elif task_name == "MiniGrid-WashingPotsAndPans-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("~stainable(") and ( + "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom) + ) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "cabinet" in str(atom) and ( + "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-WateringHouseplants-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("soakable(") and "pot_plant" in str(atom)]) + else: + import ipdb; ipdb.set_trace() + + + def get_plan(self, state, seed): + objs, _, _, ground_atoms_traj, all_atoms = parse_objs_preds_and_options(self.dataset[-1], train_task_idx=len(self.dataset)) + task = Task(State({}, None), self.goal) + + _, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options(LowLevelTrajectory([state], [], _is_demo=True, _train_task_idx=0), train_task_idx=0, all_atoms=all_atoms) + init_atoms = ground_atoms_traj[1][0] + plan = self.plan(init_atoms, objs, self.preds, self.nsrts, task, seed) + return plan + + def policy(self, obs, env): + if self.actions is None: + seed = self.seed_i + self.seed_i += 1 + num_remove_pre = 0 + is_replanning = self.num_replanned > 0 + safety_counter = 0 + while self.actions is None or self.actions == []: + print('planning', num_remove_pre) + try: + safety_counter += 1 + if safety_counter > 10: + print(env.env_id, self.goal) + import ipdb; ipdb.set_trace() + self.actions = self.get_plan(env.get_lifted_state(), seed) + self.i = 0 + self.num_preconditions_removed = num_remove_pre + if not is_replanning: + self.initial_num_preconditions_removed = num_remove_pre + self.found_initial_plan = True + self.initial_plan = list(self.actions) + self.initial_plan_length = len(self.actions) + except (StopIteration, _SkeletonSearchTimeout, PlanningFailure): + num_remove_pre += 1 + if self.initial_num_preconditions_removed > 19: + #TODO fix + break + # Planning failed, remove preconditions randomly and retry if version_space is enabled + if not self.version_space: + # No version space search, just fail + break + new_nsrts = set() + for nsrt in self.nsrts: + pre = set() + tot_pre = len(nsrt.op.preconditions) - num_remove_pre + if tot_pre > 0: + pre = set(random.sample(list(nsrt.op.preconditions), tot_pre)) + ignore_effects = nsrt.op.ignore_effects + del_effs = nsrt.op.delete_effects + new_nsrts.add( + nsrt.op.copy_with(preconditions=pre, + ignore_effects=ignore_effects, + delete_effects=del_effs).make_nsrt( + nsrt.option, + [], # dummy sampler + lambda s, g, rng, o: np.zeros(1, dtype=np.float32))) + self.nsrts = new_nsrts + ## TODO: This is a hack to prevent infinite loops when removing preconditions + if num_remove_pre > 20: + # remove all delete effects and ignore effects + new_nsrts = set() + for nsrt in self.nsrts: + new_nsrts.add( + nsrt.op.copy_with(preconditions=set(), + ignore_effects=set(), + delete_effects=set()).make_nsrt( + nsrt.option, + [], # dummy sampler + lambda s, g, rng, o: np.zeros(1, dtype=np.float32))) + self.nsrts = new_nsrts + self.initial_num_preconditions_removed = num_remove_pre + + self.i += 1 + if self.actions is not None and self.i-1 < len(self.actions): + return env.key_to_action[self.actions[self.i-1]] + else: + self.actions = None + self.i = 0 + self.num_replanned += 1 + return env.key_to_action["0"] + + def clean_action_plan(self, action_plan): + plan = [] + for step in action_plan: + name = step[0] + objs = step[1] + if len(objs) > 0: + obj_name = objs[0].name + if name.startswith("Move"): + plan.append(f"moveto-{obj_name}") + else: + for opname, key in opname_to_key.items(): + if opname in name: + plan.append(key) + break + return plan + + def plan(self, init_atoms, objects, predicates, nsrts, task, seed): + ground_nsrts, reachable_atoms = task_plan_grounding(init_atoms, objects, nsrts, allow_noops=True) + heuristic = utils.create_task_planning_heuristic("hadd", init_atoms, + task.goal, ground_nsrts, + predicates, objects) + task_plan_generator = task_plan(init_atoms, + task.goal, + ground_nsrts, + reachable_atoms, + heuristic, + timeout=1, + seed=seed, + max_skeletons_optimized=3) + skeleton, _, _ = next(task_plan_generator) + + action_plan = [] + for step in skeleton: + action_plan.append((step.option.name, step.objects)) + return self.clean_action_plan(action_plan) + + def get_data(self, task_name): + for demo_file in demo_files: + if task_name in demo_file: + demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + + if self.short_task: + # Shorten demos for all 20 tasks + if task_name == 'MiniGrid-OpeningPackages-16x16-N2-v0': # 2 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:3], demo_traj.actions[:2], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-InstallingAPrinter-16x16-N2-v0': # 5 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:6], demo_traj.actions[:5], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0': # 4 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-MovingBoxesToStorage-16x16-N2-v0': # 4 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-SettingUpCandles-16x16-N2-v0': # 4 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-WateringHouseplants-16x16-N2-v0': # 6 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:7], demo_traj.actions[:6], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-LayingWoodFloors-16x16-N2-v0': # 9 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:10], demo_traj.actions[:9], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0': # 4 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-SortingBooks-16x16-N2-v0': # 4 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-CleaningShoes-16x16-N2-v0': # 9 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:10], demo_traj.actions[:9], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-OrganizingFileCabinet-16x16-N2-v0': # 5 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:6], demo_traj.actions[:5], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-StoringFood-16x16-N2-v0': # 6 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:7], demo_traj.actions[:6], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0': # 6 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:7], demo_traj.actions[:6], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-CollectMisplacedItems-16x16-N2-v0': # 4 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-CleaningACar-16x16-N2-v0': # 24 actions + demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-MakingTea-16x16-N2-v0': # 27 actions + demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-WashingPotsAndPans-16x16-N2-v0': # 9 actions + demo_traj = LowLevelTrajectory(demo_traj.states[:10], demo_traj.actions[:9], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0': # 41 actions + demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=0) + else: + import ipdb; ipdb.set_trace() + + idx = len(self.dataset) + demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx) + + self.dataset.append(demo_traj) + new_objs, new_preds, new_options, self.ground_atoms_traj, _ = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) + self.objs |= new_objs + self.preds |= new_preds + self.options |= new_options + self.ground_atom_dataset.append(self.ground_atoms_traj) + goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.tasks.append(Task(State({}, None), goal)) + # if len(self.dataset) >= self.num_demos: + # break + # assert len(self.dataset) == self.num_demos + return self.dataset, self.tasks, self.preds, self.options, self.action_space, self.ground_atom_dataset + + def learn_nsrts(self, task_name): + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + nsrts, _, _ = learn_nsrts_from_data(dataset, + tasks, + preds, + options, + action_space, + ground_atom_dataset, + sampler_learner="neural", + annotations=None) + with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in nsrts: + if nsrt.op.add_effects != set(): + file.write(str(nsrt)+"\n") + return nsrts + +class DummyAgent(OperatorLearningAgent): + def __init__(self, name="dummy", strips_learner="dummy"): + super().__init__(name=name, strips_learner=strips_learner, single_grounding=True) + + def learn_nsrts(self, task_name): + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + obj_to_var = {obj:obj.type("?" + obj.name) for obj in self.objs} + lifted_goal = {atom.lift(obj_to_var) for atom in goal} + + nsrts = set() + name_i = 0 + for option in options: + op = STRIPSOperator( + name="Dummy" + str(name_i), + parameters=[], + preconditions=set(), + add_effects=set(), + delete_effects=set(), + ignore_effects=set() + ) + dummy_nsrt = op.make_nsrt( + option, + [], # dummy sampler + lambda s, g, rng, o: np.zeros(1, dtype=np.float32)) + nsrts.add(dummy_nsrt) + name_i += 1 + + params = [] + for sublist in [lifted_atom.variables for lifted_atom in lifted_goal]: + params += sublist + params = [x for x in set(params)] + op = STRIPSOperator( + name="Dummy" + str(name_i), + parameters=params, + preconditions=set(), + add_effects=lifted_goal, + delete_effects=set(), + ignore_effects=set() + ) + dummy_nsrt = op.make_nsrt( + option, + [], # dummy sampler + lambda s, g, rng, o: np.zeros(1, dtype=np.float32)) + nsrts.add(dummy_nsrt) + name_i += 1 + return nsrts + +class GroundTruthAgent(OperatorLearningAgent): + def __init__(self, name): + super().__init__(name=name, strips_learner="NONE") + self.name = name + self.ground_truth_trajs = {} + self.i = 0 + self.actions = None + + def reset(self, task_name, learn=False): + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.action_space = Box(0, 7, (1,)) + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + + self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.actions = None + self.i = 0 + + action_plan = [] + for i, step in enumerate(self.ground_atoms_traj[0].actions): + curr_state = self.ground_atoms_traj[1][i] + next_state = self.ground_atoms_traj[1][i+1] + def count_object_occurrences(atom_set): + counter = Counter() + for atom in atom_set: + for obj in atom.objects: + if not atom.predicate.name.startswith("~inreachofrobot"): + counter[obj] += 1 + return counter + counter = count_object_occurrences(next_state - curr_state) + def get_max_count_object(counter, exclude_types=("table", "shelf")): + max_count = max(counter.values()) + candidates = [ + obj for obj, count in counter.items() + if count == max_count and all(ex_type not in str(obj) for ex_type in exclude_types) + ] + + if candidates: + return candidates[0] + else: + return None + + try: + if get_max_count_object(counter) is None: + objs = [max(counter, key=counter.get)] + else: + objs = [get_max_count_object(counter)] + except: + objs = random.sample(self.objs, 1) + action_plan.append((step._option.name, objs)) + self.ground_truth_trajs[task_name] = self.clean_action_plan(action_plan) + + def policy(self, obs, env): + #print(env.get_lifted_state()) + try: + assert env.env_id in self.ground_truth_trajs.keys() + except: + import ipdb; ipdb.set_trace() + self.i += 1 + if self.i-1 < len(self.ground_truth_trajs[env.env_id]): + return env.key_to_action[self.ground_truth_trajs[env.env_id][self.i-1]] + else: + return env.key_to_action["0"] + + def learn_nsrts(self, task_name): + return None + +# i = 0 +# curriculum = [] +# for k,v in sorted([(k,v) for k,v in task_info.items()], key=lambda x: x[1][2]): # by add effects +# i+=1 +# print("|", v[0], "| goal length:", v[1], "| add count:", v[2], "|", k.split("_")[0], i) +# curriculum.append(k.split("_")[0]) + +import time +start_time = time.time() + +# Tasks ordered from easiest to hardest based on SHORTENED plan length (when short_task=True) and goal complexity +# Format: (shortened_actions, goal_count, task_name) +tasks = [ + 'MiniGrid-OpeningPackages-16x16-N2-v0', # 2 actions, 2 goals + 'MiniGrid-MovingBoxesToStorage-16x16-N2-v0', # 4 actions, 1 goal + 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0', # 4 actions, 3 goals + 'MiniGrid-SortingBooks-16x16-N2-v0', # 4 actions, 4 goals + + # TODO the tasks below take longer, the ones with two # + # are not working with our current approach and should be fixed! + + # 'MiniGrid-CollectMisplacedItems-16x16-N2-v0', # 4 actions, 5 goals + # 'MiniGrid-SettingUpCandles-16x16-N2-v0', # 4 actions, 6 goals + # # 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0', # 4 actions, 7 goals + # 'MiniGrid-InstallingAPrinter-16x16-N2-v0', # 5 actions, 2 goals + # # 'MiniGrid-OrganizingFileCabinet-16x16-N2-v0', # 5 actions, 7 goals + # 'MiniGrid-WateringHouseplants-16x16-N2-v0', # 6 actions, 3 goals + # # 'MiniGrid-StoringFood-16x16-N2-v0', # 6 actions, 8 goals + # # 'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0', # 6 actions, 8 goals + # 'MiniGrid-LayingWoodFloors-16x16-N2-v0', # 9 actions, 2 goals + # # 'MiniGrid-CleaningShoes-16x16-N2-v0', # 9 actions, 1 goal + # 'MiniGrid-WashingPotsAndPans-16x16-N2-v0', # 9 actions, 5 goals + # # 'MiniGrid-CleaningACar-16x16-N2-v0', # 24 actions, 2 goals + # # 'MiniGrid-MakingTea-16x16-N2-v0', # 27 actions, 6 goals + # # 'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0', # 41 actions, 7 goals + + # # TODO There are two more tasks that are broken and should be fixed! +] + +#tasks = tasks + tasks + tasks +# tasks = curriculum +print("#"*30) +print(tasks) + +all_agents = [ + GroundTruthAgent("ground-truth"), + # DummyAgent("dummy", strips_learner="dummy"), + OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"), + # OperatorLearningAgent("backchaining", strips_learner="backchaining"), + # OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"), + # OperatorLearningAgent("llm", strips_learner="llm"), # TODO needs to be implemented + # OperatorLearningAgent("backward-forward", strips_learner="backward-forward", version_space=True), + ] + +num_seeds = 1 #10 +for i in range(num_seeds): + for j, agent in enumerate(all_agents): + with open("test_saved.NSRTs.txt", "w") as file: + file.write("""NSRT-Move0: + Parameters: [?x0:obj_type] + Preconditions: [~inreachofrobot(?x0:obj_type)] + Add Effects: [inreachofrobot(?x0:obj_type)] + Delete Effects: [~inreachofrobot(?x0:obj_type)] + Ignore Effects: [inreachofrobot, ~inreachofrobot] + Option Spec: Move()""") + seed = (i+1) + (i+1)*(j+1) + 42 + + # TODO here we evaluate shorten tasks + results = evaluation([agent], tasks, num_iterations=1, start_seed=seed) + df = structure_results(results) + plot_lifelong_success(df) + end_time = time.time() + print("time elasped", end_time - start_time) + df["seed"] = seed + df.to_csv(results_folder + agent.name + '_short_output_{}.csv'.format(seed)) + + # TODO here we evaluate full tasks + # results = evaluation([agent], tasks, num_iterations=1, start_seed=seed, short_task=False)#, learn=False) + # df2 = structure_results(results) + # plot_lifelong_success(df2) + # end_time = time.time() + # print("time elasped", end_time - start_time) + # df2["seed"] = seed + # df2.to_csv(results_folder + agent.name + '_long_output_{}.csv'.format(seed)) + + diff --git a/colla/minibehavior_env.py b/colla/minibehavior_env.py new file mode 100644 index 0000000000..21770e2b5f --- /dev/null +++ b/colla/minibehavior_env.py @@ -0,0 +1,446 @@ +import gym +import numpy as np +from PIL import Image +from minigrid.wrappers import * +from mini_behavior.window import Window +from mini_behavior.utils.save import get_step, save_demo +from mini_behavior.grid import GridDimension +from mini_behavior.states import * +from collections import deque +import random + +TILE_PIXELS = 32 + +class MiniBehaviorEnv: + def __init__(self, env_id='MiniGrid-InstallingAPrinter-8x8-N2-v0', seed=-1, tile_size=32, + agent_view=False, save_demo_flag=False, load_path=None): + + self.env_id = env_id + self.seed = seed + self.tile_size = tile_size + self.agent_view = agent_view + self.save_demo_flag = save_demo_flag + self.load_path = load_path + self.show_furniture = False + self.all_steps = {} + + self.env = gym.make(env_id) + self.env.teleop_mode() + self.key_to_action = { + '0': self.env.actions.pickup_0, + '1': self.env.actions.pickup_1, + '2': self.env.actions.pickup_2, + '3': self.env.actions.drop_0, + '4': self.env.actions.drop_1, + '5': self.env.actions.drop_2, + 't': self.env.actions.toggle, + 'o': self.env.actions.open, + 'c': self.env.actions.close, + 'k': self.env.actions.cook, + '6': self.env.actions.slice, + 'i': self.env.actions.drop_in, + } + for obj_type, obj_list in self.env.objs.items(): + for obj in obj_list: + self.key_to_action["moveto-" + obj.name] = "moveto-" + obj.name + + + if self.agent_view: + self.env = RGBImgPartialObsWrapper(self.env) + self.env = ImgObsWrapper(self.env) + + self.window = Window('mini_behavior - ' + env_id) + self.window.no_closeup() + + if self.load_path is not None: + self._load_state() + + self.nav_sampler_cache = {} + self.short_task = True + + def redraw(self, img): + if not self.agent_view: + img = self.env.render() + self.window.set_inventory(self.env) + self.window.show_img(img) + self.window.save_img("output_image.jpeg") + + def render_furniture(self): + self.show_furniture = not self.show_furniture + if self.show_furniture: + img = np.copy(self.env.furniture_view) + i, j = self.env.agent_pos + ymin = j * TILE_PIXELS + ymax = (j + 1) * TILE_PIXELS + xmin = i * TILE_PIXELS + xmax = (i + 1) * TILE_PIXELS + img[ymin:ymax, xmin:xmax, :] = GridDimension.render_agent( + img[ymin:ymax, xmin:xmax, :], self.env.agent_dir) + img = self.env.render_furniture_states(img) + self.window.show_img(img) + else: + obs = self.env.gen_obs() + self.redraw(obs) + + def show_states(self): + imgs = self.env.render_states() + self.window.show_closeup(imgs) + + def switch_dim(self, dim): + self.env.switch_dim(dim) + print(f'switching to dim: {self.env.render_dim}') + obs = self.env.gen_obs() + self.redraw(obs) + + def _load_state(self): + if self.seed != -1: + self.env.seed(self.seed) + self.env.reset() + obs = self.env.load_state(self.load_path) + if hasattr(self.env, 'mission'): + print('Mission: %s' % self.env.mission) + self.window.set_caption(self.env.mission) + self.redraw(obs) + + def reset(self): + if self.seed != -1: + self.env.seed(self.seed) + obs = self.env.reset() + if hasattr(self.env, 'mission'): + print('Mission: %s' % self.env.mission) + self.window.set_caption(self.env.mission) + self.redraw(obs) + return obs + + def get_lifted_state(self): + objs = self.env.objs + obj_instances = {} + for obj_type, obj_list in objs.items(): + for obj in obj_list: + obj_instances[obj.name] = obj + + ground_atoms = [] + for k, o in obj_instances.items(): + for pred_name, pred in o.states.items(): + if isinstance(pred, (AbsoluteObjectState, AbilityState, ObjectProperty)): + if pred.get_value(self.env): + ground_atoms.append(f"{pred_name}({k})") + elif isinstance(pred, RelativeObjectState): + for k2, o2 in obj_instances.items(): + if o.check_rel_state(self.env, o2, pred_name): + ground_atoms.append(f"{pred_name}({k},{k2})") + return ground_atoms + + def step(self, action): + prev_obs = self.env.gen_obs() + prev_state = self.get_lifted_state() + if isinstance(action, str) and action.startswith("moveto-"): + self.move_in_front_of(action.replace("moveto-","")) + obs = self.env.gen_obs() + reward = 0.0 + done = False + terminated = False + info = {} + else: + obs, reward, done, terminated, info = self.env.step(action) + if self.short_task: + if self.env_id == 'MiniGrid-OpeningPackages-16x16-N2-v0': + for package in self.env.objs.get('package', []): + if package.check_abs_state(self.env, 'openable'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-InstallingAPrinter-16x16-N2-v0': + printer = self.env.objs.get('printer', [None])[0] + table = self.env.objs.get('table', [None])[0] + if printer and table and printer.check_rel_state(self.env, table, 'onTop') and printer.check_abs_state(self.env, 'toggleable'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-MovingBoxesToStorage-16x16-N2-v0': + for carton in self.env.objs.get('carton', []): + for shelf in self.env.objs.get('shelf', []): + if carton.check_rel_state(self.env, shelf, 'onTop'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-SettingUpCandles-16x16-N2-v0': + for candle in self.env.objs.get('candle', []): + for table in self.env.objs.get('table', []): + if candle.check_rel_state(self.env, table, 'onTop'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0': + for hamburger in self.env.objs['hamburger']: + is_inside = [hamburger.check_rel_state(self.env, ashcan, 'inside') for ashcan in self.env.objs['ashcan']] + if True in is_inside: + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-CollectMisplacedItems-16x16-N2-v0': + items = self.env.objs.get('gym_shoe', []) + self.env.objs.get('necklace', []) + \ + self.env.objs.get('notebook', []) + self.env.objs.get('sock', []) + for item in items: + for table in self.env.objs.get('table', []): + if item.check_rel_state(self.env, table, 'onTop') and 'table' not in item.name: + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-SortingBooks-16x16-N2-v0': + book = self.env.objs['book'] + hardback = self.env.objs['hardback'] + shelf = self.env.objs['shelf'][0] + for obj in book + hardback: + if obj.check_rel_state(self.env, shelf, 'onTop'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0': + book = self.env.objs['book'] + box = self.env.objs['box'][0] + for obj in book: + if obj.check_rel_state(self.env, box, 'inside'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-LayingWoodFloors-16x16-N2-v0': + plywoods = self.env.objs.get('plywood', []) + if len(plywoods) >= 2: + for i, plywood1 in enumerate(plywoods): + for plywood2 in plywoods: + if plywood1.check_rel_state(self.env, plywood2, 'nextto'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-OrganizingFileCabinet-16x16-N2-v0': + items = self.env.objs.get('document', []) + self.env.objs.get('folder', []) + for item in items: + for cabinet in self.env.objs.get('cabinet', []): + if item.check_rel_state(self.env, cabinet, 'inside'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-WateringHouseplants-16x16-N2-v0': + pot_plants = self.env.objs['pot_plant'] + for plant in pot_plants: + if plant.check_abs_state(self.env, 'soakable'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-StoringFood-16x16-N2-v0': + food_items = self.env.objs.get('oatmeal', []) + self.env.objs.get('chip', []) + \ + self.env.objs.get('vegetable_oil', []) + self.env.objs.get('sugar', []) + for food in food_items: + for cabinet in self.env.objs.get('cabinet', []): + if food.check_rel_state(self.env, cabinet, 'inside'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0': + for plate in self.env.objs.get('plate', []): + for cabinet in self.env.objs.get('cabinet', []): + if plate.check_rel_state(self.env, cabinet, 'inside'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-CleaningShoes-16x16-N2-v0': + for towel in self.env.objs.get('towel', []): + if towel.check_abs_state(self.env, 'onfloor'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-WashingPotsAndPans-16x16-N2-v0': + cookware = self.env.objs.get('pan', []) + self.env.objs.get('kettle', []) + self.env.objs.get('teapot', []) + for item in cookware: + for cabinet in self.env.objs.get('cabinet', []): + if item.check_rel_state(self.env, cabinet, 'inside'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-CleaningACar-16x16-N2-v0': + items = self.env.objs.get('rag', []) + self.env.objs.get('soap', []) + for item in items: + for bucket in self.env.objs.get('bucket', []): + if item.check_rel_state(self.env, bucket, 'inside'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-MakingTea-16x16-N2-v0': + for lemon in self.env.objs.get('lemon', []): + if lemon.check_abs_state(self.env, 'sliceable'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0': + blender = self.env.objs.get('blender', [None])[0] + countertop = self.env.objs.get('countertop', [None])[0] + if blender and countertop and blender.check_rel_state(self.env, countertop, 'onTop'): + reward = 1.0 + done = 1.0 + state = self.get_lifted_state() + + print(f'env_id={self.env_id}, step={self.env.step_count}, reward={reward:.2f}') + # for atom in state: + # print(atom) + + if self.save_demo_flag: + self.all_steps[self.env.step_count] = (prev_obs, prev_state, action, obs, state) + + if done: + print('done!') + if self.save_demo_flag: + save_demo(self.all_steps, self.env_id, self.env.episode) + self.reset() + else: + self.redraw(obs) + + info = {'final_state': state} + return obs, reward, done, terminated, info + + def show(self): + self.window.show(block=False) + + def key_handler_primitive(self, event): + print('pressed', event.key) + action_map = { + 'left': self.env.actions.left, + 'right': self.env.actions.right, + 'up': self.env.actions.forward, + '0': self.env.actions.pickup_0, + '1': self.env.actions.pickup_1, + '2': self.env.actions.pickup_2, + '3': self.env.actions.drop_0, + '4': self.env.actions.drop_1, + '5': self.env.actions.drop_2, + 't': self.env.actions.toggle, + 'o': self.env.actions.open, + 'c': self.env.actions.close, + 'k': self.env.actions.cook, + '6': self.env.actions.slice, + 'i': self.env.actions.drop_in + } + + if event.key == 'escape': + self.window.close() + elif event.key in action_map: + self.step(action_map[event.key]) + elif event.key == 'pagedown': + self.show_states() + + def bfs_path(self, start, goal): + grid = self.env.grid + width, height = grid.width, grid.height + visited = set() + queue = deque([(start, [])]) + + while queue: + current_pos, path = queue.popleft() + if current_pos == goal: + return path + + for dx, dy in [(-1,0), (1,0), (0,-1), (0,1)]: + nx, ny = current_pos[0] + dx, current_pos[1] + dy + next_pos = (nx, ny) + + if not (0 <= nx < width and 0 <= ny < height): + continue + if next_pos in visited: + continue + if grid.get(nx, ny) != [[None, None], [None, None], [None, None]]: + if grid.get(nx, ny)[0][0] is None or grid.get(nx, ny)[0][0].name != "door": + continue # Obstacle + + visited.add(next_pos) + queue.append((next_pos, path + [next_pos])) + + return None # No path found + + def move_in_front_of(self, target_name): + # Find the target object + target_obj = None + for obj_list in self.env.objs.values(): + for obj in obj_list: + if obj.name == target_name: + target_obj = obj + break + if target_obj: + break + + if target_obj is None: + print(f"[Error] Object '{target_name}' not found.") + return + + reachable = [] + start_pos = tuple(self.env.agent_pos) + target_pos = target_obj.cur_pos + adjacents = [ + (target_pos[0] + 1, target_pos[1]), + (target_pos[0] - 1, target_pos[1]), + (target_pos[0], target_pos[1] + 1), + (target_pos[0], target_pos[1] - 1) + ] + pos_to_target = {} + for adj in adjacents: + pos_to_target[adj] = target_pos + if hasattr(target_obj, 'all_pos'): + adjacents = [] + for target_pos in target_obj.all_pos: + if 'cabinet' not in target_name: + if target_name in self.nav_sampler_cache: + if target_pos in self.nav_sampler_cache[target_name]: + continue + new_adjacents = [ + (target_pos[0] + 1, target_pos[1]), + (target_pos[0] - 1, target_pos[1]), + (target_pos[0], target_pos[1] + 1), + (target_pos[0], target_pos[1] - 1) + ] + for adj in new_adjacents: + pos_to_target[adj] = target_pos + adjacents += new_adjacents + # Choose a reachable adjacent position + random.shuffle(adjacents) + for pos in adjacents: + if (0 <= pos[0] < self.env.grid.width and 0 <= pos[1] < self.env.grid.height): + if self.env.grid.get(*pos) == [[None, None], [None, None], [None, None]]: + path = self.bfs_path(start_pos, pos) + if path: + reachable.append((pos, path)) + elif self.env.grid.get(*pos)[0][0] is not None: + if self.env.grid.get(*pos)[0][0].name == "door": + path = self.bfs_path(start_pos, pos) + if path: + reachable.append((pos, path)) + else: + pass + + if not reachable: + print(f"[Error] No accessible position next to '{target_name}'") + return + + # Choose shortest reachable + goal_pos, path = min(reachable, key=lambda x: len(x[1])) + + # Follow path + for next_pos in path: + dx = next_pos[0] - self.env.agent_pos[0] + dy = next_pos[1] - self.env.agent_pos[1] + + desired_dir = { + (1, 0): 0, + (0, 1): 1, + (-1, 0): 2, + (0, -1): 3 + }.get((dx, dy)) + + if desired_dir is None: + continue + + while self.env.agent_dir != desired_dir: + self.step(self.env.actions.right) + self.step(self.env.actions.forward) + + # Face the object + target_pos = pos_to_target[tuple(self.env.agent_pos)] + face_dir = (target_pos[0] - self.env.agent_pos[0], target_pos[1] - self.env.agent_pos[1]) + target_dir = { + (1, 0): 0, + (0, 1): 1, + (-1, 0): 2, + (0, -1): 3 + }.get(face_dir) + + if target_dir is not None: + while self.env.agent_dir != target_dir: + self.step(self.env.actions.right) + + print(f"[Success] Reached position in front of '{target_name}', facing it.") + if target_name in self.nav_sampler_cache: + self.nav_sampler_cache[target_name].append(target_pos) + else: + self.nav_sampler_cache[target_name] = [target_pos] + diff --git a/colla/minibehavior_helpers.py b/colla/minibehavior_helpers.py new file mode 100644 index 0000000000..675b42404a --- /dev/null +++ b/colla/minibehavior_helpers.py @@ -0,0 +1,331 @@ +import numpy as np +from gym.spaces import Box +import re +import pickle as pkl + +from predicators import utils +from predicators.nsrt_learning.nsrt_learning_main import learn_nsrts_from_data +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type, GroundAtom, Task, Variable, LiftedAtom, NSRT, Set +import glob + +name_to_actions = { + "Move": 0, + "Actions.pickup_0": 3, + "Actions.pickup_1": 4, + "Actions.pickup_2": 5, + "Actions.drop_0": 6, + "Actions.drop_1": 7, + "Actions.drop_2": 8, + "Actions.drop_in": 9, + "Actions.toggle": 10, + "Actions.close": 11, + "Actions.open": 12, + "Actions.cook": 13, + "Actions.slice": 14 +} + +demo_files = sorted([filename for filename in glob.glob("/Users/shashlik/Documents/GitHub/predicators/demos/*/*")]) +demo_tasks = set([demo_file.split("/")[-1].split("_")[0] for demo_file in demo_files]) + +# Load and do this from MiniBeahvior Demo + + +def get_demo_traj(demo_file, verbose=True): + with open(demo_file, 'rb') as f: + data = pkl.load(f) + + last_skill = "Move" + state = [a for a in data[1][1] if "infovofrobot" not in a] + states = [state] + actions = [] + for step in data.keys(): + obs = data[step][0]['image'] + direction = data[step][0]['direction'] + action = data[step][2] + skill = None + + if "forward" in str(action) or \ + "left" in str(action) or \ + "right" in str(action): + + skill = "Move" + else: + skill = str(action) + + has_effect = True + try: + next_obs = data[step][3]['image'] + next_direction = data[step][3]['direction'] + if np.allclose(obs, next_obs) and (direction == next_direction): + has_effect = False + except: + pass + + if has_effect: + if last_skill != skill: + if verbose: + print("#") + print(last_skill) + try: + next_state = [a for a in data[step][1] if "infovofrobot" not in a] + if verbose: + print("PREV:", set(state)) + print("ADD:", set(next_state) - set(state)) + print("DEL:", set(state) - set(next_state)) + state = next_state + actions.append(last_skill) + states.append(state) + except: + pass + last_skill = skill + else: + if verbose: + print("#") + print(last_skill) + next_state = [a for a in data[step][4] if "infovofrobot" not in a] + if verbose: + print("PREV:", set(state)) + print("ADD:", set(next_state) - set(state)) + print("DEL:", set(state) - set(next_state)) + state = next_state + if verbose: + print("#") + actions.append(last_skill) + states.append(state) + + return LowLevelTrajectory(states, actions, _is_demo=True, _train_task_idx=0) + +def parse_objs_preds_and_options(trajectory, train_task_idx=0): + objs = set() + preds = set() + options = set() + state = None + states = [] + actions = [] + ground_atoms_traj = [] + obj_type = Type("obj_type", ["is_obj"]) + + for i, s in enumerate(trajectory.states): + ground_atoms = set() + for pred_str in s: + pred = None + choice = [] + pattern = re.compile(r"(\w+)\((.*?)\)") + match = pattern.match(pred_str) + if match: + func_name = match.group(1) + args = match.group(2).split(',') if match.group(2) else [] + for arg in args: + obj = obj_type(arg.strip()) + choice.append(obj) + objs.add(obj) + if len(args) == 1: + pred = Predicate(func_name, [obj_type], lambda s, o: True) + preds.add(pred) + elif len(args) == 2: + pred = Predicate(func_name, [obj_type, obj_type], lambda s, o: True) + preds.add(pred) + else: + NotImplementedError("") + ground_atoms.add(GroundAtom(pred, choice)) + states.append(state) + ground_atoms_traj.append(ground_atoms) + + if i < len(trajectory.actions): + a_name = trajectory.actions[i] + name_to_actions = actions_dict = { + "Move": 0, + "Actions.pickup_0": 3, + "Actions.pickup_1": 4, + "Actions.pickup_2": 5, + "Actions.drop_0": 6, + "Actions.drop_1": 7, + "Actions.drop_2": 8, + "Actions.drop_in": 9, + "Actions.toggle": 10, + "Actions.close": 11, + "Actions.open": 12, + "Actions.cook": 13, + "Actions.slice": 14 + } + + param_option = utils.SingletonParameterizedOption( + a_name, lambda s, m, o, p: Action(name_to_actions[a_name])) + options.add(param_option) + option = param_option.ground([], []) + action = option.policy(state) + action.set_option(option) + actions.append(action) + + return objs, preds, options, (LowLevelTrajectory([{obj:[0.0] for obj in objs} for _ in states], actions, _is_demo=True, _train_task_idx=train_task_idx), ground_atoms_traj) + + +def parse_nsrt_from_string(block: str, predicates: dict) -> NSRT: + """Parses a single NSRT block string into an NSRT object. + + Args: + block: String containing NSRT definition + predicates: Dict mapping predicate name (str) to Predicate object + + Returns: + NSRT object + """ + lines = block.strip().split("\n") + + name_match = re.match(r"(\S+):", lines[0]) + name = name_match.group(1) if name_match else "" + + parameters = re.findall(r"\?x\d+:\w+", lines[1]) + + # Assume all types are obj_type + obj_type = Type("obj_type", ["is_obj"]) + + def extract_effects(label: str) -> Set[str]: + """Extracts a list of predicates from labeled sections.""" + for line in lines: + if line.strip().startswith(label): + # Extract with optional ~ prefix + return set(re.findall(r"~?\w+\(.*?\)", line)) + return set() + + preconditions_raw = extract_effects("Preconditions") + add_effects_raw = extract_effects("Add Effects") + delete_effects_raw = extract_effects("Delete Effects") + ignore_effects_raw = extract_effects("Ignore Effects") + + option_spec_match = re.search(r"Option Spec:\s*(.*)", block) + option_spec = option_spec_match.group(1) if option_spec_match else "" + + def get_predicate(name, entities): + if name not in predicates: + raise ValueError(f"Predicate '{name}' not found in provided predicates") + pred = predicates[name] + if name == "onfloor": + entities = entities[:1] + if entities is not None and pred.arity != len(entities): + raise ValueError(f"Predicate '{pred.name}' has arity {pred.arity} but got {len(entities)} entities") + return pred + + def extract_parameters(predicate: str) -> Set[str]: + parameter_pattern = re.compile(r"\?x\d+:\w+") + matches = parameter_pattern.findall(predicate) + if "onfloor" in predicate: + return matches[:1] + return matches + + # Create parameters with obj_type + parameters = [Variable(param.split(":")[0], obj_type) for param in parameters] + + # Process preconditions: only include positive ones (strip ~ ones) + preconditions = set() + for pre in preconditions_raw: + if not pre.startswith('~'): + preconditions.add(LiftedAtom( + get_predicate(pre.split("(")[0], + [Variable(param.split(":")[0], obj_type) + for param in extract_parameters(pre)]), + [Variable(param.split(":")[0], obj_type) + for param in extract_parameters(pre)])) + + + # Process add effects: only positive ones + add_effects = set() + for add in add_effects_raw: + if not add.startswith('~'): + add_effects.add(LiftedAtom( + get_predicate(add.split("(")[0], + [Variable(param.split(":")[0], obj_type) + for param in extract_parameters(add)]), + [Variable(param.split(":")[0], obj_type) + for param in extract_parameters(add)])) + + # Process delete effects: strip ~ and create atoms + delete_effects = set() + for dle in delete_effects_raw: + pred_str = dle.lstrip('~') + delete_effects.add(LiftedAtom( + get_predicate(pred_str.split("(")[0], + [Variable(param.split(":")[0], obj_type) + for param in extract_parameters(pred_str)]), + [Variable(param.split(":")[0], obj_type) + for param in extract_parameters(pred_str)])) + + # Process ignore effects: strip ~ and get predicates + ignore_effects = set() + for ige in ignore_effects_raw: + pred_name = ige.lstrip('~').split("(")[0] + ignore_effects.add(get_predicate(pred_name, None)) + + a_name = option_spec.split("(")[0] + option_spec = utils.SingletonParameterizedOption( + a_name, lambda s, m, o, p: Action(name_to_actions[a_name])) + + return NSRT(name, parameters, preconditions, add_effects, delete_effects, ignore_effects, option_spec, [], None) + +def parse_nsrt_block(block, segmented_trajs) -> NSRT: + """Parses a single NSRT block into an PNAD object.""" + lines = block.strip().split("\n") + + name_match = re.match(r"(\S+):", lines[0]) + name = name_match.group(1) if name_match else "" + + parameters = re.findall(r"\?x\d+:\w+", lines[1]) + + def extract_effects(label: str) -> Set[str]: + """Extracts a list of predicates from labeled sections.""" + for line in lines: + if line.strip().startswith(label): + return set(re.findall(r"\w+\(.*?\)", line)) + return set() + + preconditions = extract_effects("Preconditions") + add_effects = extract_effects("Add Effects") + delete_effects = extract_effects("Delete Effects") + ignore_effects = extract_effects("Ignore Effects") + + option_spec_match = re.search(r"Option Spec:\s*(.*)", block) + option_spec = option_spec_match.group(1) if option_spec_match else "" + + objects = set() + atoms = set() + option_specs = {} + for traj in segmented_trajs: + for segment in traj: + for state in segment.states: + for k, v in state.items(): + objects.add(k) + atoms |= segment.init_atoms | segment.final_atoms + option_specs[segment.get_option().parent.name] = segment.get_option().parent + all_predicates_list = [(atom.predicate.name,atom.predicate) for atom in atoms] + def get_predicate(name, entities): + for pred_name, pred in all_predicates_list: + if pred_name == pred_name and pred.arity == len(entities): + valid_types = True + for i, ent in enumerate(entities): + if ent.type != pred.types[i]: + valid_types = False + if valid_types: + return pred + raise NotImplementedError + + types = {obj.type.name:obj.type for obj in objects} + + def extract_parameters(predicate: str) -> Set[str]: + parameter_pattern = re.compile(r"\?x\d+:\w+") # Matches variables like ?x0:obj_type + matches = parameter_pattern.findall(predicate) + return matches + + try: + parameters = [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in parameters] + except KeyError: + return None + preconditions = set([LiftedAtom(get_predicate(pre.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]) for pre in preconditions]) + add_effects = set([LiftedAtom(get_predicate(add.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]) for add in add_effects]) + delete_effects = set([LiftedAtom(get_predicate(dle.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]) for dle in delete_effects]) + ignore_effects = set([get_predicate(ige, None) for ige in ignore_effects]) + a_name = option_spec.split("(")[0] + option_spec = utils.SingletonParameterizedOption( + a_name, lambda s, m, o, p: Action(name_to_actions[a_name])) + + return NSRT(name, parameters, preconditions, add_effects, delete_effects, ignore_effects, option_spec, [], None) \ No newline at end of file diff --git a/colla/output_image.jpeg b/colla/output_image.jpeg new file mode 100644 index 0000000000..a93fe297fc Binary files /dev/null and b/colla/output_image.jpeg differ diff --git a/colla/requirements.txt b/colla/requirements.txt new file mode 100644 index 0000000000..823a34d2b8 --- /dev/null +++ b/colla/requirements.txt @@ -0,0 +1,173 @@ +aiodns==3.2.0 +aiohappyeyeballs==2.4.4 +aiohttp==3.10.11 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.9.0 +apriltag==0.0.16 +astroid==2.11.7 +asttokens==3.0.0 +async-timeout==5.0.1 +attrs==24.3.0 +beautifulsoup4==4.12.3 +bosdyn-api==4.1.1 +bosdyn-client==4.1.1 +bosdyn-core==4.1.1 +cachetools==5.5.2 +ccxt==4.4.42 +certifi==2024.12.14 +cffi==1.17.1 +charset-normalizer==3.4.0 +click==8.1.8 +cloudpickle==3.1.1 +-e git+https://github.com/concepts-ai/Concepts.git@44ecfd7aff3d48f967a92e455d78cc87f6f97971#egg=concepts +contourpy==1.3.1 +cryptography==44.0.0 +cycler==0.12.1 +Cython==3.0.12 +decorator==4.4.2 +Deprecated==1.2.18 +dill==0.3.5.1 +distro==1.9.0 +exceptiongroup==1.2.2 +executing==2.1.0 +Farama-Notifications==0.0.4 +filelock==3.17.0 +fonttools==4.56.0 +frozendict==2.4.6 +frozenlist==1.5.0 +fsspec==2025.2.0 +google-ai-generativelanguage==0.6.15 +google-api-core==2.24.2 +google-api-python-client==2.167.0 +google-auth==2.39.0 +google-auth-httplib2==0.2.0 +google-generativeai==0.8.5 +googleapis-common-protos==1.70.0 +graphlib_backport==1.1.0 +grpcio==1.71.0 +grpcio-status==1.62.3 +gym==0.26.2 +gym-minigrid==1.0.3 +gym-notices==0.0.8 +gymnasium==0.29.1 +h11==0.14.0 +h5py==3.13.0 +html5lib==1.1 +httpcore==1.0.8 +httplib2==0.22.0 +httpx==0.27.0 +idna==3.10 +ImageHash==4.3.2 +imageio==2.22.2 +imageio-ffmpeg==0.6.0 +iniconfig==2.1.0 +ipdb==0.13.13 +ipython==8.31.0 +isort==5.13.2 +jedi==0.19.2 +Jinja2==3.1.6 +joblib==1.4.2 +kiwisolver==1.4.8 +lark==1.2.2 +lazy-object-proxy==1.11.0 +lisdf==0.1.1 +lxml==5.3.0 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +matplotlib==3.6.2 +matplotlib-inline==0.1.7 +mccabe==0.7.0 +mdurl==0.1.2 +-e git+https://github.com/wmcclinton/mini_behavior.git@f5450eae2f632f880c1c11fcdbf1b6b816ff77f9#egg=mini_behavior +minigrid==3.0.0 +moviepy==1.0.3 +mpmath==1.3.0 +multidict==6.1.0 +multiprocess==0.70.13 +multitasking==0.0.11 +mypy==1.8.0 +mypy_extensions==1.1.0 +mysql-connector-python==9.1.0 +networkx==3.4.2 +nltk==3.9.1 +numpy==1.23.5 +openai==1.19.0 +opencv-python==4.7.0.72 +packaging==24.2 +pandas==1.5.1 +pandasql==0.7.3 +parso==0.8.4 +pathos==0.2.9 +peewee==3.17.8 +pexpect==4.9.0 +pillow==10.3.0 +pkgconfig==1.5.5 +platformdirs==4.3.6 +pluggy==1.5.0 +pox==0.3.6 +ppft==1.7.7 +proglog==0.1.11 +prompt_toolkit==3.0.48 +propcache==0.2.1 +proto-plus==1.26.1 +protobuf==4.22.0 +ptyprocess==0.7.0 +pure_eval==0.2.3 +py==1.11.0 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pybullet==3.2.7 +pycares==4.5.0 +pycparser==2.22 +pydantic==2.11.3 +pydantic_core==2.33.1 +pygame==2.6.1 +Pygments==2.18.0 +PyJWT==2.10.1 +pylint==2.14.5 +pynmea2==1.19.0 +pyparsing==3.2.1 +pyperplan==2.1 +pytest==7.1.3 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytz==2024.2 +PyWavelets==1.8.0 +PyYAML==6.0 +recordclass==0.23.1 +regex==2024.11.6 +requests==2.32.3 +rich==14.0.0 +rsa==4.9.1 +scikit-image==0.19.3 +scikit-learn==1.1.2 +scipy==1.9.3 +seaborn==0.12.1 +six==1.17.0 +slack_bolt==1.23.0 +slack_sdk==3.35.0 +sniffio==1.3.1 +soupsieve==2.6 +SQLAlchemy==2.0.36 +stack-data==0.6.3 +sympy==1.13.1 +tabulate==0.9.0 +tenacity==9.1.2 +threadpoolctl==3.6.0 +tifffile==2025.3.30 +tomli==2.2.1 +tomlkit==0.13.2 +tqdm==4.67.1 +traitlets==5.14.3 +types-PyYAML==6.0.12.20250402 +typing-inspection==0.4.0 +typing_extensions==4.12.2 +tzdata==2024.2 +uritemplate==4.1.1 +urllib3==2.2.3 +wcwidth==0.2.13 +webencodings==0.5.1 +wrapt==1.17.2 +yarl==1.18.3 +yfinance==0.2.51 diff --git a/colla/test_saved.NSRTs.txt b/colla/test_saved.NSRTs.txt new file mode 100644 index 0000000000..74490bd924 --- /dev/null +++ b/colla/test_saved.NSRTs.txt @@ -0,0 +1,91 @@ +NSRT-Op4: + Parameters: [?x0:obj_type, ?x1:obj_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), atsamelocation(?x1:obj_type, ?x1:obj_type), onfloor(?x0:obj_type), onfloor(?x1:obj_type), ~inreachofrobot(?x1:obj_type), ~insameroomasrobot(?x0:obj_type), ~insameroomasrobot(?x1:obj_type)] + Add Effects: [inreachofrobot(?x1:obj_type), insameroomasrobot(?x0:obj_type), insameroomasrobot(?x1:obj_type)] + Delete Effects: [~inreachofrobot(?x1:obj_type), ~insameroomasrobot(?x0:obj_type), ~insameroomasrobot(?x1:obj_type)] + Ignore Effects: [] + Option Spec: Move() +NSRT-Op5: + Parameters: [?x0:obj_type, ?x1:obj_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), atsamelocation(?x1:obj_type, ?x1:obj_type), inhandofrobot(?x0:obj_type), inreachofrobot(?x0:obj_type), inreachofrobot(?x1:obj_type), insameroomasrobot(?x0:obj_type), insameroomasrobot(?x1:obj_type), onfloor(?x1:obj_type), ~atsamelocation(?x0:obj_type, ?x1:obj_type), ~atsamelocation(?x1:obj_type, ?x0:obj_type), ~onTop(?x0:obj_type, ?x1:obj_type), ~onfloor(?x0:obj_type)] + Add Effects: [atsamelocation(?x0:obj_type, ?x1:obj_type), atsamelocation(?x1:obj_type, ?x0:obj_type), handempty(), onTop(?x0:obj_type, ?x1:obj_type), onfloor(?x0:obj_type), ~inhandofrobot(?x0:obj_type)] + Delete Effects: [inhandofrobot(?x0:obj_type), ~atsamelocation(?x0:obj_type, ?x1:obj_type), ~atsamelocation(?x1:obj_type, ?x0:obj_type), ~onTop(?x0:obj_type, ?x1:obj_type), ~onfloor(?x0:obj_type)] + Ignore Effects: [] + Option Spec: Actions.drop_2() +NSRT-Op1: + Parameters: [?x0:obj_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), handempty(), inreachofrobot(?x0:obj_type), insameroomasrobot(?x0:obj_type), onfloor(?x0:obj_type), ~openable(?x0:obj_type)] + Add Effects: [openable(?x0:obj_type)] + Delete Effects: [~openable(?x0:obj_type)] + Ignore Effects: [] + Option Spec: Actions.open() +NSRT-Op11: + Parameters: [?x0:obj_type, ?x1:obj_type, ?x2:surface_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), atsamelocation(?x0:obj_type, ?x2:surface_type), atsamelocation(?x1:obj_type, ?x1:obj_type), atsamelocation(?x1:obj_type, ?x2:surface_type), atsamelocation(?x2:surface_type, ?x0:obj_type), atsamelocation(?x2:surface_type, ?x1:obj_type), atsamelocation(?x2:surface_type, ?x2:surface_type), handempty(), inreachofrobot(?x0:obj_type), inreachofrobot(?x2:surface_type), insameroomasrobot(?x0:obj_type), insameroomasrobot(?x1:obj_type), insameroomasrobot(?x2:surface_type), nextto(?x0:obj_type, ?x1:obj_type), nextto(?x1:obj_type, ?x0:obj_type), onTop(?x0:obj_type, ?x2:surface_type), onTop(?x1:obj_type, ?x2:surface_type), onfloor(?x0:obj_type), onfloor(?x1:obj_type), onfloor(?x2:surface_type), ~inhandofrobot(?x0:obj_type)] + Add Effects: [inhandofrobot(?x0:obj_type), ~atsamelocation(?x0:obj_type, ?x2:surface_type), ~atsamelocation(?x2:surface_type, ?x0:obj_type), ~nextto(?x0:obj_type, ?x1:obj_type), ~nextto(?x1:obj_type, ?x0:obj_type), ~onTop(?x0:obj_type, ?x2:surface_type), ~onfloor(?x0:obj_type)] + Delete Effects: [atsamelocation(?x0:obj_type, ?x2:surface_type), atsamelocation(?x2:surface_type, ?x0:obj_type), handempty(), nextto(?x0:obj_type, ?x1:obj_type), nextto(?x1:obj_type, ?x0:obj_type), onTop(?x0:obj_type, ?x2:surface_type), onfloor(?x0:obj_type), ~inhandofrobot(?x0:obj_type)] + Ignore Effects: [] + Option Spec: Actions.pickup_2() +NSRT-Op12: + Parameters: [?x0:obj_type, ?x1:surface_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), atsamelocation(?x1:surface_type, ?x1:surface_type), inreachofrobot(?x1:surface_type), insameroomasrobot(?x0:obj_type), insameroomasrobot(?x1:surface_type), onfloor(?x0:obj_type), onfloor(?x1:surface_type), ~inreachofrobot(?x0:obj_type)] + Add Effects: [inreachofrobot(?x0:obj_type), ~inreachofrobot(?x1:surface_type)] + Delete Effects: [inreachofrobot(?x1:surface_type), ~inreachofrobot(?x0:obj_type)] + Ignore Effects: [] + Option Spec: Move() +NSRT-Op2: + Parameters: [?x0:obj_type, ?x1:obj_type, ?x2:obj_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), atsamelocation(?x1:obj_type, ?x1:obj_type), atsamelocation(?x2:obj_type, ?x2:obj_type), handempty(), insameroomasrobot(?x1:obj_type), insameroomasrobot(?x2:obj_type), onfloor(?x0:obj_type), onfloor(?x1:obj_type), onfloor(?x2:obj_type), ~atsamelocation(?x0:obj_type, ?x2:obj_type), ~atsamelocation(?x2:obj_type, ?x0:obj_type), ~inhandofrobot(?x0:obj_type), ~inreachofrobot(?x0:obj_type), ~inreachofrobot(?x2:obj_type), ~insameroomasrobot(?x0:obj_type), ~onTop(?x0:obj_type, ?x2:obj_type)] + Add Effects: [inreachofrobot(?x0:obj_type), insameroomasrobot(?x0:obj_type), ~insameroomasrobot(?x1:obj_type), ~insameroomasrobot(?x2:obj_type)] + Delete Effects: [insameroomasrobot(?x1:obj_type), insameroomasrobot(?x2:obj_type), ~inreachofrobot(?x0:obj_type), ~insameroomasrobot(?x0:obj_type)] + Ignore Effects: [] + Option Spec: Move() +NSRT-Op3: + Parameters: [?x0:obj_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), handempty(), inreachofrobot(?x0:obj_type), insameroomasrobot(?x0:obj_type), onfloor(?x0:obj_type), ~inhandofrobot(?x0:obj_type)] + Add Effects: [inhandofrobot(?x0:obj_type), ~onfloor(?x0:obj_type)] + Delete Effects: [handempty(), onfloor(?x0:obj_type), ~inhandofrobot(?x0:obj_type)] + Ignore Effects: [] + Option Spec: Actions.pickup_0() +NSRT-Op6: + Parameters: [?x0:obj_type, ?x1:obj_type, ?x2:obj_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), atsamelocation(?x0:obj_type, ?x1:obj_type), atsamelocation(?x0:obj_type, ?x2:obj_type), atsamelocation(?x1:obj_type, ?x0:obj_type), atsamelocation(?x1:obj_type, ?x1:obj_type), atsamelocation(?x1:obj_type, ?x2:obj_type), atsamelocation(?x2:obj_type, ?x0:obj_type), atsamelocation(?x2:obj_type, ?x1:obj_type), atsamelocation(?x2:obj_type, ?x2:obj_type), handempty(), insameroomasrobot(?x0:obj_type), insameroomasrobot(?x1:obj_type), insameroomasrobot(?x2:obj_type), onTop(?x1:obj_type, ?x0:obj_type), onTop(?x1:obj_type, ?x2:obj_type), onTop(?x2:obj_type, ?x0:obj_type), onfloor(?x0:obj_type), onfloor(?x1:obj_type), onfloor(?x2:obj_type), under(?x2:obj_type, ?x1:obj_type), ~inhandofrobot(?x1:obj_type), ~inreachofrobot(?x0:obj_type), ~inreachofrobot(?x1:obj_type), ~inreachofrobot(?x2:obj_type)] + Add Effects: [inreachofrobot(?x0:obj_type), inreachofrobot(?x1:obj_type), inreachofrobot(?x2:obj_type)] + Delete Effects: [~inreachofrobot(?x0:obj_type), ~inreachofrobot(?x1:obj_type), ~inreachofrobot(?x2:obj_type)] + Ignore Effects: [] + Option Spec: Move() +NSRT-Op9: + Parameters: [?x0:surface_type, ?x1:obj_type] + Preconditions: [atsamelocation(?x0:surface_type, ?x0:surface_type), atsamelocation(?x1:obj_type, ?x1:obj_type), inhandofrobot(?x1:obj_type), inreachofrobot(?x0:surface_type), inreachofrobot(?x1:obj_type), insameroomasrobot(?x0:surface_type), insameroomasrobot(?x1:obj_type), onfloor(?x0:surface_type), ~atsamelocation(?x0:surface_type, ?x1:obj_type), ~atsamelocation(?x1:obj_type, ?x0:surface_type), ~inside(?x1:obj_type, ?x0:surface_type), ~onTop(?x0:surface_type, ?x1:obj_type), ~onfloor(?x1:obj_type)] + Add Effects: [atsamelocation(?x0:surface_type, ?x1:obj_type), atsamelocation(?x1:obj_type, ?x0:surface_type), handempty(), inside(?x1:obj_type, ?x0:surface_type), onTop(?x0:surface_type, ?x1:obj_type), onfloor(?x1:obj_type), ~inhandofrobot(?x1:obj_type)] + Delete Effects: [inhandofrobot(?x1:obj_type), ~atsamelocation(?x0:surface_type, ?x1:obj_type), ~atsamelocation(?x1:obj_type, ?x0:surface_type), ~inside(?x1:obj_type, ?x0:surface_type), ~onTop(?x0:surface_type, ?x1:obj_type), ~onfloor(?x1:obj_type)] + Ignore Effects: [] + Option Spec: Actions.drop_in() +NSRT-Op0: + Parameters: [?x0:obj_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), insameroomasrobot(?x0:obj_type), onfloor(?x0:obj_type), ~inreachofrobot(?x0:obj_type)] + Add Effects: [inreachofrobot(?x0:obj_type)] + Delete Effects: [~inreachofrobot(?x0:obj_type)] + Ignore Effects: [] + Option Spec: Move() +NSRT-Op10: + Parameters: [?x0:obj_type, ?x1:surface_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), atsamelocation(?x0:obj_type, ?x1:surface_type), atsamelocation(?x1:surface_type, ?x0:obj_type), atsamelocation(?x1:surface_type, ?x1:surface_type), handempty(), insameroomasrobot(?x0:obj_type), insameroomasrobot(?x1:surface_type), onTop(?x0:obj_type, ?x1:surface_type), onfloor(?x0:obj_type), onfloor(?x1:surface_type), ~inhandofrobot(?x0:obj_type), ~inreachofrobot(?x0:obj_type), ~inreachofrobot(?x1:surface_type)] + Add Effects: [inreachofrobot(?x0:obj_type), inreachofrobot(?x1:surface_type)] + Delete Effects: [~inreachofrobot(?x0:obj_type), ~inreachofrobot(?x1:surface_type)] + Ignore Effects: [] + Option Spec: Move() +NSRT-Op8: + Parameters: [?x0:surface_type, ?x1:obj_type, ?x2:obj_type] + Preconditions: [atsamelocation(?x0:surface_type, ?x0:surface_type), atsamelocation(?x1:obj_type, ?x1:obj_type), atsamelocation(?x1:obj_type, ?x2:obj_type), atsamelocation(?x2:obj_type, ?x1:obj_type), atsamelocation(?x2:obj_type, ?x2:obj_type), inreachofrobot(?x1:obj_type), inreachofrobot(?x2:obj_type), insameroomasrobot(?x0:surface_type), insameroomasrobot(?x1:obj_type), insameroomasrobot(?x2:obj_type), onTop(?x2:obj_type, ?x1:obj_type), onfloor(?x0:surface_type), onfloor(?x1:obj_type), onfloor(?x2:obj_type), ~inreachofrobot(?x0:surface_type)] + Add Effects: [inreachofrobot(?x0:surface_type), ~inreachofrobot(?x1:obj_type), ~inreachofrobot(?x2:obj_type)] + Delete Effects: [inreachofrobot(?x1:obj_type), inreachofrobot(?x2:obj_type), ~inreachofrobot(?x0:surface_type)] + Ignore Effects: [] + Option Spec: Move() +NSRT-Op7: + Parameters: [?x0:obj_type, ?x1:obj_type, ?x2:obj_type, ?x3:obj_type, ?x4:obj_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), atsamelocation(?x0:obj_type, ?x1:obj_type), atsamelocation(?x0:obj_type, ?x2:obj_type), atsamelocation(?x0:obj_type, ?x3:obj_type), atsamelocation(?x0:obj_type, ?x4:obj_type), atsamelocation(?x1:obj_type, ?x0:obj_type), atsamelocation(?x1:obj_type, ?x1:obj_type), atsamelocation(?x1:obj_type, ?x4:obj_type), atsamelocation(?x2:obj_type, ?x0:obj_type), atsamelocation(?x2:obj_type, ?x2:obj_type), atsamelocation(?x2:obj_type, ?x3:obj_type), atsamelocation(?x3:obj_type, ?x0:obj_type), atsamelocation(?x3:obj_type, ?x2:obj_type), atsamelocation(?x3:obj_type, ?x3:obj_type), atsamelocation(?x4:obj_type, ?x0:obj_type), atsamelocation(?x4:obj_type, ?x1:obj_type), atsamelocation(?x4:obj_type, ?x4:obj_type), handempty(), inreachofrobot(?x0:obj_type), inreachofrobot(?x2:obj_type), inreachofrobot(?x3:obj_type), insameroomasrobot(?x0:obj_type), insameroomasrobot(?x1:obj_type), insameroomasrobot(?x2:obj_type), insameroomasrobot(?x3:obj_type), insameroomasrobot(?x4:obj_type), nextto(?x1:obj_type, ?x2:obj_type), nextto(?x1:obj_type, ?x3:obj_type), nextto(?x2:obj_type, ?x1:obj_type), nextto(?x2:obj_type, ?x4:obj_type), nextto(?x3:obj_type, ?x1:obj_type), nextto(?x3:obj_type, ?x4:obj_type), nextto(?x4:obj_type, ?x2:obj_type), nextto(?x4:obj_type, ?x3:obj_type), onTop(?x1:obj_type, ?x0:obj_type), onTop(?x1:obj_type, ?x4:obj_type), onTop(?x2:obj_type, ?x0:obj_type), onTop(?x2:obj_type, ?x3:obj_type), onTop(?x3:obj_type, ?x0:obj_type), onTop(?x4:obj_type, ?x0:obj_type), onfloor(?x0:obj_type), onfloor(?x1:obj_type), onfloor(?x2:obj_type), onfloor(?x3:obj_type), onfloor(?x4:obj_type), under(?x3:obj_type, ?x2:obj_type), under(?x4:obj_type, ?x1:obj_type), ~inhandofrobot(?x2:obj_type)] + Add Effects: [inhandofrobot(?x2:obj_type), ~atsamelocation(?x0:obj_type, ?x2:obj_type), ~atsamelocation(?x2:obj_type, ?x0:obj_type), ~atsamelocation(?x2:obj_type, ?x3:obj_type), ~atsamelocation(?x3:obj_type, ?x2:obj_type), ~nextto(?x1:obj_type, ?x2:obj_type), ~nextto(?x2:obj_type, ?x1:obj_type), ~nextto(?x2:obj_type, ?x4:obj_type), ~nextto(?x4:obj_type, ?x2:obj_type), ~onTop(?x2:obj_type, ?x0:obj_type), ~onTop(?x2:obj_type, ?x3:obj_type), ~onfloor(?x2:obj_type), ~under(?x3:obj_type, ?x2:obj_type)] + Delete Effects: [atsamelocation(?x0:obj_type, ?x2:obj_type), atsamelocation(?x2:obj_type, ?x0:obj_type), atsamelocation(?x2:obj_type, ?x3:obj_type), atsamelocation(?x3:obj_type, ?x2:obj_type), handempty(), nextto(?x1:obj_type, ?x2:obj_type), nextto(?x2:obj_type, ?x1:obj_type), nextto(?x2:obj_type, ?x4:obj_type), nextto(?x4:obj_type, ?x2:obj_type), onTop(?x2:obj_type, ?x0:obj_type), onTop(?x2:obj_type, ?x3:obj_type), onfloor(?x2:obj_type), under(?x3:obj_type, ?x2:obj_type), ~inhandofrobot(?x2:obj_type)] + Ignore Effects: [] + Option Spec: Actions.pickup_2() diff --git a/colla/view_demos.py b/colla/view_demos.py new file mode 100644 index 0000000000..fc277005ba --- /dev/null +++ b/colla/view_demos.py @@ -0,0 +1,230 @@ +"""View demos with task, goal, action sequence, and state changes.""" + +import glob +import os +import pickle +from typing import Set, Any + + +def parse_goal(task_name: str, ground_atoms_state: Set) -> Set: + """Parse goal atoms from ground atoms state for each task.""" + if task_name == "MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) + + elif task_name == "MiniGrid-OpeningPackages-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("openable(")]) + + elif task_name == "MiniGrid-CleaningACar-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(")]) + + elif task_name == "MiniGrid-CleaningShoes-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("~stainable(") and "shoe" in str(atom)]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(") and "shoe" in str(atom)]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("onfloor(") and "towel" in str(atom)]) + + elif task_name == "MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if ( + str(atom).startswith("onTop(") and "blender" in str(atom) and "countertop" in str(atom) + ) or ( + str(atom).startswith("nextto(") and "soap" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("inside(") and "vegetable_oil" in str(atom) and "cabinet" in str(atom) + ) or ( + str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom) + ) or ( + str(atom).startswith("inside(") and "casserole" in str(atom) and "electric_refrigerator" in str(atom) + ) or ( + str(atom).startswith("inside(") and "apple" in str(atom) and "electric_refrigerator" in str(atom) + ) or ( + str(atom).startswith("inside(") and "rag" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("nextto(") and "rag" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("~dustyable(") and "cabinet" in str(atom) + ) or ( + str(atom).startswith("~stainable(") and "plate" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-CollectMisplacedItems-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "table" in str(atom) and ( + "gym_shoe" in str(atom) or + "necklace" in str(atom) or + "notebook" in str(atom) or + "sock" in str(atom) + ) and not str(atom).startswith("onTop(table") + ]) + + elif task_name == "MiniGrid-InstallingAPrinter-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("toggleable(")]) + + elif task_name == "MiniGrid-LayingWoodFloors-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("nextto(")]) + + elif task_name == "MiniGrid-MakingTea-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("sliceable(") and "lemon" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "teapot" in str(atom) and "stove" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("atsamelocation(") and "tea_bag" in str(atom) and "teapot" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("soakable(") and "teapot" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("toggleable(") and "stove" in str(atom) + ]) + + elif task_name == "MiniGrid-MovingBoxesToStorage-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) + + elif task_name == "MiniGrid-OrganizingFileCabinet-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "marker" in str(atom) and "table" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "document" in str(atom) and "cabinet" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "folder" in str(atom) and "cabinet" in str(atom) + ]) + + elif task_name == "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom) + ]) + + elif task_name == "MiniGrid-SettingUpCandles-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) + + elif task_name == "MiniGrid-SortingBooks-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(") and "shelf" in str(atom) and ("book" in str(atom) or "hardback" in str(atom))]) + + elif task_name == "MiniGrid-StoringFood-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "cabinet" in str(atom) and ( + "oatmeal" in str(atom) or "chip" in str(atom) or "vegetable_oil" in str(atom) or "sugar" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-ThawingFrozenFood-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("nextto(") and ( + ("date" in str(atom) and "fish" in str(atom)) or + ("fish" in str(atom) and "sink" in str(atom)) or + ("olive" in str(atom) and "sink" in str(atom)) + ) + ]) + + elif task_name == "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(") and "hamburger" in str(atom) and "ashcan" in str(atom)]) + + elif task_name == "MiniGrid-WashingPotsAndPans-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("~stainable(") and ( + "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom) + ) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "cabinet" in str(atom) and ( + "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-WateringHouseplants-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("soakable(") and "pot_plant" in str(atom)]) + + else: + return set() + + +def normalize_action(action: str) -> str: + """Normalize movement actions to 'move'.""" + action_str = str(action).replace("Actions.", "") + if action_str in ['forward', 'left', 'right']: + return 'move' + return action_str.split('_')[0] + + +def visualize_demos(path: str = '../demos/'): + """Load and visualize all demos with state changes.""" + csv_files = glob.glob(os.path.join(path, "*/*")) + + if not csv_files: + print(f"No demo files found in {path}") + return + + # Load all pickle files into a dictionary + demos = {os.path.basename(f): pickle.load(open(f, 'rb')) for f in csv_files} + + print("=" * 80) + print("MiniBehavior Demo Visualization") + print("=" * 80) + + for demo_idx, (demo_name, demo_data) in enumerate(demos.items(), 1): + task_name = demo_name.split("_")[0] + + print(f"\n{'=' * 80}") + print(f"Demo #{demo_idx}: {demo_name}") + print(f"{'=' * 80}") + + # Get final state to determine goal + final_state = list(demo_data.values())[-1][4] + goal_atoms = parse_goal(task_name, final_state) + + print(f"\nšŸ“‹ TASK: {task_name}") + print(f"\nšŸŽÆ GOAL ({len(goal_atoms)} atoms):") + for atom in sorted(goal_atoms, key=str): + print(f" • {atom}") + + # Collect action sequence with state changes + action_sequence_parts = [] + prev_action = None + prev_state = None + + for step_idx, traj in demo_data.items(): + action = normalize_action(traj[2]) + current_state = set(traj[1]) + + # Calculate state changes when action changes + if action != prev_action: + if prev_action is not None: + action_sequence_parts.append(prev_action) + + # Add state change info if we have a previous state + if prev_state is not None and action != prev_action: + added = len(current_state - prev_state) + deleted = len(prev_state - current_state) + action_sequence_parts.append(f"(+{added} / -{deleted})") + + prev_action = action + + prev_state = current_state + + # Add final action + if prev_action is not None: + action_sequence_parts.append(prev_action) + + print(f"\nšŸ”„ ACTION SEQUENCE ({len([p for p in action_sequence_parts if not p.startswith('(')])} actions):") + print(f" {' → '.join(action_sequence_parts)}") + + print(f"\n{'=' * 80}\n") + + +if __name__ == "__main__": + visualize_demos() diff --git a/demos/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0_0 b/demos/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0_0 new file mode 100644 index 0000000000..fc13cbf17c Binary files /dev/null and b/demos/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-CleaningACar-16x16-N2-v0/MiniGrid-CleaningACar-16x16-N2-v0_0 b/demos/MiniGrid-CleaningACar-16x16-N2-v0/MiniGrid-CleaningACar-16x16-N2-v0_0 new file mode 100644 index 0000000000..39451f2a89 Binary files /dev/null and b/demos/MiniGrid-CleaningACar-16x16-N2-v0/MiniGrid-CleaningACar-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-CleaningShoes-16x16-N2-v0/MiniGrid-CleaningShoes-16x16-N2-v0_0 b/demos/MiniGrid-CleaningShoes-16x16-N2-v0/MiniGrid-CleaningShoes-16x16-N2-v0_0 new file mode 100644 index 0000000000..2a68cf6c7b Binary files /dev/null and b/demos/MiniGrid-CleaningShoes-16x16-N2-v0/MiniGrid-CleaningShoes-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0_0 b/demos/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0_0 new file mode 100644 index 0000000000..77c13ec731 Binary files /dev/null and b/demos/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-CollectMisplacedItems-16x16-N2-v0/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 b/demos/MiniGrid-CollectMisplacedItems-16x16-N2-v0/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 new file mode 100644 index 0000000000..3a312b34d9 Binary files /dev/null and b/demos/MiniGrid-CollectMisplacedItems-16x16-N2-v0/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-InstallingAPrinter-16x16-N2-v0/MiniGrid-InstallingAPrinter-16x16-N2-v0_0 b/demos/MiniGrid-InstallingAPrinter-16x16-N2-v0/MiniGrid-InstallingAPrinter-16x16-N2-v0_0 new file mode 100644 index 0000000000..7020128144 Binary files /dev/null and b/demos/MiniGrid-InstallingAPrinter-16x16-N2-v0/MiniGrid-InstallingAPrinter-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-LayingWoodFloors-16x16-N2-v0/MiniGrid-LayingWoodFloors-16x16-N2-v0_0 b/demos/MiniGrid-LayingWoodFloors-16x16-N2-v0/MiniGrid-LayingWoodFloors-16x16-N2-v0_0 new file mode 100644 index 0000000000..ea8b5874a3 Binary files /dev/null and b/demos/MiniGrid-LayingWoodFloors-16x16-N2-v0/MiniGrid-LayingWoodFloors-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-MakingTea-16x16-N2-v0/MiniGrid-MakingTea-16x16-N2-v0_0 b/demos/MiniGrid-MakingTea-16x16-N2-v0/MiniGrid-MakingTea-16x16-N2-v0_0 new file mode 100644 index 0000000000..ab28d8d64b Binary files /dev/null and b/demos/MiniGrid-MakingTea-16x16-N2-v0/MiniGrid-MakingTea-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-MovingBoxesToStorage-16x16-N2-v0/MiniGrid-MovingBoxesToStorage-16x16-N2-v0_0 b/demos/MiniGrid-MovingBoxesToStorage-16x16-N2-v0/MiniGrid-MovingBoxesToStorage-16x16-N2-v0_0 new file mode 100644 index 0000000000..382c43cea0 Binary files /dev/null and b/demos/MiniGrid-MovingBoxesToStorage-16x16-N2-v0/MiniGrid-MovingBoxesToStorage-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-OpeningPackages-16x16-N2-v0/MiniGrid-OpeningPackages-16x16-N2-v0_0 b/demos/MiniGrid-OpeningPackages-16x16-N2-v0/MiniGrid-OpeningPackages-16x16-N2-v0_0 new file mode 100644 index 0000000000..96b0eecb02 Binary files /dev/null and b/demos/MiniGrid-OpeningPackages-16x16-N2-v0/MiniGrid-OpeningPackages-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-OrganizingFileCabinet-16x16-N2-v0/MiniGrid-OrganizingFileCabinet-16x16-N2-v0_0 b/demos/MiniGrid-OrganizingFileCabinet-16x16-N2-v0/MiniGrid-OrganizingFileCabinet-16x16-N2-v0_0 new file mode 100644 index 0000000000..77701ff4b9 Binary files /dev/null and b/demos/MiniGrid-OrganizingFileCabinet-16x16-N2-v0/MiniGrid-OrganizingFileCabinet-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0_0 b/demos/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0_0 new file mode 100644 index 0000000000..bf751d78b9 Binary files /dev/null and b/demos/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-SettingUpCandles-16x16-N2-v0/MiniGrid-SettingUpCandles-16x16-N2-v0_0 b/demos/MiniGrid-SettingUpCandles-16x16-N2-v0/MiniGrid-SettingUpCandles-16x16-N2-v0_0 new file mode 100644 index 0000000000..26a152ccdb Binary files /dev/null and b/demos/MiniGrid-SettingUpCandles-16x16-N2-v0/MiniGrid-SettingUpCandles-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_0 b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_0 new file mode 100644 index 0000000000..77ae2a429d Binary files /dev/null and b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_1 b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_1 new file mode 100644 index 0000000000..e8ae981608 Binary files /dev/null and b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_1 differ diff --git a/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_2 b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_2 new file mode 100644 index 0000000000..b160d06854 Binary files /dev/null and b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_2 differ diff --git a/demos/MiniGrid-StoringFood-16x16-N2-v0/MiniGrid-StoringFood-16x16-N2-v0_0 b/demos/MiniGrid-StoringFood-16x16-N2-v0/MiniGrid-StoringFood-16x16-N2-v0_0 new file mode 100644 index 0000000000..90acc25406 Binary files /dev/null and b/demos/MiniGrid-StoringFood-16x16-N2-v0/MiniGrid-StoringFood-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 b/demos/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 new file mode 100644 index 0000000000..6a0bb3279a Binary files /dev/null and b/demos/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-WashingPotsAndPans-16x16-N2-v0/MiniGrid-WashingPotsAndPans-16x16-N2-v0_0 b/demos/MiniGrid-WashingPotsAndPans-16x16-N2-v0/MiniGrid-WashingPotsAndPans-16x16-N2-v0_0 new file mode 100644 index 0000000000..7884ebdaf6 Binary files /dev/null and b/demos/MiniGrid-WashingPotsAndPans-16x16-N2-v0/MiniGrid-WashingPotsAndPans-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-WateringHouseplants-16x16-N2-v0/MiniGrid-WateringHouseplants-16x16-N2-v0_0 b/demos/MiniGrid-WateringHouseplants-16x16-N2-v0/MiniGrid-WateringHouseplants-16x16-N2-v0_0 new file mode 100644 index 0000000000..966f1a9653 Binary files /dev/null and b/demos/MiniGrid-WateringHouseplants-16x16-N2-v0/MiniGrid-WateringHouseplants-16x16-N2-v0_0 differ diff --git a/extra/MiniGrid-CleaningACar-16x16-N2-v0_0 b/extra/MiniGrid-CleaningACar-16x16-N2-v0_0 new file mode 100644 index 0000000000..0cda03cceb Binary files /dev/null and b/extra/MiniGrid-CleaningACar-16x16-N2-v0_0 differ diff --git a/extra/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 b/extra/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 new file mode 100644 index 0000000000..525d1b37cb Binary files /dev/null and b/extra/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 differ diff --git a/extra/MiniGrid-OpeningPackages-16x16-N2-v0_0 b/extra/MiniGrid-OpeningPackages-16x16-N2-v0_0 new file mode 100644 index 0000000000..70a54c0248 Binary files /dev/null and b/extra/MiniGrid-OpeningPackages-16x16-N2-v0_0 differ diff --git a/extra/MiniGrid-SortingBooks-16x16-N2-v0_0 b/extra/MiniGrid-SortingBooks-16x16-N2-v0_0 new file mode 100644 index 0000000000..40139ef049 Binary files /dev/null and b/extra/MiniGrid-SortingBooks-16x16-N2-v0_0 differ diff --git a/extra/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 b/extra/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 new file mode 100644 index 0000000000..3deaefae46 Binary files /dev/null and b/extra/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 differ diff --git a/predicators/approaches/minigrid_controller_approach.py b/predicators/approaches/minigrid_controller_approach.py new file mode 100644 index 0000000000..de2748db29 --- /dev/null +++ b/predicators/approaches/minigrid_controller_approach.py @@ -0,0 +1,30 @@ +"""An approach that just takes random low-level actions.""" + +from typing import Callable + +from predicators.approaches import BaseApproach +from predicators.structs import Action, State, Task + + +class MinigridControllerApproach(BaseApproach): + """Samples random low-level actions.""" + + @classmethod + def get_name(cls) -> str: + return "minigrid_controller" + + @property + def is_learning_based(self) -> bool: + return False + + def _solve(self, task: Task, timeout: int) -> Callable[[State], Action]: + zero_vec = self._action_space.low + + def _policy(_: State) -> Action: + action_vec = zero_vec.copy() + print(task.goal) + action_vec[int(input("Action: "))] = 1.0 + print(action_vec) + return Action(action_vec) + + return _policy diff --git a/predicators/envs/mini_behavior_env.py b/predicators/envs/mini_behavior_env.py new file mode 100644 index 0000000000..56467cf84f --- /dev/null +++ b/predicators/envs/mini_behavior_env.py @@ -0,0 +1,386 @@ +"""A MiniBehavior environment wrapping https://github.com/StanfordVL/mini_behavior.""" +import sys +from typing import ClassVar, Dict, List, Optional, Sequence, Set + +import gymnasium as gym +import matplotlib +import numpy as np +from gym.spaces import Box + +from predicators import utils +from predicators.envs import BaseEnv +from predicators.settings import CFG +from predicators.structs import Action, EnvironmentTask, Image, Object, \ + Observation, Predicate, State, Type, Video + +from minigrid.wrappers import * +from mini_behavior.window import Window +from mini_behavior.utils.save import get_step, save_demo +from mini_behavior.grid import GridDimension +from mini_behavior.utils.wrappers import MiniBHFullyObsWrapper +from mini_behavior.utils.save import all_state_values + +class MiniBehavior(BaseEnv): + """MiniBehavior environment wrapping gym-sokoban.""" + + name_to_enum: ClassVar[Dict[str, int]] = OBJECT_TO_IDX + + object_type = Type("obj", ["row", "column", "type", "state", "color"]) + + def __init__(self, use_gui: bool = True) -> None: + super().__init__(use_gui) + + # Predicates + self._IsLoc = Predicate("IsLoc", [self.object_type], self._IsLoc_holds) + self._Above = Predicate("Above", [self.object_type, self.object_type], + self._Above_holds) + self._Below = Predicate("Below", [self.object_type, self.object_type], + self._Below_holds) + self._RightOf = Predicate("RightOf", + [self.object_type, self.object_type], + self._RightOf_holds) + self._LeftOf = Predicate("LeftOf", + [self.object_type, self.object_type], + self._LeftOf_holds) + self._IsFacingUp = Predicate("IsFacingUp", [self.object_type], + self._IsFacingUp_holds) + self._IsFacingDown = Predicate("IsFacingDown", [self.object_type], + self._IsFacingDown_holds) + self._IsFacingLeft = Predicate("IsFacingLeft", [self.object_type], + self._IsFacingLeft_holds) + self._IsFacingRight = Predicate("IsFacingRight", [self.object_type], + self._IsFacingRight_holds) + self._IsNonGoalLoc = Predicate("IsNonGoalLoc", [self.object_type], + self._IsNonGoalLoc_holds) + self._Unknown = Predicate("Unknown", [self.object_type], + self._Unknown_holds) + self._Found = Predicate("Found", [self.object_type], + self._Found_holds) + self._IsAgent, self._At, self._IsGoal, self._IsBall, \ + self._IsKey, self._IsBox, self._IsRed, self._IsGreen, \ + self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey, \ + self._Holding, self._Near = self.get_goal_predicates() + + self.last_action = None + + # NOTE: we can change the level by modifying what we pass + + # into gym.make here. + if CFG.mini_behavior_env_fully_observable: + self._gym_env = MiniBHFullyObsWrapper(gym.make(CFG.mini_behavior_env_name)) + else: + NotImplementedError("Partial Observability Not implemented yet") + + @classmethod + def get_goal_predicates(cls) -> list[Predicate]: + """Defined public so that the perceiver can use it.""" + return [Predicate("IsAgent", [cls.object_type], cls._IsAgent_holds), + Predicate("At", [cls.object_type, cls.object_type], cls._At_holds), + Predicate("IsGoal", [cls.object_type], cls._IsGoal_holds), + Predicate("IsBall", [cls.object_type], cls._IsBall_holds), + Predicate("IsKey", [cls.object_type], cls._IsKey_holds), + Predicate("IsBox", [cls.object_type], cls._IsBox_holds), + Predicate("IsRed", [cls.object_type], cls._IsRed_holds), + Predicate("IsGreen", [cls.object_type], cls._IsGreen_holds), + Predicate("IsBlue", [cls.object_type], cls._IsBlue_holds), + Predicate("IsPurple", [cls.object_type], cls._IsPurple_holds), + Predicate("IsYellow", [cls.object_type], cls._IsYellow_holds), + Predicate("IsGrey", [cls.object_type], cls._IsGrey_holds), + Predicate("Holding", [cls.object_type], cls._Holding_holds), + Predicate("Near", [cls.object_type, cls.object_type], cls._Near_holds)] + + + def _generate_train_tasks(self) -> List[EnvironmentTask]: + return self._get_tasks(num=CFG.num_train_tasks, train_or_test="train") + + def _generate_test_tasks(self) -> List[EnvironmentTask]: + return self._get_tasks(num=CFG.num_test_tasks, train_or_test="test") + + @classmethod + def get_name(cls) -> str: + return "mini_behavior_env" + + def get_observation(self) -> Observation: + return self._copy_observation(self._current_observation) + + def render_state_plt( + self, + state: State, + task: EnvironmentTask, + action: Optional[Action] = None, + caption: Optional[str] = None) -> matplotlib.figure.Figure: + raise NotImplementedError("This env does not use Matplotlib") + + def render_state(self, + state: State, + task: EnvironmentTask, + action: Optional[Action] = None, + caption: Optional[str] = None) -> Video: + raise NotImplementedError("A gym environment cannot render " + "arbitrary states.") + + def render(self, + action: Optional[Action] = None, + caption: Optional[str] = None) -> Video: + assert caption is None + arr: Image = self._gym_env.get_frame() + return [arr] + + @property + def predicates(self) -> Set[Predicate]: + return { + self._At, self._IsLoc, self._Above, self._Below, + self._RightOf, self._LeftOf, self._IsAgent, self._IsGoal, self._IsNonGoalLoc, + self._IsFacingUp, self._IsFacingDown, self._IsFacingLeft, self._IsFacingRight, + self._Unknown, self._Found, self._IsBall, self._IsKey, self._IsBox, self._IsRed, + self._IsGreen, self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey, + self._Holding, self._Near + } + + @property + def goal_predicates(self) -> Set[Predicate]: + return {self._IsAgent, self._At, self._IsGoal} + + @property + def types(self) -> Set[Type]: + return {self.object_type} + + @property + def action_space(self) -> Box: + # One-hot encoding of discrete action space. + num_actions = 15 + assert self._gym_env.action_space.n == num_actions # type: ignore + lowers = np.zeros(num_actions, dtype=np.float32) + uppers = np.ones(num_actions, dtype=np.float32) + return Box(lowers, uppers) + + def reset(self, train_or_test: str, task_idx: int) -> Observation: + """Resets the current state to the train or test task initial state.""" + self._current_task = self.get_task(train_or_test, task_idx) + self._current_observation = self._current_task.init_obs + # We now need to reset the underlying gym environment to the correct + # state. + seed = utils.get_task_seed(train_or_test, task_idx) + self._reset_initial_state_from_seed(seed) + return self._copy_observation(self._current_observation) + + def simulate(self, state: State, action: Action) -> State: + raise NotImplementedError("Simulate not implemented for gym envs. " + + "Try using --bilevel_plan_without_sim True") + + def step(self, action: Action) -> Observation: + # Convert our actions to their discrete action space. + discrete_action = np.argmax(action.arr) + + goal_position = [ + y.cur_pos for x, y in enumerate(self._gym_env.grid.grid) if isinstance(y, Goal) + ] + self._current_observation = self._gym_env.step(discrete_action) + self._gym_env.render() + self.last_action = discrete_action + self._current_observation[4]['last_action'] = self.last_action + + if CFG.mini_behavior_gym_render: + # save frame to png + visual = self._gym_env.get_frame() + import matplotlib.pyplot as plt + plt.imsave('render.png', visual.astype('uint8')) + + + return self._copy_observation(self._current_observation) + + def goal_reached(self) -> bool: + if len(self._current_observation) == 5: + return self._current_observation[2] + return False + + def _get_tasks(self, num: int, + train_or_test: str) -> List[EnvironmentTask]: + tasks = [] + for task_idx in range(num): + seed = utils.get_task_seed(train_or_test, task_idx) + init_obs = self._reset_initial_state_from_seed(seed) + goal_description = self._gym_env.mission + task = EnvironmentTask(init_obs, goal_description) + tasks.append(task) + return tasks + + def _reset_initial_state_from_seed(self, seed: int) -> Observation: + self._gym_env.reset(seed=seed) + return self._gym_env.gen_full_obs() + + @classmethod + def _IsLoc_holds(cls, state: State, objects: Sequence[Object]) -> bool: + # Free spaces and goals are locations. + loc, = objects + obj_type = int(state.get(loc, "type")) + return obj_type in {cls.name_to_enum["empty"], cls.name_to_enum["goal"]} + + @classmethod + def _IsGoal_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "goal") + + @classmethod + def _IsAgent_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "agent") + + @classmethod + def _IsBall_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "ball") + + @classmethod + def _IsKey_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "key") + + @classmethod + def _IsBox_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "box") + + @classmethod + def _IsRed_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'red' + + @classmethod + def _IsGreen_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'green' + + @classmethod + def _IsBlue_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'blue' + + @classmethod + def _IsPurple_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'purple' + + @classmethod + def _IsYellow_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'yellow' + + @classmethod + def _IsGrey_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'grey' + + @classmethod + def _IsNonGoalLoc_holds(cls, state: State, + objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "empty") + + @classmethod + def _At_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj1, _ = objects + if cls._check_enum(state, [obj1], "agent"): + return cls._check_spatial_relation(state, objects, 0, 0) + return False + + @classmethod + def _Above_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 1, 0) + + @classmethod + def _Below_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, -1, 0) + + @classmethod + def _RightOf_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 0, -1) + + @classmethod + def _LeftOf_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 0, 1) + + @classmethod + def _IsFacingRight_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 0 + return False + + @classmethod + def _IsFacingDown_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 1 + return False + + @classmethod + def _IsFacingLeft_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 2 + return False + + @classmethod + def _IsFacingUp_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 3 + return False + + @classmethod + def _Holding_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) == 3 + + @classmethod + def _Near_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj1, ob2 = objects + return cls._Above_holds(state, [obj1, ob2]) or \ + cls._Below_holds(state, [obj1, ob2]) or \ + cls._RightOf_holds(state, [obj1, ob2]) or \ + cls._LeftOf_holds(state, [obj1, ob2]) + + @classmethod + def _Unknown_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) == -1 + + @classmethod + def _Found_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) != -1 + + @classmethod + def get_objects_of_enum(cls, state: State, enum_name: str) -> Set[Object]: + """Made public for use by perceiver.""" + return { + o + for o in state + if int(state.get(o, "type")) == int(cls.name_to_enum[enum_name]) + } + + @classmethod + def _check_spatial_relation(cls, state: State, objects: Sequence[Object], + dr: int, dc: int) -> bool: + obj1, obj2 = objects + obj1_r = int(state.get(obj1, "row")) + obj1_c = int(state.get(obj1, "column")) + obj2_r = int(state.get(obj2, "row")) + obj2_c = int(state.get(obj2, "column")) + if obj1_r == sys.maxsize or obj2_r == sys.maxsize or obj1_c == sys.maxsize or obj2_c == sys.maxsize: + return False + return ((obj1_r + dr) == obj2_r) and ((obj1_c + dc) == obj2_c) + + @classmethod + def _check_enum(cls, state: State, objects: Sequence[Object], + enum_name: str) -> bool: + obj, = objects + obj_type = state.get(obj, "type") + return int(obj_type) == int(cls.name_to_enum[enum_name]) + + @classmethod + def _is_static(cls, obj: Object, state: State) -> bool: + return cls._IsGoal_holds(state, [obj]) or \ + cls._IsNonGoalLoc_holds(state, [obj]) + + @classmethod + def _is_dynamic(cls, obj: Object, state: State) -> bool: + return not cls._is_static(obj, state) + + def _copy_observation(self, obs: Observation) -> Observation: + return tuple(m.copy() if type(m) not in [bool, int, float] else m for m in obs) diff --git a/predicators/envs/minigrid_env.py b/predicators/envs/minigrid_env.py new file mode 100644 index 0000000000..9f86008e15 --- /dev/null +++ b/predicators/envs/minigrid_env.py @@ -0,0 +1,386 @@ +"""A MiniGrid environment wrapping https://github.com/mpSchrader/gym-sokoban.""" +import sys +from typing import ClassVar, Dict, List, Optional, Sequence, Set + +import gymnasium as gym +import matplotlib +import numpy as np +from gym.spaces import Box + +from predicators import utils +from predicators.envs import BaseEnv +from predicators.settings import CFG +from predicators.structs import Action, EnvironmentTask, Image, Object, \ + Observation, Predicate, State, Type, Video + +from minigrid.core.constants import ( + OBJECT_TO_IDX, +) +from minigrid.core.world_object import Ball as BallObj, Goal, Key as KeyObj, Box as BoxObj +from minigrid.wrappers import FullyObsWrapper + +class MiniGridEnv(BaseEnv): + """MiniGrid environment wrapping gym-sokoban.""" + + name_to_enum: ClassVar[Dict[str, int]] = OBJECT_TO_IDX + + object_type = Type("obj", ["row", "column", "type", "state", "color"]) + + def __init__(self, use_gui: bool = True) -> None: + super().__init__(use_gui) + + # Predicates + self._IsLoc = Predicate("IsLoc", [self.object_type], self._IsLoc_holds) + self._Above = Predicate("Above", [self.object_type, self.object_type], + self._Above_holds) + self._Below = Predicate("Below", [self.object_type, self.object_type], + self._Below_holds) + self._RightOf = Predicate("RightOf", + [self.object_type, self.object_type], + self._RightOf_holds) + self._LeftOf = Predicate("LeftOf", + [self.object_type, self.object_type], + self._LeftOf_holds) + self._IsFacingUp = Predicate("IsFacingUp", [self.object_type], + self._IsFacingUp_holds) + self._IsFacingDown = Predicate("IsFacingDown", [self.object_type], + self._IsFacingDown_holds) + self._IsFacingLeft = Predicate("IsFacingLeft", [self.object_type], + self._IsFacingLeft_holds) + self._IsFacingRight = Predicate("IsFacingRight", [self.object_type], + self._IsFacingRight_holds) + self._IsNonGoalLoc = Predicate("IsNonGoalLoc", [self.object_type], + self._IsNonGoalLoc_holds) + self._Unknown = Predicate("Unknown", [self.object_type], + self._Unknown_holds) + self._Found = Predicate("Found", [self.object_type], + self._Found_holds) + self._IsAgent, self._At, self._IsGoal, self._IsBall, \ + self._IsKey, self._IsBox, self._IsRed, self._IsGreen, \ + self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey, \ + self._Holding, self._Near = self.get_goal_predicates() + + self.last_action = None + + # NOTE: we can change the level by modifying what we pass + + # into gym.make here. + if CFG.minigrid_gym_fully_observable: + self._gym_env = FullyObsWrapper(gym.make(CFG.minigrid_gym_name)) + else: + self._gym_env = gym.make(CFG.minigrid_gym_name) + + @classmethod + def get_goal_predicates(cls) -> list[Predicate]: + """Defined public so that the perceiver can use it.""" + return [Predicate("IsAgent", [cls.object_type], cls._IsAgent_holds), + Predicate("At", [cls.object_type, cls.object_type], cls._At_holds), + Predicate("IsGoal", [cls.object_type], cls._IsGoal_holds), + Predicate("IsBall", [cls.object_type], cls._IsBall_holds), + Predicate("IsKey", [cls.object_type], cls._IsKey_holds), + Predicate("IsBox", [cls.object_type], cls._IsBox_holds), + Predicate("IsRed", [cls.object_type], cls._IsRed_holds), + Predicate("IsGreen", [cls.object_type], cls._IsGreen_holds), + Predicate("IsBlue", [cls.object_type], cls._IsBlue_holds), + Predicate("IsPurple", [cls.object_type], cls._IsPurple_holds), + Predicate("IsYellow", [cls.object_type], cls._IsYellow_holds), + Predicate("IsGrey", [cls.object_type], cls._IsGrey_holds), + Predicate("Holding", [cls.object_type], cls._Holding_holds), + Predicate("Near", [cls.object_type, cls.object_type], cls._Near_holds)] + + + def _generate_train_tasks(self) -> List[EnvironmentTask]: + return self._get_tasks(num=CFG.num_train_tasks, train_or_test="train") + + def _generate_test_tasks(self) -> List[EnvironmentTask]: + return self._get_tasks(num=CFG.num_test_tasks, train_or_test="test") + + @classmethod + def get_name(cls) -> str: + return "minigrid_env" + + def get_observation(self) -> Observation: + return self._copy_observation(self._current_observation) + + def render_state_plt( + self, + state: State, + task: EnvironmentTask, + action: Optional[Action] = None, + caption: Optional[str] = None) -> matplotlib.figure.Figure: + raise NotImplementedError("This env does not use Matplotlib") + + def render_state(self, + state: State, + task: EnvironmentTask, + action: Optional[Action] = None, + caption: Optional[str] = None) -> Video: + raise NotImplementedError("A gym environment cannot render " + "arbitrary states.") + + def render(self, + action: Optional[Action] = None, + caption: Optional[str] = None) -> Video: + assert caption is None + arr: Image = self._gym_env.get_frame() + import matplotlib.pyplot as plt + plt.imsave('visual_image.png', arr.astype('uint8')) + return [arr] + + @property + def predicates(self) -> Set[Predicate]: + return { + self._At, self._IsLoc, self._Above, self._Below, + self._RightOf, self._LeftOf, self._IsAgent, self._IsGoal, self._IsNonGoalLoc, + self._IsFacingUp, self._IsFacingDown, self._IsFacingLeft, self._IsFacingRight, + self._Unknown, self._Found, self._IsBall, self._IsKey, self._IsBox, self._IsRed, + self._IsGreen, self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey, + self._Holding, self._Near + } + + @property + def goal_predicates(self) -> Set[Predicate]: + return {self._IsAgent, self._At, self._IsGoal} + + @property + def types(self) -> Set[Type]: + return {self.object_type} + + @property + def action_space(self) -> Box: + # One-hot encoding of discrete action space. + num_actions = 7 + assert self._gym_env.action_space.n == num_actions # type: ignore + lowers = np.zeros(num_actions, dtype=np.float32) + uppers = np.ones(num_actions, dtype=np.float32) + return Box(lowers, uppers) + + def reset(self, train_or_test: str, task_idx: int) -> Observation: + """Resets the current state to the train or test task initial state.""" + self._current_task = self.get_task(train_or_test, task_idx) + self._current_observation = self._current_task.init_obs + # We now need to reset the underlying gym environment to the correct + # state. + seed = utils.get_task_seed(train_or_test, task_idx) + self._reset_initial_state_from_seed(seed) + return self._copy_observation(self._current_observation) + + def simulate(self, state: State, action: Action) -> State: + raise NotImplementedError("Simulate not implemented for gym envs. " + + "Try using --bilevel_plan_without_sim True") + + def step(self, action: Action) -> Observation: + # Convert our actions to their discrete action space. + discrete_action = np.argmax(action.arr) + + goal_position = [ + y.cur_pos for x, y in enumerate(self._gym_env.grid.grid) if isinstance(y, Goal) + ] + self._current_observation = self._gym_env.step(discrete_action) + self._gym_env.render() + self.last_action = discrete_action + self._current_observation[4]['last_action'] = self.last_action + + if CFG.minigrid_gym_render: + # save frame to png + visual = self._gym_env.get_frame() + import matplotlib.pyplot as plt + plt.imsave('render.png', visual.astype('uint8')) + + + return self._copy_observation(self._current_observation) + + def goal_reached(self) -> bool: + if len(self._current_observation) == 5: + return self._current_observation[2] + return False + + def _get_tasks(self, num: int, + train_or_test: str) -> List[EnvironmentTask]: + tasks = [] + for task_idx in range(num): + seed = utils.get_task_seed(train_or_test, task_idx) + init_obs = self._reset_initial_state_from_seed(seed) + goal_description = self._gym_env.mission + task = EnvironmentTask(init_obs, goal_description) + tasks.append(task) + return tasks + + def _reset_initial_state_from_seed(self, seed: int) -> Observation: + return self._gym_env.reset(seed=seed) + + @classmethod + def _IsLoc_holds(cls, state: State, objects: Sequence[Object]) -> bool: + # Free spaces and goals are locations. + loc, = objects + obj_type = int(state.get(loc, "type")) + return obj_type in {cls.name_to_enum["empty"], cls.name_to_enum["goal"]} + + @classmethod + def _IsGoal_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "goal") + + @classmethod + def _IsAgent_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "agent") + + @classmethod + def _IsBall_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "ball") + + @classmethod + def _IsKey_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "key") + + @classmethod + def _IsBox_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "box") + + @classmethod + def _IsRed_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'red' + + @classmethod + def _IsGreen_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'green' + + @classmethod + def _IsBlue_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'blue' + + @classmethod + def _IsPurple_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'purple' + + @classmethod + def _IsYellow_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'yellow' + + @classmethod + def _IsGrey_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'grey' + + @classmethod + def _IsNonGoalLoc_holds(cls, state: State, + objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "empty") + + @classmethod + def _At_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj1, _ = objects + if cls._check_enum(state, [obj1], "agent"): + return cls._check_spatial_relation(state, objects, 0, 0) + return False + + @classmethod + def _Above_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 1, 0) + + @classmethod + def _Below_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, -1, 0) + + @classmethod + def _RightOf_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 0, -1) + + @classmethod + def _LeftOf_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 0, 1) + + @classmethod + def _IsFacingRight_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 0 + return False + + @classmethod + def _IsFacingDown_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 1 + return False + + @classmethod + def _IsFacingLeft_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 2 + return False + + @classmethod + def _IsFacingUp_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 3 + return False + + @classmethod + def _Holding_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) == 3 + + @classmethod + def _Near_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj1, ob2 = objects + return cls._Above_holds(state, [obj1, ob2]) or \ + cls._Below_holds(state, [obj1, ob2]) or \ + cls._RightOf_holds(state, [obj1, ob2]) or \ + cls._LeftOf_holds(state, [obj1, ob2]) + + @classmethod + def _Unknown_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) == -1 + + @classmethod + def _Found_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) != -1 + + @classmethod + def get_objects_of_enum(cls, state: State, enum_name: str) -> Set[Object]: + """Made public for use by perceiver.""" + return { + o + for o in state + if int(state.get(o, "type")) == int(cls.name_to_enum[enum_name]) + } + + @classmethod + def _check_spatial_relation(cls, state: State, objects: Sequence[Object], + dr: int, dc: int) -> bool: + obj1, obj2 = objects + obj1_r = int(state.get(obj1, "row")) + obj1_c = int(state.get(obj1, "column")) + obj2_r = int(state.get(obj2, "row")) + obj2_c = int(state.get(obj2, "column")) + if obj1_r == sys.maxsize or obj2_r == sys.maxsize or obj1_c == sys.maxsize or obj2_c == sys.maxsize: + return False + return ((obj1_r + dr) == obj2_r) and ((obj1_c + dc) == obj2_c) + + @classmethod + def _check_enum(cls, state: State, objects: Sequence[Object], + enum_name: str) -> bool: + obj, = objects + obj_type = state.get(obj, "type") + return int(obj_type) == int(cls.name_to_enum[enum_name]) + + @classmethod + def _is_static(cls, obj: Object, state: State) -> bool: + return cls._IsGoal_holds(state, [obj]) or \ + cls._IsNonGoalLoc_holds(state, [obj]) + + @classmethod + def _is_dynamic(cls, obj: Object, state: State) -> bool: + return not cls._is_static(obj, state) + + def _copy_observation(self, obs: Observation) -> Observation: + return tuple(m.copy() if type(m) not in [bool, int, float] else m for m in obs) diff --git a/predicators/ground_truth_models/mini_behavior_env/__init__.py b/predicators/ground_truth_models/mini_behavior_env/__init__.py new file mode 100644 index 0000000000..2a8f9dca73 --- /dev/null +++ b/predicators/ground_truth_models/mini_behavior_env/__init__.py @@ -0,0 +1,6 @@ +"""Ground truth models for MiniBehavior gym environment.""" + +from .nsrts import MiniBehaviorGroundTruthNSRTFactory +from .options import MiniBehaviorGroundTruthOptionFactory + +__all__ = ["MiniBehaviorGroundTruthOptionFactory", "MiniBehaviorGroundTruthNSRTFactory"] diff --git a/predicators/ground_truth_models/mini_behavior_env/nsrts.py b/predicators/ground_truth_models/mini_behavior_env/nsrts.py new file mode 100644 index 0000000000..24f6825af4 --- /dev/null +++ b/predicators/ground_truth_models/mini_behavior_env/nsrts.py @@ -0,0 +1,319 @@ +"""Ground-truth NSRTs for the cover environment.""" + +from typing import Dict, List, Set + +from predicators.ground_truth_models import GroundTruthNSRTFactory +from predicators.structs import NSRT, LiftedAtom, ParameterizedOption, \ + Predicate, Type, Variable +from predicators.utils import null_sampler + + +class MiniBehaviorGroundTruthNSRTFactory(GroundTruthNSRTFactory): + """Ground-truth NSRTs for the MiniBehavior environment.""" + + @classmethod + def get_env_names(cls) -> Set[str]: + return {"mini_behavior_env"} + + @staticmethod + def get_nsrts(env_name: str, types: Dict[str, Type], + predicates: Dict[str, Predicate], + options: Dict[str, ParameterizedOption]) -> Set[NSRT]: + # Types + object_type = types["obj"] + + # Objects + obj1 = Variable("?obj1", object_type) + obj2 = Variable("?obj2", object_type) + obj3 = Variable("?obj3", object_type) + + # Predicates + At = predicates["At"] + IsLoc = predicates["IsLoc"] + Above = predicates["Above"] + Below = predicates["Below"] + RightOf = predicates["RightOf"] + LeftOf = predicates["LeftOf"] + IsAgent = predicates["IsAgent"] + IsGoal = predicates["IsGoal"] + IsFacingUp = predicates["IsFacingUp"] + IsFacingDown = predicates["IsFacingDown"] + IsFacingLeft = predicates["IsFacingLeft"] + IsFacingRight = predicates["IsFacingRight"] + Unknown = predicates["Unknown"] + Found = predicates["Found"] + Holding = predicates["Holding"] + Near = predicates["Near"] + + # Options + MoveForward = options["Forward"] + TurnLeft = options["Left"] + TurnRight = options["Right"] + Pickup = options["Pickup_0"] + Drop = options["Drop_0"] + Toggle = options["Toggle"] + FindObj = options["FindObj"] + ReplanToObj = options["ReplanToObj"] + + nsrts = set() + + # MoveUp + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(Above, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingUp, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars: List[Variable] = [] # dummy - not used + move_up_nsrt = NSRT("MoveUp", parameters, preconditions, add_effects, + delete_effects, set(), option, option_vars, + null_sampler) + nsrts.add(move_up_nsrt) + + # MoveDown + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(Below, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingDown, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_down_nsrt = NSRT("MoveDown", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_down_nsrt) + + # MoveRight + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(RightOf, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingRight, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_right_nsrt = NSRT("MoveRight", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_right_nsrt) + + # MoveLeft + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(LeftOf, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingLeft, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_left_nsrt = NSRT("MoveLeft", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_left_nsrt) + + # TurnRight + turn_right_from_up_nsrt = NSRT("TurnRightFromUp", [obj1], + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_up_nsrt) + + turn_right_from_down_nsrt = NSRT("TurnRightFromDown", [obj1], + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_down_nsrt) + + turn_right_from_left_nsrt = NSRT("TurnRightFromLeft", [obj1], + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_left_nsrt) + + turn_right_from_right_nsrt = NSRT("TurnRightFromRight", [obj1], + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_right_nsrt) + + # TurnLeft + turn_left_from_up_nsrt = NSRT("TurnLeftFromUp", [obj1], + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_up_nsrt) + + turn_left_from_down_nsrt = NSRT("TurnLeftFromDown", [obj1], + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_down_nsrt) + + turn_left_from_left_nsrt = NSRT("TurnLeftFromLeft", [obj1], + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_left_nsrt) + + turn_left_from_right_nsrt = NSRT("TurnLeftFromRight", [obj1], + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_right_nsrt) + + # Pickup Left + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(LeftOf, [obj2, obj3]), + LiftedAtom(IsFacingLeft, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(LeftOf, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_left_nsrt = NSRT("Pickup_Left", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_left_nsrt) + + # Pickup Right + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(RightOf, [obj2, obj3]), + LiftedAtom(IsFacingRight, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(RightOf, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_right_nsrt = NSRT("Pickup_Right", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_right_nsrt) + + # Pickup Up + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(Above, [obj2, obj3]), + LiftedAtom(IsFacingUp, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(Above, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_up_nsrt = NSRT("Pickup_Up", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_up_nsrt) + + # Pickup Down + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(Below, [obj2, obj3]), + LiftedAtom(IsFacingDown, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(Below, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_down_nsrt = NSRT("Pickup_Down", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_down_nsrt) + + # Drop + # TODO + + # Toggle + # TODO + + # For Partial Observability + # Find Object + find_obj_nsrt = NSRT("FindObj", [obj1], + {LiftedAtom(Unknown, [obj1])}, + {LiftedAtom(Found, [obj1])}, + set(), + {LeftOf, RightOf, Above, Below}, + FindObj, [obj1], null_sampler) + nsrts.add(find_obj_nsrt) + + # Replan With Obj Known + replan_to_obj_nsrt = NSRT("ReplanToObj", [obj1, obj2], + {LiftedAtom(IsAgent, [obj1]), LiftedAtom(IsLoc, [obj2]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])}, + {LiftedAtom(At, [obj1, obj2])}, + set(), + {LeftOf, RightOf, Above, Below}, + ReplanToObj, [], null_sampler) + nsrts.add(replan_to_obj_nsrt) + + replan_to_pickable_obj_nsrt = NSRT("ReplanToPickableObj", [obj1, obj2], + {LiftedAtom(IsAgent, [obj1]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])}, + {LiftedAtom(Holding, [obj2])}, + set(), + {LeftOf, RightOf, Above, Below}, + ReplanToObj, [], null_sampler) + nsrts.add(replan_to_pickable_obj_nsrt) + + return nsrts diff --git a/predicators/ground_truth_models/mini_behavior_env/options.py b/predicators/ground_truth_models/mini_behavior_env/options.py new file mode 100644 index 0000000000..e664a4b6ff --- /dev/null +++ b/predicators/ground_truth_models/mini_behavior_env/options.py @@ -0,0 +1,92 @@ +"""Ground-truth options for the sokoban environment.""" + +from typing import Dict, Sequence, Set + +import numpy as np +from gym.spaces import Box + +from enum import IntEnum +from predicators import utils +from predicators.ground_truth_models import GroundTruthOptionFactory +from predicators.structs import Action, Array, Object, ParameterizedOption, \ + ParameterizedPolicy, Predicate, State, Type + +class Actions(IntEnum): + left = 0 + right = 1 + forward = 2 + toggle = 3 + open = 4 + close = 5 + slice = 6 + cook = 7 + drop_in = 8 + pickup_0 = 9 + pickup_1 = 10 + pickup_2 = 11 + drop_0 = 12 + drop_1 = 13 + drop_2 = 14 + +class MiniBehaviorGroundTruthOptionFactory(GroundTruthOptionFactory): + """Ground-truth options for the MiniBehavior environment.""" + + @classmethod + def get_env_names(cls) -> Set[str]: + return {"mini_behavior_env"} + + @classmethod + def get_options(cls, env_name: str, types: Dict[str, Type], + predicates: Dict[str, Predicate], + action_space: Box) -> Set[ParameterizedOption]: + + # Reformat names for consistency with other option naming. + def _format_name(name: str) -> str: + return "".join([n.capitalize() for n in name.split(" ")]) + + options: Set[ParameterizedOption] = { + utils.SingletonParameterizedOption( + _format_name(name), cls._create_policy(discrete_action=i)) + for i, name in {value: key for key, value in Actions.__members__.items()}.items() + } + + # FindObj option. + object_type = types["obj"] + FindObjOption = ParameterizedOption( + "FindObj", + [object_type], + Box(low=np.array([]), high=np.array([]), shape=(0, )), + policy=cls._create_find_obj_policy(), + initiable=lambda s, m, o, p: True, + terminal=lambda s, m, o, p: s.get(o[0], "type") == 8 and s.get(o[0], "state") != -1) # 8 is the goal enum type + options.add(FindObjOption) + + # ReplanToObj option. + ReplanToObj = utils.SingletonParameterizedOption("ReplanToObj", cls._create_policy(discrete_action=6)) + options.add(ReplanToObj) + + return options + + @classmethod + def _create_policy(cls, discrete_action: int) -> ParameterizedPolicy: + + def policy(state: State, memory: Dict, objects: Sequence[Object], + params: Array) -> Action: + del state, memory, objects, params # unused. + arr = np.zeros(7, dtype=np.float32) + arr[discrete_action] = 1 + return Action(arr) + + return policy + + @classmethod + def _create_find_obj_policy(cls) -> ParameterizedPolicy: + + def policy(state: State, memory: Dict, objects: Sequence[Object], + params: Array) -> Action: + del state, memory, objects, params # unused. + arr = np.zeros(7, dtype=np.float32) + arr[np.random.choice([0, 1, 2], 1, p=[0.2, 0.2, 0.6])[0]] = 1 + return Action(arr) + + return policy diff --git a/predicators/ground_truth_models/minigrid_env/__init__.py b/predicators/ground_truth_models/minigrid_env/__init__.py new file mode 100644 index 0000000000..63ddf1fab2 --- /dev/null +++ b/predicators/ground_truth_models/minigrid_env/__init__.py @@ -0,0 +1,6 @@ +"""Ground truth models for MiniGrid gym environment.""" + +from .nsrts import MiniGridGroundTruthNSRTFactory +from .options import MiniGridGroundTruthOptionFactory + +__all__ = ["MiniGridGroundTruthOptionFactory", "MiniGridGroundTruthNSRTFactory"] diff --git a/predicators/ground_truth_models/minigrid_env/nsrts.py b/predicators/ground_truth_models/minigrid_env/nsrts.py new file mode 100644 index 0000000000..31304111de --- /dev/null +++ b/predicators/ground_truth_models/minigrid_env/nsrts.py @@ -0,0 +1,320 @@ +"""Ground-truth NSRTs for the cover environment.""" + +from typing import Dict, List, Set + +from predicators.ground_truth_models import GroundTruthNSRTFactory +from predicators.structs import NSRT, LiftedAtom, ParameterizedOption, \ + Predicate, Type, Variable +from predicators.utils import null_sampler + + +class MiniGridGroundTruthNSRTFactory(GroundTruthNSRTFactory): + """Ground-truth NSRTs for the MiniGrid environment.""" + + @classmethod + def get_env_names(cls) -> Set[str]: + return {"minigrid_env"} + + @staticmethod + def get_nsrts(env_name: str, types: Dict[str, Type], + predicates: Dict[str, Predicate], + options: Dict[str, ParameterizedOption]) -> Set[NSRT]: + # Types + object_type = types["obj"] + + # Objects + obj1 = Variable("?obj1", object_type) + obj2 = Variable("?obj2", object_type) + obj3 = Variable("?obj3", object_type) + + # Predicates + At = predicates["At"] + IsLoc = predicates["IsLoc"] + Above = predicates["Above"] + Below = predicates["Below"] + RightOf = predicates["RightOf"] + LeftOf = predicates["LeftOf"] + IsAgent = predicates["IsAgent"] + IsGoal = predicates["IsGoal"] + IsFacingUp = predicates["IsFacingUp"] + IsFacingDown = predicates["IsFacingDown"] + IsFacingLeft = predicates["IsFacingLeft"] + IsFacingRight = predicates["IsFacingRight"] + Unknown = predicates["Unknown"] + Found = predicates["Found"] + Holding = predicates["Holding"] + Near = predicates["Near"] + + # Options + MoveForward = options["Forward"] + TurnLeft = options["Left"] + TurnRight = options["Right"] + Pickup = options["Pickup"] + Drop = options["Drop"] + Toggle = options["Toggle"] + Done = options["Done"] + FindObj = options["FindObj"] + ReplanToObj = options["ReplanToObj"] + + nsrts = set() + + # MoveUp + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(Above, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingUp, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars: List[Variable] = [] # dummy - not used + move_up_nsrt = NSRT("MoveUp", parameters, preconditions, add_effects, + delete_effects, set(), option, option_vars, + null_sampler) + nsrts.add(move_up_nsrt) + + # MoveDown + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(Below, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingDown, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_down_nsrt = NSRT("MoveDown", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_down_nsrt) + + # MoveRight + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(RightOf, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingRight, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_right_nsrt = NSRT("MoveRight", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_right_nsrt) + + # MoveLeft + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(LeftOf, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingLeft, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_left_nsrt = NSRT("MoveLeft", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_left_nsrt) + + # TurnRight + turn_right_from_up_nsrt = NSRT("TurnRightFromUp", [obj1], + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_up_nsrt) + + turn_right_from_down_nsrt = NSRT("TurnRightFromDown", [obj1], + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_down_nsrt) + + turn_right_from_left_nsrt = NSRT("TurnRightFromLeft", [obj1], + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_left_nsrt) + + turn_right_from_right_nsrt = NSRT("TurnRightFromRight", [obj1], + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_right_nsrt) + + # TurnLeft + turn_left_from_up_nsrt = NSRT("TurnLeftFromUp", [obj1], + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_up_nsrt) + + turn_left_from_down_nsrt = NSRT("TurnLeftFromDown", [obj1], + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_down_nsrt) + + turn_left_from_left_nsrt = NSRT("TurnLeftFromLeft", [obj1], + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_left_nsrt) + + turn_left_from_right_nsrt = NSRT("TurnLeftFromRight", [obj1], + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_right_nsrt) + + # Pickup Left + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(LeftOf, [obj2, obj3]), + LiftedAtom(IsFacingLeft, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(LeftOf, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_left_nsrt = NSRT("Pickup_Left", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_left_nsrt) + + # Pickup Right + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(RightOf, [obj2, obj3]), + LiftedAtom(IsFacingRight, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(RightOf, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_right_nsrt = NSRT("Pickup_Right", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_right_nsrt) + + # Pickup Up + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(Above, [obj2, obj3]), + LiftedAtom(IsFacingUp, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(Above, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_up_nsrt = NSRT("Pickup_Up", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_up_nsrt) + + # Pickup Down + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(Below, [obj2, obj3]), + LiftedAtom(IsFacingDown, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(Below, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_down_nsrt = NSRT("Pickup_Down", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_down_nsrt) + + # Drop + # TODO + + # Toggle + # TODO + + # For Partial Observability + # Find Object + find_obj_nsrt = NSRT("FindObj", [obj1], + {LiftedAtom(Unknown, [obj1])}, + {LiftedAtom(Found, [obj1])}, + set(), + {LeftOf, RightOf, Above, Below}, + FindObj, [obj1], null_sampler) + nsrts.add(find_obj_nsrt) + + # Replan With Obj Known + replan_to_obj_nsrt = NSRT("ReplanToObj", [obj1, obj2], + {LiftedAtom(IsAgent, [obj1]), LiftedAtom(IsLoc, [obj2]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])}, + {LiftedAtom(At, [obj1, obj2])}, + set(), + {LeftOf, RightOf, Above, Below}, + ReplanToObj, [], null_sampler) + nsrts.add(replan_to_obj_nsrt) + + replan_to_pickable_obj_nsrt = NSRT("ReplanToPickableObj", [obj1, obj2], + {LiftedAtom(IsAgent, [obj1]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])}, + {LiftedAtom(Holding, [obj2])}, + set(), + {LeftOf, RightOf, Above, Below}, + ReplanToObj, [], null_sampler) + nsrts.add(replan_to_pickable_obj_nsrt) + + return nsrts diff --git a/predicators/ground_truth_models/minigrid_env/options.py b/predicators/ground_truth_models/minigrid_env/options.py new file mode 100644 index 0000000000..64b0a43291 --- /dev/null +++ b/predicators/ground_truth_models/minigrid_env/options.py @@ -0,0 +1,76 @@ +"""Ground-truth options for the sokoban environment.""" + +from typing import Dict, Sequence, Set + +import numpy as np +from gym.spaces import Box +from minigrid.core.actions import Actions + +from predicators import utils +from predicators.ground_truth_models import GroundTruthOptionFactory +from predicators.structs import Action, Array, Object, ParameterizedOption, \ + ParameterizedPolicy, Predicate, State, Type + + +class MiniGridGroundTruthOptionFactory(GroundTruthOptionFactory): + """Ground-truth options for the minigrid environment.""" + + @classmethod + def get_env_names(cls) -> Set[str]: + return {"minigrid_env"} + + @classmethod + def get_options(cls, env_name: str, types: Dict[str, Type], + predicates: Dict[str, Predicate], + action_space: Box) -> Set[ParameterizedOption]: + + # Reformat names for consistency with other option naming. + def _format_name(name: str) -> str: + return "".join([n.capitalize() for n in name.split(" ")]) + + options: Set[ParameterizedOption] = { + utils.SingletonParameterizedOption( + _format_name(name), cls._create_policy(discrete_action=i)) + for i, name in {value: key for key, value in Actions.__members__.items()}.items() + } + + # FindObj option. + object_type = types["obj"] + FindObjOption = ParameterizedOption( + "FindObj", + [object_type], + Box(low=np.array([]), high=np.array([]), shape=(0, )), + policy=cls._create_find_obj_policy(), + initiable=lambda s, m, o, p: True, + terminal=lambda s, m, o, p: s.get(o[0], "type") == 8 and s.get(o[0], "state") != -1) # 8 is the goal enum type + options.add(FindObjOption) + + # ReplanToObj option. + ReplanToObj = utils.SingletonParameterizedOption("ReplanToObj", cls._create_policy(discrete_action=6)) + options.add(ReplanToObj) + + return options + + @classmethod + def _create_policy(cls, discrete_action: int) -> ParameterizedPolicy: + + def policy(state: State, memory: Dict, objects: Sequence[Object], + params: Array) -> Action: + del state, memory, objects, params # unused. + arr = np.zeros(7, dtype=np.float32) + arr[discrete_action] = 1 + return Action(arr) + + return policy + + @classmethod + def _create_find_obj_policy(cls) -> ParameterizedPolicy: + + def policy(state: State, memory: Dict, objects: Sequence[Object], + params: Array) -> Action: + del state, memory, objects, params # unused. + arr = np.zeros(7, dtype=np.float32) + arr[np.random.choice([0, 1, 2], 1, p=[0.2, 0.2, 0.6])[0]] = 1 + return Action(arr) + + return policy diff --git a/predicators/nsrt_learning/nsrt_learning_main.py b/predicators/nsrt_learning/nsrt_learning_main.py index d13ef054b6..72abeb4ac2 100644 --- a/predicators/nsrt_learning/nsrt_learning_main.py +++ b/predicators/nsrt_learning/nsrt_learning_main.py @@ -119,7 +119,8 @@ def learn_nsrts_from_data( if CFG.strips_learner != "oracle" or CFG.sampler_learner != "oracle" or \ CFG.option_learner != "no_learning": # Updates the PNADs in-place. - _learn_pnad_options(pnads, known_options, action_space) + if CFG.option_learner != "no_learning": + _learn_pnad_options(pnads, known_options, action_space) # STEP 4: Learn samplers (sampler_learning.py) and update PNADs. _learn_pnad_samplers(pnads, sampler_learner) # in-place update diff --git a/predicators/nsrt_learning/strips_learning/base_strips_learner.py b/predicators/nsrt_learning/strips_learning/base_strips_learner.py index 5d3aa998ac..876fde7503 100644 --- a/predicators/nsrt_learning/strips_learning/base_strips_learner.py +++ b/predicators/nsrt_learning/strips_learning/base_strips_learner.py @@ -162,7 +162,11 @@ def _check_single_demo_preservation( traj_goal, option_plan, atoms_seq) return ground_nsrt_plan is not None - def _recompute_datastores_from_segments(self, pnads: List[PNAD]) -> None: + def _recompute_datastores_from_segments(self, + pnads: List[PNAD], + check_only_preconditions: bool = False, + check_assertion: bool = True, + any_matching: bool = False) -> None: """For the given PNADs, wipe and recompute the datastores. Uses a "rationality" heuristic, where for each segment, we @@ -181,19 +185,24 @@ def _recompute_datastores_from_segments(self, pnads: List[PNAD]) -> None: continue objects = set(seg_traj[0].states[0]) for segment in seg_traj: + print(f"Finding best matching PNAD for segment") best_pnad, best_sub = self._find_best_matching_pnad_and_sub( - segment, objects, pnads) + segment, objects, pnads, check_only_preconditions, check_assertion, any_matching) + print(f"Best matching Substitution: {best_sub}") if best_pnad is not None: assert best_sub is not None best_pnad.add_to_datastore((segment, best_sub), check_effect_equality=False) + print("Finished recomputing datastores...") def _find_best_matching_pnad_and_sub( self, segment: Segment, objects: Set[Object], pnads: List[PNAD], - check_only_preconditions: bool = False + check_only_preconditions: bool = False, + check_assertion: bool = True, + any_matching: bool = False, ) -> Tuple[Optional[PNAD], Optional[Dict[Variable, Object]]]: """Find the best matching PNAD (if any) given our rationality-based score function, and return the PNAD and substitution necessary to @@ -226,7 +235,12 @@ def _find_best_matching_pnad_and_sub( for pnad in pnads: param_opt, opt_vars = pnad.option_spec if param_opt != segment_param_option: + if not any_matching: + continue + ##### + if len(pnad.op.parameters) > CFG.max_operator_arity: continue + ##### isub = dict(zip(opt_vars, segment_option_objs)) if segment in pnad.seg_to_keep_effects_sub: # If there are any variables only in the keep effects, @@ -244,7 +258,8 @@ def _find_best_matching_pnad_and_sub( # If the preconditions don't hold in the segment's # initial atoms, skip. if not ground_op.preconditions.issubset(segment.init_atoms): - continue + if not any_matching: + continue next_atoms = utils.apply_operator(ground_op, segment.init_atoms) if not check_only_preconditions: @@ -265,7 +280,8 @@ def _find_best_matching_pnad_and_sub( # with a most-general PNAD that has no add effects # and all other predicates sidelined, and thus this # assertion must hold. - assert next_atoms.issubset(segment.final_atoms) + if check_assertion: + assert next_atoms.issubset(segment.final_atoms) # This ground PNAD covers this segment. Score it! score = self._score_segment_ground_op_match(segment, ground_op) if score < best_score: # we want a closer match diff --git a/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py b/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py index 59906cd49a..b080b65dec 100644 --- a/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py +++ b/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py @@ -3,15 +3,32 @@ import functools import itertools -from typing import Dict, List, Set +from typing import Dict, List, Set, Optional, Sequence from predicators import utils from predicators.nsrt_learning.strips_learning import BaseSTRIPSLearner from predicators.settings import CFG from predicators.structs import PNAD, GroundAtom, Object, \ ParameterizedOption, Segment, STRIPSOperator, Variable, \ - _GroundSTRIPSOperator + _GroundSTRIPSOperator, _Atom, LowLevelTrajectory, Predicate, Type, Action, LiftedAtom, NSRT +from predicators.planning import task_plan, task_plan_grounding, _SkeletonSearchTimeout +import re +name_to_actions = { + "Move": 0, + "Actions.pickup_0": 3, + "Actions.pickup_1": 4, + "Actions.pickup_2": 5, + "Actions.drop_0": 6, + "Actions.drop_1": 7, + "Actions.drop_2": 8, + "Actions.drop_in": 9, + "Actions.toggle": 10, + "Actions.close": 11, + "Actions.open": 12, + "Actions.cook": 13, + "Actions.slice": 14 +} class GeneralToSpecificSTRIPSLearner(BaseSTRIPSLearner): """Base class for a general-to-specific STRIPS learner.""" @@ -73,11 +90,68 @@ def spawn_new_pnad(self, segment: Segment) -> PNAD: # parameters. all_objs = {o for eff in necessary_add_effects for o in eff.objects} missing_objs = sorted(all_objs - set(obj_to_var)) + + # ####### + # # Check if adding missing objects would exceed max_operator_arity + # total_params = len(obj_to_var) + len(missing_objs) + # if total_params > CFG.max_operator_arity: + # # Strategy: Filter necessary_add_effects to stay within limit + # # Priority 1: Keep effects involving option objects + # option_objs = set(segment.get_option().objects) + # filtered_effects = {eff for eff in necessary_add_effects + # if set(eff.objects).issubset(option_objs | set(obj_to_var.keys()))} + + # # If still too many, prioritize effects with fewer objects + # if filtered_effects: + # all_objs = {o for eff in filtered_effects for o in eff.objects} + # missing_objs_filtered = sorted(all_objs - set(obj_to_var)) + + # if len(obj_to_var) + len(missing_objs_filtered) > CFG.max_operator_arity: + # # Take effects with fewest new objects first + # effects_by_new_objs = sorted(filtered_effects, + # key=lambda eff: len(set(eff.objects) - set(obj_to_var.keys()))) + + # # Greedily add effects until we hit the parameter limit + # kept_effects = set() + # current_objs = set(obj_to_var.keys()) + # for eff in effects_by_new_objs: + # new_objs = set(eff.objects) - current_objs + # if len(current_objs) + len(new_objs) <= CFG.max_operator_arity: + # kept_effects.add(eff) + # current_objs.update(new_objs) + + # necessary_add_effects = kept_effects + # else: + # necessary_add_effects = filtered_effects + # else: + # # If no effects involve option objects, take first N effects + # effects_sorted = sorted(necessary_add_effects, + # key=lambda eff: len(eff.objects)) + # kept_effects = set() + # current_objs = set(obj_to_var.keys()) + # for eff in effects_sorted: + # new_objs = set(eff.objects) - current_objs + # if len(current_objs) + len(new_objs) <= CFG.max_operator_arity: + # kept_effects.add(eff) + # current_objs.update(new_objs) + # necessary_add_effects = kept_effects + + # # Recalculate after filtering + # all_objs = {o for eff in necessary_add_effects for o in eff.objects} + # missing_objs = sorted(all_objs - set(obj_to_var)) + # ####### + new_vars = utils.create_new_variables([o.type for o in missing_objs], existing_vars=pnad.op.parameters) obj_to_var.update(dict(zip(missing_objs, new_vars))) # Finally, we can lift necessary_add_effects. updated_params = sorted(obj_to_var.values()) + + # # TODO Assert that we stay within the arity limit + # assert len(updated_params) <= CFG.max_operator_arity, \ + # f"Operator would have {len(updated_params)} parameters " \ + # f"(max {CFG.max_operator_arity} allowed). Filtering failed." + updated_add_effects = { a.lift(obj_to_var) for a in necessary_add_effects @@ -138,7 +212,6 @@ def get_pnads_with_keep_effects(pnad: PNAD) -> Set[PNAD]: # Remember to copy seg_to_keep_effects_sub into the new_pnad! new_pnad.seg_to_keep_effects_sub = pnad.seg_to_keep_effects_sub new_pnads_with_keep_effects.add(new_pnad) - return new_pnads_with_keep_effects def _reset_all_segment_necessary_add_effs(self) -> None: @@ -499,3 +572,913 @@ def _assert_all_data_in_exactly_one_datastore(self, continue for segment in seg_traj: assert segment in all_segs_in_data + +class BackwardForwardSTRIPSLearner(GeneralToSpecificSTRIPSLearner): + """Learn STRIPS operators by backchaining and forward search.""" + + def _learn(self) -> List[PNAD]: + # Initialize the most general PNADs by merging self._initial_pnads. + # As a result, we will have one very general PNAD per option. + param_opt_to_nec_pnads: Dict[ParameterizedOption, List[PNAD]] = {} + # Extract all parameterized options from the data. + parameterized_options = set() + for ll_traj, seg_traj in zip(self._trajectories, + self._segmented_trajs): + if not ll_traj.is_demo: + continue + for segment in seg_traj: + parameterized_options.add(segment.get_option().parent) + + # Set up the param_opt_to_nec_pnads dictionary. + for param_opt in parameterized_options: + param_opt_to_nec_pnads[param_opt] = [] + + prev_itr_ops: Set[STRIPSOperator] = set() + + # Load initial pnad set + if CFG.backward_forward_load_initial: + with open("test_saved.NSRTs.txt", "r") as file: + content = file.read() + nsrt_strs = ["NSRT-" + nsrt_str for nsrt_str in content.split("NSRT-") if nsrt_str != ''] + pnads = [self.parse_nsrt_block(nsrt_str) for nsrt_str in nsrt_strs] + self._recompute_datastores_from_segments(pnads) + for pnad in pnads: + param_opt_to_nec_pnads[pnad.option_spec[0]].append(pnad) + # TODO + # if pnad.option_spec[0] in param_opt_to_nec_pnads: + # param_opt_to_nec_pnads[pnad.option_spec[0]].append(pnad) + # else: + # print(f"Warning: PNAD with option {pnad.option_spec[0]} not in param_opt_to_nec_pnads") + ### + + # We loop until the harmless PNADs induced by our procedure + # converge to a fixed point (i.e, they don't change after two + # subsequent iterations). + while True: + # Run multiple passes of backchaining over the data until + # convergence to a fixed point. Note that this process creates + # operators with only parameters, preconditions, and add effects. + print("Backward-Forward STRIPS Learning Iteration") + + # Step 1: Run backchaining + self._backchain_multipass(param_opt_to_nec_pnads) + print("Backchaining multipass completed") + + # Induce delete effects, ignore effects and potentially + # keep effects. + self._induce_delete_side_keep(param_opt_to_nec_pnads) + print("Inducing delete, ignore, and keep effects") + + # Harmlessness should now hold, but it's slow to check. + if CFG.backchaining_check_intermediate_harmlessness: + assert self._check_harmlessness( + self._get_uniquely_named_nec_pnads(param_opt_to_nec_pnads)) + print("Intermediate harmlessness check passed") + print("Recomputing datastores and filtering out PNADs that don't have datastores") + + # Recompute datastores and filter out PNADs that don't have datastores. + cur_itr_pnads_unfiltered = [ + pnad for pnads in param_opt_to_nec_pnads.values() + for pnad in pnads + ] + self._recompute_datastores_from_segments(cur_itr_pnads_unfiltered) + print("Finished recomputing datastores", len(cur_itr_pnads_unfiltered)) + cur_itr_pnads_filtered = [] + for pnad in cur_itr_pnads_unfiltered: + if len(pnad.datastore) > 0: + # new_pre = self._induce_preconditions_via_intersection(pnad) + # NOTE: this implicitly changes param_opt_to_nec_pnads + # as well, since we're directly modifying the PNAD objects. + # nad.op = pnad.op.copy_with(preconditions=new_pre) + cur_itr_pnads_filtered.append(pnad) + else: + param_opt_to_nec_pnads[pnad.option_spec[0]].remove(pnad) + del cur_itr_pnads_unfiltered # should be unused after this + print("Current iteration PNADs filtered:", len(cur_itr_pnads_filtered)) + + # Check if the PNAD set has converged. If so, break. + if {pnad.op for pnad in cur_itr_pnads_filtered} == prev_itr_ops: + print("No changes in this pass, backchaining has reached a fixed point") + break + + prev_itr_ops = {pnad.op for pnad in cur_itr_pnads_filtered} + + ###### + # Step 2 & 3: Fixed forward refinement (strips and re-adds preconditions/ignore_effects) + self._fixed_forward_one_pass(param_opt_to_nec_pnads) + + # # Recompute datastores. + # cur_itr_pnads_unfiltered = [ + # pnad for pnads in param_opt_to_nec_pnads.values() + # for pnad in pnads + # ] + # self._recompute_datastores_from_segments(cur_itr_pnads_unfiltered, check_only_preconditions=True, check_assertion=False) + ###### + + # Assign a unique name to each PNAD. + final_pnads = self._get_uniquely_named_nec_pnads( + param_opt_to_nec_pnads) + # Assert data has been correctly partitioned amongst PNADs. + # self._assert_all_data_in_exactly_one_datastore(final_pnads) + return final_pnads + + def parse_nsrt_block(self, block: str) -> PNAD: + """Parses a single NSRT block into an PNAD object.""" + lines = block.strip().split("\n") + + name_match = re.match(r"(\S+):", lines[0]) + name = name_match.group(1) if name_match else "" + + parameters = re.findall(r"\?x\d+:\w+", lines[1]) + + def extract_effects(label: str) -> Set[str]: + """Extracts a list of predicates from labeled sections.""" + for line in lines: + if line.strip().startswith(label): + return set(re.findall(r"\w+\(.*?\)", line)) + return set() + + preconditions = extract_effects("Preconditions") + add_effects = extract_effects("Add Effects") + delete_effects = extract_effects("Delete Effects") + ignore_effects = extract_effects("Ignore Effects") + + option_spec_match = re.search(r"Option Spec:\s*(.*)", block) + option_spec = option_spec_match.group(1) if option_spec_match else "" + + objects = set() + atoms = set() + option_specs = {} + for traj in self._segmented_trajs: + for segment in traj: + for state in segment.states: + for k, v in state.items(): + objects.add(k) + atoms |= segment.init_atoms | segment.final_atoms + option_specs[segment.get_option().parent.name] = segment.get_option().parent + all_predicates_list = [(atom.predicate.name,atom.predicate) for atom in atoms] + def get_predicate(name, entities): + for pred_name, pred in all_predicates_list: + if pred_name == pred_name and pred.arity == len(entities): + valid_types = True + for i, ent in enumerate(entities): + if ent.type != pred.types[i]: + valid_types = False + if valid_types: + return pred + raise NotImplementedError + + types = {obj.type.name:obj.type for obj in objects} + + def extract_parameters(predicate: str) -> Set[str]: + parameter_pattern = re.compile(r"\?x\d+:\w+") # Matches variables like ?x0:obj_type + matches = parameter_pattern.findall(predicate) + return matches + + parameters = [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in parameters] + preconditions = set([LiftedAtom(get_predicate(pre.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]) for pre in preconditions]) + add_effects = set([LiftedAtom(get_predicate(add.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]) for add in add_effects]) + delete_effects = set([LiftedAtom(get_predicate(dle.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]) for dle in delete_effects]) + ignore_effects = set([get_predicate(ige, None) for ige in ignore_effects]) + if option_spec.split("(")[0] in option_specs: + option_spec = (option_specs[option_spec.split("(")[0]], []) + else: + a_name = option_spec.split("(")[0] + option_spec = utils.SingletonParameterizedOption( + a_name, lambda s, m, o, p: Action(name_to_actions[a_name])) + print("ADDED OPTION", a_name) + + nsrt = NSRT(name, parameters, preconditions, add_effects, delete_effects, ignore_effects, option_spec, [], None) + return PNAD(nsrt.op, [], option_spec) + + def _forward_one_pass( + self, param_opt_to_nec_pnads: Dict[ParameterizedOption, List[PNAD]] + ) -> None: + """Perform one forward search passes to refine PNAD preconditions + """ + + for ll_traj, seg_traj in zip(self._trajectories, self._segmented_trajs): + if not ll_traj.is_demo: + continue + task = self._train_tasks[ll_traj.train_task_idx] + + # Get initial atoms and object list + objects, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options( + ll_traj, train_task_idx=ll_traj.train_task_idx) + + while True: + # TODO continue until plans match demo from start + init_atoms = ground_atoms_traj[1][0] + + # plan with current nsrts + nsrts = [pnad.op for pnads in param_opt_to_nec_pnads.values() + for pnad in pnads] + nsrt_to_option = {pnad.op:pnad.option_spec for pnads in param_opt_to_nec_pnads.values() for pnad in pnads} + predicates = self._predicates + + # Plan using current operators + ground_nsrts, reachable_atoms = task_plan_grounding( + init_atoms, objects, nsrts, allow_noops=True) + heuristic = utils.create_task_planning_heuristic( + "hadd", init_atoms, task.goal, ground_nsrts, + predicates, objects) + task_plan_generator = task_plan( + init_atoms, task.goal, ground_nsrts, + reachable_atoms, heuristic, + timeout=100, seed=123, max_skeletons_optimized=3) + + skeleton, _, _ = next(task_plan_generator) + + # Check if plan matches the actual low-level trajectory + planned_options = [] + for step in skeleton: + planned_options.append(nsrt_to_option[step.parent][0]) + + last_mistakes = set() + for i, planned_option in enumerate(planned_options): + curr_traj = seg_traj[i] + print("GT vs Our Plan") + print(i, curr_traj.get_option().name, "=?=", planned_option.name) + if curr_traj.get_option().name != planned_option.name: + # skip repeated mistakes + print(i, curr_traj.get_option().name, "is not", planned_option.name) + if (i, curr_traj.get_option().name, planned_option.name) in last_mistakes: + print("Skipping repeated mistake") + continue + last_mistakes.add((i, curr_traj.get_option().name, planned_option.name)) + # TODO should not just be the first + pnad = None + for option_pnad in param_opt_to_nec_pnads[planned_option]: + if pnad is None: + pnad = option_pnad + if len(option_pnad.op.preconditions) < len(pnad.op.preconditions): + pnad = option_pnad + positive_data = pnad.datastore + diff_atoms = [] + diff_preds = [] + non_nec_diff_atoms = [] + non_nec_diff_preds = [] + necessary_effects = set.union(*[seg.necessary_add_effects for seg in seg_traj]) + + #### + # Lift atoms from each positive example using their substitutions + lifted_atoms_list = [] + for pos_seg in positive_data: + segment, var_to_obj = pos_seg + obj_to_var = {v: k for k, v in var_to_obj.items()} + + # Lift the init_atoms by substituting objects with variables + lifted_atoms = set() + for atom in segment.init_atoms: + #print(atom) + lifted_objs = [obj_to_var.get(obj, obj) for obj in atom.objects] + # Only include if all objects were successfully mapped to variables + if all(isinstance(o, Variable) for o in lifted_objs): + lifted_atoms.add(LiftedAtom(atom.predicate, lifted_objs)) + lifted_atoms_list.append(lifted_atoms) + + # Find intersection of lifted atoms across all positive examples + if lifted_atoms_list: + common_lifted_atoms = set.intersection(*lifted_atoms_list) if lifted_atoms_list else set() + + # Separate into necessary and non-necessary based on predicates + necessary_lifted = {atom for atom in common_lifted_atoms + if any(atom.predicate == nec_atom.predicate for nec_atom in necessary_effects)} + non_necessary_lifted = common_lifted_atoms - necessary_lifted + + diff_atoms.append(necessary_lifted) + diff_preds.append({atom.predicate for atom in necessary_lifted}) + non_nec_diff_atoms.append(non_necessary_lifted) + non_nec_diff_preds.append({atom.predicate for atom in non_necessary_lifted}) + + #### + + new_pre = set() + new_params = [] + print() + print(planned_option, set.intersection(*[s for s in diff_preds])) + new_preds = set.intersection(*[s for s in diff_preds]) + if len(new_preds) <= 0: + new_preds = set.intersection(*[s for s in non_nec_diff_preds]) + if new_preds != set(): + for pred in new_preds: + best_pnad, best_sub = self._find_best_matching_pnad_and_sub(positive_data[0][0], objects, param_opt_to_nec_pnads[planned_option], check_only_preconditions=True, check_assertion=False, any_matching=True) + pred_objs = [atom.objects for atom in positive_data[0][0].init_atoms if atom.predicate == pred][0] + print(pred_objs) + obj_vars = {v:k for k,v in best_sub.items()} + if best_pnad is not None: + params = [] + for obj in pred_objs: + if obj in obj_vars: + params.append(obj_vars[obj]) + else: + params.append(Variable("?x" + str(len(obj_vars.keys())), obj.type)) + new_pre.add(LiftedAtom(pred, params)) + new_params += params + print(params) + print(new_params) + print(pnad) + if len(new_pre) > len(pnad.op.preconditions): + # randomly/incrementally add one of the different predicates to new pnad + import random + single_new_pre = random.choice(list(new_pre - pnad.op.preconditions)) + updated_params = list(set(pnad.op.parameters + single_new_pre.variables)) + updated_preconditions = set(list(pnad.op.preconditions) + [single_new_pre]) + pnad.op = pnad.op.copy_with(parameters=updated_params,preconditions=updated_preconditions) + else: + # TODO No new predicates to differentiate + pass + print("Updated PNAD:", pnad) + else: + break + + + # # Check for convergence + # cur_op_set = {pnad.op for pnads in param_opt_to_nec_pnads.values() + # for pnad in pnads} + # if cur_op_set == prev_op_set: + # break + # prev_op_set = cur_op_set + + def _fixed_forward_one_pass( + self, param_opt_to_nec_pnads: Dict[ParameterizedOption, List[PNAD]] + ) -> None: + """Simplified forward pass: strips preconditions and adds them back + until replanned trajectories match demos. + """ + import random + + # Step 1: Save original preconditions and strip all preconditions + original_pnads = {} + for option, pnads in param_opt_to_nec_pnads.items(): + original_pnads[option] = [] + for pnad in pnads: + original_pnads[option].append({ + 'preconditions': set(pnad.op.preconditions), + 'ignore_effects': set(pnad.op.ignore_effects) + }) + pnad.op = pnad.op.copy_with(preconditions=set()) + + # Step 2: Ensure all operators have at least one precondition + print("\n=== Ensuring all operators have at least one precondition ===") + for option, pnads in param_opt_to_nec_pnads.items(): + for idx, pnad in enumerate(pnads): + if len(pnad.op.preconditions) == 0: + original = original_pnads[option][idx] + if len(original['preconditions']) > 0: + new_pre = random.choice(list(original['preconditions'])) + updated_params = list(set(pnad.op.parameters + list(new_pre.variables))) + updated_preconditions = {new_pre} + pnad.op = pnad.op.copy_with( + parameters=updated_params, + preconditions=updated_preconditions + ) + print(f" Added minimal precondition {new_pre} to {option.name}") + + # Step 3: Iteratively add preconditions back until all plans match demos + max_iterations = 100 + print(f"\n=== Forward Refinement: Adding preconditions until plans match demos ===") + + for iteration in range(max_iterations): + print(f"\nIteration {iteration + 1}") + all_match = True + + # Check each demo trajectory + for traj_idx, (ll_traj, seg_traj) in enumerate(zip(self._trajectories, self._segmented_trajs)): + if not ll_traj.is_demo: + continue + + task = self._train_tasks[ll_traj.train_task_idx] + objects, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options( + ll_traj, train_task_idx=ll_traj.train_task_idx) + + init_atoms = ground_atoms_traj[1][0] + + # Recompute datastores and filter out PNADs that don't have datastores. + cur_itr_pnads_unfiltered = [ + pnad for pnads in param_opt_to_nec_pnads.values() + for pnad in pnads + ] + self._recompute_datastores_from_segments(cur_itr_pnads_unfiltered) + cur_itr_pnads_filtered = [] + for pnad in cur_itr_pnads_unfiltered: + if len(pnad.datastore) > 0: + # new_pre = self._induce_preconditions_via_intersection(pnad) + # NOTE: this implicitly changes param_opt_to_nec_pnads + # as well, since we're directly modifying the PNAD objects. + # nad.op = pnad.op.copy_with(preconditions=new_pre) + cur_itr_pnads_filtered.append(pnad) + else: + param_opt_to_nec_pnads[pnad.option_spec[0]].remove(pnad) + del cur_itr_pnads_unfiltered # should be unused after this + # + + # Plan from initial state to goal + nsrts = [pnad.op for pnads in param_opt_to_nec_pnads.values() + for pnad in pnads] + nsrt_to_pnad = {pnad.op: pnad for pnads in param_opt_to_nec_pnads.values() + for pnad in pnads} + nsrt_to_option = {pnad.op: pnad.option_spec[0] + for pnads in param_opt_to_nec_pnads.values() + for pnad in pnads} + + try: + ground_nsrts, reachable_atoms = task_plan_grounding( + init_atoms, objects, nsrts, allow_noops=True) + heuristic = utils.create_task_planning_heuristic( + "hadd", init_atoms, task.goal, ground_nsrts, + self._predicates, objects) + task_plan_generator = task_plan( + init_atoms, task.goal, ground_nsrts, + reachable_atoms, heuristic, + timeout=100, seed=123, max_skeletons_optimized=3) + skeleton, _, _ = next(task_plan_generator) + except (StopIteration, Exception) as e: + print(f" Trajectory {traj_idx}: Failed to plan - {e}") + all_match = False + continue + + # Compare plan to demo + planned_options = [nsrt_to_option[ground_nsrt.parent] for ground_nsrt in skeleton] + demo_options = [seg.get_option().parent for seg in seg_traj] + + # Find first mismatch + mismatch_idx = None + for i, (planned_opt, demo_opt) in enumerate(zip(planned_options, demo_options)): + if planned_opt != demo_opt: + mismatch_idx = i + break + + if mismatch_idx is not None or len(planned_options) != len(demo_options): + all_match = False + + # Add a precondition to the wrongly chosen operator + if mismatch_idx is not None and mismatch_idx < len(skeleton): + wrong_ground_nsrt = skeleton[mismatch_idx] + wrong_pnad = nsrt_to_pnad[wrong_ground_nsrt.parent] + wrong_option = nsrt_to_option[wrong_ground_nsrt.parent] + demo_option = demo_options[mismatch_idx] + + print(f" Trajectory {traj_idx}, Step {mismatch_idx}: {wrong_option.name} != {demo_option.name}") + + # Get original preconditions for this operator + option_idx = list(param_opt_to_nec_pnads[wrong_option]).index(wrong_pnad) + original = original_pnads[wrong_option][option_idx] + + # Find preconditions to add (ones that aren't already added) + available_pres = original['preconditions'] - wrong_pnad.op.preconditions + + if available_pres: + # Add one random precondition + new_pre = random.choice(list(available_pres)) + updated_params = list(set(wrong_pnad.op.parameters + list(new_pre.variables))) + updated_preconditions = wrong_pnad.op.preconditions | {new_pre} + wrong_pnad.op = wrong_pnad.op.copy_with( + parameters=updated_params, + preconditions=updated_preconditions + ) + print(f" Added precondition {new_pre} to {wrong_option.name}") + # self._induce_delete_side_keep(param_opt_to_nec_pnads) + + break # Only fix one mismatch per iteration + else: + print(f" No more preconditions available for {wrong_option.name}") + print(f" Restoring all original preconditions for all operators") + # Restore original preconditions for all PNADs + for option, pnads in param_opt_to_nec_pnads.items(): + for idx, pnad in enumerate(pnads): + orig = original_pnads[option][idx] + # Get all variables from original preconditions + all_vars = set(pnad.op.parameters) + for pre in orig['preconditions']: + all_vars.update(pre.variables) + pnad.op = pnad.op.copy_with( + parameters=sorted(all_vars), + preconditions=orig['preconditions'], + ignore_effects=orig['ignore_effects'] + ) + # self._induce_delete_side_keep(param_opt_to_nec_pnads) + all_match = True # Exit loop since we've restored originals + break + break # Move to next iteration after finding first trajectory mismatch + + if all_match: + print(f"\nāœ“ All trajectories match demos after {iteration + 1} iterations!") + break + + # # Final verification: replan from init to goal and assert equivalence to demos + # print("\n=== Final Verification: Checking plans match demos ===") + # for ll_traj, seg_traj in zip(self._trajectories, self._segmented_trajs): + # if not ll_traj.is_demo: + # continue + + # task = self._train_tasks[ll_traj.train_task_idx] + # objects, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options( + # ll_traj, train_task_idx=ll_traj.train_task_idx) + + # init_atoms = ground_atoms_traj[1][0] + + # # Plan with final operators + # nsrts = [pnad.op for pnads in param_opt_to_nec_pnads.values() + # for pnad in pnads] + # nsrt_to_option = {pnad.op: pnad.option_spec[0] + # for pnads in param_opt_to_nec_pnads.values() + # for pnad in pnads} + + # try: + # ground_nsrts, reachable_atoms = task_plan_grounding( + # init_atoms, objects, nsrts, allow_noops=True) + # heuristic = utils.create_task_planning_heuristic( + # "hadd", init_atoms, task.goal, ground_nsrts, + # self._predicates, objects) + # task_plan_generator = task_plan( + # init_atoms, task.goal, ground_nsrts, + # reachable_atoms, heuristic, + # timeout=100, seed=123, max_skeletons_optimized=3) + # skeleton, _, _ = next(task_plan_generator) + # except (StopIteration, Exception) as e: + # print(f"Failed to plan for trajectory: {e}") + # assert False, f"Could not generate plan for demo trajectory" + + # # Compare planned options to demo options + # planned_options = [nsrt_to_option[ground_nsrt.parent] for ground_nsrt in skeleton] + # demo_options = [seg.get_option().parent for seg in seg_traj] + + # print(f"\nDemo trajectory {ll_traj.train_task_idx}:") + # print(f" Demo options: {[opt.name for opt in demo_options]}") + # print(f" Planned options: {[opt.name for opt in planned_options]}") + + # # Assert equivalence + # assert len(planned_options) == len(demo_options), \ + # f"Plan length mismatch: {len(planned_options)} vs {len(demo_options)}" + + # for i, (planned_opt, demo_opt) in enumerate(zip(planned_options, demo_options)): + # assert planned_opt == demo_opt, \ + # f"Step {i}: planned {planned_opt.name} != demo {demo_opt.name}" + + # print(f" āœ“ Plan matches demo!") + + # print("\n=== All plans match demos successfully! ===\n") + + def _try_lift_atom(self, ground_atom: GroundAtom, ground_objects: Sequence[Object], + parameters: Sequence[Variable]) -> Optional[LiftedAtom]: + """Try to lift a ground atom using a mapping from objects to parameters.""" + # Create object to variable mapping + obj_to_var = {} + for i, (obj, param) in enumerate(zip(ground_objects, parameters)): + if obj.type == param.type: + obj_to_var[obj] = param + + # Try to lift the atom + lifted_objs = [] + for obj in ground_atom.objects: + if obj in obj_to_var: + lifted_objs.append(obj_to_var[obj]) + else: + # Can't lift this atom with current parameters + return None + + return LiftedAtom(ground_atom.predicate, lifted_objs) + + def _backchain_multipass( + self, param_opt_to_nec_pnads: Dict[ParameterizedOption, + List[PNAD]]) -> None: + """Take multiple passes through the demonstrations, running + self._backchain_one_pass() each time. + + Keep going until the PNADs reach a fixed point. Note that this + process creates operators with only parameters, preconditions, + and add effects. + """ + while True: + # Before each pass, clear the poss_keep_effects + # of all the PNADs. We do this because we only want the + # poss_keep_effects of the final pass, where the PNADs did + # not change. However, we cannot simply clear the + # pnad.seg_to_keep_effects_sub because some of these + # substitutions might be necessary if this happens to be + # a PNAD that already has keep effects. Thus, we call a + # method that handles this correctly. + for pnads in param_opt_to_nec_pnads.values(): + for pnad in pnads: + self.clear_unnecessary_keep_effs(pnad) + # Run one pass of backchaining. + nec_pnad_set_changed = self._backchain_one_pass( + param_opt_to_nec_pnads) + + print("inner pass of backchaining") + if not nec_pnad_set_changed: + print("no changes in this pass, backchaining has reached a fixed point") + break + + def _backchain_one_pass( + self, param_opt_to_nec_pnads: Dict[ParameterizedOption, + List[PNAD]]) -> bool: + """Take one pass through the demonstrations in the given order. + + Go through each one from the end back to the start, making the + PNADs more specific whenever needed. Return whether any PNAD was + changed. + """ + # Reset all segments' necessary_add_effects so that they aren't + # accidentally used from a previous iteration of backchaining. + self._reset_all_segment_necessary_add_effs() + nec_pnad_set_changed = False + for ll_traj, seg_traj in zip(self._trajectories, + self._segmented_trajs): + if not ll_traj.is_demo: + continue + traj_goal = self._train_tasks[ll_traj.train_task_idx].goal + atoms_seq = utils.segment_trajectory_to_atoms_sequence(seg_traj) + assert traj_goal.issubset(atoms_seq[-1]) + # This variable, necessary_image, gets updated as we + # backchain. It always holds the set of ground atoms that + # are necessary for the remainder of the plan to reach the + # goal. At the start, necessary_image is simply the goal. + necessary_image = set(traj_goal) + for t in range(len(atoms_seq) - 2, -1, -1): + segment = seg_traj[t] + option = segment.get_option() + # Find the necessary PNADs associated with this option. If + # there are none, then use the general PNAD associated with + # this option. (But make sure to use a copy of it, because we + # don't want the general PNAD to get mutated when we mutate + # necessary PNADs!) + if len(param_opt_to_nec_pnads[option.parent]) == 0: + general_pnad = self._create_general_pnad_for_option( + option.parent) + pnads_for_option = [ + PNAD(general_pnad.op, list(general_pnad.datastore), + general_pnad.option_spec) + ] + else: + pnads_for_option = param_opt_to_nec_pnads[option.parent] + + # Compute the ground atoms that must be added on this timestep. + # They must be a subset of the current PNAD's add effects. + necessary_add_effects = necessary_image - atoms_seq[t] + necessary_objects = set() + if len(necessary_add_effects) > 0: + necessary_objects = set.union(*[set(a.objects) for a in (list(necessary_add_effects))]) + if len(necessary_objects) > CFG.max_operator_arity: + from collections import Counter + new_necessary_objects = set([item for item, count in Counter([next(iter(a.objects)) for a in necessary_add_effects]).most_common(CFG.max_operator_arity)]) + necessary_add_effects = set([a for a in necessary_add_effects if set(a.objects).issubset(new_necessary_objects)]) + if not necessary_add_effects.issubset(segment.add_effects): + necessary_add_effects = segment.add_effects & necessary_add_effects + assert necessary_add_effects.issubset(segment.add_effects) + # Update the segment's necessary_add_effects. + segment.necessary_add_effects = necessary_add_effects + + # We start by checking if any of the PNADs associated with the + # demonstrated option are able to match this transition. + objects = set(segment.states[0]) + pnad, var_to_obj = self._find_best_matching_pnad_and_sub( + segment, objects, pnads_for_option) + if pnad is not None: + assert var_to_obj is not None + obj_to_var = {v: k for k, v in var_to_obj.items()} + assert len(var_to_obj) == len(obj_to_var) + ground_op = pnad.op.ground( + tuple(var_to_obj[var] for var in pnad.op.parameters)) + if len(param_opt_to_nec_pnads[option.parent]) == 0: + param_opt_to_nec_pnads[option.parent].append(pnad) + segs_in_pnad = { + datapoint[0] + for datapoint in pnad.datastore + } + # In this case, we want to move the segment from + # another PNAD into the current PNAD. Note that + # we don't have to recompute the PNAD's add + # effects or preconditions because of the fact that + # this PNAD was found by the _find_best_matching + # function (which internally checks that the + # preconditions and add effects are all correct). + if segment not in segs_in_pnad: + # Find PNAD that the segment is currently in. + for seg_pnad in pnads_for_option: + segs_in_seg_pnad = [ + datapoint[0] + for datapoint in seg_pnad.datastore + ] + if segment in set(segs_in_seg_pnad): + seg_idx = segs_in_seg_pnad.index(segment) + seg_pnad.datastore.pop(seg_idx) + break + pnad.datastore.append((segment, var_to_obj)) + self._remove_empty_datastore_pnads( + param_opt_to_nec_pnads, option.parent) + + # If we weren't able to find a substitution (i.e, the above + # _find_best_matching call didn't yield a PNAD), we need to + # spawn a new PNAD from the most general PNAD to cover + # these necessary add effects. + else: + nec_pnad_set_changed = True + pnad = self.spawn_new_pnad(segment) + param_opt_to_nec_pnads[option.parent].append(pnad) + + # Recompute datastores for ALL PNADs associated with this + # option. We need to do this because the new PNAD may now + # be a better match for some transition that we previously + # matched to another PNAD. + self._recompute_datastores_from_segments( + param_opt_to_nec_pnads[option.parent]) + # Now that we have done this, certain PNADs may be + # left with empty datastores. Remove these. + self._remove_empty_datastore_pnads(param_opt_to_nec_pnads, + option.parent) + + # Recompute all preconditions, now that we have recomputed + # the datastores. + for nec_pnad in param_opt_to_nec_pnads[option.parent]: + if len(nec_pnad.datastore) > 0: + pre = self._induce_preconditions_via_intersection( + nec_pnad) + nec_pnad.op = nec_pnad.op.copy_with( + preconditions=pre) + + # # assert that op arity is less than max_arity + # assert len(pnad.op.parameters) <= CFG.max_operator_arity + + # After all this, the unification call that failed earlier + # (leading us into the current else statement) should work. + best_score_pnad, var_to_obj = \ + self._find_best_matching_pnad_and_sub( + segment, objects, + param_opt_to_nec_pnads[option.parent]) + + assert var_to_obj is not None + # TODO #assert best_score_pnad == pnad + # Also, since this segment caused us to induce the new + # PNAD, it should appear in this new PNAD's datastore. + segs_in_pnad = { + datapoint[0] + for datapoint in pnad.datastore + } + if segment not in segs_in_pnad: + import ipdb; ipdb.set_trace() + assert segment in segs_in_pnad + obj_to_var = {v: k for k, v in var_to_obj.items()} + assert len(var_to_obj) == len(obj_to_var) + ground_op = pnad.op.ground( + tuple(var_to_obj[var] for var in pnad.op.parameters)) + + + self._update_pnad_seg_to_keep_effs(pnad, necessary_image, + ground_op, obj_to_var, + segment) + + # Update necessary_image for this timestep. It no longer + # needs to include the ground add effects of this PNAD, but + # must now include its ground preconditions. + necessary_image -= { + a.ground(var_to_obj) + for a in pnad.op.add_effects + } + necessary_image |= { + a.ground(var_to_obj) + for a in pnad.op.preconditions + } + return nec_pnad_set_changed + + @staticmethod + def _remove_empty_datastore_pnads(param_opt_to_nec_pnads: Dict[ + ParameterizedOption, List[PNAD]], + param_opt: ParameterizedOption) -> None: + """Removes all PNADs associated with the given param_opt that have + empty datastores from the input param_opt_to_nec_pnads dict.""" + pnads_to_rm = [] + for pnad in param_opt_to_nec_pnads[param_opt]: + if len(pnad.datastore) == 0: + pnads_to_rm.append(pnad) + for rm_pnad in pnads_to_rm: + param_opt_to_nec_pnads[param_opt].remove(rm_pnad) + + def _induce_delete_side_keep( + self, param_opt_to_nec_pnads: Dict[ParameterizedOption, + List[PNAD]]) -> None: + """Given the current PNADs where add effects and preconditions are + correct, learn the remaining components: delete effects, side + predicates, and keep_effects. + + Note that this may require spawning new PNADs with keep effects. + """ + for option, nec_pnad_list in sorted(param_opt_to_nec_pnads.items(), + key=str): + pnads_with_keep_effects = set() + for pnad in nec_pnad_list: + self._compute_pnad_delete_effects(pnad) + self._compute_pnad_ignore_effects(pnad) + pnads_with_keep_effects |= self.get_pnads_with_keep_effects( + pnad) + param_opt_to_nec_pnads[option].extend( + list(pnads_with_keep_effects)) + + @classmethod + def get_name(cls) -> str: + return "backward-forward" + + def _assert_all_data_in_exactly_one_datastore(self, + pnads: List[PNAD]) -> None: + """Assert that every demo datapoint appears in exactly one datastore + among the given PNADs' datastores.""" + all_segs_in_data_lst = [ + seg for pnad in pnads for seg, _ in pnad.datastore + ] + all_segs_in_data = set(all_segs_in_data_lst) + assert len(all_segs_in_data_lst) == len(all_segs_in_data) + for ll_traj, seg_traj in zip(self._trajectories, + self._segmented_trajs): + if not ll_traj.is_demo: # ignore non-demo data + continue + for segment in seg_traj: + assert segment in all_segs_in_data + +def parse_objs_preds_and_options(trajectory, train_task_idx=0, all_atoms=None): + objs = set() + preds = set() + options = set() + state = None + states = [] + actions = [] + ground_atoms_traj = [] + obj_types = {"obj_type": Type("obj_type", ["is_obj"]), "surface_type": Type("surface_type", ["is_obj"])} + + for i, s in enumerate(trajectory.states): + ground_atoms = set() + for pred_str in s: + pred = None + choice = [] + pattern = re.compile(r"(\w+)\((.*?)\)") + match = pattern.match(pred_str) + if match: + func_name = match.group(1) + args = match.group(2).split(',') if match.group(2) else [] + for arg in args: + base_name = arg.strip().split("_")[0] + if base_name in ['box','cabinet','table','sink','bucket', 'ashcan']: + obj_types[base_name] = Type("surface_type", ["is_obj"]) + else: + obj_types[base_name] = Type("obj_type", ["is_obj"]) #Type(base_name, ["is_obj"]) + obj = obj_types[base_name](arg.strip()) + choice.append(obj) + objs.add(obj) + if len(args) == 1: + base_name = args[0].strip().split("_")[0] + pred = Predicate(func_name, [obj_types[base_name]], lambda s, o: True) + preds.add(pred) + elif len(args) == 2: + base_name1 = args[0].strip().split("_")[0] + base_name2 = args[1].strip().split("_")[0] + pred = Predicate(func_name, [obj_types[base_name1], obj_types[base_name2]], lambda s, o: True) + if not(func_name == 'atsamelocation' and base_name1 == base_name2): + preds.add(pred) + else: + NotImplementedError("") + ground_atoms.add(GroundAtom(pred, choice)) + states.append(state) + ground_atoms_traj.append(ground_atoms) + + if i < len(trajectory.actions): + a_name = trajectory.actions[i] + + param_option = utils.SingletonParameterizedOption( + a_name, lambda s, m, o, p: Action(name_to_actions[a_name])) + options.add(param_option) + option = param_option.ground([], []) + action = option.policy(state) + action.set_option(option) + actions.append(action) + + def get_all_atoms_in_traj(ground_atoms_traj): + all_atoms = set() + for timestep_atoms in ground_atoms_traj: + all_atoms.update(timestep_atoms) + return all_atoms + + def add_neg_atoms(preds, lltraj, all_atoms): + ground_atoms = [] + neg_pred_table = {str(atom):GroundAtom(Predicate("~" + atom.predicate.name, atom.predicate.types, lambda s, o: True), atom.objects) for atom in all_atoms} + neg_pred_table["HandEmpty"] = GroundAtom(Predicate("handempty", [], lambda s, o: True), []) + for timestep_atoms in lltraj[1]: + missing_atoms = all_atoms - timestep_atoms + neg_atoms = set([neg_pred_table[str(atom)] for atom in missing_atoms]) + handempty = True + for atom in timestep_atoms: + if "inhandofrobot" in str(atom): + handempty = False + if handempty: + neg_atoms |= set([neg_pred_table["HandEmpty"]]) + ground_atoms.append(timestep_atoms | neg_atoms) + lltraj = (lltraj[0], ground_atoms) + return preds | set([v.predicate for v in neg_pred_table.values()]) | set([atom.predicate for atom in all_atoms]), lltraj + + lltraj = (LowLevelTrajectory([{obj:[0.0] for obj in objs} for _ in states], actions, _is_demo=True, _train_task_idx=train_task_idx), ground_atoms_traj) + if all_atoms is None: + all_atoms = get_all_atoms_in_traj(ground_atoms_traj) + preds, lltraj = add_neg_atoms(preds, lltraj, all_atoms) + else: + preds, lltraj = add_neg_atoms(preds, lltraj, all_atoms) + + return objs, preds, options, lltraj, all_atoms diff --git a/predicators/nsrt_learning/strips_learning/pnad_search_learner.py b/predicators/nsrt_learning/strips_learning/pnad_search_learner.py index f7ebf9bd21..6c9b795a12 100644 --- a/predicators/nsrt_learning/strips_learning/pnad_search_learner.py +++ b/predicators/nsrt_learning/strips_learning/pnad_search_learner.py @@ -11,7 +11,8 @@ GeneralToSpecificSTRIPSLearner from predicators.settings import CFG from predicators.structs import PNAD, GroundAtom, LowLevelTrajectory, \ - ParameterizedOption, Predicate, Segment, Task, _GroundSTRIPSOperator + ParameterizedOption, Predicate, Segment, Task, _GroundSTRIPSOperator, NSRT, Variable, LiftedAtom +import re class _PNADSearchOperator(abc.ABC): @@ -101,6 +102,7 @@ def _append_new_pnad_and_keep_effects( # that are unnecessary. new_pnads = self._learner.recompute_pnads_from_effects( sorted(new_pnads)) + print(len(new_pnads)) return new_pnads def _get_backchaining_results( @@ -267,6 +269,70 @@ def recompute_pnads_from_effects(self, pnads: List[PNAD]) -> List[PNAD]: pnad_map[p.option_spec[0]].append(p) new_pnads = self._get_uniquely_named_nec_pnads(pnad_map) return new_pnads + + + def parse_nsrt_block(self, block: str) -> PNAD: + """Parses a single NSRT block into an PNAD object.""" + lines = block.strip().split("\n") + + name_match = re.match(r"(\S+):", lines[0]) + name = name_match.group(1) if name_match else "" + + parameters = re.findall(r"\?x\d+:\w+", lines[1]) + + def extract_effects(label: str) -> Set[str]: + """Extracts a list of predicates from labeled sections.""" + for line in lines: + if line.strip().startswith(label): + return set(re.findall(r"\w+\(.*?\)", line)) + return set() + + preconditions = extract_effects("Preconditions") + add_effects = extract_effects("Add Effects") + delete_effects = extract_effects("Delete Effects") + ignore_effects = extract_effects("Ignore Effects") + + option_spec_match = re.search(r"Option Spec:\s*(.*)", block) + option_spec = option_spec_match.group(1) if option_spec_match else "" + + objects = set() + atoms = set() + option_specs = {} + for traj in self._segmented_trajs: + for segment in traj: + for state in segment.states: + for k, v in state.items(): + objects.add(k) + atoms |= segment.init_atoms | segment.final_atoms + option_specs[segment.get_option().parent.name] = segment.get_option().parent + all_predicates_list = [(atom.predicate.name,atom.predicate) for atom in atoms] + def get_predicate(name, entities): + for pred_name, pred in all_predicates_list: + if pred_name == pred_name and pred.arity == len(entities): + valid_types = True + for i, ent in enumerate(entities): + if ent.type != pred.types[i]: + valid_types = False + if valid_types: + return pred + raise NotImplementedError + + types = {obj.type.name:obj.type for obj in objects} + + def extract_parameters(predicate: str) -> Set[str]: + parameter_pattern = re.compile(r"\?x\d+:\w+") # Matches variables like ?x0:obj_type + matches = parameter_pattern.findall(predicate) + return matches + + parameters = [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in parameters] + preconditions = set([LiftedAtom(get_predicate(pre.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]) for pre in preconditions]) + add_effects = set([LiftedAtom(get_predicate(add.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]) for add in add_effects]) + delete_effects = set([LiftedAtom(get_predicate(dle.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]) for dle in delete_effects]) + ignore_effects = set([get_predicate(ige, None) for ige in ignore_effects]) + option_spec = (option_specs[option_spec.split("(")[0]], []) + + nsrt = NSRT(name, parameters, preconditions, add_effects, delete_effects, ignore_effects, option_spec, [], None) + return PNAD(nsrt.op, [], option_spec) def _learn(self) -> List[PNAD]: # Set up hill-climbing search over PNAD sets. @@ -285,6 +351,16 @@ def get_successors( for i, child in enumerate(op.get_successors(pnads)): yield (op, i), child, 1.0 # cost always 1 + # Load initial pnad set + if CFG.pnad_search_load_initial: + initial_state = None + with open("test_saved.NSRTs.txt", "r") as file: + content = file.read() + nsrt_strs = ["NSRT-" + nsrt_str for nsrt_str in content.split("NSRT-") if nsrt_str != ''] + pnads = [self.parse_nsrt_block(nsrt_str) for nsrt_str in nsrt_strs] + self._recompute_datastores_from_segments(pnads) + initial_state = frozenset(pnads) + # Run hill-climbing search. path, _, _ = utils.run_hill_climbing(initial_state=initial_state, check_goal=lambda _: False, @@ -296,6 +372,7 @@ def get_successors( # Extract the best PNADs set. final_pnads = path[-1] sorted_final_pnads = sorted(final_pnads) + # Fix naming. pnad_map: Dict[ParameterizedOption, List[PNAD]] = { p.option_spec[0]: [] diff --git a/predicators/perception/mini_behavior_env_perceiver.py b/predicators/perception/mini_behavior_env_perceiver.py new file mode 100644 index 0000000000..eb43c38a23 --- /dev/null +++ b/predicators/perception/mini_behavior_env_perceiver.py @@ -0,0 +1,227 @@ +"""A mini_behavior-specific perceiver.""" + +import sys +from typing import Dict, Tuple + +import numpy as np + +from predicators import utils +from predicators.settings import CFG +from predicators.envs.mini_behavior_env import MiniBehavior +from predicators.perception.base_perceiver import BasePerceiver +from predicators.structs import EnvironmentTask, GroundAtom, Object, \ + Observation, State, Task, Video +from mini_behavior.grid import BehaviorGrid +from mini_bddl import DEFAULT_STATES, STATE_FUNC_MAPPING, DEFAULT_ACTIONS, OBJECT_TO_IDX, IDX_TO_OBJECT, OBJECTS, ABILITIES + +class MiniBehaviorPerceiver(BasePerceiver): + """A mini_behavior-specific perceiver.""" + + def __init__(self) -> None: + super().__init__() + self.grid = BehaviorGrid(16, 16) + + @classmethod + def get_name(cls) -> str: + return "mini_behavior_env" + + def parse_mini_behavior_task(self, env_task: EnvironmentTask) -> Task: + state = self._observation_to_state(env_task.init_obs) + if env_task.goal_description == "Get to the goal": + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniBehavior.get_goal_predicates() + assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniBehavior.get_objects_of_enum(state, "goal")) == 1 + agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0] + goal_obj = list(MiniBehavior.get_objects_of_enum(state, "goal"))[0] + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(IsGoal, [goal_obj])} + elif "go to the " in env_task.goal_description: + color, obj_type = env_task.goal_description.split("go to the ")[1].split(" ")[0:2] + obj_name = f"{color}_{obj_type}" + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniBehavior.get_goal_predicates() + assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniBehavior.get_objects_of_enum(state, obj_type)) > 1 + agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0] + for obj in MiniBehavior.get_objects_of_enum(state, obj_type): + if obj.name == obj_name: + goal_obj = obj + obj_type_to_predicate = { + "ball": IsBall, + "key": IsKey, + "box": IsBox + } + color_to_predicate = { + "red": IsRed, + "green": IsGreen, + "blue": IsBlue, + "purple": IsPurple, + "yellow": IsYellow, + "grey": IsGrey + } + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]), + GroundAtom(color_to_predicate[color], [goal_obj]), + } + elif env_task.goal_description == "get to the green goal square": + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniBehavior.get_goal_predicates() + assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniBehavior.get_objects_of_enum(state, "goal")) == 1 + agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0] + goal_obj = list(MiniBehavior.get_objects_of_enum(state, "goal"))[0] + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(IsGoal, [goal_obj])} + elif env_task.goal_description.startswith("get a") or \ + env_task.goal_description.startswith("go get a") or \ + env_task.goal_description.startswith("fetch a") or \ + env_task.goal_description.startswith("go fetch a") or \ + env_task.goal_description.startswith("you must fetch a") or \ + env_task.goal_description.startswith("pick up the"): + color, obj_type = env_task.goal_description.split(" ")[-2:] + obj_name = f"{color}_{obj_type}" + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniBehavior.get_goal_predicates() + assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniBehavior.get_objects_of_enum(state, obj_type)) > 1 + agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0] + for obj in MiniBehavior.get_objects_of_enum(state, obj_type): + if obj.name == obj_name: + goal_obj = obj + obj_type_to_predicate = { + "ball": IsBall, + "key": IsKey, + "box": IsBox + } + color_to_predicate = { + "red": IsRed, + "green": IsGreen, + "blue": IsBlue, + "purple": IsPurple, + "yellow": IsYellow, + "grey": IsGrey + } + goal = {GroundAtom(Holding, [goal_obj]), + GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]), + GroundAtom(color_to_predicate[color], [goal_obj])} + else: + raise NotImplementedError(f"Goal description {env_task.goal_description} not supported") + return Task(state, goal) + + def reset(self, env_task: EnvironmentTask) -> Task: + return self.parse_mini_behavior_task(env_task) + + def step(self, observation: Observation) -> State: + return self._observation_to_state(observation) + + def _observation_to_objects(self, obs: Observation) -> Dict[str, Tuple[int, int]]: + objs = [] + import ipdb; ipdb.set_trace() + visual = obs['image'] + self.grid.decode(visual) + for r in range(visual.shape[0]): + for c in range(visual.shape[1]): + obj = [IDX_TO_OBJECT[visual[r, c][0]], IDX_TO_COLOR[visual[r, c][1]], visual[r, c][2], r - self.agent_pov_pos[0], c - self.agent_pov_pos[1]] + if obj[0] == 'empty': + obj[1] = 'black' + objs.append(tuple(obj)) + return objs + + def transform_point(self, x1, y1, o1, x2, y2): + # Compute global coordinates directly + x_prime = x1 + x2 * np.cos(o1) - y2 * np.sin(o1) + y_prime = y1 + x2 * np.sin(o1) + y2 * np.cos(o1) + return x_prime, y_prime + + def _globalize_coords(self, r: int, c: int) -> Tuple[int, int]: + # Adjusted direction-to-radian mapping + direction_to_radian = { + 0: 0, # right + 1: -np.pi / 2, # down + 2: np.pi, # left + 3: np.pi / 2 # up + } + o1 = direction_to_radian[self.direction] + x1, y1 = self.agent_pos[0], self.agent_pos[1] + x2, y2 = r, -c # Use c directly + x_prime, y_prime = self.transform_point(x1, y1, o1, x2, y2) + return int(round(x_prime)), int(round(y_prime)) + + def _observation_to_state(self, obs: Observation) -> State: + import numpy as np + self.last_obs = obs + self.agent_pos = None + + objs = self._observation_to_objects(obs) + + def _get_object_name(r: int, c: int, type_name: str, color: str) -> str: + # Put the location of the static objects in their names for easier + # debugging. + if type_name == "agent": + return "agent" + if type_name in ["empty", "wall"]: + return f"{type_name}_{r}_{c}" + else: + return f"{color}_{type_name}" + + for type_name, color, obj_state, r, c in objs: + enum = MiniBehavior.name_to_enum[type_name] + if CFG.mini_behavior_gym_fully_observable: + global_r, global_c = r, c + else: + global_r, global_c = self._globalize_coords(r, c) + if type_name in ["goal", "agent"]: + object_name = type_name + if type_name == "agent" and not CFG.mini_behavior_gym_fully_observable: + assert (global_r, global_c) == self.agent_pos + else: + object_name = _get_object_name(global_r, global_c, type_name, color) + obj = Object(object_name, MiniBehavior.object_type) + self.state_dict[obj] = { + "row": global_r, + "column": global_c, + "type": enum, + "state": obj_state, + "color": color, + } + + if all([val["type"] != MiniBehavior.name_to_enum['goal'] for key, val in self.state_dict.items()]): + enum = MiniBehavior.name_to_enum["goal"] + object_name = "goal" + obj = Object(object_name, MiniBehavior.object_type) + self.state_dict[obj] = { + "row": sys.maxsize, + "column": sys.maxsize, + "type": enum, + "state": -1, + "color": 'green', + } + + for color in ['blue', 'green', 'grey', 'purple', 'red', 'yellow']: + for obj_type in ['key', 'ball', 'box']: + if all([not (val["type"] == MiniBehavior.name_to_enum[obj_type] and val["color"] == color) for key, val in self.state_dict.items()]): + enum = MiniBehavior.name_to_enum[obj_type] + object_name = f"{color}_{obj_type}" + obj = Object(object_name, MiniBehavior.object_type) + self.state_dict[obj] = { + "row": sys.maxsize, + "column": sys.maxsize, + "type": enum, + "state": -1, + "color": color, + } + + state = utils.create_state_from_dict(self.state_dict) + return state + + def render_mental_images(self, observation: Observation, + env_task: EnvironmentTask) -> Video: + raise NotImplementedError("Mental images not implemented for mini_behavior") diff --git a/predicators/perception/minigrid_env_perceiver.py b/predicators/perception/minigrid_env_perceiver.py new file mode 100644 index 0000000000..8be4309d88 --- /dev/null +++ b/predicators/perception/minigrid_env_perceiver.py @@ -0,0 +1,258 @@ +"""A minigrid-specific perceiver.""" + +import sys +from typing import Dict, Tuple + +import numpy as np + +from predicators import utils +from predicators.settings import CFG +from predicators.envs.minigrid_env import MiniGridEnv +from predicators.perception.base_perceiver import BasePerceiver +from predicators.structs import EnvironmentTask, GroundAtom, Object, \ + Observation, State, Task, Video + +from minigrid.core.constants import ( + COLORS, + IDX_TO_COLOR, + IDX_TO_OBJECT, +) + +class MiniGridPerceiver(BasePerceiver): + """A minigrid-specific perceiver.""" + + def __init__(self) -> None: + super().__init__() + self.state_dict = {} + self.agent_pov_pos = (3,6) # agent's point of view is always at (3,6) + self.agent_pos = (0,0) # starts at origin + self.direction = 0 # directions (right, down, left, up) + self.last_obs = None + + @classmethod + def get_name(cls) -> str: + return "minigrid_env" + + def parse_minigrid_task(self, env_task: EnvironmentTask) -> Task: + state = self._observation_to_state(env_task.init_obs) + if env_task.goal_description == "Get to the goal": + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniGridEnv.get_goal_predicates() + assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniGridEnv.get_objects_of_enum(state, "goal")) == 1 + agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0] + goal_obj = list(MiniGridEnv.get_objects_of_enum(state, "goal"))[0] + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(IsGoal, [goal_obj])} + elif "go to the " in env_task.goal_description: + color, obj_type = env_task.goal_description.split("go to the ")[1].split(" ")[0:2] + obj_name = f"{color}_{obj_type}" + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniGridEnv.get_goal_predicates() + assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniGridEnv.get_objects_of_enum(state, obj_type)) > 1 + agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0] + for obj in MiniGridEnv.get_objects_of_enum(state, obj_type): + if obj.name == obj_name: + goal_obj = obj + obj_type_to_predicate = { + "ball": IsBall, + "key": IsKey, + "box": IsBox + } + color_to_predicate = { + "red": IsRed, + "green": IsGreen, + "blue": IsBlue, + "purple": IsPurple, + "yellow": IsYellow, + "grey": IsGrey + } + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]), + GroundAtom(color_to_predicate[color], [goal_obj]), + } + elif env_task.goal_description == "get to the green goal square": + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniGridEnv.get_goal_predicates() + assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniGridEnv.get_objects_of_enum(state, "goal")) == 1 + agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0] + goal_obj = list(MiniGridEnv.get_objects_of_enum(state, "goal"))[0] + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(IsGoal, [goal_obj])} + elif env_task.goal_description.startswith("get a") or \ + env_task.goal_description.startswith("go get a") or \ + env_task.goal_description.startswith("fetch a") or \ + env_task.goal_description.startswith("go fetch a") or \ + env_task.goal_description.startswith("you must fetch a") or \ + env_task.goal_description.startswith("pick up the"): + color, obj_type = env_task.goal_description.split(" ")[-2:] + obj_name = f"{color}_{obj_type}" + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniGridEnv.get_goal_predicates() + assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniGridEnv.get_objects_of_enum(state, obj_type)) > 1 + agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0] + for obj in MiniGridEnv.get_objects_of_enum(state, obj_type): + if obj.name == obj_name: + goal_obj = obj + obj_type_to_predicate = { + "ball": IsBall, + "key": IsKey, + "box": IsBox + } + color_to_predicate = { + "red": IsRed, + "green": IsGreen, + "blue": IsBlue, + "purple": IsPurple, + "yellow": IsYellow, + "grey": IsGrey + } + goal = {GroundAtom(Holding, [goal_obj]), + GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]), + GroundAtom(color_to_predicate[color], [goal_obj])} + else: + raise NotImplementedError(f"Goal description {env_task.goal_description} not supported") + return Task(state, goal) + + def reset(self, env_task: EnvironmentTask) -> Task: + self.state_dict.clear() + return self.parse_minigrid_task(env_task) + + def step(self, observation: Observation) -> State: + return self._observation_to_state(observation) + + def _observation_to_objects(self, obs: Observation) -> Dict[str, Tuple[int, int]]: + objs = [] + visual = obs[0]['image'] + direction = obs[0]['direction'] + objs.append(('agent', + None, + direction, + 0, + 0)) + objs.append(('empty', + 'black', + 0, + 0, + 0)) + for r in range(visual.shape[0]): + for c in range(visual.shape[1]): + obj = [IDX_TO_OBJECT[visual[r, c][0]], IDX_TO_COLOR[visual[r, c][1]], visual[r, c][2], r - self.agent_pov_pos[0], c - self.agent_pov_pos[1]] + if obj[0] == 'empty': + obj[1] = 'black' + objs.append(tuple(obj)) + return objs + + def transform_point(self, x1, y1, o1, x2, y2): + # Compute global coordinates directly + x_prime = x1 + x2 * np.cos(o1) - y2 * np.sin(o1) + y_prime = y1 + x2 * np.sin(o1) + y2 * np.cos(o1) + return x_prime, y_prime + + def _globalize_coords(self, r: int, c: int) -> Tuple[int, int]: + # Adjusted direction-to-radian mapping + direction_to_radian = { + 0: 0, # right + 1: -np.pi / 2, # down + 2: np.pi, # left + 3: np.pi / 2 # up + } + o1 = direction_to_radian[self.direction] + x1, y1 = self.agent_pos[0], self.agent_pos[1] + x2, y2 = r, -c # Use c directly + x_prime, y_prime = self.transform_point(x1, y1, o1, x2, y2) + return int(round(x_prime)), int(round(y_prime)) + + def _observation_to_state(self, obs: Observation) -> State: + import numpy as np + + self.direction = obs[0]['direction'] + if len(obs) == 5: + if obs[4]['last_action'] == 2: # Moved Forward + if (not np.array_equal(self.last_obs[0]['image'], obs[0]['image'])) or \ + not np.array_equal(obs[0]['image'][self.agent_pov_pos[0], self.agent_pov_pos[1]-1], np.array([2, 5, 0], dtype=np.uint8)): + if self.direction == 0: # right (0, 1) + self.agent_pos = (self.agent_pos[0], self.agent_pos[1] + 1) + elif self.direction == 1: # down (1, 0) + self.agent_pos = (self.agent_pos[0] + 1, self.agent_pos[1]) + elif self.direction == 2: # left (0, -1) + self.agent_pos = (self.agent_pos[0], self.agent_pos[1] - 1) + elif self.direction == 3: # up (-1, 0) + self.agent_pos = (self.agent_pos[0] - 1, self.agent_pos[1]) + self.last_obs = obs + + objs = self._observation_to_objects(obs) + + def _get_object_name(r: int, c: int, type_name: str, color: str) -> str: + # Put the location of the static objects in their names for easier + # debugging. + if type_name == "agent": + return "agent" + if type_name in ["empty", "wall"]: + return f"{type_name}_{r}_{c}" + else: + return f"{color}_{type_name}" + + for type_name, color, obj_state, r, c in objs: + enum = MiniGridEnv.name_to_enum[type_name] + if CFG.minigrid_gym_fully_observable: + global_r, global_c = r, c + else: + global_r, global_c = self._globalize_coords(r, c) + if type_name in ["goal", "agent"]: + object_name = type_name + if type_name == "agent" and not CFG.minigrid_gym_fully_observable: + assert (global_r, global_c) == self.agent_pos + else: + object_name = _get_object_name(global_r, global_c, type_name, color) + obj = Object(object_name, MiniGridEnv.object_type) + self.state_dict[obj] = { + "row": global_r, + "column": global_c, + "type": enum, + "state": obj_state, + "color": color, + } + + if all([val["type"] != MiniGridEnv.name_to_enum['goal'] for key, val in self.state_dict.items()]): + enum = MiniGridEnv.name_to_enum["goal"] + object_name = "goal" + obj = Object(object_name, MiniGridEnv.object_type) + self.state_dict[obj] = { + "row": sys.maxsize, + "column": sys.maxsize, + "type": enum, + "state": -1, + "color": 'green', + } + + for color in ['blue', 'green', 'grey', 'purple', 'red', 'yellow']: + for obj_type in ['key', 'ball', 'box']: + if all([not (val["type"] == MiniGridEnv.name_to_enum[obj_type] and val["color"] == color) for key, val in self.state_dict.items()]): + enum = MiniGridEnv.name_to_enum[obj_type] + object_name = f"{color}_{obj_type}" + obj = Object(object_name, MiniGridEnv.object_type) + self.state_dict[obj] = { + "row": sys.maxsize, + "column": sys.maxsize, + "type": enum, + "state": -1, + "color": color, + } + + state = utils.create_state_from_dict(self.state_dict) + return state + + def render_mental_images(self, observation: Observation, + env_task: EnvironmentTask) -> Video: + raise NotImplementedError("Mental images not implemented for minigrid") diff --git a/predicators/planning.py b/predicators/planning.py index 123323ff0a..74f4603ef3 100644 --- a/predicators/planning.py +++ b/predicators/planning.py @@ -284,8 +284,10 @@ def task_plan_grounding( for nsrt in sorted(nsrts): for ground_nsrt in utils.all_ground_nsrts(nsrt, objects): if allow_noops or (ground_nsrt.add_effects - | ground_nsrt.delete_effects): + | ground_nsrt.delete_effects): ground_nsrts.append(ground_nsrt) + if CFG.single_grounding: + break reachable_atoms = utils.get_reachable_atoms(ground_nsrts, init_atoms) reachable_nsrts = [ nsrt for nsrt in ground_nsrts @@ -1208,15 +1210,15 @@ def run_task_plan_once( timeout -= duration plan, atoms_seq, metrics = next( task_plan(init_atoms, - goal, - ground_nsrts, - reachable_atoms, - heuristic, - seed, - timeout, - max_skeletons_optimized=1, - use_visited_state_set=True, - **kwargs)) + goal, + ground_nsrts, + reachable_atoms, + heuristic, + seed, + timeout, + max_skeletons_optimized=1, + use_visited_state_set=True, + **kwargs)) if len(plan) > max_horizon: raise PlanningFailure( "Skeleton produced by A-star exceeds horizon!") @@ -1243,7 +1245,7 @@ def run_task_plan_once( alias_flag = "--alias lama-first" else: raise ValueError("Unrecognized sesame_task_planner: " - f"{CFG.sesame_task_planner}") + f"{CFG.sesame_task_planner}") sas_file = generate_sas_file_for_fd(task, nsrts, preds, types, timeout, timeout_cmd, alias_flag, exec_str, @@ -1262,11 +1264,10 @@ def run_task_plan_once( list(objects), init_atoms, nsrts, float(max_horizon)) else: raise ValueError("Unrecognized sesame_task_planner: " - f"{CFG.sesame_task_planner}") + f"{CFG.sesame_task_planner}") necessary_atoms_seq = utils.compute_necessary_atoms_seq( plan, atoms_seq, goal) - return plan, necessary_atoms_seq, metrics diff --git a/predicators/settings.py b/predicators/settings.py index 4dc482e377..ebfbfb7f84 100644 --- a/predicators/settings.py +++ b/predicators/settings.py @@ -331,6 +331,22 @@ class GlobalSettings: # initialization and resetting. use Sokoban-small-v0 for tests sokoban_gym_name = "Sokoban-v0" + # minigrid env parameters + # Currently tested envs: + # "MiniGrid-Empty-5x5-v0" + # "MiniGrid-Empty-8x8-v0" + # "MiniGrid-Empty-16x16-v0" + # "MiniGrid-GoToObject-8x8-N2-v0" + # "MiniGrid-Fetch-8x8-N3-v0" + minigrid_gym_name = "MiniGrid-Fetch-8x8-N3-v0" + minigrid_gym_render = False + minigrid_gym_fully_observable = False + + # mini_behavior env parameters + mini_behavior_env_name = "MiniGrid-SortingBooks-16x16-N2-v0" + mini_behavior_env_render = False + mini_behavior_env_fully_observable = True + # kitchen env parameters kitchen_use_perfect_samplers = False kitchen_goals = "all" @@ -496,6 +512,10 @@ class GlobalSettings: enable_harmless_op_pruning = False # some methods may want this to be True precondition_soft_intersection_threshold_percent = 0.8 # between 0 and 1 backchaining_check_intermediate_harmlessness = False + backward_forward_load_initial = False + single_grounding = False + pnad_search_load_initial = False + max_operator_arity = 4 # maximum number of parameters allowed per operator pnad_search_without_del = False pnad_search_timeout = 10.0 compute_sidelining_objective_value = False @@ -714,7 +734,9 @@ def get_arg_specific_settings(cls, args: Dict[str, Any]) -> Dict[str, Any]: # The method used for perception: now only "trivial" or "sokoban". perceiver=defaultdict(lambda: "trivial", { "sokoban": "sokoban", - "kitchen": "kitchen", + "minigrid_env": "minigrid_env", + "mini_behavior_env": "mini_behavior_env", + "kitchen": "kitchen" })[args.get("env", "")], # Horizon for each environment. When checking if a policy solves a # task, we run the policy for at most this many steps. @@ -728,6 +750,7 @@ def get_arg_specific_settings(cls, args: Dict[str, Any]) -> Dict[str, Any]: "doors": 1000, "coffee": 1000, "kitchen": 1000, + "minigrid_env": 1000, # For the very simple touch point environment, restrict # the horizon to be shorter. "touch_point": 15, diff --git a/predicators/utils.py b/predicators/utils.py index 3500562b56..e2f3fddaac 100644 --- a/predicators/utils.py +++ b/predicators/utils.py @@ -1473,7 +1473,8 @@ def _get_entity_combinations( this_choices.append(ent) choices.append(this_choices) for choice in itertools.product(*choices): - yield list(choice) + if len(set(choice)) == len(choice): + yield list(choice) def get_object_combinations(objects: Collection[Object], diff --git a/run_autoformat.sh b/run_autoformat.sh deleted file mode 100755 index 0b541348f4..0000000000 --- a/run_autoformat.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -yapf -i -r --style .style.yapf --exclude '**/third_party' predicators -yapf -i -r --style .style.yapf scripts -yapf -i -r --style .style.yapf tests -yapf -i -r --style .style.yapf setup.py -docformatter -i -r . --exclude venv predicators/third_party -isort .