diff --git a/.gitignore b/.gitignore index f9082380e..cadae2196 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,6 @@ pufferlib/ocean/impulse_wars/*-release/ pufferlib/ocean/impulse_wars/debug-*/ pufferlib/ocean/impulse_wars/release-*/ pufferlib/ocean/impulse_wars/benchmark/ + +# dsym files +*.dSYM/ \ No newline at end of file diff --git a/pufferlib/config/ocean/overcooked.ini b/pufferlib/config/ocean/overcooked.ini new file mode 100644 index 000000000..7026e0fb7 --- /dev/null +++ b/pufferlib/config/ocean/overcooked.ini @@ -0,0 +1,60 @@ +[base] +package = ocean +env_name = puffer_overcooked +policy_name = Policy +rnn_name = Recurrent + +[env] +num_envs = 4096 +num_agents = 2 +layout = cramped_room +reward_dish_served_whole_team = 1.0 +reward_dish_served_agent = 0.0 +reward_pot_started = 0.15 +reward_ingredient_added = 0.15 +reward_ingredient_picked = 0.05 +reward_plate_picked = 0.05 +reward_soup_plated = 0.20 +reward_wrong_dish_served = 0.0 +reward_step_penalty = 0.0 + +[train] +total_timesteps = 100_000_000 +learning_rate = 0.01 +minibatch_size = 32768 +gamma = 0.99 +ent_coef = 0.02 +gae_lambda = 0.97 +clip_coef = 0.15 +anneal_lr = True + +[sweep] +method = Protein +metric = n +goal = maximize +downsample = 1 + +[sweep.train.learning_rate] +type = log_normal +min = 0.0001 +max = 0.01 + +[sweep.train.ent_coef] +type = log_normal +min = 0.01 +max = 0.30 + +[sweep.train.clip_coef] +type = log_normal +min = 0.05 +max = 0.30 + +[sweep.train.gamma] +type = logit_normal +min = 0.90 +max = 0.999 + +[sweep.train.gae_lambda] +type = logit_normal +min = 0.90 +max = 0.999 \ No newline at end of file diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py index 6c56a4ea2..a97b83e77 100644 --- a/pufferlib/ocean/environment.py +++ b/pufferlib/ocean/environment.py @@ -156,6 +156,7 @@ def make_multiagent(buf=None, **kwargs): 'checkers': 'Checkers', 'asteroids': 'Asteroids', 'whisker_racer': 'WhiskerRacer', + 'overcooked': 'Overcooked', 'onestateworld': 'World', 'onlyfish': 'OnlyFish', 'chain_mdp': 'Chain', diff --git a/pufferlib/ocean/overcooked/README.md b/pufferlib/ocean/overcooked/README.md new file mode 100644 index 000000000..bcb146c56 --- /dev/null +++ b/pufferlib/ocean/overcooked/README.md @@ -0,0 +1,247 @@ +# Overcooked Environment + +A multi-agent cooking coordination environment where agents cooperate to prepare and serve onion soup. Based on the popular Overcooked video game, this environment tests agents' ability to coordinate, divide labor, and work together efficiently. + +## File Structure + +``` +overcooked/ +├── overcooked.h # Main entry point (init, reset, step, close) +├── overcooked_types.h # Constants, enums, and struct definitions +├── overcooked_items.h # Item and cooking pot management +├── overcooked_obs.h # Observation computation +├── overcooked_logic.h # Game logic (interaction, movement, cooking) +├── overcooked_render.h # Rendering and texture management +├── binding.c # Python bindings +└── overcooked.py # Python environment wrapper +``` + +## Observation Space + +**39-dimensional vector per agent** — *see [compute_observations](overcooked_obs.h#L81)* + +### Player Features (34 dims) +- **Orientation** (4): One-hot encoding of facing direction — [overcooked_obs.h:101-103](overcooked_obs.h#L101-L103) +- **Held Object** (4): One-hot encoding (onion, plated_soup, plate, empty) — [overcooked_obs.h:105-116](overcooked_obs.h#L105-L116) +- **Proximity Features** (12): Normalized (dx, dy) to nearest — [overcooked_obs.h:118-167](overcooked_obs.h#L118-L167): + - Onion source (ingredient box) + - Dish source (plate box) + - Plated soup on counter + - Serving area + - Empty counter + - Pot (stove) +- **Nearest Soup Ingredients** (2): Onion/tomato counts in nearest plated soup or held soup (normalized) — [overcooked_obs.h:169-179](overcooked_obs.h#L169-L179) +- **Pot Soup Ingredients** (2): Onion/tomato counts in nearest pot (normalized) — [overcooked_obs.h:181-202](overcooked_obs.h#L181-L202) +- **Pot Existence** (1): Binary flag for reachable pot — [overcooked_obs.h:205](overcooked_obs.h#L205) +- **Pot State** (4): Binary flags (empty, full, cooking, ready) — [overcooked_obs.h:207-215](overcooked_obs.h#L207-L215) +- **Cooking Time** (1): Remaining cook time (normalized) — [overcooked_obs.h:217-223](overcooked_obs.h#L217-L223) +- **Wall Detection** (4): Binary flags for walls/obstacles (up, down, left, right) — [overcooked_obs.h:225-235](overcooked_obs.h#L225-L235) + +### Spatial Features (4 dims) +- **Teammate Relative Position** (2): Normalized (dx, dy) to other agent — [overcooked_obs.h:237-248](overcooked_obs.h#L237-L248) +- **Absolute Position** (2): Normalized (x, y) coordinates — [overcooked_obs.h:250-252](overcooked_obs.h#L250-L252) + +### Context (1 dim) +- **Reward** (1): Current step reward — [overcooked_obs.h:255](overcooked_obs.h#L255) + +## Action Space + +**6 discrete actions** — *see [c_step](overcooked.h#L77)* +- 0: No-op — [ACTION_NOOP](overcooked_types.h#L43) +- 1: Move up — [ACTION_UP](overcooked_types.h#L44) +- 2: Move down — [ACTION_DOWN](overcooked_types.h#L45) +- 3: Move left — [ACTION_LEFT](overcooked_types.h#L46) +- 4: Move right — [ACTION_RIGHT](overcooked_types.h#L47) +- 5: Interact (pick up/place items, use equipment) — [ACTION_INTERACT](overcooked_types.h#L48) + +## Reward System + +*See [evaluate_dish_served](overcooked_logic.h#L229) and [handle_interaction](overcooked_logic.h#L106)* + +### Main Rewards +- **Correct dish served** (3 onions): +1.0 (shared), +0.0 (server bonus) — [overcooked_logic.h:237-241](overcooked_logic.h#L237-L241) +- **Wrong dish served** (incorrect recipe): +0.0 (shared) — [overcooked_logic.h:252-258](overcooked_logic.h#L252-L258) +- **Step penalty**: 0.0 — [overcooked.h:80](overcooked.h#L80) + +### Intermediate Rewards +- **Pick up ingredient**: +0.05 — [overcooked_logic.h:221](overcooked_logic.h#L221) +- **Add onion to pot**: +0.15 — [overcooked_logic.h:133](overcooked_logic.h#L133) +- **Start cooking** (3 onions in pot): +0.15 — [overcooked_logic.h:145-147](overcooked_logic.h#L145-L147) +- **Plate cooked soup**: +0.20 — [overcooked_logic.h:159](overcooked_logic.h#L159) + +## Recipe + +The correct recipe requires **exactly 3 onions** in the soup. Agents must: +1. Pick up onions from ingredient boxes +2. Add 3 onions to a pot +3. Start cooking (interact with pot when empty-handed) +4. Wait for soup to cook (20 steps) +5. Pick up a plate from plate box +6. Plate the cooked soup (interact with pot while holding plate) +7. Deliver plated soup to serving area + +## Configuration + +*See [Overcooked class](overcooked.py#L14)* + +```python +env = Overcooked( + num_envs=1, # Number of parallel environments + layout="cramped_room", # Layout name (see Available Layouts) + num_agents=2, # Agents per environment + render_mode=None, # Set to enable rendering + log_interval=128, # Steps between log aggregation + grid_size=32, # Render tile size in pixels + + # Reward configuration (from config/ocean/overcooked.ini) + reward_dish_served_whole_team=1.0, # Shared reward for correct dish + reward_dish_served_agent=0.0, # Bonus for serving agent + reward_pot_started=0.15, # Starting correct recipe + reward_ingredient_added=0.15, # Adding onion to pot + reward_ingredient_picked=0.05, # Picking up ingredient + reward_soup_plated=0.20, # Plating cooked soup + reward_wrong_dish_served=0.0, # Serving incorrect dish + reward_step_penalty=0.0, # Per-step penalty +) +``` + +## Game Constants + +- **Cooking time**: 20 steps — [COOKING_TIME](overcooked_types.h#L39) +- **Max ingredients per pot**: 3 — [MAX_INGREDIENTS](overcooked_types.h#L40) +- **Max episode steps**: 400 (default) +- **Max dynamic items**: 20 — [overcooked.h:19](overcooked.h#L19) + +## Available Layouts + +*See [LAYOUTS](overcooked_types.h#L244-L259)* + +### cramped_room (5x5) + +``` ++---+---+---+---+---+ +| W | C | P | C | W | W = Wall ++---+---+---+---+---+ C = Counter +| I | | | | I | P = Pot (Stove) ++---+---+---+---+---+ I = Ingredient Box (Onions) +| C | | | | C | D = Dish/Plate Box ++---+---+---+---+---+ S = Serving Area +| C | | | | C | ++---+---+---+---+---+ +| W | D | C | S | W | ++---+---+---+---+---+ +``` +Spawns: (1,2) and (3,2) + +### asymmetric_advantages (9x5) + +``` ++---+---+---+---+---+---+---+---+---+ +| W | C | W | W | W | W | W | C | W | ++---+---+---+---+---+---+---+---+---+ +| I | | C | S | W | I | C | | S | ++---+---+---+---+---+---+---+---+---+ +| C | | | | P | | | | C | ++---+---+---+---+---+---+---+---+---+ +| C | | | | P | | | | C | ++---+---+---+---+---+---+---+---+---+ +| W | C | C | D | W | D | C | C | W | ++---+---+---+---+---+---+---+---+---+ +``` +Spawns: (1,2) and (7,2) + +### forced_coordination (5x5) + +``` ++---+---+---+---+---+ +| W | C | W | P | W | W = Wall ++---+---+---+---+---+ C = Counter +| I | | C | | P | P = Pot (Stove) ++---+---+---+---+---+ I = Ingredient Box (Onions) +| I | | C | | C | D = Dish/Plate Box ++---+---+---+---+---+ S = Serving Area +| D | | C | | C | ++---+---+---+---+---+ +| W | C | W | S | W | ++---+---+---+---+---+ +``` +Spawns: (1,2) and (3,2) + +A challenging layout with a center wall dividing the kitchen. Agents must coordinate through limited passage points. + +### coordination_ring (5x5) + +``` ++---+---+---+---+---+ +| W | C | C | P | W | W = Wall ++---+---+---+---+---+ C = Counter +| C | | | | P | P = Pot (Stove) ++---+---+---+---+---+ I = Ingredient Box (Onions) +| D | | C | | C | D = Dish/Plate Box ++---+---+---+---+---+ S = Serving Area +| I | | | | C | ++---+---+---+---+---+ +| W | I | S | C | W | ++---+---+---+---+---+ +``` +Spawns: (1,2) and (3,2) + +Ring-shaped layout with a center counter obstacle. Agents must navigate around the center to coordinate ingredient pickup and soup delivery. + +### counter_circuit (8x5) + +``` ++---+---+---+---+---+---+---+---+ +| W | C | C | P | P | C | C | W | ++---+---+---+---+---+---+---+---+ +| C | | | | | | | C | ++---+---+---+---+---+---+---+---+ +| D | | C | C | C | C | | S | ++---+---+---+---+---+---+---+---+ +| C | | | | | | | C | ++---+---+---+---+---+---+---+---+ +| W | C | C | I | I | C | C | W | ++---+---+---+---+---+---+---+---+ +``` +Spawns: (1,1) and (6,3) + +Circuit-shaped layout with a center counter island. Agents must coordinate around the obstacle to efficiently transport ingredients and serve dishes. Features dual pots and dual ingredient boxes for parallel cooking. + +## Logging Metrics + +*See [Log struct](overcooked_types.h#L65-L78)* + +| Metric | Description | +|--------|-------------| +| perf | Normalized performance (correct dishes served) | +| score | Raw score (correct dishes served) | +| episode_return | Sum of rewards over episode | +| episode_length | Number of steps in episode | +| dishes_served | Total dishes served (correct + wrong) | +| correct_dishes | Number of 3-onion dishes served | +| wrong_dishes | Number of incorrect dishes served | +| ingredients_picked | Total ingredients picked up | +| pots_started | Number of cooking sessions started | +| items_dropped | Number of items placed on counters | +| agent_collisions | Number of agent collision attempts | + +## Agent Reset Mechanism + +If an agent goes 512 steps without receiving a reward, it is automatically reset to its starting position with no held item. This prevents agents from getting stuck — [c_step](overcooked.h#L114-L133) + +## Building + +```bash +# Build the environment +python setup.py build_overcooked --inplace + +# Run standalone test +python pufferlib/ocean/overcooked/overcooked.py + +# Run standalone demo with specific layout +./overcooked cramped_room +./overcooked asymmetric_advantages +./overcooked forced_coordination +./overcooked coordination_ring +./overcooked counter_circuit +``` diff --git a/pufferlib/ocean/overcooked/binding.c b/pufferlib/ocean/overcooked/binding.c new file mode 100644 index 000000000..fbc5662de --- /dev/null +++ b/pufferlib/ocean/overcooked/binding.c @@ -0,0 +1,37 @@ +#include "overcooked.h" + +#define Env Overcooked +#include "../env_binding.h" + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + env->layout_id = (LayoutType)unpack(kwargs, "layout"); + env->num_agents = unpack(kwargs, "num_agents"); + env->grid_size = unpack(kwargs, "grid_size"); + env->observation_size = unpack(kwargs, "observation_size"); + env->rewards_config.dish_served_whole_team = unpack(kwargs, "reward_dish_served_whole_team"); + env->rewards_config.dish_served_agent = unpack(kwargs, "reward_dish_served_agent"); + env->rewards_config.pot_started = unpack(kwargs, "reward_pot_started"); + env->rewards_config.ingredient_added = unpack(kwargs, "reward_ingredient_added"); + env->rewards_config.ingredient_picked = unpack(kwargs, "reward_ingredient_picked"); + env->rewards_config.plate_picked = unpack(kwargs, "reward_plate_picked"); + env->rewards_config.soup_plated = unpack(kwargs, "reward_soup_plated"); + env->rewards_config.wrong_dish_served = unpack(kwargs, "reward_wrong_dish_served"); + env->rewards_config.step_penalty = unpack(kwargs, "reward_step_penalty"); + init(env); + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "dishes_served", log->dishes_served); + assign_to_dict(dict, "correct_dishes", log->correct_dishes); + assign_to_dict(dict, "wrong_dishes", log->wrong_dishes); + assign_to_dict(dict, "ingredients_picked", log->ingredients_picked); + assign_to_dict(dict, "pots_started", log->pots_started); + assign_to_dict(dict, "items_dropped", log->items_dropped); + assign_to_dict(dict, "agent_collisions", log->agent_collisions); + return 0; +} diff --git a/pufferlib/ocean/overcooked/overcooked.c b/pufferlib/ocean/overcooked/overcooked.c new file mode 100644 index 000000000..8df2f7008 --- /dev/null +++ b/pufferlib/ocean/overcooked/overcooked.c @@ -0,0 +1,79 @@ +#include +#include "overcooked.h" +#include "puffernet.h" + +int main(int argc, char** argv) { + LayoutType layout_id = LAYOUT_CRAMPED_ROOM; + if (argc > 1) { + layout_id = get_layout_by_name(argv[1]); + } + + int num_agents = 2; + int num_obs = 43; + + // Select weights file and size based on layout + const char* weights_file; + int weights_size; + if (layout_id == LAYOUT_ASYMMETRIC_ADVANTAGES) { + weights_file = "resources/overcooked/puffer_overcooked_weights_aa.bin"; + weights_size = 138631; + } else if (layout_id == LAYOUT_FORCED_COORDINATION) { + weights_file = "resources/overcooked/puffer_overcooked_weights_fc.bin"; + weights_size = 138631; + } else if (layout_id == LAYOUT_COORDINATION_RING) { + weights_file = "resources/overcooked/puffer_overcooked_weights_cor.bin"; + weights_size = 138631; + } else if (layout_id == LAYOUT_COUNTER_CIRCUIT) { + weights_file = "resources/overcooked/puffer_overcooked_weights_cc.bin"; + weights_size = 138631; + } else { + weights_file = "resources/overcooked/puffer_overcooked_weights_cr.bin"; + weights_size = 138631; + } + + Weights* weights = load_weights(weights_file, weights_size); + int logit_sizes[] = {6}; + LinearLSTM* net = make_linearlstm(weights, num_agents, num_obs, logit_sizes, 1); + + Overcooked env = { + .layout_id = layout_id, + .num_agents = num_agents, + .grid_size = 100, + .rewards_config = { + .dish_served_whole_team = 1.0f, + .dish_served_agent = 0.0f, + .pot_started = 0.15f, + .ingredient_added = 0.15f, + .ingredient_picked = 0.05f, + .plate_picked = 0.05f, + .soup_plated = 0.20f, + .wrong_dish_served = 0.0f, + .step_penalty = 0.0f + }, + .observation_size = num_obs + }; + + env.observations = (float*)calloc(num_obs * num_agents, sizeof(float)); + env.actions = (int*)calloc(num_agents, sizeof(int)); + env.rewards = (float*)calloc(num_agents, sizeof(float)); + env.terminals = (unsigned char*)calloc(num_agents, sizeof(unsigned char)); + + init(&env); + c_reset(&env); + c_render(&env); + + while (!WindowShouldClose()) { + forward_linearlstm(net, env.observations, env.actions); + c_step(&env); + c_render(&env); + } + + free_linearlstm(net); + free(env.observations); + free(env.actions); + free(env.rewards); + free(env.terminals); + c_close(&env); + + return 0; +} diff --git a/pufferlib/ocean/overcooked/overcooked.h b/pufferlib/ocean/overcooked/overcooked.h new file mode 100644 index 000000000..6ff3cdf7f --- /dev/null +++ b/pufferlib/ocean/overcooked/overcooked.h @@ -0,0 +1,154 @@ +/* Overcooked: a multi-agent cooking coordination environment. + * Agents can walk around, pick up items, and put down items. + */ + +#ifndef OVERCOOKED_H +#define OVERCOOKED_H + +#include "overcooked_types.h" +#include "overcooked_items.h" +#include "overcooked_obs.h" +#include "overcooked_logic.h" +#include "overcooked_render.h" + +static void init(Overcooked* env) { + const LayoutInfo* layout = get_layout_info(env->layout_id); + env->width = layout->width; + env->height = layout->height; + env->grid = calloc(env->width * env->height, sizeof(char)); + env->max_items = 20; + env->items = calloc(env->max_items, sizeof(Item)); + env->num_items = 0; + env->agents = calloc(env->num_agents, sizeof(Agent)); + parse_grid(env); + init_static_cache(env); + init_cooking_pots(env); + init_pot_indices(env); + init_item_grid(env); + env->client = NULL; + + memset(&env->log, 0, sizeof(Log)); +} + +void c_reset(Overcooked* env) { + env->num_items = 0; + reset_item_grid(env); + parse_grid(env); + + for (int i = 0; i < env->num_stoves; i++) { + CookingPot* pot = &env->cooking_pots[i]; + pot->cooking_state = NOT_COOKING; + pot->cooking_progress = 0; + pot->ingredient_count = 0; + pot->num_onions = 0; + pot->num_tomatoes = 0; + for (int j = 0; j < MAX_INGREDIENTS; j++) { + pot->ingredient_types[j] = NO_ITEM; + } + } + + const LayoutInfo* layout = get_layout_info(env->layout_id); + for (int i = 0; i < env->num_agents; i++) { + if (i < layout->num_spawns) { + env->agents[i].x = layout->spawn_positions[i * 2]; + env->agents[i].y = layout->spawn_positions[i * 2 + 1]; + } else { + env->agents[i].x = 1 + (i % (env->width - 2)); + env->agents[i].y = 1 + (i / (env->width - 2)); + } + env->agents[i].held_item = NO_ITEM; + env->agents[i].facing_direction = 0; + env->agents[i].held_soup_onions = 0; + env->agents[i].held_soup_tomatoes = 0; + env->agents[i].held_soup_total = 0; + env->agents[i].ticks_since_reward = 0; + + env->rewards[i] = 0.0f; + env->terminals[i] = 0; + } + + env->agent_position_mask = 0; + for (int i = 0; i < env->num_agents; i++) { + set_agent_position(env, env->agents[i].x, env->agents[i].y); + } + + compute_observations(env); +} + +void c_step(Overcooked* env) { + for (int i = 0; i < env->num_agents; i++) { + int action = env->actions[i]; + env->rewards[i] = env->rewards_config.step_penalty; + env->agents[i].ticks_since_reward++; + + Agent* agent = &env->agents[i]; + int new_x = agent->x; + int new_y = agent->y; + + switch (action) { + case ACTION_UP: new_y -= 1; agent->facing_direction = 0; break; + case ACTION_DOWN: new_y += 1; agent->facing_direction = 1; break; + case ACTION_LEFT: new_x -= 1; agent->facing_direction = 2; break; + case ACTION_RIGHT: new_x += 1; agent->facing_direction = 3; break; + case ACTION_INTERACT: handle_interaction(env, i); break; + } + + if (action != ACTION_INTERACT && action != ACTION_NOOP) { + if (is_valid_position(env, new_x, new_y, i)) { + clear_agent_position(env, agent->x, agent->y); + agent->x = new_x; + agent->y = new_y; + set_agent_position(env, new_x, new_y); + } else { + for (int j = 0; j < env->num_agents; j++) { + if (j != i && (int)env->agents[j].x == new_x && (int)env->agents[j].y == new_y) { + env->log.agent_collisions++; + break; + } + } + } + } + } + + update_cooking(env); + + const LayoutInfo* layout = get_layout_info(env->layout_id); + for (int i = 0; i < env->num_agents; i++) { + if (env->agents[i].ticks_since_reward % 512 == 0 && env->agents[i].ticks_since_reward > 0) { + clear_agent_position(env, env->agents[i].x, env->agents[i].y); + if (i < layout->num_spawns) { + env->agents[i].x = layout->spawn_positions[i * 2]; + env->agents[i].y = layout->spawn_positions[i * 2 + 1]; + } else { + env->agents[i].x = 1 + (i % (env->width - 2)); + env->agents[i].y = 1 + (i / (env->width - 2)); + } + set_agent_position(env, env->agents[i].x, env->agents[i].y); + env->agents[i].held_item = NO_ITEM; + env->agents[i].held_soup_onions = 0; + env->agents[i].held_soup_tomatoes = 0; + env->agents[i].held_soup_total = 0; + } + } + + for (int i = 0; i < env->num_agents; i++) { + env->log.episode_return += env->rewards[i]; + } + + compute_observations(env); +} + +void c_close(Overcooked* env) { + free(env->grid); + free(env->items); + free(env->agents); + free(env->cooking_pots); + free(env->pot_index_grid); + free(env->item_grid); + if (env->client != NULL) { + unload_textures(env->client); + free(env->client); + } +} + +#endif // OVERCOOKED_H \ No newline at end of file diff --git a/pufferlib/ocean/overcooked/overcooked.py b/pufferlib/ocean/overcooked/overcooked.py new file mode 100644 index 000000000..147e25f7e --- /dev/null +++ b/pufferlib/ocean/overcooked/overcooked.py @@ -0,0 +1,141 @@ +'''Overcooked: A multi-agent cooking coordination environment.''' + +import gymnasium +import numpy as np + +import pufferlib +from pufferlib.ocean.overcooked import binding + +LAYOUTS = { + "cramped_room": 0, + "asymmetric_advantages": 1, + "forced_coordination": 2, + "coordination_ring": 3, + "counter_circuit": 4, +} + +class Overcooked(pufferlib.PufferEnv): + def __init__(self, num_envs=1, layout="cramped_room", num_agents=2, + render_mode=None, log_interval=128, buf=None, seed=0, + grid_size=32, + reward_dish_served_whole_team=20.0, + reward_dish_served_agent=5.0, + reward_pot_started=0.1, + reward_ingredient_added=0.1, + reward_ingredient_picked=0.0, + reward_plate_picked=0.0, + reward_soup_plated=0.1, + reward_wrong_dish_served=0.1, + reward_step_penalty=0.0): + + # Define observation space - 43-dimensional vector per agent + # Structure: + # - Player features: 38 dims + # * Orientation (one-hot): 4 + # * Held object (one-hot): 4 + # * Proximity to objects (dx,dy): 16 (8 objects × 2) + # * Nearest soup ingredients: 2 (onions, tomatoes in plated soup or held) + # * Pot soup ingredients: 2 (onions, tomatoes in nearest pot) + # * Pot existence: 1 + # * Pot state flags: 4 + # * Cooking time: 1 + # * Wall detection: 4 (up, down, left, right) + # - Teammate relative position: 2 dims + # - Absolute position: 2 dims + # - Reward: 1 dim + # Total: 43 dimensions + + observation_size = 43 + + self.single_observation_space = gymnasium.spaces.Box( + low=-1, high=1, + shape=(observation_size,), + dtype=np.float32 + ) + + # Action space: 6 discrete actions (noop, up, down, left, right, interact) + self.single_action_space = gymnasium.spaces.Discrete(6) + + self.render_mode = render_mode + self.num_agents = num_envs * num_agents # Multiple agents per env + self.log_interval = log_interval + + super().__init__(buf) + layout_id = LAYOUTS.get(layout, 0) + c_envs = [] + for i in range(num_envs): + c_env = binding.env_init( + self.observations[i*num_agents:(i+1)*num_agents], + self.actions[i*num_agents:(i+1)*num_agents], + self.rewards[i*num_agents:(i+1)*num_agents], + self.terminals[i*num_agents:(i+1)*num_agents], + self.truncations[i*num_agents:(i+1)*num_agents], + seed + i, + layout=layout_id, + num_agents=num_agents, + grid_size=grid_size, + observation_size=observation_size, + reward_dish_served_whole_team=reward_dish_served_whole_team, + reward_dish_served_agent=reward_dish_served_agent, + reward_pot_started=reward_pot_started, + reward_ingredient_added=reward_ingredient_added, + reward_ingredient_picked=reward_ingredient_picked, + reward_plate_picked=reward_plate_picked, + reward_soup_plated=reward_soup_plated, + reward_wrong_dish_served=reward_wrong_dish_served, + reward_step_penalty=reward_step_penalty + ) + c_envs.append(c_env) + + self.c_envs = binding.vectorize(*c_envs) + + def reset(self, seed=0): + binding.vec_reset(self.c_envs, seed) + self.tick = 0 + return self.observations, [] + + def step(self, actions): + self.tick += 1 + self.actions[:] = actions + binding.vec_step(self.c_envs) + + info = [] + if self.tick % self.log_interval == 0: + log = binding.vec_log(self.c_envs) + if log: + info.append(log) + + return (self.observations, self.rewards, + self.terminals, self.truncations, info) + + def render(self): + binding.vec_render(self.c_envs, 0) + + def close(self): + binding.vec_close(self.c_envs) + +if __name__ == '__main__': + # Test with single environment, 2 agents + num_agents = 2 + env = Overcooked(num_envs=1, num_agents=num_agents) + env.reset() + steps = 0 + + import time + start = time.time() + + # Run for 10 seconds with random actions + while time.time() - start < 10: + # Random action from action space for both agents + actions = np.random.randint(0, 6, size=(num_agents,)) + obs, rewards, dones, truncs, info = env.step(actions) + + if env.render_mode: + env.render() + + steps += num_agents + + if any(dones): + env.reset() + + print('Overcooked SPS:', int(steps / (time.time() - start))) \ No newline at end of file diff --git a/pufferlib/ocean/overcooked/overcooked_items.h b/pufferlib/ocean/overcooked/overcooked_items.h new file mode 100644 index 000000000..980bef2c8 --- /dev/null +++ b/pufferlib/ocean/overcooked/overcooked_items.h @@ -0,0 +1,123 @@ +/* Overcooked Items: Item and cooking pot management functions. + */ + +#ifndef OVERCOOKED_ITEMS_H +#define OVERCOOKED_ITEMS_H + +#include "overcooked_types.h" + +static inline Item* get_item_at(Overcooked* env, int x, int y) { + int idx = env->item_grid[y * env->width + x]; + return (idx >= 0) ? &env->items[idx] : NULL; +} + +static void add_item(Overcooked* env, int type, int x, int y) { + if (env->num_items < env->max_items) { + int idx = env->num_items; + env->items[idx].type = type; + env->items[idx].x = x; + env->items[idx].y = y; + env->items[idx].state = 0; + env->items[idx].num_onions = 0; + env->items[idx].num_tomatoes = 0; + env->items[idx].total_ingredients = 0; + env->item_grid[y * env->width + x] = idx; + env->num_items++; + } +} + +static void remove_item(Overcooked* env, int x, int y) { + int idx = env->item_grid[y * env->width + x]; + if (idx < 0) return; + + env->item_grid[y * env->width + x] = -1; + + if (idx < env->num_items - 1) { + Item* last = &env->items[env->num_items - 1]; + env->items[idx] = *last; + env->item_grid[last->y * env->width + last->x] = idx; + } + env->num_items--; +} + +static void init_cooking_pots(Overcooked* env) { + env->num_stoves = 0; + for (int i = 0; i < env->width * env->height; i++) { + if (env->grid[i] == STOVE) { + env->num_stoves++; + } + } + + env->cooking_pots = calloc(env->num_stoves, sizeof(CookingPot)); + + int pot_index = 0; + for (int y = 0; y < env->height; y++) { + for (int x = 0; x < env->width; x++) { + if (env->grid[y * env->width + x] == STOVE) { + CookingPot* pot = &env->cooking_pots[pot_index]; + pot->cooking_state = NOT_COOKING; + pot->cooking_progress = 0; + pot->ingredient_count = 0; + pot->num_onions = 0; + pot->num_tomatoes = 0; + for (int i = 0; i < MAX_INGREDIENTS; i++) { + pot->ingredient_types[i] = NO_ITEM; + } + pot_index++; + } + } + } +} + +static void init_pot_indices(Overcooked* env) { + // Allocate pot index grid (same size as main grid) + env->pot_index_grid = calloc(env->width * env->height, sizeof(int)); + + // Initialize all cells to -1 (not a stove) + for (int i = 0; i < env->width * env->height; i++) { + env->pot_index_grid[i] = -1; + } + + // Map stove cells to their pot indices (same order as init_cooking_pots) + int pot_idx = 0; + for (int y = 0; y < env->height; y++) { + for (int x = 0; x < env->width; x++) { + if (env->grid[y * env->width + x] == STOVE) { + env->pot_index_grid[y * env->width + x] = pot_idx++; + } + } + } +} + +// O(1) pot lookup using precomputed index grid +static inline CookingPot* get_pot_at(Overcooked* env, int x, int y) { + int idx = env->pot_index_grid[y * env->width + x]; + return (idx >= 0) ? &env->cooking_pots[idx] : NULL; +} + +static void init_item_grid(Overcooked* env) { + env->item_grid = calloc(env->width * env->height, sizeof(int)); + for (int i = 0; i < env->width * env->height; i++) { + env->item_grid[i] = -1; + } +} + +static void reset_item_grid(Overcooked* env) { + for (int i = 0; i < env->width * env->height; i++) { + env->item_grid[i] = -1; + } +} + +static void update_cooking(Overcooked* env) { + for (int i = 0; i < env->num_stoves; i++) { + CookingPot* pot = &env->cooking_pots[i]; + if (pot->cooking_state == COOKING) { + pot->cooking_progress++; + if (pot->cooking_progress >= COOKING_TIME) { + pot->cooking_state = COOKED; + } + } + } +} + +#endif // OVERCOOKED_ITEMS_H diff --git a/pufferlib/ocean/overcooked/overcooked_logic.h b/pufferlib/ocean/overcooked/overcooked_logic.h new file mode 100644 index 000000000..a2f5a58a6 --- /dev/null +++ b/pufferlib/ocean/overcooked/overcooked_logic.h @@ -0,0 +1,255 @@ +/* Overcooked Logic: Game logic functions (parsing, interaction, movement). + */ + +#ifndef OVERCOOKED_LOGIC_H +#define OVERCOOKED_LOGIC_H + +#include "overcooked_types.h" +#include "overcooked_items.h" + +// Forward declaration for circular dependency +static void evaluate_dish_served(Overcooked* env, Agent* agent, int agent_idx); + +static void parse_grid(Overcooked* env) { + const LayoutInfo* layout = get_layout_info(env->layout_id); + for (int y = 0; y < env->height; y++) { + for (int x = 0; x < env->width; x++) { + char tile = get_layout_tile(layout, x, y); + int idx = y * env->width + x; + switch (tile) { + case '#': env->grid[idx] = WALL; break; + case '1': env->grid[idx] = COUNTER; break; + case '2': env->grid[idx] = STOVE; break; + case '3': env->grid[idx] = CUTTING_BOARD; break; + case '4': env->grid[idx] = INGREDIENT_BOX; break; + case '5': env->grid[idx] = SERVING_AREA; break; + case '6': env->grid[idx] = WALL; break; + case '7': env->grid[idx] = PLATE_BOX; break; + default: env->grid[idx] = EMPTY; break; + } + } + } +} + +static void init_static_cache(Overcooked* env) { + // Precompute normalization factors + env->cache.inv_width = 1.0f / env->width; + env->cache.inv_height = 1.0f / env->height; + + // Reset counts + env->cache.ingredient_box_count = 0; + env->cache.plate_box_count = 0; + env->cache.serving_area_count = 0; + env->cache.stove_count = 0; + env->cache.counter_count = 0; + + // Scan grid once and cache all static tile positions + for (int y = 0; y < env->height; y++) { + for (int x = 0; x < env->width; x++) { + int tile = env->grid[y * env->width + x]; + switch (tile) { + case INGREDIENT_BOX: + env->cache.ingredient_box_positions[env->cache.ingredient_box_count * 2] = x; + env->cache.ingredient_box_positions[env->cache.ingredient_box_count * 2 + 1] = y; + env->cache.ingredient_box_count++; + break; + case PLATE_BOX: + env->cache.plate_box_positions[env->cache.plate_box_count * 2] = x; + env->cache.plate_box_positions[env->cache.plate_box_count * 2 + 1] = y; + env->cache.plate_box_count++; + break; + case SERVING_AREA: + env->cache.serving_area_positions[env->cache.serving_area_count * 2] = x; + env->cache.serving_area_positions[env->cache.serving_area_count * 2 + 1] = y; + env->cache.serving_area_count++; + break; + case STOVE: + env->cache.stove_positions[env->cache.stove_count * 2] = x; + env->cache.stove_positions[env->cache.stove_count * 2 + 1] = y; + env->cache.stove_count++; + break; + case COUNTER: + env->cache.counter_positions[env->cache.counter_count * 2] = x; + env->cache.counter_positions[env->cache.counter_count * 2 + 1] = y; + env->cache.counter_count++; + break; + } + } + } +} + +static inline void set_agent_position(Overcooked* env, int x, int y) { + env->agent_position_mask |= (1ULL << (y * env->width + x)); +} + +static inline void clear_agent_position(Overcooked* env, int x, int y) { + env->agent_position_mask &= ~(1ULL << (y * env->width + x)); +} + +static inline int is_agent_at(Overcooked* env, int x, int y) { + return (env->agent_position_mask >> (y * env->width + x)) & 1; +} + +static int is_valid_position(Overcooked* env, int x, int y, int excluding_agent) { + (void)excluding_agent; + if (x < 0 || x >= env->width || y < 0 || y >= env->height) { + return 0; + } + if (env->grid[y * env->width + x] != EMPTY) { + return 0; + } + if (is_agent_at(env, x, y)) { + return 0; + } + return 1; +} + +static void handle_interaction(Overcooked* env, int agent_idx) { + Agent* agent = &env->agents[agent_idx]; + int target_x = agent->x; + int target_y = agent->y; + + switch (agent->facing_direction) { + case 0: target_y -= 1; break; // Up + case 1: target_y += 1; break; // Down + case 2: target_x -= 1; break; // Left + case 3: target_x += 1; break; // Right + } + + if (target_x < 0 || target_x >= env->width || target_y < 0 || target_y >= env->height) { + return; + } + + int tile = env->grid[target_y * env->width + target_x]; + Item* item = get_item_at(env, target_x, target_y); + CookingPot* pot = get_pot_at(env, target_x, target_y); + + if (tile == STOVE && pot != NULL) { + if (agent->held_item == ONION || agent->held_item == TOMATO) { + if (pot->cooking_state == NOT_COOKING && pot->ingredient_count < MAX_INGREDIENTS) { + pot->ingredient_types[pot->ingredient_count] = agent->held_item; + pot->ingredient_count++; + if (agent->held_item == ONION) { + pot->num_onions++; + env->rewards[agent_idx] += env->rewards_config.ingredient_added; + } else if (agent->held_item == TOMATO) { + pot->num_tomatoes++; + } + agent->held_item = NO_ITEM; + } + } + else if (agent->held_item == NO_ITEM && pot->ingredient_count > 0) { + if (pot->cooking_state == NOT_COOKING) { + pot->cooking_state = COOKING; + pot->cooking_progress = 0; + env->log.pots_started++; + if (pot->num_onions == 3) { + env->rewards[agent_idx] += env->rewards_config.pot_started; + } + } + else if (pot->cooking_state == COOKED) { + return; + } + } + else if (agent->held_item == PLATE && pot->cooking_state == COOKED) { + agent->held_item = PLATED_SOUP; + agent->held_soup_onions = pot->num_onions; + agent->held_soup_tomatoes = pot->num_tomatoes; + agent->held_soup_total = pot->ingredient_count; + + env->rewards[agent_idx] += env->rewards_config.soup_plated; + + pot->cooking_state = NOT_COOKING; + pot->cooking_progress = 0; + pot->ingredient_count = 0; + pot->num_onions = 0; + pot->num_tomatoes = 0; + for (int i = 0; i < MAX_INGREDIENTS; i++) { + pot->ingredient_types[i] = NO_ITEM; + } + } + return; + } + + if (tile == SERVING_AREA && agent->held_item == PLATED_SOUP) { + evaluate_dish_served(env, agent, agent_idx); + + agent->held_item = NO_ITEM; + agent->held_soup_onions = 0; + agent->held_soup_tomatoes = 0; + agent->held_soup_total = 0; + return; + } + + if (agent->held_item != NO_ITEM) { + if ((tile == COUNTER || tile == CUTTING_BOARD) && item == NULL) { + if (agent->held_item == PLATED_SOUP) { + add_item(env, agent->held_item, target_x, target_y); + Item* placed_soup = get_item_at(env, target_x, target_y); + if (placed_soup) { + placed_soup->num_onions = agent->held_soup_onions; + placed_soup->num_tomatoes = agent->held_soup_tomatoes; + placed_soup->total_ingredients = agent->held_soup_total; + } + agent->held_soup_onions = 0; + agent->held_soup_tomatoes = 0; + agent->held_soup_total = 0; + } else { + add_item(env, agent->held_item, target_x, target_y); + } + agent->held_item = NO_ITEM; + env->log.items_dropped++; + } else if ((tile == EMPTY) && item == NULL) { + agent->held_item = NO_ITEM; + env->log.items_dropped++; + } + } + else { + if (item != NULL) { + if (item->type == PLATED_SOUP) { + agent->held_soup_onions = item->num_onions; + agent->held_soup_tomatoes = item->num_tomatoes; + agent->held_soup_total = item->total_ingredients; + } + agent->held_item = item->type; + remove_item(env, target_x, target_y); + } + else if (tile == INGREDIENT_BOX) { + // TODO @mmbajo: What if we have Tomatoes as well? + // Add logs for each ingredient type + agent->held_item = ONION; // Always gives onions for now + env->log.ingredients_picked++; + env->rewards[agent_idx] += env->rewards_config.ingredient_picked; + } + else if (tile == PLATE_BOX) { + agent->held_item = PLATE; + env->rewards[agent_idx] += env->rewards_config.plate_picked; + } + } +} + +static void evaluate_dish_served(Overcooked* env, Agent* agent, int agent_idx) { + int is_correct_recipe = (agent->held_soup_onions == 3); + + if (is_correct_recipe) { + env->rewards[agent_idx] += env->rewards_config.dish_served_agent; + for (int i = 0; i < env->num_agents; i++) { + env->rewards[i] += env->rewards_config.dish_served_whole_team; + } + env->log.episode_length += agent->ticks_since_reward; + env->log.score += 25.0 / agent->ticks_since_reward; + env->log.perf += 25.0 / agent->ticks_since_reward; + agent->ticks_since_reward = 0; + env->log.correct_dishes++; + env->log.n++; + } else { + env->rewards[agent_idx] += env->rewards_config.wrong_dish_served; + for (int i = 0; i < env->num_agents; i++) { + env->rewards[i] += env->rewards_config.wrong_dish_served; + } + env->log.wrong_dishes++; + } + env->log.dishes_served++; +} + +#endif // OVERCOOKED_LOGIC_H diff --git a/pufferlib/ocean/overcooked/overcooked_obs.h b/pufferlib/ocean/overcooked/overcooked_obs.h new file mode 100644 index 000000000..c349a71f1 --- /dev/null +++ b/pufferlib/ocean/overcooked/overcooked_obs.h @@ -0,0 +1,292 @@ +/* Overcooked Observations: Observation computation functions. + */ + +#ifndef OVERCOOKED_OBS_H +#define OVERCOOKED_OBS_H + +#include "overcooked_types.h" +#include "overcooked_items.h" + +static Item* find_nearest_plated_soup(Overcooked* env, Agent* agent, float* dx, float* dy) { + *dx = 0.0f; + *dy = 0.0f; + + if (agent->held_item == PLATED_SOUP) return NULL; + + Item* nearest = NULL; + float min_dist = 1000.0f; + for (int i = 0; i < env->num_items; i++) { + if (env->items[i].type == PLATED_SOUP) { + float dist = (float)(abs(env->items[i].x - (int)agent->x) + abs(env->items[i].y - (int)agent->y)); + if (dist < min_dist) { + min_dist = dist; + nearest = &env->items[i]; + *dx = (env->items[i].x - agent->x) * env->cache.inv_width; + *dy = (env->items[i].y - agent->y) * env->cache.inv_height; + } + } + } + return nearest; +} + +static void find_nearest_item_by_type(Overcooked* env, Agent* agent, + int item_type, float* dx, float* dy) { + *dx = 0.0f; + *dy = 0.0f; + + if (agent->held_item == item_type) return; + + float min_dist = 1000.0f; + for (int i = 0; i < env->num_items; i++) { + if (env->items[i].type == item_type) { + float dist = (float)(abs(env->items[i].x - (int)agent->x) + + abs(env->items[i].y - (int)agent->y)); + if (dist < min_dist) { + min_dist = dist; + *dx = (env->items[i].x - agent->x) * env->cache.inv_width; + *dy = (env->items[i].y - agent->y) * env->cache.inv_height; + } + } + } +} + +// Cached version: iterate over precomputed tile positions instead of scanning grid +static void compute_tile_proximity_cached(Overcooked* env, Agent* agent, + int* positions, int count, + float* dx, float* dy) { + *dx = 0.0f; + *dy = 0.0f; + + int min_dist = 1000; + int best_x = 0, best_y = 0; + + for (int i = 0; i < count; i++) { + int x = positions[i * 2]; + int y = positions[i * 2 + 1]; + int dist = abs(x - (int)agent->x) + abs(y - (int)agent->y); + if (dist < min_dist) { + min_dist = dist; + best_x = x; + best_y = y; + } + } + + if (min_dist < 1000) { + *dx = (best_x - agent->x) * env->cache.inv_width; + *dy = (best_y - agent->y) * env->cache.inv_height; + } +} + + +static void find_nearest_empty_counter(Overcooked* env, int agent_x, int agent_y, float* dx, float* dy) { + *dx = 0.0f; + *dy = 0.0f; + int min_dist = 1000; + + // Iterate cached counter positions instead of scanning entire grid + for (int i = 0; i < env->cache.counter_count; i++) { + int x = env->cache.counter_positions[i * 2]; + int y = env->cache.counter_positions[i * 2 + 1]; + + if (env->item_grid[y * env->width + x] < 0) { + int dist = abs(x - agent_x) + abs(y - agent_y); + if (dist < min_dist) { + min_dist = dist; + *dx = (x - agent_x) * env->cache.inv_width; + *dy = (y - agent_y) * env->cache.inv_height; + } + } + } +} + +static void compute_observations(Overcooked* env) { + // 43-dimensional observation vector for each agent + // Structure per agent: + // - Player features: 38 dims (4 orientation + 4 held + 16 proximity + 2 nearest soup ingredients + 2 pot soup ingredients + 1 pot exist + 4 pot state + 1 cook time + 4 walls) + // - Teammate relative position: 2 dims + // - Absolute position: 2 dims + // - Reward: 1 dim + // Total: 43 dims + // Proximity: onion box, plate box, plated soup, serving, empty counter, pot, pickable onion, pickable plate + + for (int agent_idx = 0; agent_idx < env->num_agents; agent_idx++) { + Agent* agent = &env->agents[agent_idx]; + float* obs = &env->observations[agent_idx * env->observation_size]; + int obs_idx = 0; + + memset(obs, 0, env->observation_size * sizeof(float)); + + // === PLAYER-SPECIFIC FEATURES (28 dims) === + + // 1. Orientation (one-hot, 4 dims) + obs[obs_idx + agent->facing_direction] = 1.0f; + obs_idx += 4; + + // 2. Held object (one-hot: onion, soup, dish, tomato, empty - 5 dims but we use 4) + if (agent->held_item == NO_ITEM) { + obs[obs_idx + 3] = 1.0f; // Empty + } else if (agent->held_item == ONION) { + obs[obs_idx + 0] = 1.0f; + } else if (agent->held_item == PLATED_SOUP) { + obs[obs_idx + 1] = 1.0f; // Soup + } else if (agent->held_item == PLATE) { + obs[obs_idx + 2] = 1.0f; // Dish + } + // Note: We don't use tomatoes in this version, keeping slot for compatibility + obs_idx += 4; + + // 3. Proximity to key objects (dx, dy for each, 16 dims total) + float dx, dy; + + // Nearest onion source (ingredient box) - returns (0,0) if holding onion + if (agent->held_item == ONION) { + dx = 0.0f; + dy = 0.0f; + } else { + compute_tile_proximity_cached(env, agent, + env->cache.ingredient_box_positions, env->cache.ingredient_box_count, + &dx, &dy); + } + obs[obs_idx++] = dx; + obs[obs_idx++] = dy; + + // Nearest dish (plate box) - returns (0,0) if holding plate + if (agent->held_item == PLATE) { + dx = 0.0f; + dy = 0.0f; + } else { + compute_tile_proximity_cached(env, agent, + env->cache.plate_box_positions, env->cache.plate_box_count, + &dx, &dy); + } + obs[obs_idx++] = dx; + obs[obs_idx++] = dy; + + // Nearest soup (plated soup) - returns (0,0) if holding soup or none exists + Item* nearest_soup = find_nearest_plated_soup(env, agent, &dx, &dy); + obs[obs_idx++] = dx; + obs[obs_idx++] = dy; + + // Nearest serving area + compute_tile_proximity_cached(env, agent, + env->cache.serving_area_positions, env->cache.serving_area_count, + &dx, &dy); + obs[obs_idx++] = dx; + obs[obs_idx++] = dy; + + // Nearest empty counter - special case, needs custom handling + find_nearest_empty_counter(env, agent->x, agent->y, &dx, &dy); + obs[obs_idx++] = dx; + obs[obs_idx++] = dy; + + // Nearest pot (stove) + compute_tile_proximity_cached(env, agent, + env->cache.stove_positions, env->cache.stove_count, + &dx, &dy); + obs[obs_idx++] = dx; + obs[obs_idx++] = dy; + + // Nearest pickable onion on counter (not in box) + find_nearest_item_by_type(env, agent, ONION, &dx, &dy); + obs[obs_idx++] = dx; + obs[obs_idx++] = dy; + + // Nearest pickable plate on counter (not in box) + find_nearest_item_by_type(env, agent, PLATE, &dx, &dy); + obs[obs_idx++] = dx; + obs[obs_idx++] = dy; + + // 4. Nearest soup ingredients (2 dims: onions, tomatoes in nearest plated soup or held soup) + if (agent->held_item == PLATED_SOUP) { + obs[obs_idx++] = agent->held_soup_onions / (float)MAX_INGREDIENTS; + obs[obs_idx++] = agent->held_soup_tomatoes / (float)MAX_INGREDIENTS; + } else if (nearest_soup) { + obs[obs_idx++] = nearest_soup->num_onions / (float)MAX_INGREDIENTS; + obs[obs_idx++] = nearest_soup->num_tomatoes / (float)MAX_INGREDIENTS; + } else { + obs[obs_idx++] = 0.0f; + obs[obs_idx++] = 0.0f; + } + + // 5. Pot soup ingredients (2 dims: onion count, always 0 for tomatoes in nearest pot) + // Find nearest pot using cached stove positions + int min_pot_dist = 1000; + CookingPot* nearest_pot = NULL; + + for (int i = 0; i < env->cache.stove_count; i++) { + int x = env->cache.stove_positions[i * 2]; + int y = env->cache.stove_positions[i * 2 + 1]; + int dist = abs(x - (int)agent->x) + abs(y - (int)agent->y); + if (dist < min_pot_dist) { + min_pot_dist = dist; + nearest_pot = get_pot_at(env, x, y); + } + } + + if (nearest_pot) { + obs[obs_idx++] = nearest_pot->num_onions / (float)MAX_INGREDIENTS; + obs[obs_idx++] = 0.0f; // No tomatoes in our version + } else { + obs[obs_idx++] = 0.0f; + obs[obs_idx++] = 0.0f; + } + + // 6. Reachable pot existence (1 dim) + obs[obs_idx++] = (nearest_pot != NULL) ? 1.0f : 0.0f; + + // 7. Pot state flags (4 dims: empty, full, cooking, ready) + if (nearest_pot) { + obs[obs_idx++] = (nearest_pot->ingredient_count == 0) ? 1.0f : 0.0f; // Empty + obs[obs_idx++] = (nearest_pot->ingredient_count == MAX_INGREDIENTS) ? 1.0f : 0.0f; // Full (exactly MAX_INGREDIENTS) + obs[obs_idx++] = (nearest_pot->cooking_state == COOKING) ? 1.0f : 0.0f; // Cooking + obs[obs_idx++] = (nearest_pot->cooking_state == COOKED) ? 1.0f : 0.0f; // Ready + } else { + obs_idx += 4; // Skip pot state if no pot found + } + + // 8. Remaining cooking time (1 dim) + if (nearest_pot && nearest_pot->cooking_state == COOKING) { + float remaining = (COOKING_TIME - nearest_pot->cooking_progress) / (float)COOKING_TIME; + obs[obs_idx++] = remaining; + } else { + obs[obs_idx++] = 0.0f; + } + + // 9. Wall detection (4 dims: up, down, left, right) + // Check each direction for any non-EMPTY tile (walls, stoves, counters, serving area, ingredient box, plate box, cutting board - all are non-walkable) + int wall_up = (agent->y > 0) ? env->grid[((int)agent->y - 1) * env->width + (int)agent->x] : WALL; + int wall_down = (agent->y < env->height - 1) ? env->grid[((int)agent->y + 1) * env->width + (int)agent->x] : WALL; + int wall_left = (agent->x > 0) ? env->grid[(int)agent->y * env->width + ((int)agent->x - 1)] : WALL; + int wall_right = (agent->x < env->width - 1) ? env->grid[(int)agent->y * env->width + ((int)agent->x + 1)] : WALL; + + obs[obs_idx++] = (wall_up != EMPTY) ? 1.0f : 0.0f; + obs[obs_idx++] = (wall_down != EMPTY) ? 1.0f : 0.0f; + obs[obs_idx++] = (wall_left != EMPTY) ? 1.0f : 0.0f; + obs[obs_idx++] = (wall_right != EMPTY) ? 1.0f : 0.0f; + + // === TEAMMATE RELATIVE POSITION (2 dims) === + // Find teammate (other agent) + int teammate_idx = (agent_idx == 0) ? 1 : 0; + if (teammate_idx < env->num_agents) { + Agent* teammate = &env->agents[teammate_idx]; + obs[obs_idx++] = (teammate->x - agent->x) / (float)env->width; + obs[obs_idx++] = (teammate->y - agent->y) / (float)env->height; + } else { + // No teammate, set relative position to 0 + obs[obs_idx++] = 0.0f; + obs[obs_idx++] = 0.0f; + } + + // === ABSOLUTE POSITION (2 dims) === + obs[obs_idx++] = agent->x / (float)env->width; + obs[obs_idx++] = agent->y / (float)env->height; + + // === REWARD (1 dim) === + obs[obs_idx++] = env->rewards[agent_idx]; + + // Total should be 43 dims (38 player features + 2 teammate relative position + 2 absolute position + 1 reward) + // Debug check removed - was only useful on first step + } +} + +#endif // OVERCOOKED_OBS_H diff --git a/pufferlib/ocean/overcooked/overcooked_render.h b/pufferlib/ocean/overcooked/overcooked_render.h new file mode 100644 index 000000000..971577350 --- /dev/null +++ b/pufferlib/ocean/overcooked/overcooked_render.h @@ -0,0 +1,511 @@ +/* Overcooked Render: All rendering and texture management functions. + */ + + #ifndef OVERCOOKED_RENDER_H + #define OVERCOOKED_RENDER_H + + #include "overcooked_types.h" + #include "overcooked_items.h" + + static Color get_agent_color(int held_item) { + switch (held_item) { + case NO_ITEM: + return BLUE; // Blue when empty-handed + case TOMATO: + return (Color){200, 50, 50, 255}; // Dark red when holding tomato + case ONION: + return (Color){255, 200, 100, 255}; // Light orange when holding onion + case PLATE: + return (Color){200, 200, 220, 255}; // Light blue-gray when holding plate + case SOUP: + return (Color){255, 140, 0, 255}; // Orange when holding soup + case PLATED_SOUP: + return (Color){255, 165, 0, 255}; // Brighter orange when holding plated soup + default: + return BLUE; // Default to blue + } + } + + static void unload_textures(Client* client) { + UnloadTexture(client->floor); + UnloadTexture(client->counter); + UnloadTexture(client->pot); + UnloadTexture(client->serve); + UnloadTexture(client->onions_box); + UnloadTexture(client->tomatoes_box); + UnloadTexture(client->dishes_box); + UnloadTexture(client->wall); + + UnloadTexture(client->onion); + UnloadTexture(client->tomato); + UnloadTexture(client->dish); + UnloadTexture(client->soup_onion); + UnloadTexture(client->soup_tomato); + UnloadTexture(client->soup_onion_dish); + UnloadTexture(client->soup_tomato_dish); + + UnloadTexture(client->soup_onion_cooking_1); + UnloadTexture(client->soup_onion_cooking_2); + UnloadTexture(client->soup_onion_cooking_3); + UnloadTexture(client->soup_onion_cooked); + UnloadTexture(client->soup_tomato_cooking_1); + UnloadTexture(client->soup_tomato_cooking_2); + UnloadTexture(client->soup_tomato_cooking_3); + UnloadTexture(client->soup_tomato_cooked); + + UnloadTexture(client->chef_north); + UnloadTexture(client->chef_south); + UnloadTexture(client->chef_east); + UnloadTexture(client->chef_west); + UnloadTexture(client->chef_north_onion); + UnloadTexture(client->chef_south_onion); + UnloadTexture(client->chef_east_onion); + UnloadTexture(client->chef_west_onion); + UnloadTexture(client->chef_north_tomato); + UnloadTexture(client->chef_south_tomato); + UnloadTexture(client->chef_east_tomato); + UnloadTexture(client->chef_west_tomato); + UnloadTexture(client->chef_north_dish); + UnloadTexture(client->chef_south_dish); + UnloadTexture(client->chef_east_dish); + UnloadTexture(client->chef_west_dish); + UnloadTexture(client->chef_north_soup_onion); + UnloadTexture(client->chef_south_soup_onion); + UnloadTexture(client->chef_east_soup_onion); + UnloadTexture(client->chef_west_soup_onion); + UnloadTexture(client->chef_north_soup_tomato); + UnloadTexture(client->chef_south_soup_tomato); + UnloadTexture(client->chef_east_soup_tomato); + UnloadTexture(client->chef_west_soup_tomato); + + CloseWindow(); + } + + void c_render(Overcooked* env) { + if (env->client == NULL) { + int window_width = env->width * env->grid_size + 350; + int window_height = env->height * env->grid_size + 80; + InitWindow(window_width, window_height, "PufferLib Overcooked"); + SetTargetFPS(16); + env->client = (Client*)calloc(1, sizeof(Client)); + + env->client->floor = LoadTexture("pufferlib/resources/overcooked/terrain/floor.png"); + env->client->counter = LoadTexture("pufferlib/resources/overcooked/terrain/counter.png"); + env->client->pot = LoadTexture("pufferlib/resources/overcooked/terrain/pot.png"); + env->client->serve = LoadTexture("pufferlib/resources/overcooked/terrain/serve.png"); + env->client->onions_box = LoadTexture("pufferlib/resources/overcooked/terrain/onions.png"); + env->client->tomatoes_box = LoadTexture("pufferlib/resources/overcooked/terrain/tomatoes.png"); + env->client->dishes_box = LoadTexture("pufferlib/resources/overcooked/terrain/dishes.png"); + env->client->wall = LoadTexture("pufferlib/resources/overcooked/terrain/counter.png"); + + env->client->onion = LoadTexture("pufferlib/resources/overcooked/objects/onion.png"); + env->client->tomato = LoadTexture("pufferlib/resources/overcooked/objects/tomato.png"); + env->client->dish = LoadTexture("pufferlib/resources/overcooked/objects/dish.png"); + env->client->soup_onion = LoadTexture("pufferlib/resources/overcooked/objects/soup-onion-cooked.png"); + env->client->soup_tomato = LoadTexture("pufferlib/resources/overcooked/objects/soup-tomato-cooked.png"); + env->client->soup_onion_dish = LoadTexture("pufferlib/resources/overcooked/objects/soup-onion-dish.png"); + env->client->soup_tomato_dish = LoadTexture("pufferlib/resources/overcooked/objects/soup-tomato-dish.png"); + + env->client->soup_onion_cooking_1 = LoadTexture("pufferlib/resources/overcooked/objects/soup-onion-1-cooking.png"); + env->client->soup_onion_cooking_2 = LoadTexture("pufferlib/resources/overcooked/objects/soup-onion-2-cooking.png"); + env->client->soup_onion_cooking_3 = LoadTexture("pufferlib/resources/overcooked/objects/soup-onion-3-cooking.png"); + env->client->soup_onion_cooked = LoadTexture("pufferlib/resources/overcooked/objects/soup-onion-cooked.png"); + env->client->soup_tomato_cooking_1 = LoadTexture("pufferlib/resources/overcooked/objects/soup-tomato-1-cooking.png"); + env->client->soup_tomato_cooking_2 = LoadTexture("pufferlib/resources/overcooked/objects/soup-tomato-2-cooking.png"); + env->client->soup_tomato_cooking_3 = LoadTexture("pufferlib/resources/overcooked/objects/soup-tomato-3-cooking.png"); + env->client->soup_tomato_cooked = LoadTexture("pufferlib/resources/overcooked/objects/soup-tomato-cooked.png"); + + env->client->chef_north = LoadTexture("pufferlib/resources/overcooked/chefs/NORTH.png"); + env->client->chef_south = LoadTexture("pufferlib/resources/overcooked/chefs/SOUTH.png"); + env->client->chef_east = LoadTexture("pufferlib/resources/overcooked/chefs/EAST.png"); + env->client->chef_west = LoadTexture("pufferlib/resources/overcooked/chefs/WEST.png"); + env->client->chef_north_onion = LoadTexture("pufferlib/resources/overcooked/chefs/NORTH-onion.png"); + env->client->chef_south_onion = LoadTexture("pufferlib/resources/overcooked/chefs/SOUTH-onion.png"); + env->client->chef_east_onion = LoadTexture("pufferlib/resources/overcooked/chefs/EAST-onion.png"); + env->client->chef_west_onion = LoadTexture("pufferlib/resources/overcooked/chefs/WEST-onion.png"); + env->client->chef_north_tomato = LoadTexture("pufferlib/resources/overcooked/chefs/NORTH-tomato.png"); + env->client->chef_south_tomato = LoadTexture("pufferlib/resources/overcooked/chefs/SOUTH-tomato.png"); + env->client->chef_east_tomato = LoadTexture("pufferlib/resources/overcooked/chefs/EAST-tomato.png"); + env->client->chef_west_tomato = LoadTexture("pufferlib/resources/overcooked/chefs/WEST-tomato.png"); + env->client->chef_north_dish = LoadTexture("pufferlib/resources/overcooked/chefs/NORTH-dish.png"); + env->client->chef_south_dish = LoadTexture("pufferlib/resources/overcooked/chefs/SOUTH-dish.png"); + env->client->chef_east_dish = LoadTexture("pufferlib/resources/overcooked/chefs/EAST-dish.png"); + env->client->chef_west_dish = LoadTexture("pufferlib/resources/overcooked/chefs/WEST-dish.png"); + + env->client->chef_north_soup_onion = LoadTexture("pufferlib/resources/overcooked/chefs/NORTH-soup-onion.png"); + env->client->chef_south_soup_onion = LoadTexture("pufferlib/resources/overcooked/chefs/SOUTH-soup-onion.png"); + env->client->chef_east_soup_onion = LoadTexture("pufferlib/resources/overcooked/chefs/EAST-soup-onion.png"); + env->client->chef_west_soup_onion = LoadTexture("pufferlib/resources/overcooked/chefs/WEST-soup-onion.png"); + env->client->chef_north_soup_tomato = LoadTexture("pufferlib/resources/overcooked/chefs/NORTH-soup-tomato.png"); + env->client->chef_south_soup_tomato = LoadTexture("pufferlib/resources/overcooked/chefs/SOUTH-soup-tomato.png"); + env->client->chef_east_soup_tomato = LoadTexture("pufferlib/resources/overcooked/chefs/EAST-soup-tomato.png"); + env->client->chef_west_soup_tomato = LoadTexture("pufferlib/resources/overcooked/chefs/WEST-soup-tomato.png"); + } + + if (IsKeyDown(KEY_ESCAPE)) exit(0); + + BeginDrawing(); + ClearBackground((Color){240, 240, 240, 255}); + + DrawText(TextFormat("Correct Dishes: %d", (int)env->log.n), 10, 10, 20, BLACK); + DrawText(TextFormat("Total Dishes: %d", (int)env->log.dishes_served), 10, 35, 20, BLACK); + DrawText("Recipe: 3 Onions", 10, 60, 16, DARKGRAY); + + int grid_offset_y = 80; + for (int y = 0; y < env->height; y++) { + for (int x = 0; x < env->width; x++) { + int idx = y * env->width + x; + Rectangle dest = {x * env->grid_size, y * env->grid_size + grid_offset_y, env->grid_size, env->grid_size}; + + if (env->client->floor.id != 0) { + DrawTexturePro(env->client->floor, + (Rectangle){0, 0, env->client->floor.width, env->client->floor.height}, + dest, (Vector2){0, 0}, 0, WHITE); + } + + Texture2D* texture = NULL; + switch (env->grid[idx]) { + case COUNTER: + texture = &env->client->counter; + break; + case STOVE: + texture = &env->client->pot; + break; + case CUTTING_BOARD: + texture = &env->client->counter; + break; + case INGREDIENT_BOX: + texture = &env->client->onions_box; + break; + case SERVING_AREA: + texture = &env->client->serve; + break; + case PLATE_BOX: + texture = &env->client->dishes_box; + break; + case WALL: + texture = &env->client->wall; + break; + } + + if (texture && texture->id != 0) { + DrawTexturePro(*texture, + (Rectangle){0, 0, texture->width, texture->height}, + dest, (Vector2){0, 0}, 0, WHITE); + } + + if (env->grid[idx] == STOVE) { + CookingPot* pot = get_pot_at(env, x, y); + if (pot && pot->ingredient_count > 0) { + Texture2D* cooking_texture = NULL; + + bool is_onion_soup = (pot->num_onions >= pot->num_tomatoes); + if (is_onion_soup) { + if (pot->ingredient_count <= 1) { + cooking_texture = &env->client->soup_onion_cooking_1; + } else if (pot->ingredient_count == 2) { + cooking_texture = &env->client->soup_onion_cooking_2; + } else { + cooking_texture = &env->client->soup_onion_cooking_3; + } + } else { + if (pot->ingredient_count <= 1) { + cooking_texture = &env->client->soup_tomato_cooking_1; + } else if (pot->ingredient_count == 2) { + cooking_texture = &env->client->soup_tomato_cooking_2; + } else { + cooking_texture = &env->client->soup_tomato_cooking_3; + } + } + + if (pot->cooking_state == COOKING) { + float progress = (float)pot->cooking_progress / COOKING_TIME; + + DrawRectangle(x * env->grid_size + 5, + y * env->grid_size + grid_offset_y + env->grid_size - 10, + (env->grid_size - 10) * progress, 3, GREEN); + DrawRectangleLines(x * env->grid_size + 5, + y * env->grid_size + grid_offset_y + env->grid_size - 10, + env->grid_size - 10, 3, BLACK); + } + else if (pot->cooking_state == COOKED) { + cooking_texture = is_onion_soup ? &env->client->soup_onion_cooked : + &env->client->soup_tomato_cooked; + DrawText("READY!", x * env->grid_size + 5, + y * env->grid_size + grid_offset_y + env->grid_size - 10, + 8, GREEN); + } + + if (cooking_texture && cooking_texture->id != 0) { + Rectangle pot_dest = { + x * env->grid_size, + y * env->grid_size + grid_offset_y, + env->grid_size, + env->grid_size + }; + DrawTexturePro(*cooking_texture, + (Rectangle){0, 0, cooking_texture->width, cooking_texture->height}, + pot_dest, (Vector2){0, 0}, 0, WHITE); + } + } + } + } + } + + for (int i = 0; i < env->num_items; i++) { + Texture2D* texture = NULL; + switch (env->items[i].type) { + case TOMATO: + texture = &env->client->tomato; + break; + case ONION: + texture = &env->client->onion; + break; + case PLATE: + texture = &env->client->dish; + break; + case SOUP: + texture = &env->client->soup_onion; + break; + case PLATED_SOUP: + if (env->items[i].num_onions >= env->items[i].num_tomatoes) { + texture = &env->client->soup_onion_dish; + } else { + texture = &env->client->soup_tomato_dish; + } + break; + } + + if (texture && texture->id != 0) { + Rectangle dest = { + env->items[i].x * env->grid_size + env->grid_size/4, + env->items[i].y * env->grid_size + grid_offset_y + env->grid_size/4, + env->grid_size/2, + env->grid_size/2 + }; + DrawTexturePro(*texture, + (Rectangle){0, 0, texture->width, texture->height}, + dest, (Vector2){0, 0}, 0, WHITE); + } else { + Color item_color = GRAY; + switch (env->items[i].type) { + case TOMATO: item_color = RED; break; + case ONION: item_color = YELLOW; break; + case PLATE: item_color = WHITE; break; + case SOUP: item_color = ORANGE; break; + case PLATED_SOUP: item_color = ORANGE; break; + } + DrawCircle( + env->items[i].x * env->grid_size + env->grid_size/2, + env->items[i].y * env->grid_size + grid_offset_y + env->grid_size/2, + env->grid_size/4, + item_color + ); + } + } + + for (int agent_idx = 0; agent_idx < env->num_agents; agent_idx++) { + Agent* agent = &env->agents[agent_idx]; + Texture2D* chef_texture = NULL; + + if (agent->held_item == NO_ITEM) { + switch (agent->facing_direction) { + case 0: chef_texture = &env->client->chef_north; break; + case 1: chef_texture = &env->client->chef_south; break; + case 2: chef_texture = &env->client->chef_west; break; + case 3: chef_texture = &env->client->chef_east; break; + } + } else if (agent->held_item == ONION) { + switch (agent->facing_direction) { + case 0: chef_texture = &env->client->chef_north_onion; break; + case 1: chef_texture = &env->client->chef_south_onion; break; + case 2: chef_texture = &env->client->chef_west_onion; break; + case 3: chef_texture = &env->client->chef_east_onion; break; + } + } else if (agent->held_item == TOMATO) { + switch (agent->facing_direction) { + case 0: chef_texture = &env->client->chef_north_tomato; break; + case 1: chef_texture = &env->client->chef_south_tomato; break; + case 2: chef_texture = &env->client->chef_west_tomato; break; + case 3: chef_texture = &env->client->chef_east_tomato; break; + } + } else if (agent->held_item == PLATE) { + switch (agent->facing_direction) { + case 0: chef_texture = &env->client->chef_north_dish; break; + case 1: chef_texture = &env->client->chef_south_dish; break; + case 2: chef_texture = &env->client->chef_west_dish; break; + case 3: chef_texture = &env->client->chef_east_dish; break; + } + } else if (agent->held_item == PLATED_SOUP) { + bool is_onion_soup = (agent->held_soup_onions >= agent->held_soup_tomatoes); + if (is_onion_soup) { + switch (agent->facing_direction) { + case 0: chef_texture = &env->client->chef_north_soup_onion; break; + case 1: chef_texture = &env->client->chef_south_soup_onion; break; + case 2: chef_texture = &env->client->chef_west_soup_onion; break; + case 3: chef_texture = &env->client->chef_east_soup_onion; break; + } + } else { + switch (agent->facing_direction) { + case 0: chef_texture = &env->client->chef_north_soup_tomato; break; + case 1: chef_texture = &env->client->chef_south_soup_tomato; break; + case 2: chef_texture = &env->client->chef_west_soup_tomato; break; + case 3: chef_texture = &env->client->chef_east_soup_tomato; break; + } + } + } + + if (chef_texture && chef_texture->id != 0) { + Rectangle dest = { + agent->x * env->grid_size, + agent->y * env->grid_size + grid_offset_y, + env->grid_size, + env->grid_size + }; + Color tint = WHITE; + if (agent_idx == 0) { + tint = (Color){255, 255, 255, 255}; // White for player 1 + } else if (agent_idx == 1) { + tint = (Color){200, 200, 255, 255}; // Light blue tint for player 2 + } else { + tint = (Color){255, 200, 200, 255}; // Light red tint for other players + } + DrawTexturePro(*chef_texture, + (Rectangle){0, 0, chef_texture->width, chef_texture->height}, + dest, (Vector2){0, 0}, 0, tint); + } else { + Color agent_color = get_agent_color(agent->held_item); + if (agent_idx == 1) { + agent_color = (Color){agent_color.r * 0.8, agent_color.g * 0.8, agent_color.b, agent_color.a}; + } + DrawRectangle( + agent->x * env->grid_size + env->grid_size/4, + agent->y * env->grid_size + grid_offset_y + env->grid_size/4, + env->grid_size/2, + env->grid_size/2, + agent_color + ); + + int dir_x = agent->x * env->grid_size + env->grid_size/2; + int dir_y = agent->y * env->grid_size + grid_offset_y + env->grid_size/2; + int end_x = dir_x, end_y = dir_y; + switch (agent->facing_direction) { + case 0: end_y -= env->grid_size/4; break; // Up + case 1: end_y += env->grid_size/4; break; // Down + case 2: end_x -= env->grid_size/4; break; // Left + case 3: end_x += env->grid_size/4; break; // Right + } + DrawLine(dir_x, dir_y, end_x, end_y, BLACK); + + DrawText(TextFormat("%d", agent_idx + 1), + agent->x * env->grid_size + 2, + agent->y * env->grid_size + grid_offset_y + 2, + 10, BLACK); + } + } + + int obs_panel_x = env->width * env->grid_size + 10; + int obs_panel_y = grid_offset_y; + + if (env->num_agents > 0) { + float* obs = &env->observations[0]; + + DrawText("=== OBSERVATION ARRAY (43 dims) ===", obs_panel_x, obs_panel_y, 11, BLACK); + obs_panel_y += 18; + + DrawText("-- PLAYER (0-33) --", obs_panel_x, obs_panel_y, 10, DARKGREEN); + obs_panel_y += 13; + + DrawText(TextFormat("[0-3] Orient: %.0f %.0f %.0f %.0f", + obs[0], obs[1], obs[2], obs[3]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 11; + + DrawText(TextFormat("[4-7] Held: %.0f %.0f %.0f %.0f", + obs[4], obs[5], obs[6], obs[7]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 11; + + DrawText(TextFormat("[8-9] Onion: %.2f, %.2f", obs[8], obs[9]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + DrawText(TextFormat("[10-11] Dish: %.2f, %.2f", obs[10], obs[11]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + DrawText(TextFormat("[12-13] Soup: %.2f, %.2f", obs[12], obs[13]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + DrawText(TextFormat("[14-15] Serve: %.2f, %.2f", obs[14], obs[15]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + DrawText(TextFormat("[16-17] Empty: %.2f, %.2f", obs[16], obs[17]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + DrawText(TextFormat("[18-19] Pot: %.2f, %.2f", obs[18], obs[19]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + + DrawText(TextFormat("[20-21] PickOnion: %.2f, %.2f", obs[20], obs[21]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + + DrawText(TextFormat("[22-23] PickPlate: %.2f, %.2f", obs[22], obs[23]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + + DrawText(TextFormat("[24-25] SoupIngr: %.2f, %.2f", obs[24], obs[25]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + + DrawText(TextFormat("[26-27] PotIngr: %.2f, %.2f", obs[26], obs[27]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + + DrawText(TextFormat("[28] PotExists: %.0f", obs[28]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + + DrawText(TextFormat("[29-32] PotState: %.0f %.0f %.0f %.0f", + obs[29], obs[30], obs[31], obs[32]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + + DrawText(TextFormat("[33] CookTime: %.2f", obs[33]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + + DrawText(TextFormat("[34-37] Walls: %.0f %.0f %.0f %.0f", + obs[34], obs[35], obs[36], obs[37]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 13; + + DrawText("-- TEAMMATE (38-39) --", obs_panel_x, obs_panel_y, 10, DARKBLUE); + obs_panel_y += 13; + + if (env->num_agents > 1) { + DrawText(TextFormat("[38-39] T.RelPos: %.2f, %.2f", obs[38], obs[39]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + } else { + DrawText("No teammate", obs_panel_x, obs_panel_y, 9, GRAY); + obs_panel_y += 10; + } + + obs_panel_y += 3; + DrawText("-- MISC (40-42) --", obs_panel_x, obs_panel_y, 10, DARKGRAY); + obs_panel_y += 13; + + DrawText(TextFormat("[40-41] AbsPos: %.3f, %.3f", obs[40], obs[41]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + + DrawText(TextFormat("[42] Reward: %.2f", obs[42]), + obs_panel_x, obs_panel_y, 9, BLACK); + obs_panel_y += 10; + } + + EndDrawing(); + } + + #endif // OVERCOOKED_RENDER_H + \ No newline at end of file diff --git a/pufferlib/ocean/overcooked/overcooked_types.h b/pufferlib/ocean/overcooked/overcooked_types.h new file mode 100644 index 000000000..24b3c44bd --- /dev/null +++ b/pufferlib/ocean/overcooked/overcooked_types.h @@ -0,0 +1,326 @@ +/* Overcooked Types: Constants, enums, and struct definitions. + */ + +#ifndef OVERCOOKED_TYPES_H +#define OVERCOOKED_TYPES_H + +#include +#include +#include +#include +#include +#include "raylib.h" + +// Tile types +#define EMPTY 0 +#define COUNTER 1 +#define STOVE 2 +#define CUTTING_BOARD 3 +#define INGREDIENT_BOX 4 +#define SERVING_AREA 5 +#define WALL 6 +#define PLATE_BOX 7 +#define AGENT 8 + +// Item types +#define NO_ITEM 10 +#define TOMATO 11 +#define ONION 12 +#define PLATE 13 +#define SOUP 14 +#define PLATED_SOUP 15 + +// Cooking states +#define NOT_COOKING 0 +#define COOKING 1 +#define COOKED 2 + +// Cooking parameters +#define COOKING_TIME 20 +#define MAX_INGREDIENTS 3 + +// Actions +#define ACTION_NOOP 0 +#define ACTION_UP 1 +#define ACTION_DOWN 2 +#define ACTION_LEFT 3 +#define ACTION_RIGHT 4 +#define ACTION_INTERACT 5 + +// Agent states +#define AGENT_EMPTY_HANDED 0 +#define AGENT_HOLDING_ITEM 1 + +#define MAX_SPAWN_POSITIONS 8 + +typedef enum { + LAYOUT_CRAMPED_ROOM = 0, + LAYOUT_ASYMMETRIC_ADVANTAGES = 1, + LAYOUT_FORCED_COORDINATION = 2, + LAYOUT_COORDINATION_RING = 3, + LAYOUT_COUNTER_CIRCUIT = 4, + LAYOUT_COUNT +} LayoutType; + +typedef struct { + const char* name; + int width; + int height; + const char* grid; + int spawn_positions[MAX_SPAWN_POSITIONS]; + int num_spawns; +} LayoutInfo; + +typedef struct { + float dish_served_whole_team; + float dish_served_agent; + float pot_started; + float ingredient_added; + float ingredient_picked; + float plate_picked; + float soup_plated; + float wrong_dish_served; + float step_penalty; +} RewardConfig; + +typedef struct { + float perf; // Recommended 0-1 normalized single real number perf metric + float score; // Recommended unnormalized single real number perf metric + float episode_return; // Recommended metric: sum of agent rewards over episode + float episode_length; // Recommended metric: number of steps of agent episode + float dishes_served; // Number of dishes successfully served + float correct_dishes; // Number of correct 3-onion dishes + float wrong_dishes; // Number of wrong dishes submitted + float ingredients_picked; // Total ingredients picked up + float pots_started; // Number of cooking sessions started + float items_dropped; // Number of items dropped/placed + float agent_collisions; // Number of times agents tried to move to same spot + float n; // Required as the last field +} Log; + +typedef struct { + Texture2D floor; + Texture2D counter; + Texture2D pot; + Texture2D serve; + Texture2D onions_box; + Texture2D tomatoes_box; + Texture2D dishes_box; + Texture2D wall; + + Texture2D onion; + Texture2D tomato; + Texture2D dish; + Texture2D soup_onion; + Texture2D soup_tomato; + + Texture2D soup_onion_cooking_1; + Texture2D soup_onion_cooking_2; + Texture2D soup_onion_cooking_3; + Texture2D soup_onion_cooked; + Texture2D soup_tomato_cooking_1; + Texture2D soup_tomato_cooking_2; + Texture2D soup_tomato_cooking_3; + Texture2D soup_tomato_cooked; + + Texture2D chef_north; + Texture2D chef_south; + Texture2D chef_east; + Texture2D chef_west; + Texture2D chef_north_onion; + Texture2D chef_south_onion; + Texture2D chef_east_onion; + Texture2D chef_west_onion; + Texture2D chef_north_tomato; + Texture2D chef_south_tomato; + Texture2D chef_east_tomato; + Texture2D chef_west_tomato; + Texture2D chef_north_dish; + Texture2D chef_south_dish; + Texture2D chef_east_dish; + Texture2D chef_west_dish; + Texture2D chef_north_soup_onion; + Texture2D chef_south_soup_onion; + Texture2D chef_east_soup_onion; + Texture2D chef_west_soup_onion; + Texture2D chef_north_soup_tomato; + Texture2D chef_south_soup_tomato; + Texture2D chef_east_soup_tomato; + Texture2D chef_west_soup_tomato; + + Texture2D soup_onion_dish; + Texture2D soup_tomato_dish; +} Client; + +typedef struct __attribute__((aligned(32))) { + float x; + float y; + int facing_direction; + int held_item; + int held_soup_onions; + int held_soup_tomatoes; + int held_soup_total; + int ticks_since_reward; +} Agent; + +typedef struct __attribute__((aligned(32))) { + int x; + int y; + int type; + int state; + int num_onions; + int num_tomatoes; + int total_ingredients; +} Item; + +typedef struct { + int cooking_state; // NOT_COOKING, COOKING, COOKED + int cooking_progress; // Steps since cooking started + int ingredient_types[MAX_INGREDIENTS]; // Types of ingredients added + int ingredient_count; // Number of ingredients in pot + int num_onions; // Count of onions + int num_tomatoes; // Count of tomatoes +} CookingPot; + +// Cache for static tile positions (computed once at init, never changes) +typedef struct { + // Static tile positions stored as x,y pairs: [x0, y0, x1, y1, ...] + int ingredient_box_positions[20]; // Max 10 ingredient boxes + int ingredient_box_count; + int plate_box_positions[20]; // Max 10 plate boxes + int plate_box_count; + int serving_area_positions[20]; // Max 10 serving areas + int serving_area_count; + int stove_positions[20]; // Max 10 stoves + int stove_count; + int counter_positions[100]; // Max 50 counters + int counter_count; + + // Precomputed normalization factors + float inv_width; // 1.0f / width + float inv_height; // 1.0f / height +} StaticCache; + +typedef struct { + Log log; // Required field. Env binding code uses this to aggregate logs + Client* client; + LayoutType layout_id; + char* grid; + Item* items; // Dynamic items in the kitchen + int num_items; + int max_items; + Agent* agents; // Array of agents + int num_agents; + uint64_t agent_position_mask; // Bit (y * width + x) set if agent present + CookingPot* cooking_pots; // Array of cooking pots (one per stove) + int num_stoves; + int* pot_index_grid; // Maps grid cell to pot index (-1 if not a stove) + int* item_grid; // Maps grid cell to item index (-1 if empty) + float* observations; // Required. You can use any obs type, but make sure it matches in Python! + int* actions; // Required. int* for discrete/multidiscrete, float* for box + float* rewards; // Required + unsigned char* terminals; // Required. We don't yet have truncations as standard yet + int width; + int height; + int grid_size; + RewardConfig rewards_config; + int observation_size; + StaticCache cache; // Cached static tile positions for O(1) lookup +} Overcooked; + +// Grid layout +static const char CRAMPED_ROOM[5][5] = { + {'6', '1', '2', '1', '6'}, + {'4', ' ', ' ', ' ', '4'}, + {'1', ' ', ' ', ' ', '1'}, + {'1', ' ', ' ', ' ', '1'}, + {'6', '7', '1', '5', '6'} +}; + +static const char ASYMMETRIC_ADVANTAGES[5][9] = { + {'6','1','6','6','6','6','6','1','6'}, + {'4',' ','1','5','6','4','1',' ','5'}, + {'1',' ',' ',' ','2',' ',' ',' ','1'}, + {'1',' ',' ',' ','2',' ',' ',' ','1'}, + {'6','1','1','7','6','7','1','1','6'} +}; + +static const char COORDINATION_RING[5][5] = { + {'6', '1', '1', '2', '6'}, + {'1', ' ', ' ', ' ', '2'}, + {'7', ' ', '1', ' ', '1'}, + {'4', ' ', ' ', ' ', '1'}, + {'6', '4', '5', '1', '6'} +}; + +static const char FORCED_COORDINATION[5][5] = { + {'6', '1', '6', '2', '6'}, + {'4', ' ', '1', ' ', '2'}, + {'4', ' ', '1', ' ', '1'}, + {'7', ' ', '1', ' ', '1'}, + {'6', '1', '6', '5', '6'} +}; + +static const char COUNTER_CIRCUIT[5][8] = { + {'6','1','1','2','2','1','1','6'}, + {'1',' ',' ',' ',' ',' ',' ','1'}, + {'7',' ','1','1','1','1',' ','5'}, + {'1',' ',' ',' ',' ',' ',' ','1'}, + {'6','1','1','4','4','1','1','6'} +}; + +static const LayoutInfo LAYOUTS[LAYOUT_COUNT] = { + { + "cramped_room", + 5, 5, + (const char*)CRAMPED_ROOM, + {1, 2, 3, 2}, + 2 + }, + { + "asymmetric_advantages", + 9, 5, + (const char*)ASYMMETRIC_ADVANTAGES, + {1, 2, 7, 2}, + 2 + }, + { + "forced_coordination", + 5, 5, + (const char*)FORCED_COORDINATION, + {1, 2, 3, 2}, + 2 + }, + { + "coordination_ring", + 5, 5, + (const char*)COORDINATION_RING, + {1, 2, 3, 2}, + 2 + }, + { + "counter_circuit", + 8, 5, + (const char*)COUNTER_CIRCUIT, + {1, 1, 6, 3}, + 2 + } +}; + +static inline const LayoutInfo* get_layout_info(LayoutType id) { + if (id < 0 || id >= LAYOUT_COUNT) return &LAYOUTS[0]; + return &LAYOUTS[id]; +} + +static inline char get_layout_tile(const LayoutInfo* info, int x, int y) { + return info->grid[y * info->width + x]; +} + +static inline LayoutType get_layout_by_name(const char* name) { + for (int i = 0; i < LAYOUT_COUNT; i++) { + if (strcmp(LAYOUTS[i].name, name) == 0) return (LayoutType)i; + } + return LAYOUT_CRAMPED_ROOM; +} + +#endif // OVERCOOKED_TYPES_H diff --git a/pufferlib/resources/overcooked/chefs/EAST-bluehat.png b/pufferlib/resources/overcooked/chefs/EAST-bluehat.png new file mode 100644 index 000000000..aa03a1994 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/EAST-bluehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/EAST-dish.png b/pufferlib/resources/overcooked/chefs/EAST-dish.png new file mode 100644 index 000000000..85574475b Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/EAST-dish.png differ diff --git a/pufferlib/resources/overcooked/chefs/EAST-greenhat.png b/pufferlib/resources/overcooked/chefs/EAST-greenhat.png new file mode 100644 index 000000000..38fb8f8ae Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/EAST-greenhat.png differ diff --git a/pufferlib/resources/overcooked/chefs/EAST-onion.png b/pufferlib/resources/overcooked/chefs/EAST-onion.png new file mode 100644 index 000000000..2c11958d3 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/EAST-onion.png differ diff --git a/pufferlib/resources/overcooked/chefs/EAST-orangehat.png b/pufferlib/resources/overcooked/chefs/EAST-orangehat.png new file mode 100644 index 000000000..00d8ce4c4 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/EAST-orangehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/EAST-purplehat.png b/pufferlib/resources/overcooked/chefs/EAST-purplehat.png new file mode 100644 index 000000000..f508f309d Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/EAST-purplehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/EAST-redhat.png b/pufferlib/resources/overcooked/chefs/EAST-redhat.png new file mode 100644 index 000000000..ac823b5a6 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/EAST-redhat.png differ diff --git a/pufferlib/resources/overcooked/chefs/EAST-soup-onion.png b/pufferlib/resources/overcooked/chefs/EAST-soup-onion.png new file mode 100644 index 000000000..ba70009c9 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/EAST-soup-onion.png differ diff --git a/pufferlib/resources/overcooked/chefs/EAST-soup-tomato.png b/pufferlib/resources/overcooked/chefs/EAST-soup-tomato.png new file mode 100644 index 000000000..3fe0ea6d0 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/EAST-soup-tomato.png differ diff --git a/pufferlib/resources/overcooked/chefs/EAST-tomato.png b/pufferlib/resources/overcooked/chefs/EAST-tomato.png new file mode 100644 index 000000000..ba15181ea Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/EAST-tomato.png differ diff --git a/pufferlib/resources/overcooked/chefs/EAST.png b/pufferlib/resources/overcooked/chefs/EAST.png new file mode 100644 index 000000000..4b704154e Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/EAST.png differ diff --git a/pufferlib/resources/overcooked/chefs/NORTH-bluehat.png b/pufferlib/resources/overcooked/chefs/NORTH-bluehat.png new file mode 100644 index 000000000..0ba8726a5 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/NORTH-bluehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/NORTH-dish.png b/pufferlib/resources/overcooked/chefs/NORTH-dish.png new file mode 100644 index 000000000..515ce18c7 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/NORTH-dish.png differ diff --git a/pufferlib/resources/overcooked/chefs/NORTH-greenhat.png b/pufferlib/resources/overcooked/chefs/NORTH-greenhat.png new file mode 100644 index 000000000..7ab3199a8 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/NORTH-greenhat.png differ diff --git a/pufferlib/resources/overcooked/chefs/NORTH-onion.png b/pufferlib/resources/overcooked/chefs/NORTH-onion.png new file mode 100644 index 000000000..adfbcde7b Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/NORTH-onion.png differ diff --git a/pufferlib/resources/overcooked/chefs/NORTH-orangehat.png b/pufferlib/resources/overcooked/chefs/NORTH-orangehat.png new file mode 100644 index 000000000..44dfd3878 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/NORTH-orangehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/NORTH-purplehat.png b/pufferlib/resources/overcooked/chefs/NORTH-purplehat.png new file mode 100644 index 000000000..cb204236f Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/NORTH-purplehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/NORTH-redhat.png b/pufferlib/resources/overcooked/chefs/NORTH-redhat.png new file mode 100644 index 000000000..5ac895a1a Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/NORTH-redhat.png differ diff --git a/pufferlib/resources/overcooked/chefs/NORTH-soup-onion.png b/pufferlib/resources/overcooked/chefs/NORTH-soup-onion.png new file mode 100644 index 000000000..74d5731c2 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/NORTH-soup-onion.png differ diff --git a/pufferlib/resources/overcooked/chefs/NORTH-soup-tomato.png b/pufferlib/resources/overcooked/chefs/NORTH-soup-tomato.png new file mode 100644 index 000000000..5dfb3bee5 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/NORTH-soup-tomato.png differ diff --git a/pufferlib/resources/overcooked/chefs/NORTH-tomato.png b/pufferlib/resources/overcooked/chefs/NORTH-tomato.png new file mode 100644 index 000000000..e4a56aa59 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/NORTH-tomato.png differ diff --git a/pufferlib/resources/overcooked/chefs/NORTH.png b/pufferlib/resources/overcooked/chefs/NORTH.png new file mode 100644 index 000000000..2c2360006 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/NORTH.png differ diff --git a/pufferlib/resources/overcooked/chefs/SOUTH-bluehat.png b/pufferlib/resources/overcooked/chefs/SOUTH-bluehat.png new file mode 100644 index 000000000..8f4e95e22 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/SOUTH-bluehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/SOUTH-dish.png b/pufferlib/resources/overcooked/chefs/SOUTH-dish.png new file mode 100644 index 000000000..0bda2437d Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/SOUTH-dish.png differ diff --git a/pufferlib/resources/overcooked/chefs/SOUTH-greenhat.png b/pufferlib/resources/overcooked/chefs/SOUTH-greenhat.png new file mode 100644 index 000000000..5bf1cf7d2 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/SOUTH-greenhat.png differ diff --git a/pufferlib/resources/overcooked/chefs/SOUTH-onion.png b/pufferlib/resources/overcooked/chefs/SOUTH-onion.png new file mode 100644 index 000000000..d12a0383b Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/SOUTH-onion.png differ diff --git a/pufferlib/resources/overcooked/chefs/SOUTH-orangehat.png b/pufferlib/resources/overcooked/chefs/SOUTH-orangehat.png new file mode 100644 index 000000000..82ffa7a5a Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/SOUTH-orangehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/SOUTH-purplehat.png b/pufferlib/resources/overcooked/chefs/SOUTH-purplehat.png new file mode 100644 index 000000000..885ebf4ae Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/SOUTH-purplehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/SOUTH-redhat.png b/pufferlib/resources/overcooked/chefs/SOUTH-redhat.png new file mode 100644 index 000000000..f610cc20b Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/SOUTH-redhat.png differ diff --git a/pufferlib/resources/overcooked/chefs/SOUTH-soup-onion.png b/pufferlib/resources/overcooked/chefs/SOUTH-soup-onion.png new file mode 100644 index 000000000..f7beeea30 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/SOUTH-soup-onion.png differ diff --git a/pufferlib/resources/overcooked/chefs/SOUTH-soup-tomato.png b/pufferlib/resources/overcooked/chefs/SOUTH-soup-tomato.png new file mode 100644 index 000000000..e524822fd Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/SOUTH-soup-tomato.png differ diff --git a/pufferlib/resources/overcooked/chefs/SOUTH-tomato.png b/pufferlib/resources/overcooked/chefs/SOUTH-tomato.png new file mode 100644 index 000000000..f4ccd62d4 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/SOUTH-tomato.png differ diff --git a/pufferlib/resources/overcooked/chefs/SOUTH.png b/pufferlib/resources/overcooked/chefs/SOUTH.png new file mode 100644 index 000000000..90a5c1a5c Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/SOUTH.png differ diff --git a/pufferlib/resources/overcooked/chefs/WEST-bluehat.png b/pufferlib/resources/overcooked/chefs/WEST-bluehat.png new file mode 100644 index 000000000..0ecd023ab Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/WEST-bluehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/WEST-dish.png b/pufferlib/resources/overcooked/chefs/WEST-dish.png new file mode 100644 index 000000000..33ea8e127 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/WEST-dish.png differ diff --git a/pufferlib/resources/overcooked/chefs/WEST-greenhat.png b/pufferlib/resources/overcooked/chefs/WEST-greenhat.png new file mode 100644 index 000000000..4ac02a9c2 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/WEST-greenhat.png differ diff --git a/pufferlib/resources/overcooked/chefs/WEST-onion.png b/pufferlib/resources/overcooked/chefs/WEST-onion.png new file mode 100644 index 000000000..d27241684 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/WEST-onion.png differ diff --git a/pufferlib/resources/overcooked/chefs/WEST-orangehat.png b/pufferlib/resources/overcooked/chefs/WEST-orangehat.png new file mode 100644 index 000000000..c72b8677c Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/WEST-orangehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/WEST-purplehat.png b/pufferlib/resources/overcooked/chefs/WEST-purplehat.png new file mode 100644 index 000000000..6486365f8 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/WEST-purplehat.png differ diff --git a/pufferlib/resources/overcooked/chefs/WEST-redhat.png b/pufferlib/resources/overcooked/chefs/WEST-redhat.png new file mode 100644 index 000000000..95ba881ad Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/WEST-redhat.png differ diff --git a/pufferlib/resources/overcooked/chefs/WEST-soup-onion.png b/pufferlib/resources/overcooked/chefs/WEST-soup-onion.png new file mode 100644 index 000000000..60d6082ba Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/WEST-soup-onion.png differ diff --git a/pufferlib/resources/overcooked/chefs/WEST-soup-tomato.png b/pufferlib/resources/overcooked/chefs/WEST-soup-tomato.png new file mode 100644 index 000000000..43245b120 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/WEST-soup-tomato.png differ diff --git a/pufferlib/resources/overcooked/chefs/WEST-tomato.png b/pufferlib/resources/overcooked/chefs/WEST-tomato.png new file mode 100644 index 000000000..44cbfe3a6 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/WEST-tomato.png differ diff --git a/pufferlib/resources/overcooked/chefs/WEST.png b/pufferlib/resources/overcooked/chefs/WEST.png new file mode 100644 index 000000000..960b95303 Binary files /dev/null and b/pufferlib/resources/overcooked/chefs/WEST.png differ diff --git a/pufferlib/resources/overcooked/individual/arrow.png b/pufferlib/resources/overcooked/individual/arrow.png new file mode 100644 index 000000000..e2452c029 Binary files /dev/null and b/pufferlib/resources/overcooked/individual/arrow.png differ diff --git a/pufferlib/resources/overcooked/individual/interact.png b/pufferlib/resources/overcooked/individual/interact.png new file mode 100644 index 000000000..c94e0c951 Binary files /dev/null and b/pufferlib/resources/overcooked/individual/interact.png differ diff --git a/pufferlib/resources/overcooked/individual/stay.png b/pufferlib/resources/overcooked/individual/stay.png new file mode 100644 index 000000000..ae20d854a Binary files /dev/null and b/pufferlib/resources/overcooked/individual/stay.png differ diff --git a/pufferlib/resources/overcooked/objects/dish.png b/pufferlib/resources/overcooked/objects/dish.png new file mode 100644 index 000000000..1f2fa2ae2 Binary files /dev/null and b/pufferlib/resources/overcooked/objects/dish.png differ diff --git a/pufferlib/resources/overcooked/objects/onion.png b/pufferlib/resources/overcooked/objects/onion.png new file mode 100644 index 000000000..f637b305c Binary files /dev/null and b/pufferlib/resources/overcooked/objects/onion.png differ diff --git a/pufferlib/resources/overcooked/objects/pot-explosion.png b/pufferlib/resources/overcooked/objects/pot-explosion.png new file mode 100644 index 000000000..a802d15e7 Binary files /dev/null and b/pufferlib/resources/overcooked/objects/pot-explosion.png differ diff --git a/pufferlib/resources/overcooked/objects/soup-onion-1-cooking.png b/pufferlib/resources/overcooked/objects/soup-onion-1-cooking.png new file mode 100644 index 000000000..5e91a1912 Binary files /dev/null and b/pufferlib/resources/overcooked/objects/soup-onion-1-cooking.png differ diff --git a/pufferlib/resources/overcooked/objects/soup-onion-2-cooking.png b/pufferlib/resources/overcooked/objects/soup-onion-2-cooking.png new file mode 100644 index 000000000..668d70fdd Binary files /dev/null and b/pufferlib/resources/overcooked/objects/soup-onion-2-cooking.png differ diff --git a/pufferlib/resources/overcooked/objects/soup-onion-3-cooking.png b/pufferlib/resources/overcooked/objects/soup-onion-3-cooking.png new file mode 100644 index 000000000..3c35aeb71 Binary files /dev/null and b/pufferlib/resources/overcooked/objects/soup-onion-3-cooking.png differ diff --git a/pufferlib/resources/overcooked/objects/soup-onion-cooked.png b/pufferlib/resources/overcooked/objects/soup-onion-cooked.png new file mode 100644 index 000000000..664e81ab4 Binary files /dev/null and b/pufferlib/resources/overcooked/objects/soup-onion-cooked.png differ diff --git a/pufferlib/resources/overcooked/objects/soup-onion-dish.png b/pufferlib/resources/overcooked/objects/soup-onion-dish.png new file mode 100644 index 000000000..7f6e4f81c Binary files /dev/null and b/pufferlib/resources/overcooked/objects/soup-onion-dish.png differ diff --git a/pufferlib/resources/overcooked/objects/soup-tomato-1-cooking.png b/pufferlib/resources/overcooked/objects/soup-tomato-1-cooking.png new file mode 100644 index 000000000..77bbfe4b2 Binary files /dev/null and b/pufferlib/resources/overcooked/objects/soup-tomato-1-cooking.png differ diff --git a/pufferlib/resources/overcooked/objects/soup-tomato-2-cooking.png b/pufferlib/resources/overcooked/objects/soup-tomato-2-cooking.png new file mode 100644 index 000000000..81b75494a Binary files /dev/null and b/pufferlib/resources/overcooked/objects/soup-tomato-2-cooking.png differ diff --git a/pufferlib/resources/overcooked/objects/soup-tomato-3-0.png b/pufferlib/resources/overcooked/objects/soup-tomato-3-0.png new file mode 100644 index 000000000..efe89b922 Binary files /dev/null and b/pufferlib/resources/overcooked/objects/soup-tomato-3-0.png differ diff --git a/pufferlib/resources/overcooked/objects/soup-tomato-3-cooking.png b/pufferlib/resources/overcooked/objects/soup-tomato-3-cooking.png new file mode 100644 index 000000000..efe89b922 Binary files /dev/null and b/pufferlib/resources/overcooked/objects/soup-tomato-3-cooking.png differ diff --git a/pufferlib/resources/overcooked/objects/soup-tomato-cooked.png b/pufferlib/resources/overcooked/objects/soup-tomato-cooked.png new file mode 100644 index 000000000..a022d2df6 Binary files /dev/null and b/pufferlib/resources/overcooked/objects/soup-tomato-cooked.png differ diff --git a/pufferlib/resources/overcooked/objects/soup-tomato-dish.png b/pufferlib/resources/overcooked/objects/soup-tomato-dish.png new file mode 100644 index 000000000..cce15c841 Binary files /dev/null and b/pufferlib/resources/overcooked/objects/soup-tomato-dish.png differ diff --git a/pufferlib/resources/overcooked/objects/tomato.png b/pufferlib/resources/overcooked/objects/tomato.png new file mode 100644 index 000000000..69c4681c6 Binary files /dev/null and b/pufferlib/resources/overcooked/objects/tomato.png differ diff --git a/pufferlib/resources/overcooked/puffer_overcooked_weights_aa.bin b/pufferlib/resources/overcooked/puffer_overcooked_weights_aa.bin new file mode 100644 index 000000000..8bf8843d6 Binary files /dev/null and b/pufferlib/resources/overcooked/puffer_overcooked_weights_aa.bin differ diff --git a/pufferlib/resources/overcooked/puffer_overcooked_weights_cc.bin b/pufferlib/resources/overcooked/puffer_overcooked_weights_cc.bin new file mode 100644 index 000000000..e7b9ed674 Binary files /dev/null and b/pufferlib/resources/overcooked/puffer_overcooked_weights_cc.bin differ diff --git a/pufferlib/resources/overcooked/puffer_overcooked_weights_cor.bin b/pufferlib/resources/overcooked/puffer_overcooked_weights_cor.bin new file mode 100644 index 000000000..44d3fc0bf Binary files /dev/null and b/pufferlib/resources/overcooked/puffer_overcooked_weights_cor.bin differ diff --git a/pufferlib/resources/overcooked/puffer_overcooked_weights_cr.bin b/pufferlib/resources/overcooked/puffer_overcooked_weights_cr.bin new file mode 100644 index 000000000..a1d25b5fa Binary files /dev/null and b/pufferlib/resources/overcooked/puffer_overcooked_weights_cr.bin differ diff --git a/pufferlib/resources/overcooked/puffer_overcooked_weights_fc.bin b/pufferlib/resources/overcooked/puffer_overcooked_weights_fc.bin new file mode 100644 index 000000000..ffd675e30 Binary files /dev/null and b/pufferlib/resources/overcooked/puffer_overcooked_weights_fc.bin differ diff --git a/pufferlib/resources/overcooked/soups/soup_cooked_tomato_0_onion_1.png b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_0_onion_1.png new file mode 100644 index 000000000..a5fef1e6e Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_0_onion_1.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_cooked_tomato_0_onion_2.png b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_0_onion_2.png new file mode 100644 index 000000000..bdbbc2518 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_0_onion_2.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_cooked_tomato_0_onion_3.png b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_0_onion_3.png new file mode 100644 index 000000000..1f098a222 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_0_onion_3.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_cooked_tomato_1_onion_0.png b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_1_onion_0.png new file mode 100644 index 000000000..624efb6f6 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_1_onion_0.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_cooked_tomato_1_onion_1.png b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_1_onion_1.png new file mode 100644 index 000000000..540f45f5b Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_1_onion_1.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_cooked_tomato_1_onion_2.png b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_1_onion_2.png new file mode 100644 index 000000000..1efb46a48 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_1_onion_2.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_cooked_tomato_2_onion_0.png b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_2_onion_0.png new file mode 100644 index 000000000..eb20c2d5b Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_2_onion_0.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_cooked_tomato_2_onion_1.png b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_2_onion_1.png new file mode 100644 index 000000000..2e08737ff Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_2_onion_1.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_cooked_tomato_3_onion_0.png b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_3_onion_0.png new file mode 100644 index 000000000..001a69b9b Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_cooked_tomato_3_onion_0.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_done_tomato_0_onion_1.png b/pufferlib/resources/overcooked/soups/soup_done_tomato_0_onion_1.png new file mode 100644 index 000000000..d328ae781 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_done_tomato_0_onion_1.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_done_tomato_0_onion_2.png b/pufferlib/resources/overcooked/soups/soup_done_tomato_0_onion_2.png new file mode 100644 index 000000000..80a75afb6 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_done_tomato_0_onion_2.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_done_tomato_0_onion_3.png b/pufferlib/resources/overcooked/soups/soup_done_tomato_0_onion_3.png new file mode 100644 index 000000000..ec8a96ea8 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_done_tomato_0_onion_3.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_done_tomato_1_onion_0.png b/pufferlib/resources/overcooked/soups/soup_done_tomato_1_onion_0.png new file mode 100644 index 000000000..26e89b1a7 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_done_tomato_1_onion_0.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_done_tomato_1_onion_1.png b/pufferlib/resources/overcooked/soups/soup_done_tomato_1_onion_1.png new file mode 100644 index 000000000..bad4b3e29 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_done_tomato_1_onion_1.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_done_tomato_1_onion_2.png b/pufferlib/resources/overcooked/soups/soup_done_tomato_1_onion_2.png new file mode 100644 index 000000000..e505229f2 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_done_tomato_1_onion_2.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_done_tomato_2_onion_0.png b/pufferlib/resources/overcooked/soups/soup_done_tomato_2_onion_0.png new file mode 100644 index 000000000..1eff0838f Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_done_tomato_2_onion_0.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_done_tomato_2_onion_1.png b/pufferlib/resources/overcooked/soups/soup_done_tomato_2_onion_1.png new file mode 100644 index 000000000..af8a48814 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_done_tomato_2_onion_1.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_done_tomato_3_onion_0.png b/pufferlib/resources/overcooked/soups/soup_done_tomato_3_onion_0.png new file mode 100644 index 000000000..84f2293d1 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_done_tomato_3_onion_0.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_idle_tomato_0_onion_1.png b/pufferlib/resources/overcooked/soups/soup_idle_tomato_0_onion_1.png new file mode 100644 index 000000000..f1d5d3c1b Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_idle_tomato_0_onion_1.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_idle_tomato_0_onion_2.png b/pufferlib/resources/overcooked/soups/soup_idle_tomato_0_onion_2.png new file mode 100644 index 000000000..f6fb62ee4 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_idle_tomato_0_onion_2.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_idle_tomato_0_onion_3.png b/pufferlib/resources/overcooked/soups/soup_idle_tomato_0_onion_3.png new file mode 100644 index 000000000..d5f45df47 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_idle_tomato_0_onion_3.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_idle_tomato_1_onion_0.png b/pufferlib/resources/overcooked/soups/soup_idle_tomato_1_onion_0.png new file mode 100644 index 000000000..2e09293be Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_idle_tomato_1_onion_0.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_idle_tomato_1_onion_1.png b/pufferlib/resources/overcooked/soups/soup_idle_tomato_1_onion_1.png new file mode 100644 index 000000000..4f44298c7 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_idle_tomato_1_onion_1.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_idle_tomato_1_onion_2.png b/pufferlib/resources/overcooked/soups/soup_idle_tomato_1_onion_2.png new file mode 100644 index 000000000..60b2fa84d Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_idle_tomato_1_onion_2.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_idle_tomato_2_onion_0.png b/pufferlib/resources/overcooked/soups/soup_idle_tomato_2_onion_0.png new file mode 100644 index 000000000..091f166ea Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_idle_tomato_2_onion_0.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_idle_tomato_2_onion_1.png b/pufferlib/resources/overcooked/soups/soup_idle_tomato_2_onion_1.png new file mode 100644 index 000000000..f0df61f83 Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_idle_tomato_2_onion_1.png differ diff --git a/pufferlib/resources/overcooked/soups/soup_idle_tomato_3_onion_0.png b/pufferlib/resources/overcooked/soups/soup_idle_tomato_3_onion_0.png new file mode 100644 index 000000000..f8179877a Binary files /dev/null and b/pufferlib/resources/overcooked/soups/soup_idle_tomato_3_onion_0.png differ diff --git a/pufferlib/resources/overcooked/terrain/counter.png b/pufferlib/resources/overcooked/terrain/counter.png new file mode 100644 index 000000000..3da65f04e Binary files /dev/null and b/pufferlib/resources/overcooked/terrain/counter.png differ diff --git a/pufferlib/resources/overcooked/terrain/dishes.png b/pufferlib/resources/overcooked/terrain/dishes.png new file mode 100644 index 000000000..908d4f5a6 Binary files /dev/null and b/pufferlib/resources/overcooked/terrain/dishes.png differ diff --git a/pufferlib/resources/overcooked/terrain/floor.png b/pufferlib/resources/overcooked/terrain/floor.png new file mode 100644 index 000000000..2b40ab1b9 Binary files /dev/null and b/pufferlib/resources/overcooked/terrain/floor.png differ diff --git a/pufferlib/resources/overcooked/terrain/onions.png b/pufferlib/resources/overcooked/terrain/onions.png new file mode 100644 index 000000000..8e3147ebd Binary files /dev/null and b/pufferlib/resources/overcooked/terrain/onions.png differ diff --git a/pufferlib/resources/overcooked/terrain/pot.png b/pufferlib/resources/overcooked/terrain/pot.png new file mode 100644 index 000000000..2c6caac01 Binary files /dev/null and b/pufferlib/resources/overcooked/terrain/pot.png differ diff --git a/pufferlib/resources/overcooked/terrain/serve.png b/pufferlib/resources/overcooked/terrain/serve.png new file mode 100644 index 000000000..0f83fe0ba Binary files /dev/null and b/pufferlib/resources/overcooked/terrain/serve.png differ diff --git a/pufferlib/resources/overcooked/terrain/tomatoes.png b/pufferlib/resources/overcooked/terrain/tomatoes.png new file mode 100644 index 000000000..ff2ccf470 Binary files /dev/null and b/pufferlib/resources/overcooked/terrain/tomatoes.png differ