diff --git a/.gitignore b/.gitignore index f9082380e..8bf7e335a 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ MANIFEST # Mac files .DS_Store +*.dSYM/ # PyInstaller # Usually these files are written by a python script from a template diff --git a/pufferlib/config/ocean/cubed.ini b/pufferlib/config/ocean/cubed.ini new file mode 100644 index 000000000..01a83de38 --- /dev/null +++ b/pufferlib/config/ocean/cubed.ini @@ -0,0 +1,15 @@ +[base] +package = ocean +env_name = puffer_cubed +policy_name = Policy +rnn_name = Recurrent + +[env] +num_envs = 4096 +size = 5 + +[train] +total_timesteps = 20_000_000 +gamma = 0.95 +learning_rate = 0.05 +minibatch_size = 32768 diff --git a/pufferlib/ocean/cubed/binding.c b/pufferlib/ocean/cubed/binding.c new file mode 100644 index 000000000..f89382cff --- /dev/null +++ b/pufferlib/ocean/cubed/binding.c @@ -0,0 +1,17 @@ +#include "cubed.h" + +#define Env Cubed +#include "../env_binding.h" + +static int my_init(Env *env, PyObject *args, PyObject *kwargs) { + env->size = unpack(kwargs, "size"); + return 0; +} + +static int my_log(PyObject *dict, Log *log) { + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + return 0; +} \ No newline at end of file diff --git a/pufferlib/ocean/cubed/cubed.c b/pufferlib/ocean/cubed/cubed.c new file mode 100644 index 000000000..d5178e99c --- /dev/null +++ b/pufferlib/ocean/cubed/cubed.c @@ -0,0 +1,40 @@ +#include "cubed.h" + +int main() { + Cubed env = {.size = 11}; + env.observations = (unsigned char *)calloc(env.size * env.size * env.size, + sizeof(unsigned char)); + env.actions = (int *)calloc(1, sizeof(int)); + env.rewards = (float *)calloc(1, sizeof(float)); + env.terminals = (unsigned char *)calloc(1, sizeof(unsigned char)); + + c_reset(&env); + c_render(&env); + + while (!WindowShouldClose()) { + if (IsKeyDown(KEY_LEFT_SHIFT)) { + env.actions[0] = 0; + if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W)) + env.actions[0] = UP; + if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) + env.actions[0] = DOWN; + if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) + env.actions[0] = LEFT; + if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) + env.actions[0] = RIGHT; + if (IsKeyDown(KEY_Q)) + env.actions[0] = FRONT; + if (IsKeyDown(KEY_E)) + env.actions[0] = BACK; + } else { + env.actions[0] = rand() % 7; + } + c_step(&env); + c_render(&env); + } + free(env.observations); + free(env.actions); + free(env.rewards); + free(env.terminals); + c_close(&env); +} \ No newline at end of file diff --git a/pufferlib/ocean/cubed/cubed.h b/pufferlib/ocean/cubed/cubed.h new file mode 100644 index 000000000..b30510059 --- /dev/null +++ b/pufferlib/ocean/cubed/cubed.h @@ -0,0 +1,203 @@ +#include "raylib.h" +#include +#include +#include + +const unsigned char NOOP = 0; +const unsigned char UP = 1; +const unsigned char DOWN = 2; +const unsigned char LEFT = 3; +const unsigned char RIGHT = 4; +const unsigned char FRONT = 5; +const unsigned char BACK = 6; + +const unsigned char EMPTY = 0; +const unsigned char AGENT = 1; +const unsigned char TARGET = 2; + +typedef struct { + float perf; + float score; + float episode_return; + float episode_length; + float n; +} Log; + +typedef struct { + Log log; + unsigned char *observations; + int *actions; + float *rewards; + unsigned char *terminals; + int size; + int tick; + int x; + int y; + int z; +} Cubed; + +void add_log(Cubed *env) { + env->log.perf += (env->rewards[0] > 0) ? 1 : 0; + env->log.score += env->rewards[0]; + env->log.episode_length += env->tick; + env->log.episode_return += env->rewards[0]; + env->log.n++; +} + +void c_reset(Cubed *env) { + int cubes = env->size * env->size * env->size; + memset(env->observations, 0, cubes * sizeof(unsigned char)); + env->x = env->size / 2; + env->y = env->size / 2; + env->z = env->size / 2; + int agent_idx = env->x * env->size * env->size + env->y * env->size + env->z; + env->observations[agent_idx] = AGENT; + env->tick = 0; + int target_idx; + do { + target_idx = rand() % cubes; + } while (target_idx == agent_idx); + env->observations[target_idx] = TARGET; +} + +void c_step(Cubed *env) { + env->tick += 1; + + int action = env->actions[0]; + env->terminals[0] = 0; + env->rewards[0] = 0; + + env->observations[env->x * env->size * env->size + env->y * env->size + + env->z] = EMPTY; + + if (action == UP) + env->y += 1; + if (action == DOWN) + env->y -= 1; + if (action == LEFT) + env->x -= 1; + if (action == RIGHT) + env->x += 1; + if (action == FRONT) + env->z += 1; + if (action == BACK) + env->z -= 1; + + if (env->tick > 5 * env->size || env->x < 0 || env->y < 0 || env->z < 0 || + env->x >= env->size || env->y >= env->size || env->z >= env->size) { + env->terminals[0] = 1; + env->rewards[0] = -1.0; + add_log(env); + c_reset(env); + return; + } + + int pos = env->x * env->size * env->size + env->y * env->size + env->z; + if (env->observations[pos] == TARGET) { + env->terminals[0] = 1; + env->rewards[0] = 1.0; + add_log(env); + c_reset(env); + return; + } + + env->observations[pos] = AGENT; +} + +void c_render(Cubed *env) { + static Camera3D camera = {0}; + + if (!IsWindowReady()) { + InitWindow(1024, 768, "PufferLib Cubed"); + SetTargetFPS(5); + + float center = env->size / 2.0f; + camera.position = + (Vector3){env->size * 2.5f, env->size * 2.0f, env->size * 2.5f}; + camera.target = (Vector3){center, center, center}; + camera.up = (Vector3){0.0f, 1.0f, 0.0f}; + camera.fovy = 60.0f; + camera.projection = CAMERA_PERSPECTIVE; + } + + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } + + if (IsMouseButtonDown(MOUSE_LEFT_BUTTON)) { + Vector2 mouseDelta = GetMouseDelta(); + + float rotSpeed = 0.003f; + + Vector3 offset = {camera.position.x - camera.target.x, + camera.position.y - camera.target.y, + camera.position.z - camera.target.z}; + + float distance = + sqrtf(offset.x * offset.x + offset.y * offset.y + offset.z * offset.z); + float yaw = atan2f(offset.x, offset.z); + float pitch = asinf(offset.y / distance); + + yaw -= mouseDelta.x * rotSpeed; + pitch += mouseDelta.y * rotSpeed; + + if (pitch > 1.4f) + pitch = 1.4f; + if (pitch < -1.4f) + pitch = -1.4f; + + camera.position.x = camera.target.x + distance * sinf(yaw) * cosf(pitch); + camera.position.y = camera.target.y + distance * sinf(pitch); + camera.position.z = camera.target.z + distance * cosf(yaw) * cosf(pitch); + } + + BeginDrawing(); + ClearBackground((Color){6, 24, 24, 255}); + + BeginMode3D(camera); + + float s = (float)env->size; + Color edge = (Color){128, 128, 128, 255}; + // Bottom square + DrawLine3D((Vector3){0, 0, 0}, (Vector3){s, 0, 0}, edge); + DrawLine3D((Vector3){0, 0, 0}, (Vector3){0, 0, s}, edge); + DrawLine3D((Vector3){s, 0, 0}, (Vector3){s, 0, s}, edge); + DrawLine3D((Vector3){0, 0, s}, (Vector3){s, 0, s}, edge); + // Top square + DrawLine3D((Vector3){0, s, 0}, (Vector3){s, s, 0}, edge); + DrawLine3D((Vector3){0, s, 0}, (Vector3){0, s, s}, edge); + DrawLine3D((Vector3){s, s, 0}, (Vector3){s, s, s}, edge); + DrawLine3D((Vector3){0, s, s}, (Vector3){s, s, s}, edge); + // Vertical edges + DrawLine3D((Vector3){0, 0, 0}, (Vector3){0, s, 0}, edge); + DrawLine3D((Vector3){s, 0, 0}, (Vector3){s, s, 0}, edge); + DrawLine3D((Vector3){0, 0, s}, (Vector3){0, s, s}, edge); + DrawLine3D((Vector3){s, 0, s}, (Vector3){s, s, s}, edge); + + for (int x = 0; x < env->size; x++) { + for (int y = 0; y < env->size; y++) { + for (int z = 0; z < env->size; z++) { + int idx = x * env->size * env->size + y * env->size + z; + int tex = env->observations[idx]; + if (tex == EMPTY) { + continue; + } + Color color = (tex == AGENT) ? (Color){0, 187, 187, 255} + : (Color){187, 0, 0, 255}; + + Vector3 pos = {(float)x + 0.5f, (float)y + 0.5f, (float)z + 0.5f}; + + DrawCube(pos, 1.0f, 1.0f, 1.0f, color); + } + } + } + + EndMode3D(); + EndDrawing(); +} + +void c_close(Cubed *env) { + if (IsWindowReady()) { + CloseWindow(); + } +} \ No newline at end of file diff --git a/pufferlib/ocean/cubed/cubed.py b/pufferlib/ocean/cubed/cubed.py new file mode 100644 index 000000000..490c6237b --- /dev/null +++ b/pufferlib/ocean/cubed/cubed.py @@ -0,0 +1,59 @@ +import gymnasium +import numpy as np + +import pufferlib +from pufferlib.ocean.cubed import binding + +class Cubed(pufferlib.PufferEnv): + def __init__(self, num_envs=1, render_mode=None, log_interval=128, size=11, buf=None, seed=0): + self.single_observation_space = gymnasium.spaces.Box(low=0, high=2, shape=(size*size*size,), dtype=np.uint8) + self.single_action_space = gymnasium.spaces.Discrete(7) + self.render_mode = render_mode + self.num_agents = num_envs + self.log_interval = log_interval + + super().__init__(buf) + self.c_envs = binding.vec_init(self.observations, self.actions, self.rewards, self.terminals, self.truncations, num_envs, seed, size=size) + + def reset(self, seed=0): + binding.vec_reset(self.c_envs, seed) + self.tick = 0 + return self.observations, [] + + def step(self, actions): + self.tick += 1 + + self.actions[:] = actions + binding.vec_step(self.c_envs) + + info = [] + if self.tick % self.log_interval == 0: + info.append(binding.vec_log(self.c_envs)) + + return (self.observations, self.rewards, self.terminals, self.truncations, info) + + def render(self): + binding.vec_render(self.c_envs, 0) + + def close(self): + binding.vec_close(self.c_envs) + +if __name__ == "__main__": + N = 4096 + + env = Cubed(num_envs=N) + env.reset() + steps = 0 + + CACHE = 1024 + actions = np.random.randint(0, 7, (CACHE, N)) + + i = 0 + import time + start = time.time() + while time.time() - start < 10: + env.step(actions[i % CACHE]) + steps += N + i += 1 + + print('Cubed SPS:', int(steps / (time.time() - start))) diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py index 6c56a4ea2..5ac20502b 100644 --- a/pufferlib/ocean/environment.py +++ b/pufferlib/ocean/environment.py @@ -134,6 +134,7 @@ def make_multiagent(buf=None, **kwargs): 'drone': 'Drone', 'nmmo3': 'NMMO3', 'snake': 'Snake', + 'cubed': 'Cubed', 'squared': 'Squared', 'pysquared': 'PySquared', 'connect4': 'Connect4',