Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ MANIFEST

# Mac files
.DS_Store
*.dSYM/

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down
15 changes: 15 additions & 0 deletions pufferlib/config/ocean/cubed.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[base]
package = ocean
env_name = puffer_cubed
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 4096
size = 5

[train]
total_timesteps = 20_000_000
gamma = 0.95
learning_rate = 0.05
minibatch_size = 32768
17 changes: 17 additions & 0 deletions pufferlib/ocean/cubed/binding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "cubed.h"

#define Env Cubed
#include "../env_binding.h"

static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
env->size = unpack(kwargs, "size");
return 0;
}

static int my_log(PyObject *dict, Log *log) {
assign_to_dict(dict, "perf", log->perf);
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "episode_length", log->episode_length);
return 0;
}
40 changes: 40 additions & 0 deletions pufferlib/ocean/cubed/cubed.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include "cubed.h"

int main() {
Cubed env = {.size = 11};
env.observations = (unsigned char *)calloc(env.size * env.size * env.size,
sizeof(unsigned char));
env.actions = (int *)calloc(1, sizeof(int));
env.rewards = (float *)calloc(1, sizeof(float));
env.terminals = (unsigned char *)calloc(1, sizeof(unsigned char));

c_reset(&env);
c_render(&env);

while (!WindowShouldClose()) {
if (IsKeyDown(KEY_LEFT_SHIFT)) {
env.actions[0] = 0;
if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W))
env.actions[0] = UP;
if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S))
env.actions[0] = DOWN;
if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A))
env.actions[0] = LEFT;
if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D))
env.actions[0] = RIGHT;
if (IsKeyDown(KEY_Q))
env.actions[0] = FRONT;
if (IsKeyDown(KEY_E))
env.actions[0] = BACK;
} else {
env.actions[0] = rand() % 7;
}
c_step(&env);
c_render(&env);
}
free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
c_close(&env);
}
203 changes: 203 additions & 0 deletions pufferlib/ocean/cubed/cubed.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
#include "raylib.h"
#include <math.h>
#include <stdlib.h>
#include <string.h>

const unsigned char NOOP = 0;
const unsigned char UP = 1;
const unsigned char DOWN = 2;
const unsigned char LEFT = 3;
const unsigned char RIGHT = 4;
const unsigned char FRONT = 5;
const unsigned char BACK = 6;

const unsigned char EMPTY = 0;
const unsigned char AGENT = 1;
const unsigned char TARGET = 2;

typedef struct {
float perf;
float score;
float episode_return;
float episode_length;
float n;
} Log;

typedef struct {
Log log;
unsigned char *observations;
int *actions;
float *rewards;
unsigned char *terminals;
int size;
int tick;
int x;
int y;
int z;
} Cubed;

void add_log(Cubed *env) {
env->log.perf += (env->rewards[0] > 0) ? 1 : 0;
env->log.score += env->rewards[0];
env->log.episode_length += env->tick;
env->log.episode_return += env->rewards[0];
env->log.n++;
}

void c_reset(Cubed *env) {
int cubes = env->size * env->size * env->size;
memset(env->observations, 0, cubes * sizeof(unsigned char));
env->x = env->size / 2;
env->y = env->size / 2;
env->z = env->size / 2;
int agent_idx = env->x * env->size * env->size + env->y * env->size + env->z;
env->observations[agent_idx] = AGENT;
env->tick = 0;
int target_idx;
do {
target_idx = rand() % cubes;
} while (target_idx == agent_idx);
env->observations[target_idx] = TARGET;
}

void c_step(Cubed *env) {
env->tick += 1;

int action = env->actions[0];
env->terminals[0] = 0;
env->rewards[0] = 0;

env->observations[env->x * env->size * env->size + env->y * env->size +
env->z] = EMPTY;

if (action == UP)
env->y += 1;
if (action == DOWN)
env->y -= 1;
if (action == LEFT)
env->x -= 1;
if (action == RIGHT)
env->x += 1;
if (action == FRONT)
env->z += 1;
if (action == BACK)
env->z -= 1;

if (env->tick > 5 * env->size || env->x < 0 || env->y < 0 || env->z < 0 ||
env->x >= env->size || env->y >= env->size || env->z >= env->size) {
env->terminals[0] = 1;
env->rewards[0] = -1.0;
add_log(env);
c_reset(env);
return;
}

int pos = env->x * env->size * env->size + env->y * env->size + env->z;
if (env->observations[pos] == TARGET) {
env->terminals[0] = 1;
env->rewards[0] = 1.0;
add_log(env);
c_reset(env);
return;
}

env->observations[pos] = AGENT;
}

void c_render(Cubed *env) {
static Camera3D camera = {0};

if (!IsWindowReady()) {
InitWindow(1024, 768, "PufferLib Cubed");
SetTargetFPS(5);

float center = env->size / 2.0f;
camera.position =
(Vector3){env->size * 2.5f, env->size * 2.0f, env->size * 2.5f};
camera.target = (Vector3){center, center, center};
camera.up = (Vector3){0.0f, 1.0f, 0.0f};
camera.fovy = 60.0f;
camera.projection = CAMERA_PERSPECTIVE;
}

if (IsKeyDown(KEY_ESCAPE)) {
exit(0);
}

if (IsMouseButtonDown(MOUSE_LEFT_BUTTON)) {
Vector2 mouseDelta = GetMouseDelta();

float rotSpeed = 0.003f;

Vector3 offset = {camera.position.x - camera.target.x,
camera.position.y - camera.target.y,
camera.position.z - camera.target.z};

float distance =
sqrtf(offset.x * offset.x + offset.y * offset.y + offset.z * offset.z);
float yaw = atan2f(offset.x, offset.z);
float pitch = asinf(offset.y / distance);

yaw -= mouseDelta.x * rotSpeed;
pitch += mouseDelta.y * rotSpeed;

if (pitch > 1.4f)
pitch = 1.4f;
if (pitch < -1.4f)
pitch = -1.4f;

camera.position.x = camera.target.x + distance * sinf(yaw) * cosf(pitch);
camera.position.y = camera.target.y + distance * sinf(pitch);
camera.position.z = camera.target.z + distance * cosf(yaw) * cosf(pitch);
}

BeginDrawing();
ClearBackground((Color){6, 24, 24, 255});

BeginMode3D(camera);

float s = (float)env->size;
Color edge = (Color){128, 128, 128, 255};
// Bottom square
DrawLine3D((Vector3){0, 0, 0}, (Vector3){s, 0, 0}, edge);
DrawLine3D((Vector3){0, 0, 0}, (Vector3){0, 0, s}, edge);
DrawLine3D((Vector3){s, 0, 0}, (Vector3){s, 0, s}, edge);
DrawLine3D((Vector3){0, 0, s}, (Vector3){s, 0, s}, edge);
// Top square
DrawLine3D((Vector3){0, s, 0}, (Vector3){s, s, 0}, edge);
DrawLine3D((Vector3){0, s, 0}, (Vector3){0, s, s}, edge);
DrawLine3D((Vector3){s, s, 0}, (Vector3){s, s, s}, edge);
DrawLine3D((Vector3){0, s, s}, (Vector3){s, s, s}, edge);
// Vertical edges
DrawLine3D((Vector3){0, 0, 0}, (Vector3){0, s, 0}, edge);
DrawLine3D((Vector3){s, 0, 0}, (Vector3){s, s, 0}, edge);
DrawLine3D((Vector3){0, 0, s}, (Vector3){0, s, s}, edge);
DrawLine3D((Vector3){s, 0, s}, (Vector3){s, s, s}, edge);

for (int x = 0; x < env->size; x++) {
for (int y = 0; y < env->size; y++) {
for (int z = 0; z < env->size; z++) {
int idx = x * env->size * env->size + y * env->size + z;
int tex = env->observations[idx];
if (tex == EMPTY) {
continue;
}
Color color = (tex == AGENT) ? (Color){0, 187, 187, 255}
: (Color){187, 0, 0, 255};

Vector3 pos = {(float)x + 0.5f, (float)y + 0.5f, (float)z + 0.5f};

DrawCube(pos, 1.0f, 1.0f, 1.0f, color);
}
}
}

EndMode3D();
EndDrawing();
}

void c_close(Cubed *env) {
if (IsWindowReady()) {
CloseWindow();
}
}
59 changes: 59 additions & 0 deletions pufferlib/ocean/cubed/cubed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import gymnasium
import numpy as np

import pufferlib
from pufferlib.ocean.cubed import binding

class Cubed(pufferlib.PufferEnv):
def __init__(self, num_envs=1, render_mode=None, log_interval=128, size=11, buf=None, seed=0):
self.single_observation_space = gymnasium.spaces.Box(low=0, high=2, shape=(size*size*size,), dtype=np.uint8)
self.single_action_space = gymnasium.spaces.Discrete(7)
self.render_mode = render_mode
self.num_agents = num_envs
self.log_interval = log_interval

super().__init__(buf)
self.c_envs = binding.vec_init(self.observations, self.actions, self.rewards, self.terminals, self.truncations, num_envs, seed, size=size)

def reset(self, seed=0):
binding.vec_reset(self.c_envs, seed)
self.tick = 0
return self.observations, []

def step(self, actions):
self.tick += 1

self.actions[:] = actions
binding.vec_step(self.c_envs)

info = []
if self.tick % self.log_interval == 0:
info.append(binding.vec_log(self.c_envs))

return (self.observations, self.rewards, self.terminals, self.truncations, info)

def render(self):
binding.vec_render(self.c_envs, 0)

def close(self):
binding.vec_close(self.c_envs)

if __name__ == "__main__":
N = 4096

env = Cubed(num_envs=N)
env.reset()
steps = 0

CACHE = 1024
actions = np.random.randint(0, 7, (CACHE, N))

i = 0
import time
start = time.time()
while time.time() - start < 10:
env.step(actions[i % CACHE])
steps += N
i += 1

print('Cubed SPS:', int(steps / (time.time() - start)))
1 change: 1 addition & 0 deletions pufferlib/ocean/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def make_multiagent(buf=None, **kwargs):
'drone': 'Drone',
'nmmo3': 'NMMO3',
'snake': 'Snake',
'cubed': 'Cubed',
'squared': 'Squared',
'pysquared': 'PySquared',
'connect4': 'Connect4',
Expand Down