Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions pufferlib/config/ocean/nonogram.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
[base]
package = ocean
env_name = puffer_nonogram
policy_name = Nonogram
rnn_name = Recurrent

[env]
num_envs = 4096
min_size = 4
max_size = 8
easy_learn = 1

[sweep]
metric = score

[train]
name = pufferai
seed = 42
gamma = 0.99965
device = cuda
compile = False
project = ablations
use_rnn = True
vf_coef = 2.365
adam_eps = 1.566e-10
data_dir = experiments
ent_coef = 0.01554
anneal_lr = True
clip_coef = 0.1267
optimizer = muon
precision = float32
adam_beta1 = 0.7912
adam_beta2 = 0.999949
batch_size = auto
gae_lambda = 0.9007
prio_alpha = 0.7441
prio_beta0 = 0.7365
cpu_offload = False
bptt_horizon = 64
compile_mode = max-autotune-no-cudagraphs
vf_clip_coef = 1.598
learning_rate = 0.007103
max_grad_norm = 1.275
update_epochs = 1
vtrace_c_clip = 0.8692
minibatch_size = 32768
total_timesteps = 2e10
vtrace_rho_clip = 0.9074
compile_fullgraph = True
max_minibatch_size = 32768
checkpoint_interval = 200
torch_deterministic = True
1 change: 1 addition & 0 deletions pufferlib/ocean/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def make_multiagent(buf=None, **kwargs):
'freeway': 'Freeway',
'enduro': 'Enduro',
'tetris': 'Tetris',
'nonogram': 'Nonogram',
'cartpole': 'Cartpole',
'moba': 'Moba',
'matsci': 'Matsci',
Expand Down
76 changes: 76 additions & 0 deletions pufferlib/ocean/nonogram/binding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#include "nonogram.h"
#include <Python.h>

// Forward declare custom methods
static PyObject *vec_get_solutions(PyObject *self, PyObject *args);
static PyObject *vec_get_size(PyObject *self, PyObject *args);

#define Env Nonogram
#define MY_METHODS \
{"vec_get_solutions", vec_get_solutions, METH_VARARGS, \
"Get solutions from all environments"}, \
{"vec_get_size", vec_get_size, METH_VARARGS, "Get current board size"}

#include "../env_binding.h"

static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
env->min_size = unpack(kwargs, "min_size");
env->max_size = unpack(kwargs, "max_size");
env->easy_learn = unpack(kwargs, "easy_learn");
env->size = env->max_size;
env->max_steps = 4 * env->max_size * env->max_size;
return 0;
}

static int my_log(PyObject *dict, Log *log) {
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "episode_length", log->episode_length);
assign_to_dict(dict, "solved", log->solved);
return 0;
}

// Custom method to get solutions from all environments
static PyObject *vec_get_solutions(PyObject *self, PyObject *args) {
if (PyTuple_Size(args) != 2) {
PyErr_SetString(PyExc_TypeError, "vec_get_solutions requires 2 arguments");
return NULL;
}

VecEnv *vec = unpack_vecenv(args);
if (!vec) {
return NULL;
}

PyObject *solutions_obj = PyTuple_GetItem(args, 1);
if (!PyObject_TypeCheck(solutions_obj, &PyArray_Type)) {
PyErr_SetString(PyExc_TypeError, "solutions must be a NumPy array");
return NULL;
}
PyArrayObject *solutions = (PyArrayObject *)solutions_obj;
if (!PyArray_ISCONTIGUOUS(solutions)) {
PyErr_SetString(PyExc_ValueError, "solutions must be contiguous");
return NULL;
}

// Copy solutions from each environment (always use max_size for buffer)
unsigned char *sol_ptr = PyArray_DATA(solutions);
int max_grid_size = MAX_SIZE * MAX_SIZE;
for (int i = 0; i < vec->num_envs; i++) {
Nonogram *env = vec->envs[i];
memcpy(sol_ptr + i * max_grid_size, env->solution, max_grid_size);
}

Py_RETURN_NONE;
}

// Get current board size from first environment
static PyObject *vec_get_size(PyObject *self, PyObject *args) {
VecEnv *vec = unpack_vecenv(args);
if (!vec) {
return NULL;
}

Nonogram *env = vec->envs[0];
return PyLong_FromLong(env->size);
}
32 changes: 32 additions & 0 deletions pufferlib/ocean/nonogram/nonogram.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* Pure C demo file for Nonogram. Build it with:
* bash scripts/build_ocean.sh nonogram local (debug)
* bash scripts/build_ocean.sh nonogram fast
*/

#include "nonogram.h"

int main() {
Nonogram env = {.size = 8};
int max_clues = env.size / 2;
int obs_size = env.size * env.size + 2 * env.size * max_clues;

env.max_steps = 4 * env.size * env.size;
env.observations = (unsigned char *)calloc(obs_size, sizeof(unsigned char));
env.actions = (int *)calloc(1, sizeof(int));
env.rewards = (float *)calloc(1, sizeof(float));
env.terminals = (unsigned char *)calloc(1, sizeof(unsigned char));

c_reset(&env);
c_render(&env);
while (!WindowShouldClose()) {
env.actions[0] = rand() % (env.size * env.size);
c_step(&env);
c_render(&env);
}

free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
c_close(&env);
}
Loading
Loading