PufferAI · eitanporat · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025
diff --git a/pufferlib/config/ocean/nonogram.ini b/pufferlib/config/ocean/nonogram.ini
@@ -0,0 +1,52 @@
+[base]
+package = ocean
+env_name = puffer_nonogram
+policy_name = Nonogram
+rnn_name = Recurrent
+
+[env]
+num_envs = 4096
+min_size = 4
+max_size = 8
+easy_learn = 1
+
+[sweep]
+metric = score
+
+[train]
+name = pufferai
+seed = 42
+gamma = 0.99965
+device = cuda
+compile = False
+project = ablations
+use_rnn = True
+vf_coef = 2.365
+adam_eps = 1.566e-10
+data_dir = experiments
+ent_coef = 0.01554
+anneal_lr = True
+clip_coef = 0.1267
+optimizer = muon
+precision = float32
+adam_beta1 = 0.7912
+adam_beta2 = 0.999949
+batch_size = auto
+gae_lambda = 0.9007
+prio_alpha = 0.7441
+prio_beta0 = 0.7365
+cpu_offload = False
+bptt_horizon = 64
+compile_mode = max-autotune-no-cudagraphs
+vf_clip_coef = 1.598
+learning_rate = 0.007103
+max_grad_norm = 1.275
+update_epochs = 1
+vtrace_c_clip = 0.8692
+minibatch_size = 32768
+total_timesteps = 2e10
+vtrace_rho_clip = 0.9074
+compile_fullgraph = True
+max_minibatch_size = 32768
+checkpoint_interval = 200
+torch_deterministic = True
diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py
@@ -126,6 +126,7 @@ def make_multiagent(buf=None, **kwargs):
     'freeway': 'Freeway',
     'enduro': 'Enduro',
     'tetris': 'Tetris',
+    'nonogram': 'Nonogram',
     'cartpole': 'Cartpole',
     'moba': 'Moba',
     'matsci': 'Matsci',

diff --git a/pufferlib/ocean/nonogram/binding.c b/pufferlib/ocean/nonogram/binding.c
@@ -0,0 +1,76 @@
+#include "nonogram.h"
+#include <Python.h>
+
+// Forward declare custom methods
+static PyObject *vec_get_solutions(PyObject *self, PyObject *args);
+static PyObject *vec_get_size(PyObject *self, PyObject *args);
+
+#define Env Nonogram
+#define MY_METHODS                                                             \
+  {"vec_get_solutions", vec_get_solutions, METH_VARARGS,                       \
+   "Get solutions from all environments"},                                     \
+      {"vec_get_size", vec_get_size, METH_VARARGS, "Get current board size"}
+
+#include "../env_binding.h"
+
+static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
+  env->min_size = unpack(kwargs, "min_size");
+  env->max_size = unpack(kwargs, "max_size");
+  env->easy_learn = unpack(kwargs, "easy_learn");
+  env->size = env->max_size;
+  env->max_steps = 4 * env->max_size * env->max_size;
+  return 0;
+}
+
+static int my_log(PyObject *dict, Log *log) {
+  assign_to_dict(dict, "score", log->score);
+  assign_to_dict(dict, "episode_return", log->episode_return);
+  assign_to_dict(dict, "episode_length", log->episode_length);
+  assign_to_dict(dict, "solved", log->solved);
+  return 0;
+}
+
+// Custom method to get solutions from all environments
+static PyObject *vec_get_solutions(PyObject *self, PyObject *args) {
+  if (PyTuple_Size(args) != 2) {
+    PyErr_SetString(PyExc_TypeError, "vec_get_solutions requires 2 arguments");
+    return NULL;
+  }
+
+  VecEnv *vec = unpack_vecenv(args);
+  if (!vec) {
+    return NULL;
+  }
+
+  PyObject *solutions_obj = PyTuple_GetItem(args, 1);
+  if (!PyObject_TypeCheck(solutions_obj, &PyArray_Type)) {
+    PyErr_SetString(PyExc_TypeError, "solutions must be a NumPy array");
+    return NULL;
+  }
+  PyArrayObject *solutions = (PyArrayObject *)solutions_obj;
+  if (!PyArray_ISCONTIGUOUS(solutions)) {
+    PyErr_SetString(PyExc_ValueError, "solutions must be contiguous");
+    return NULL;
+  }
+
+  // Copy solutions from each environment (always use max_size for buffer)
+  unsigned char *sol_ptr = PyArray_DATA(solutions);
+  int max_grid_size = MAX_SIZE * MAX_SIZE;
+  for (int i = 0; i < vec->num_envs; i++) {
+    Nonogram *env = vec->envs[i];
+    memcpy(sol_ptr + i * max_grid_size, env->solution, max_grid_size);
+  }
+
+  Py_RETURN_NONE;
+}
+
+// Get current board size from first environment
+static PyObject *vec_get_size(PyObject *self, PyObject *args) {
+  VecEnv *vec = unpack_vecenv(args);
+  if (!vec) {
+    return NULL;
+  }
+
+  Nonogram *env = vec->envs[0];
+  return PyLong_FromLong(env->size);
+}
diff --git a/pufferlib/ocean/nonogram/nonogram.c b/pufferlib/ocean/nonogram/nonogram.c
@@ -0,0 +1,32 @@
+/* Pure C demo file for Nonogram. Build it with:
+ * bash scripts/build_ocean.sh nonogram local (debug)
+ * bash scripts/build_ocean.sh nonogram fast
+ */
+
+#include "nonogram.h"
+
+int main() {
+  Nonogram env = {.size = 8};
+  int max_clues = env.size / 2;
+  int obs_size = env.size * env.size + 2 * env.size * max_clues;
+
+  env.max_steps = 4 * env.size * env.size;
+  env.observations = (unsigned char *)calloc(obs_size, sizeof(unsigned char));
+  env.actions = (int *)calloc(1, sizeof(int));
+  env.rewards = (float *)calloc(1, sizeof(float));
+  env.terminals = (unsigned char *)calloc(1, sizeof(unsigned char));
+
+  c_reset(&env);
+  c_render(&env);
+  while (!WindowShouldClose()) {
+    env.actions[0] = rand() % (env.size * env.size);
+    c_step(&env);
+    c_render(&env);
+  }
+
+  free(env.observations);
+  free(env.actions);
+  free(env.rewards);
+  free(env.terminals);
+  c_close(&env);
+}