diff --git a/pufferlib/config/ocean/nonogram.ini b/pufferlib/config/ocean/nonogram.ini
new file mode 100644
index 000000000..a503cf3d8
--- /dev/null
+++ b/pufferlib/config/ocean/nonogram.ini
@@ -0,0 +1,52 @@
+[base]
+package = ocean
+env_name = puffer_nonogram
+policy_name = Nonogram
+rnn_name = Recurrent
+
+[env]
+num_envs = 4096
+min_size = 4
+max_size = 8
+easy_learn = 1
+
+[sweep]
+metric = score
+
+[train]
+name = pufferai
+seed = 42
+gamma = 0.99965
+device = cuda
+compile = False
+project = ablations
+use_rnn = True
+vf_coef = 2.365
+adam_eps = 1.566e-10
+data_dir = experiments
+ent_coef = 0.01554
+anneal_lr = True
+clip_coef = 0.1267
+optimizer = muon
+precision = float32
+adam_beta1 = 0.7912
+adam_beta2 = 0.999949
+batch_size = auto
+gae_lambda = 0.9007
+prio_alpha = 0.7441
+prio_beta0 = 0.7365
+cpu_offload = False
+bptt_horizon = 64
+compile_mode = max-autotune-no-cudagraphs
+vf_clip_coef = 1.598
+learning_rate = 0.007103
+max_grad_norm = 1.275
+update_epochs = 1
+vtrace_c_clip = 0.8692
+minibatch_size = 32768
+total_timesteps = 2e10
+vtrace_rho_clip = 0.9074
+compile_fullgraph = True
+max_minibatch_size = 32768
+checkpoint_interval = 200
+torch_deterministic = True
diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py
index 93df76506..ea304bd69 100644
--- a/pufferlib/ocean/environment.py
+++ b/pufferlib/ocean/environment.py
@@ -126,6 +126,7 @@ def make_multiagent(buf=None, **kwargs):
     'freeway': 'Freeway',
     'enduro': 'Enduro',
     'tetris': 'Tetris',
+    'nonogram': 'Nonogram',
     'cartpole': 'Cartpole',
     'moba': 'Moba',
     'matsci': 'Matsci',
diff --git a/pufferlib/ocean/nonogram/binding.c b/pufferlib/ocean/nonogram/binding.c
new file mode 100644
index 000000000..910f5e90f
--- /dev/null
+++ b/pufferlib/ocean/nonogram/binding.c
@@ -0,0 +1,76 @@
+#include "nonogram.h"
+#include <Python.h>
+
+// Forward declare custom methods
+static PyObject *vec_get_solutions(PyObject *self, PyObject *args);
+static PyObject *vec_get_size(PyObject *self, PyObject *args);
+
+#define Env Nonogram
+#define MY_METHODS                                                             \
+  {"vec_get_solutions", vec_get_solutions, METH_VARARGS,                       \
+   "Get solutions from all environments"},                                     \
+      {"vec_get_size", vec_get_size, METH_VARARGS, "Get current board size"}
+
+#include "../env_binding.h"
+
+static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
+  env->min_size = unpack(kwargs, "min_size");
+  env->max_size = unpack(kwargs, "max_size");
+  env->easy_learn = unpack(kwargs, "easy_learn");
+  env->size = env->max_size;
+  env->max_steps = 4 * env->max_size * env->max_size;
+  return 0;
+}
+
+static int my_log(PyObject *dict, Log *log) {
+  assign_to_dict(dict, "score", log->score);
+  assign_to_dict(dict, "episode_return", log->episode_return);
+  assign_to_dict(dict, "episode_length", log->episode_length);
+  assign_to_dict(dict, "solved", log->solved);
+  return 0;
+}
+
+// Custom method to get solutions from all environments
+static PyObject *vec_get_solutions(PyObject *self, PyObject *args) {
+  if (PyTuple_Size(args) != 2) {
+    PyErr_SetString(PyExc_TypeError, "vec_get_solutions requires 2 arguments");
+    return NULL;
+  }
+
+  VecEnv *vec = unpack_vecenv(args);
+  if (!vec) {
+    return NULL;
+  }
+
+  PyObject *solutions_obj = PyTuple_GetItem(args, 1);
+  if (!PyObject_TypeCheck(solutions_obj, &PyArray_Type)) {
+    PyErr_SetString(PyExc_TypeError, "solutions must be a NumPy array");
+    return NULL;
+  }
+  PyArrayObject *solutions = (PyArrayObject *)solutions_obj;
+  if (!PyArray_ISCONTIGUOUS(solutions)) {
+    PyErr_SetString(PyExc_ValueError, "solutions must be contiguous");
+    return NULL;
+  }
+
+  // Copy solutions from each environment (always use max_size for buffer)
+  unsigned char *sol_ptr = PyArray_DATA(solutions);
+  int max_grid_size = MAX_SIZE * MAX_SIZE;
+  for (int i = 0; i < vec->num_envs; i++) {
+    Nonogram *env = vec->envs[i];
+    memcpy(sol_ptr + i * max_grid_size, env->solution, max_grid_size);
+  }
+
+  Py_RETURN_NONE;
+}
+
+// Get current board size from first environment
+static PyObject *vec_get_size(PyObject *self, PyObject *args) {
+  VecEnv *vec = unpack_vecenv(args);
+  if (!vec) {
+    return NULL;
+  }
+
+  Nonogram *env = vec->envs[0];
+  return PyLong_FromLong(env->size);
+}
diff --git a/pufferlib/ocean/nonogram/nonogram.c b/pufferlib/ocean/nonogram/nonogram.c
new file mode 100644
index 000000000..91e07bbd4
--- /dev/null
+++ b/pufferlib/ocean/nonogram/nonogram.c
@@ -0,0 +1,32 @@
+/* Pure C demo file for Nonogram. Build it with:
+ * bash scripts/build_ocean.sh nonogram local (debug)
+ * bash scripts/build_ocean.sh nonogram fast
+ */
+
+#include "nonogram.h"
+
+int main() {
+  Nonogram env = {.size = 8};
+  int max_clues = env.size / 2;
+  int obs_size = env.size * env.size + 2 * env.size * max_clues;
+
+  env.max_steps = 4 * env.size * env.size;
+  env.observations = (unsigned char *)calloc(obs_size, sizeof(unsigned char));
+  env.actions = (int *)calloc(1, sizeof(int));
+  env.rewards = (float *)calloc(1, sizeof(float));
+  env.terminals = (unsigned char *)calloc(1, sizeof(unsigned char));
+
+  c_reset(&env);
+  c_render(&env);
+  while (!WindowShouldClose()) {
+    env.actions[0] = rand() % (env.size * env.size);
+    c_step(&env);
+    c_render(&env);
+  }
+
+  free(env.observations);
+  free(env.actions);
+  free(env.rewards);
+  free(env.terminals);
+  c_close(&env);
+}
diff --git a/pufferlib/ocean/nonogram/nonogram.h b/pufferlib/ocean/nonogram/nonogram.h
new file mode 100644
index 000000000..e14ced293
--- /dev/null
+++ b/pufferlib/ocean/nonogram/nonogram.h
@@ -0,0 +1,605 @@
+/* Nonogram: A logic puzzle environment
+ * Players fill cells based on row and column clues (run-length encoding)
+ */
+
+#include "raylib.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_SIZE 8
+#define MAX_CLUES (MAX_SIZE / 2)
+
+const unsigned char CELL_EMPTY = 0;
+const unsigned char CELL_WHITE = 1;
+const unsigned char CELL_BLACK = 2;
+const unsigned char CELL_PADDING = 3;
+
+const float REWARD_WIN = 1.0;
+const float REWARD_INVALID_MOVE = -0.2;
+const float REWARD_OUT_OF_BOUNDS = -0.2;
+const float REWARD_TIMEOUT = -0.1;
+const float REWARD_COMPLETE_LINE = 0.02;
+const float REWARD_EASY_LEARN_CORRECT = 0.01;
+const float REWARD_EASY_LEARN_INCORRECT = -0.01;
+const float REWARD_NO_MATCH = -0.05;
+
+typedef struct {
+  float score;
+  float episode_return;
+  float episode_length;
+  float solved;
+  float n;
+} Log;
+
+typedef struct {
+  Log log;
+  unsigned char *observations;
+  int *actions;
+  float *rewards;
+  unsigned char *terminals;
+
+  int size;
+  int min_size;
+  int max_size;
+  int max_steps;
+  int steps_taken;
+  int filled_total;
+  int target_total;
+  int easy_learn;
+
+  unsigned char solution[MAX_SIZE * MAX_SIZE];
+
+  unsigned char rows_clues[MAX_SIZE * MAX_CLUES];
+  unsigned char cols_clues[MAX_SIZE * MAX_CLUES];
+  unsigned char rows_num_runs[MAX_SIZE];
+  unsigned char cols_num_runs[MAX_SIZE];
+  unsigned char rows_target_sum[MAX_SIZE];
+  unsigned char cols_target_sum[MAX_SIZE];
+  unsigned char rows_max_clue[MAX_SIZE];
+  unsigned char cols_max_clue[MAX_SIZE];
+
+  unsigned char rows_totals[MAX_SIZE];
+  unsigned char cols_totals[MAX_SIZE];
+
+  unsigned char rows_completed[MAX_SIZE];
+  unsigned char cols_completed[MAX_SIZE];
+
+  float episode_reward;
+} Nonogram;
+
+void add_log(Nonogram *env) {
+  env->log.score += env->rewards[0];
+  env->log.episode_length += env->steps_taken;
+  env->log.episode_return += env->episode_reward;
+  env->log.solved += (env->rewards[0] > 0) ? 1 : 0;
+  env->log.n++;
+}
+
+int get_row_run_length(Nonogram *env, int row, int col) {
+  int row_start = row * MAX_SIZE;
+  int run_length = 1;
+
+  for (int c = col - 1; c >= 0; c--) {
+    if (env->observations[row_start + c] == CELL_BLACK) {
+      run_length++;
+    } else {
+      break;
+    }
+  }
+
+  for (int c = col + 1; c < env->size; c++) {
+    if (env->observations[row_start + c] == CELL_BLACK) {
+      run_length++;
+    } else {
+      break;
+    }
+  }
+
+  return run_length;
+}
+
+int get_col_run_length(Nonogram *env, int row, int col) {
+  int run_length = 1;
+
+  for (int r = row - 1; r >= 0; r--) {
+    if (env->observations[r * MAX_SIZE + col] == CELL_BLACK) {
+      run_length++;
+    } else {
+      break;
+    }
+  }
+
+  for (int r = row + 1; r < env->size; r++) {
+    if (env->observations[r * MAX_SIZE + col] == CELL_BLACK) {
+      run_length++;
+    } else {
+      break;
+    }
+  }
+
+  return run_length;
+}
+
+int check_line_matches(unsigned char *line_data, unsigned char *clues,
+                       int num_runs, int size) {
+  int run_idx = 0;
+  int count = 0;
+
+  for (int i = 0; i < size; i++) {
+    if (line_data[i] == CELL_BLACK) {
+      count++;
+    } else if (line_data[i] == CELL_EMPTY || line_data[i] == CELL_WHITE) {
+      if (count > 0) {
+        if (clues[run_idx] != count) {
+          return 0;
+        }
+        run_idx++;
+        count = 0;
+      }
+    }
+  }
+
+  if (count > 0) {
+    if (clues[run_idx] != count) {
+      return 0;
+    }
+    run_idx++;
+  }
+
+  return (run_idx == num_runs);
+}
+
+float rand_uniform() { return (float)rand() / (float)RAND_MAX; }
+
+void c_reset(Nonogram *env) {
+  env->size = env->min_size + (rand() % (env->max_size - env->min_size + 1));
+  env->max_steps = env->size * env->size;
+
+  int full_grid_size = MAX_SIZE * MAX_SIZE;
+  int max_clues = MAX_SIZE / 2;
+
+  memset(env->observations, CELL_PADDING, full_grid_size);
+  for (int r = 0; r < env->size; r++) {
+    for (int c = 0; c < env->size; c++) {
+      env->observations[r * MAX_SIZE + c] = CELL_EMPTY;
+    }
+  }
+  memset(env->observations + full_grid_size, 0, 2 * MAX_SIZE * max_clues);
+
+  float fill_prob = rand_uniform();
+  memset(env->solution, CELL_WHITE, MAX_SIZE * MAX_SIZE);
+  int has_filled = 0;
+  for (int i = 0; i < env->size; i++) {
+    for (int j = 0; j < env->size; j++) {
+      if (rand_uniform() < fill_prob) {
+        env->solution[i * MAX_SIZE + j] = CELL_BLACK;
+        has_filled = 1;
+      }
+    }
+  }
+
+  if (!has_filled) {
+    int rand_row = rand() % env->size;
+    int rand_col = rand() % env->size;
+    env->solution[rand_row * MAX_SIZE + rand_col] = CELL_BLACK;
+  }
+
+  memset(env->rows_clues, 0, MAX_SIZE * MAX_CLUES);
+  memset(env->cols_clues, 0, MAX_SIZE * MAX_CLUES);
+
+  for (int i = 0; i < env->size; i++) {
+    int clue_idx = 0;
+    int count = 0;
+    for (int j = 0; j < env->size; j++) {
+      if (env->solution[i * MAX_SIZE + j] == CELL_BLACK) {
+        count++;
+      } else if (count > 0) {
+        env->rows_clues[i * MAX_CLUES + clue_idx] = count;
+        clue_idx++;
+        count = 0;
+      }
+    }
+    if (count > 0) {
+      env->rows_clues[i * MAX_CLUES + clue_idx] = count;
+      clue_idx++;
+    }
+    env->rows_num_runs[i] = clue_idx;
+  }
+
+  for (int j = 0; j < env->size; j++) {
+    int clue_idx = 0;
+    int count = 0;
+    for (int i = 0; i < env->size; i++) {
+      if (env->solution[i * MAX_SIZE + j] == CELL_BLACK) {
+        count++;
+      } else if (count > 0) {
+        env->cols_clues[j * MAX_CLUES + clue_idx] = count;
+        clue_idx++;
+        count = 0;
+      }
+    }
+    if (count > 0) {
+      env->cols_clues[j * MAX_CLUES + clue_idx] = count;
+      clue_idx++;
+    }
+    env->cols_num_runs[j] = clue_idx;
+  }
+
+  memcpy(env->observations + full_grid_size, env->rows_clues,
+         MAX_SIZE * max_clues);
+  memcpy(env->observations + full_grid_size + MAX_SIZE * max_clues,
+         env->cols_clues, MAX_SIZE * max_clues);
+
+  env->observations[full_grid_size + 2 * MAX_SIZE * max_clues] = env->size;
+
+  memset(env->rows_totals, 0, MAX_SIZE);
+  memset(env->cols_totals, 0, MAX_SIZE);
+  memset(env->rows_completed, 0, MAX_SIZE);
+  memset(env->cols_completed, 0, MAX_SIZE);
+  env->filled_total = 0;
+
+  for (int i = 0; i < env->size; i++) {
+    int max_clue = 0;
+    int sum = 0;
+    for (int j = 0; j < max_clues; j++) {
+      int clue = env->rows_clues[i * MAX_CLUES + j];
+      if (clue > max_clue) {
+        max_clue = clue;
+      }
+      sum += clue;
+    }
+    env->rows_max_clue[i] = max_clue;
+    env->rows_target_sum[i] = sum;
+
+    max_clue = 0;
+    sum = 0;
+    for (int j = 0; j < max_clues; j++) {
+      int clue = env->cols_clues[i * MAX_CLUES + j];
+      if (clue > max_clue) {
+        max_clue = clue;
+      }
+      sum += clue;
+    }
+    env->cols_max_clue[i] = max_clue;
+    env->cols_target_sum[i] = sum;
+  }
+
+  env->target_total = 0;
+  for (int i = 0; i < env->size; i++) {
+    env->target_total += env->rows_target_sum[i];
+  }
+
+  env->steps_taken = 0;
+  env->episode_reward = 0;
+}
+
+void c_step(Nonogram *env) {
+  int action = env->actions[0];
+
+  env->terminals[0] = 0;
+  env->rewards[0] = 0;
+
+  env->steps_taken++;
+
+  if (env->steps_taken > env->max_steps) {
+    env->terminals[0] = 1;
+    env->rewards[0] = REWARD_TIMEOUT;
+    env->episode_reward += REWARD_TIMEOUT;
+    add_log(env);
+    c_reset(env);
+    return;
+  }
+
+  int mark_black = action >= (MAX_SIZE * MAX_SIZE);
+  int pos = action % (MAX_SIZE * MAX_SIZE);
+
+  int row = pos / MAX_SIZE;
+  int col = pos % MAX_SIZE;
+
+  if (row >= env->size || col >= env->size) {
+    env->terminals[0] = 1;
+    env->rewards[0] = REWARD_OUT_OF_BOUNDS;
+    env->episode_reward += REWARD_OUT_OF_BOUNDS;
+    add_log(env);
+    c_reset(env);
+    return;
+  }
+
+  unsigned char current = env->observations[pos];
+
+  if (current != CELL_EMPTY) {
+    env->terminals[0] = 1;
+    env->rewards[0] = REWARD_INVALID_MOVE;
+    env->episode_reward += REWARD_INVALID_MOVE;
+    add_log(env);
+    c_reset(env);
+    return;
+  }
+
+  if (mark_black) {
+    if (env->rows_totals[row] == env->rows_target_sum[row] ||
+        env->cols_totals[col] == env->cols_target_sum[col]) {
+      env->terminals[0] = 1;
+      env->rewards[0] = REWARD_INVALID_MOVE;
+      env->episode_reward += REWARD_INVALID_MOVE;
+      add_log(env);
+      c_reset(env);
+      return;
+    }
+
+    int row_run = get_row_run_length(env, row, col);
+
+    if (row_run > env->rows_max_clue[row]) {
+      env->terminals[0] = 1;
+      env->rewards[0] = REWARD_INVALID_MOVE;
+      env->episode_reward += REWARD_INVALID_MOVE;
+      add_log(env);
+      c_reset(env);
+      return;
+    }
+
+    int col_run = get_col_run_length(env, row, col);
+
+    if (col_run > env->cols_max_clue[col]) {
+      env->terminals[0] = 1;
+      env->rewards[0] = REWARD_INVALID_MOVE;
+      env->episode_reward += REWARD_INVALID_MOVE;
+      add_log(env);
+      c_reset(env);
+      return;
+    }
+
+    int row_completed = 0;
+    int col_completed = 0;
+
+    if (env->rows_totals[row] == env->rows_target_sum[row] - 1) {
+      env->observations[pos] = CELL_BLACK;
+      int row_start = row * MAX_SIZE;
+      int matches = check_line_matches(env->observations + row_start,
+                                       env->rows_clues + row * MAX_CLUES,
+                                       env->rows_num_runs[row], env->size);
+      if (!matches) {
+        env->observations[pos] = CELL_EMPTY;
+        env->terminals[0] = 1;
+        env->rewards[0] = REWARD_NO_MATCH;
+        env->episode_reward += REWARD_NO_MATCH;
+        add_log(env);
+        c_reset(env);
+        return;
+      }
+      env->observations[pos] = CELL_EMPTY;
+      row_completed = 1;
+    }
+
+    if (env->cols_totals[col] == env->cols_target_sum[col] - 1) {
+      env->observations[pos] = CELL_BLACK;
+      unsigned char col_data[MAX_SIZE];
+      for (int i = 0; i < env->size; i++) {
+        col_data[i] = env->observations[i * MAX_SIZE + col];
+      }
+      int matches =
+          check_line_matches(col_data, env->cols_clues + col * MAX_CLUES,
+                             env->cols_num_runs[col], env->size);
+      if (!matches) {
+        env->observations[pos] = CELL_EMPTY;
+        env->terminals[0] = 1;
+        env->rewards[0] = REWARD_NO_MATCH;
+        env->episode_reward += REWARD_NO_MATCH;
+        add_log(env);
+        c_reset(env);
+        return;
+      }
+      env->observations[pos] = CELL_EMPTY;
+      col_completed = 1;
+    }
+
+    env->observations[pos] = CELL_BLACK;
+    env->rows_totals[row]++;
+    env->cols_totals[col]++;
+    env->filled_total++;
+
+    int row_newly_completed = row_completed && !env->rows_completed[row];
+    int col_newly_completed = col_completed && !env->cols_completed[col];
+
+    if (row_newly_completed)
+      env->rows_completed[row] = 1;
+    if (col_newly_completed)
+      env->cols_completed[col] = 1;
+
+    float line_reward =
+        (row_newly_completed + col_newly_completed) * REWARD_COMPLETE_LINE;
+    env->rewards[0] += line_reward;
+    env->episode_reward += line_reward;
+  } else {
+    env->observations[pos] = CELL_WHITE;
+  }
+
+  if (env->easy_learn) {
+    unsigned char solution_cell = env->solution[pos];
+    unsigned char actual = env->observations[pos];
+
+    if (solution_cell == actual) {
+      env->rewards[0] += REWARD_EASY_LEARN_CORRECT;
+      env->episode_reward += REWARD_EASY_LEARN_CORRECT;
+    } else {
+      env->rewards[0] += REWARD_EASY_LEARN_INCORRECT;
+      env->episode_reward += REWARD_EASY_LEARN_INCORRECT;
+      env->terminals[0] = 1;
+      add_log(env);
+      c_reset(env);
+      return;
+    }
+  }
+
+  if (env->filled_total == env->target_total) {
+    env->terminals[0] = 1;
+    env->rewards[0] = REWARD_WIN;
+    env->episode_reward += REWARD_WIN;
+    add_log(env);
+    c_reset(env);
+    return;
+  }
+}
+
+void c_render(Nonogram *env) {
+  if (!IsWindowReady()) {
+    int board_width = 120 + MAX_SIZE * 40;
+    int board_height = 120 + MAX_SIZE * 40;
+    int screen_width = board_width * 2 + 60 + 40;
+    int screen_height = board_height + 140;
+    InitWindow(screen_width, screen_height, "Nonogram (C)");
+    SetTargetFPS(60);
+  }
+
+  if (IsKeyDown(KEY_ESCAPE)) {
+    exit(0);
+  }
+
+  BeginDrawing();
+  ClearBackground((Color){0, 0, 0, 255});
+
+  int cell_size = 40;
+  int clue_area = 120;
+  int board_spacing = 60;
+  int font_size = 20;
+
+  // Draw titles
+  DrawText("CURRENT BOARD", 20, 20, 24, RAYWHITE);
+  int solution_x = clue_area + env->size * cell_size + board_spacing + 20;
+  DrawText("SOLUTION", solution_x, 20, 24, RAYWHITE);
+
+  // Draw current board
+  int offset_x = 20;
+  int offset_y = 60;
+
+  // Draw column clues for current board
+  for (int clue_row = 0; clue_row < MAX_CLUES; clue_row++) {
+    for (int c = 0; c < env->size; c++) {
+      int clue = env->cols_clues[c * MAX_CLUES + clue_row];
+      if (clue > 0) {
+        char text[4];
+        snprintf(text, sizeof(text), "%d", clue);
+        int x = offset_x + clue_area + c * cell_size + cell_size / 2;
+        int y = offset_y + clue_row * 20 + 10;
+        int text_width = MeasureText(text, font_size);
+        DrawText(text, x - text_width / 2, y, font_size, RAYWHITE);
+      }
+    }
+  }
+
+  // Draw row clues for current board
+  for (int r = 0; r < env->size; r++) {
+    int clue_x = offset_x + 10;
+    for (int clue_idx = 0; clue_idx < MAX_CLUES; clue_idx++) {
+      int clue = env->rows_clues[r * MAX_CLUES + clue_idx];
+      if (clue > 0) {
+        char text[4];
+        snprintf(text, sizeof(text), "%d", clue);
+        int y = offset_y + clue_area + r * cell_size + cell_size / 2 -
+                font_size / 2;
+        DrawText(text, clue_x, y, font_size, RAYWHITE);
+        clue_x += MeasureText(text, font_size) + 5;
+      }
+    }
+  }
+
+  // Draw current grid
+  for (int r = 0; r < env->size; r++) {
+    for (int c = 0; c < env->size; c++) {
+      int x = offset_x + clue_area + c * cell_size;
+      int y = offset_y + clue_area + r * cell_size;
+      int pos = r * MAX_SIZE + c;
+
+      if (env->observations[pos] == CELL_BLACK) {
+        DrawRectangle(x, y, cell_size, cell_size,
+                      (Color){50, 50, 50, 255}); // Dark gray for BLACK
+      } else if (env->observations[pos] == CELL_WHITE) {
+        DrawRectangle(x, y, cell_size, cell_size,
+                      (Color){240, 240, 240, 255}); // Light gray for WHITE
+      } else {
+        DrawRectangle(x, y, cell_size, cell_size,
+                      (Color){120, 120, 120, 255}); // Medium gray for EMPTY
+      }
+      DrawRectangleLines(x, y, cell_size, cell_size, LIGHTGRAY);
+    }
+  }
+
+  // Draw solution board
+  offset_x = solution_x;
+
+  // Draw column clues for solution
+  for (int clue_row = 0; clue_row < MAX_CLUES; clue_row++) {
+    for (int c = 0; c < env->size; c++) {
+      int clue = env->cols_clues[c * MAX_CLUES + clue_row];
+      if (clue > 0) {
+        char text[4];
+        snprintf(text, sizeof(text), "%d", clue);
+        int x = offset_x + clue_area + c * cell_size + cell_size / 2;
+        int y = offset_y + clue_row * 20 + 10;
+        int text_width = MeasureText(text, font_size);
+        DrawText(text, x - text_width / 2, y, font_size, RAYWHITE);
+      }
+    }
+  }
+
+  // Draw row clues for solution
+  for (int r = 0; r < env->size; r++) {
+    int clue_x = offset_x + 10;
+    for (int clue_idx = 0; clue_idx < MAX_CLUES; clue_idx++) {
+      int clue = env->rows_clues[r * MAX_CLUES + clue_idx];
+      if (clue > 0) {
+        char text[4];
+        snprintf(text, sizeof(text), "%d", clue);
+        int y = offset_y + clue_area + r * cell_size + cell_size / 2 -
+                font_size / 2;
+        DrawText(text, clue_x, y, font_size, RAYWHITE);
+        clue_x += MeasureText(text, font_size) + 5;
+      }
+    }
+  }
+
+  // Draw solution grid
+  for (int r = 0; r < env->size; r++) {
+    for (int c = 0; c < env->size; c++) {
+      int x = offset_x + clue_area + c * cell_size;
+      int y = offset_y + clue_area + r * cell_size;
+      int pos = r * MAX_SIZE + c;
+
+      if (env->solution[pos] == CELL_BLACK) {
+        DrawRectangle(x, y, cell_size, cell_size, GREEN);
+      } else {
+        DrawRectangle(x, y, cell_size, cell_size, (Color){200, 200, 200, 255});
+      }
+      DrawRectangleLines(x, y, cell_size, cell_size, LIGHTGRAY);
+    }
+  }
+
+  // Draw status
+  int board_height = clue_area + env->size * cell_size;
+  int status_y = board_height + 80;
+  char status[128];
+  snprintf(status, sizeof(status), "Steps: %d/%d | Filled: %d/%d | Size: %dx%d",
+           env->steps_taken, env->max_steps, env->filled_total,
+           env->target_total, env->size, env->size);
+  DrawText(status, 20, status_y, 20, RAYWHITE);
+
+  // Draw reward info
+  char reward_info[128];
+  snprintf(reward_info, sizeof(reward_info),
+           "Last Reward: %.3f | Episode Return: %.3f", env->rewards[0],
+           env->episode_reward);
+  DrawText(reward_info, 20, status_y + 25, 20, RAYWHITE);
+
+  // Draw instructions
+  DrawText("Click cells to toggle | Press R to reset | ESC to quit", 20,
+           status_y + 60, 16, LIGHTGRAY);
+
+  EndDrawing();
+}
+
+void c_close(Nonogram *env) {
+  if (IsWindowReady()) {
+    CloseWindow();
+  }
+}
diff --git a/pufferlib/ocean/nonogram/nonogram.py b/pufferlib/ocean/nonogram/nonogram.py
new file mode 100644
index 000000000..c58aa1180
--- /dev/null
+++ b/pufferlib/ocean/nonogram/nonogram.py
@@ -0,0 +1,85 @@
+'''Nonogram logic puzzle environment'''
+
+import gymnasium
+import numpy as np
+
+import pufferlib
+from pufferlib.ocean.nonogram import binding
+
+MAX_SIZE = 8
+MIN_SIZE = 4
+MAX_CLUES = MAX_SIZE // 2
+OBS_SIZE = MAX_SIZE * MAX_SIZE + 2 * MAX_SIZE * MAX_CLUES + 1  # +1 for board size
+
+class Nonogram(pufferlib.PufferEnv):
+    def __init__(self, num_envs=1, render_mode=None, log_interval=128,
+                 min_size=4, max_size=8, easy_learn=0, buf=None, seed=0):
+        # Observation space: grid cells (0-3: EMPTY/WHITE/BLACK/PADDING), clues (0-max_size), size encoding (0-1)
+        # Using max_size as high covers all values
+        self.single_observation_space = gymnasium.spaces.Box(low=0, high=max_size,
+            shape=(OBS_SIZE,), dtype=np.uint8)
+        # Action space: 0-63 = mark WHITE, 64-127 = mark BLACK
+        self.single_action_space = gymnasium.spaces.Discrete(MAX_SIZE * MAX_SIZE * 2)
+        self.render_mode = render_mode
+        self.num_agents = num_envs
+        self.log_interval = log_interval
+
+        super().__init__(buf)
+        self.c_envs = binding.vec_init(self.observations, self.actions, self.rewards,
+            self.terminals, self.truncations, num_envs, seed,
+            min_size=min_size, max_size=max_size, easy_learn=easy_learn)
+
+        self.solutions = np.zeros((num_envs, max_size * max_size), dtype=np.uint8)
+
+    def reset(self, seed=0):
+        binding.vec_reset(self.c_envs, seed)
+        self.tick = 0
+        return self.observations, []
+
+    def step(self, actions):
+        self.tick += 1
+
+        self.actions[:] = actions
+        binding.vec_step(self.c_envs)
+
+        info = []
+        if self.tick % self.log_interval == 0:
+            info.append(binding.vec_log(self.c_envs))
+
+        return (self.observations, self.rewards,
+            self.terminals, self.truncations, info)
+
+    def render(self):
+        binding.vec_render(self.c_envs, 0)
+
+    def close(self):
+        binding.vec_close(self.c_envs)
+
+    def get_solutions(self):
+        """Get the solution grids for all environments"""
+        binding.vec_get_solutions(self.c_envs, self.solutions)
+        return self.solutions
+
+    def get_size(self):
+        """Get current board size"""
+        return binding.vec_get_size(self.c_envs)
+
+if __name__ == '__main__':
+    N = 4096
+
+    env = Nonogram(num_envs=N, min_size=2, max_size=8)
+    env.reset()
+    steps = 0
+
+    CACHE = 1024
+    actions = np.random.randint(0, 64, (CACHE, N))
+
+    i = 0
+    import time
+    start = time.time()
+    while time.time() - start < 10:
+        env.step(actions[i % CACHE])
+        steps += N
+        i += 1
+
+    print('Nonogram SPS:', int(steps / (time.time() - start)))
diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py
index c414acde2..726e9c6b9 100644
--- a/pufferlib/ocean/torch.py
+++ b/pufferlib/ocean/torch.py
@@ -942,6 +942,85 @@ def decode_actions(self, hidden):
         value = self.value_fn(hidden)  # (B, 1)
         return action, value
 
+class NonogramLSTM(pufferlib.models.LSTMWrapper):
+    def __init__(self, env, policy, input_size=256, hidden_size=256):
+        super().__init__(env, policy, input_size, hidden_size)
+
+
+class Nonogram(nn.Module):
+    def __init__(self, env, cnn_channels=32, input_size=128, hidden_size=128, **kwargs):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.is_continuous = False
+
+        self.conv_grid = nn.Sequential(
+            pufferlib.pytorch.layer_init(nn.Conv2d(4, cnn_channels, kernel_size=3, stride=1, padding=1)),
+            nn.ReLU(),
+            pufferlib.pytorch.layer_init(nn.Conv2d(cnn_channels, cnn_channels, kernel_size=3, stride=2, padding=1)),
+            nn.ReLU(),
+            pufferlib.pytorch.layer_init(nn.Conv2d(cnn_channels, cnn_channels, kernel_size=3, stride=2, padding=1)),
+            nn.ReLU(),
+            nn.Flatten(),
+            pufferlib.pytorch.layer_init(nn.Linear(cnn_channels * 2 * 2, input_size)),
+        )
+
+        self.fc_row_clues = nn.Sequential(
+            pufferlib.pytorch.layer_init(nn.Linear(8 * 4 * 9, input_size // 2)),
+            nn.ReLU(),
+        )
+
+        self.fc_col_clues = nn.Sequential(
+            pufferlib.pytorch.layer_init(nn.Linear(8 * 4 * 9, input_size // 2)),
+            nn.ReLU(),
+        )
+
+        self.fc_size = nn.Sequential(
+            pufferlib.pytorch.layer_init(nn.Linear(9, input_size // 4)),
+            nn.ReLU(),
+        )
+
+        self.proj = nn.Sequential(
+            pufferlib.pytorch.layer_init(nn.Linear(input_size + input_size // 2 + input_size // 2 + input_size // 4, hidden_size)),
+            nn.ReLU(),
+        )
+
+        self.actor = pufferlib.pytorch.layer_init(
+            nn.Linear(hidden_size, env.single_action_space.n), std=0.01)
+        self.value_fn = pufferlib.pytorch.layer_init(
+            nn.Linear(hidden_size, 1), std=1)
+
+    def forward(self, observations, state=None):
+        hidden = self.encode_observations(observations)
+        actions, value = self.decode_actions(hidden)
+        return actions, value
+
+    def forward_train(self, x, state=None):
+        return self.forward(x, state)
+
+    def encode_observations(self, observations, state=None):
+        B = observations.shape[0]
+
+        grid = F.one_hot(observations[:, :64].view(B, 8, 8).long(), 4).permute(0, 3, 1, 2).float()
+        row_clues = F.one_hot(observations[:, 64:96].view(B, 8, 4).long(), 9).float()
+        col_clues = F.one_hot(observations[:, 96:128].view(B, 8, 4).long(), 9).float()
+        board_size = F.one_hot(observations[:, 128].long(), 9).float()
+
+        grid_feat = self.conv_grid(grid)
+        row_feat = self.fc_row_clues(row_clues.reshape(B, -1))
+        col_feat = self.fc_col_clues(col_clues.reshape(B, -1))
+        size_feat = self.fc_size(board_size)
+
+        combined = torch.cat([grid_feat, row_feat, col_feat, size_feat], dim=-1)
+        features = self.proj(combined)
+
+        return features
+
+    def decode_actions(self, flat_hidden):
+        action = self.actor(flat_hidden)
+        value = self.value_fn(flat_hidden)
+        return action, value
+
+
 class Drone(nn.Module):
     ''' Drone policy. Flattens obs and applies a linear layer.
     '''