From 7fcfdf2e566454f5ecae28a496164ac49df4b3ce Mon Sep 17 00:00:00 2001
From: burtenshaw <ben.burtenshaw@gmail.com>
Date: Tue, 17 Feb 2026 21:51:38 +0100
Subject: [PATCH 1/4] add skills files

---
 .gitignore                                    |  1 +
 AGENTS.md                                     | 10 ++++++
 templates/setup/AGENTS.md                     | 10 ++++++
 .../popcorn-submission-workflow/SKILL.md      | 32 +++++++++++++++++++
 templates/setup/submission.py                 | 20 ++++++++++++
 5 files changed, 73 insertions(+)
 create mode 100644 AGENTS.md
 create mode 100644 templates/setup/AGENTS.md
 create mode 100644 templates/setup/skills/popcorn-submission-workflow/SKILL.md
 create mode 100644 templates/setup/submission.py

diff --git a/.gitignore b/.gitignore
index 300478e..baaef0e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 submission.*
+!templates/setup/submission.py
 target/
 scratch.md
 *claude
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..1ab8e15
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,10 @@
+## Skills
+A skill is a local instruction bundle stored in `SKILL.md`.
+
+### Available skills
+- popcorn-submission-workflow: Helps with Popcorn CLI registration, submission setup, submission modes, and file directives. (file: /Users/ben/code/popcorn-cli/.popcorn/skills/popcorn-submission-workflow/SKILL.md)
+
+### How to use skills
+- Load the skill by reading its `SKILL.md` file when user requests match the description.
+- Follow progressive disclosure: read only relevant referenced files/scripts as needed.
+- Keep the workspace setup aligned with `popcorn setup`.
diff --git a/templates/setup/AGENTS.md b/templates/setup/AGENTS.md
new file mode 100644
index 0000000..a0f8e5c
--- /dev/null
+++ b/templates/setup/AGENTS.md
@@ -0,0 +1,10 @@
+## Skills
+A skill is a local instruction bundle stored in `SKILL.md`.
+
+### Available skills
+- {{SKILL_NAME}}: Helps with Popcorn CLI registration, submission setup, submission modes, and file directives. (file: {{SKILL_PATH}})
+
+### How to use skills
+- Load the skill by reading its `SKILL.md` file when user requests match the description.
+- Follow progressive disclosure: read only relevant referenced files/scripts as needed.
+- Keep the workspace setup aligned with `popcorn setup`.
diff --git a/templates/setup/skills/popcorn-submission-workflow/SKILL.md b/templates/setup/skills/popcorn-submission-workflow/SKILL.md
new file mode 100644
index 0000000..2b794df
--- /dev/null
+++ b/templates/setup/skills/popcorn-submission-workflow/SKILL.md
@@ -0,0 +1,32 @@
+---
+name: {{SKILL_NAME}}
+description: Helps prepare and submit popcorn-cli GPU Mode solutions. Use when users ask to set up a project, create a submission template, or run/register submissions.
+compatibility: Intended for popcorn-cli repositories with README.md and shell access.
+---
+
+# Popcorn Submission Workflow
+
+Use this skill when the user is working on Popcorn CLI submissions and needs a reliable flow from setup to submit.
+
+## Recommended workflow
+1. Ensure the project has a `submission.py` file with POPCORN directives.
+2. Register once with `popcorn register discord` (or `github`) if `.popcorn.yaml` is missing.
+3. Use `popcorn submit submission.py` for interactive mode, or `popcorn submit --no-tui ...` for scripts/CI.
+4. Use `popcorn submissions list/show/delete` to inspect previous runs.
+
+## Reference: Authentication (from README)
+
+{{AUTHENTICATION_SECTION}}
+
+## Reference: Commands (from README)
+
+{{COMMANDS_SECTION}}
+
+## Reference: Submission Format (from README)
+
+{{SUBMISSION_FORMAT_SECTION}}
+
+## Guardrails
+- Keep submissions as a single Python file.
+- Prefer POPCORN directives (`#!POPCORN leaderboard ...`, `#!POPCORN gpu ...`) so defaults are embedded.
+- Use `test` or `benchmark` mode before `leaderboard` submissions when iterating.
diff --git a/templates/setup/submission.py b/templates/setup/submission.py
new file mode 100644
index 0000000..2112cf0
--- /dev/null
+++ b/templates/setup/submission.py
@@ -0,0 +1,20 @@
+#!POPCORN leaderboard grayscale
+#!POPCORN gpu A100
+
+"""
+Popcorn submission template generated by `popcorn setup`.
+
+README-aligned notes:
+- Submissions are a single Python file.
+- You can install extra dependencies at runtime with `pip` if needed.
+- Submit with: `popcorn submit submission.py`
+"""
+
+
+def solution():
+    # Replace with your kernel implementation.
+    return "hello from popcorn"
+
+
+if __name__ == "__main__":
+    print(solution())

From 46e89d24568409b6f59c37315fc0c191bc1f5b01 Mon Sep 17 00:00:00 2001
From: burtenshaw <ben.burtenshaw@gmail.com>
Date: Tue, 17 Feb 2026 21:52:27 +0100
Subject: [PATCH 2/4] implement a setup command to add skills and symlink

---
 src/cmd/mod.rs   |   8 ++
 src/cmd/setup.rs | 278 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 286 insertions(+)
 create mode 100644 src/cmd/setup.rs

diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs
index 148e74f..b092769 100644
--- a/src/cmd/mod.rs
+++ b/src/cmd/mod.rs
@@ -6,6 +6,7 @@ use std::path::PathBuf;
 
 mod admin;
 mod auth;
+mod setup;
 mod submissions;
 mod submit;
 
@@ -103,6 +104,12 @@ enum SubmissionsAction {
 
 #[derive(Subcommand, Debug)]
 enum Commands {
+    /// Bootstrap this project with Popcorn agent skills and a submission template
+    Setup {
+        /// Overwrite files if they already exist
+        #[arg(long)]
+        force: bool,
+    },
     Reregister {
         #[command(subcommand)]
         provider: AuthProvider,
@@ -149,6 +156,7 @@ enum Commands {
 
 pub async fn execute(cli: Cli) -> Result<()> {
     match cli.command {
+        Some(Commands::Setup { force }) => setup::run_setup(force),
         Some(Commands::Reregister { provider }) => {
             let provider_str = match provider {
                 AuthProvider::Discord => "discord",
diff --git a/src/cmd/setup.rs b/src/cmd/setup.rs
new file mode 100644
index 0000000..c9348d6
--- /dev/null
+++ b/src/cmd/setup.rs
@@ -0,0 +1,278 @@
+use anyhow::{Context, Result};
+use serde_json::json;
+use std::env;
+use std::fs;
+use std::path::{Path, PathBuf};
+
+const SKILL_NAME: &str = "popcorn-submission-workflow";
+const SUBMISSION_FILENAME: &str = "submission.py";
+const SKILL_TEMPLATE: &str =
+    include_str!("../../templates/setup/skills/popcorn-submission-workflow/SKILL.md");
+const AGENTS_TEMPLATE: &str = include_str!("../../templates/setup/AGENTS.md");
+const SUBMISSION_TEMPLATE: &str = include_str!("../../templates/setup/submission.py");
+
+#[derive(Clone, Copy)]
+enum ActionStatus {
+    Created,
+    Updated,
+    Skipped,
+}
+
+impl ActionStatus {
+    fn label(self) -> &'static str {
+        match self {
+            Self::Created => "created",
+            Self::Updated => "updated",
+            Self::Skipped => "skipped",
+        }
+    }
+}
+
+pub fn run_setup(force: bool) -> Result<()> {
+    let cwd = env::current_dir().context("Failed to determine current directory")?;
+    let popcorn_dir = cwd.join(".popcorn");
+    let skill_dir = popcorn_dir.join("skills").join(SKILL_NAME);
+    let skill_path = skill_dir.join("SKILL.md");
+    let manifest_path = popcorn_dir.join("setup.json");
+    let submission_path = cwd.join(SUBMISSION_FILENAME);
+    let agents_path = cwd.join("AGENTS.md");
+
+    fs::create_dir_all(&skill_dir).with_context(|| {
+        format!(
+            "Failed to create skill directory at {}",
+            skill_dir.to_string_lossy()
+        )
+    })?;
+
+    let readme_path = cwd.join("README.md");
+    let readme_content = fs::read_to_string(&readme_path).unwrap_or_default();
+    let skill_markdown = build_skill_markdown(&readme_content);
+    let skill_status = write_text_file(&skill_path, &skill_markdown, force)?;
+
+    let manifest = json!({
+        "schema_version": 1,
+        "setup_source": "popcorn setup",
+        "skills": [{
+            "name": SKILL_NAME,
+            "path": format!(".popcorn/skills/{SKILL_NAME}")
+        }],
+        "agents": ["codex", "claude"]
+    });
+    let manifest_text = serde_json::to_string_pretty(&manifest)?;
+    let manifest_status = write_text_file(&manifest_path, &manifest_text, force)?;
+
+    let agents_md = build_agents_markdown(&skill_path);
+    let agents_status = write_text_file(&agents_path, &agents_md, force)?;
+
+    let codex_link_status = create_agent_skill_view(&cwd, "codex", &skill_dir, force)?;
+    let claude_link_status = create_agent_skill_view(&cwd, "claude", &skill_dir, force)?;
+
+    let submission_status = write_text_file(
+        &submission_path,
+        &build_submission_template(),
+        force,
+    )?;
+
+    println!(
+        "{} {}",
+        skill_status.label(),
+        relative_display(&cwd, &skill_path)
+    );
+    println!(
+        "{} {}",
+        manifest_status.label(),
+        relative_display(&cwd, &manifest_path)
+    );
+    println!(
+        "{} {}",
+        agents_status.label(),
+        relative_display(&cwd, &agents_path)
+    );
+    println!(
+        "{} {}",
+        codex_link_status.label(),
+        relative_display(
+            &cwd,
+            &cwd.join(".codex").join("skills").join(SKILL_NAME)
+        )
+    );
+    println!(
+        "{} {}",
+        claude_link_status.label(),
+        relative_display(
+            &cwd,
+            &cwd.join(".claude").join("skills").join(SKILL_NAME)
+        )
+    );
+    println!(
+        "{} {}",
+        submission_status.label(),
+        relative_display(&cwd, &submission_path)
+    );
+
+    Ok(())
+}
+
+fn relative_display(cwd: &Path, target: &Path) -> String {
+    match target.strip_prefix(cwd) {
+        Ok(relative) => relative.to_string_lossy().to_string(),
+        Err(_) => target.to_string_lossy().to_string(),
+    }
+}
+
+fn write_text_file(path: &Path, content: &str, force: bool) -> Result<ActionStatus> {
+    let existed_before = path_exists(path);
+    if existed_before && !force {
+        return Ok(ActionStatus::Skipped);
+    }
+
+    if existed_before {
+        remove_existing_path(path)?;
+    }
+
+    if let Some(parent) = path.parent() {
+        fs::create_dir_all(parent)?;
+    }
+
+    fs::write(path, content)?;
+    if existed_before {
+        Ok(ActionStatus::Updated)
+    } else {
+        Ok(ActionStatus::Created)
+    }
+}
+
+fn create_agent_skill_view(
+    cwd: &Path,
+    agent_name: &str,
+    skill_source_dir: &Path,
+    force: bool,
+) -> Result<ActionStatus> {
+    let agent_skills_dir = cwd.join(format!(".{}", agent_name)).join("skills");
+    fs::create_dir_all(&agent_skills_dir)?;
+
+    let link_path = agent_skills_dir.join(SKILL_NAME);
+    let existed_before = path_exists(&link_path);
+    if existed_before && !force {
+        return Ok(ActionStatus::Skipped);
+    }
+
+    if existed_before {
+        remove_existing_path(&link_path)?;
+    }
+
+    let relative_target = PathBuf::from("../../.popcorn/skills").join(SKILL_NAME);
+    let symlink_result = create_symlink_dir(&relative_target, &link_path);
+    if symlink_result.is_err() {
+        copy_dir_all(skill_source_dir, &link_path)?;
+    }
+
+    if existed_before {
+        Ok(ActionStatus::Updated)
+    } else {
+        Ok(ActionStatus::Created)
+    }
+}
+
+fn path_exists(path: &Path) -> bool {
+    fs::symlink_metadata(path).is_ok()
+}
+
+fn remove_existing_path(path: &Path) -> Result<()> {
+    let metadata = fs::symlink_metadata(path)?;
+    let file_type = metadata.file_type();
+    if file_type.is_symlink() || file_type.is_file() {
+        fs::remove_file(path)?;
+    } else if file_type.is_dir() {
+        fs::remove_dir_all(path)?;
+    }
+    Ok(())
+}
+
+fn copy_dir_all(src: &Path, dst: &Path) -> Result<()> {
+    fs::create_dir_all(dst)?;
+    for entry in fs::read_dir(src)? {
+        let entry = entry?;
+        let file_type = entry.file_type()?;
+        let from = entry.path();
+        let to = dst.join(entry.file_name());
+        if file_type.is_dir() {
+            copy_dir_all(&from, &to)?;
+        } else {
+            fs::copy(from, to)?;
+        }
+    }
+    Ok(())
+}
+
+#[cfg(unix)]
+fn create_symlink_dir(target: &Path, link_path: &Path) -> std::io::Result<()> {
+    std::os::unix::fs::symlink(target, link_path)
+}
+
+#[cfg(windows)]
+fn create_symlink_dir(target: &Path, link_path: &Path) -> std::io::Result<()> {
+    std::os::windows::fs::symlink_dir(target, link_path)
+}
+
+fn extract_top_level_section(content: &str, heading: &str) -> Option<String> {
+    let lines: Vec<&str> = content.lines().collect();
+    let start = lines
+        .iter()
+        .position(|line| line.trim() == heading)
+        .map(|idx| idx + 1)?;
+
+    let mut end = lines.len();
+    for (idx, line) in lines.iter().enumerate().skip(start) {
+        if line.trim_start().starts_with("## ") {
+            end = idx;
+            break;
+        }
+    }
+
+    let section = lines[start..end].join("\n").trim().to_string();
+    if section.is_empty() {
+        None
+    } else {
+        Some(section)
+    }
+}
+
+fn build_skill_markdown(readme_content: &str) -> String {
+    let authentication = extract_top_level_section(readme_content, "## Authentication")
+        .unwrap_or_else(|| "See project README for authentication details.".to_string());
+    let commands = extract_top_level_section(readme_content, "## Commands")
+        .unwrap_or_else(|| "See project README for command usage.".to_string());
+    let submission_format = extract_top_level_section(readme_content, "## Submission Format")
+        .unwrap_or_else(|| "Submissions are expected as a single Python file.".to_string());
+
+    render_template(
+        SKILL_TEMPLATE,
+        &[
+            ("{{SKILL_NAME}}", SKILL_NAME),
+            ("{{AUTHENTICATION_SECTION}}", &authentication),
+            ("{{COMMANDS_SECTION}}", &commands),
+            ("{{SUBMISSION_FORMAT_SECTION}}", &submission_format),
+        ],
+    )
+}
+
+fn build_agents_markdown(skill_path: &Path) -> String {
+    let skill_path_text = skill_path.to_string_lossy().to_string();
+    render_template(
+        AGENTS_TEMPLATE,
+        &[("{{SKILL_NAME}}", SKILL_NAME), ("{{SKILL_PATH}}", &skill_path_text)],
+    )
+}
+
+fn build_submission_template() -> String {
+    SUBMISSION_TEMPLATE.to_string()
+}
+
+fn render_template(template: &str, replacements: &[(&str, &str)]) -> String {
+    let mut output = template.to_string();
+    for (needle, value) in replacements {
+        output = output.replace(needle, value);
+    }
+    output
+}

From 456d19b8788fc7679ba7aa03aa69f809a02d1ded Mon Sep 17 00:00:00 2001
From: burtenshaw <ben.burtenshaw@gmail.com>
Date: Tue, 17 Feb 2026 21:52:40 +0100
Subject: [PATCH 3/4] update readme with new command

---
 README.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/README.md b/README.md
index 888b958..2ea85a6 100644
--- a/README.md
+++ b/README.md
@@ -70,6 +70,17 @@ We regularly run competitions with clear due dates but for beginners we will alw
 
 ## Commands
 
+### Setup
+
+Bootstrap a project with Popcorn skill scaffolding and a submission template. You can overwrite existing files with `--force`.
+
+```bash
+# Create project skill scaffolding + submission.py
+popcorn setup
+```
+
+This will create a new agent skill based on the [templates](templates/setup) and add it to your `.claude/skills` or `.codex/skills` directory.
+
 ### Submit
 
 Submit a solution to a leaderboard. Supports both TUI (interactive) and plain modes.

From b60801fe78dfd4f4a0e3db9776413a2a5e435df4 Mon Sep 17 00:00:00 2001
From: Mark Saroufim <marksaroufim@meta.com>
Date: Tue, 17 Feb 2026 14:38:11 -0800
Subject: [PATCH 4/4] Make `popcorn setup` pull templates from
 reference-kernels

Instead of writing a hardcoded submission template, setup now fetches
the competition index YAMLs from gpu-mode/reference-kernels, lets the
user interactively pick a competition, problem, and GPU, then downloads
the real submission.py with the correct #!POPCORN directives injected.

Setup always overwrites its own files on re-run (removed --force flag).

Also adds a load-inline-native-code skill with CUDA and HIP templates
for writing kernels via torch.utils.cpp_extension.load_inline().
---
 .gitignore                                    |   4 +-
 src/cmd/mod.rs                                |   8 +-
 src/cmd/setup.rs                              | 267 ++++++++++++++++--
 templates/setup/AGENTS.md                     |   1 +
 .../skills/load-inline-native-code/SKILL.md   | 143 ++++++++++
 templates/setup/submission.py                 |  20 --
 6 files changed, 388 insertions(+), 55 deletions(-)
 create mode 100644 templates/setup/skills/load-inline-native-code/SKILL.md
 delete mode 100644 templates/setup/submission.py

diff --git a/.gitignore b/.gitignore
index baaef0e..47e7497 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
 submission.*
-!templates/setup/submission.py
 target/
 scratch.md
 *claude
 *.zip
+.codex/
+.popcorn/
+.DS_Store
diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs
index b092769..35c3e2b 100644
--- a/src/cmd/mod.rs
+++ b/src/cmd/mod.rs
@@ -105,11 +105,7 @@ enum SubmissionsAction {
 #[derive(Subcommand, Debug)]
 enum Commands {
     /// Bootstrap this project with Popcorn agent skills and a submission template
-    Setup {
-        /// Overwrite files if they already exist
-        #[arg(long)]
-        force: bool,
-    },
+    Setup,
     Reregister {
         #[command(subcommand)]
         provider: AuthProvider,
@@ -156,7 +152,7 @@ enum Commands {
 
 pub async fn execute(cli: Cli) -> Result<()> {
     match cli.command {
-        Some(Commands::Setup { force }) => setup::run_setup(force),
+        Some(Commands::Setup) => setup::run_setup().await,
         Some(Commands::Reregister { provider }) => {
             let provider_str = match provider {
                 AuthProvider::Discord => "discord",
diff --git a/src/cmd/setup.rs b/src/cmd/setup.rs
index c9348d6..2368a78 100644
--- a/src/cmd/setup.rs
+++ b/src/cmd/setup.rs
@@ -1,15 +1,43 @@
-use anyhow::{Context, Result};
+use anyhow::{anyhow, Context, Result};
+use serde::Deserialize;
 use serde_json::json;
 use std::env;
 use std::fs;
+use std::io::{self, Write};
 use std::path::{Path, PathBuf};
 
 const SKILL_NAME: &str = "popcorn-submission-workflow";
+const NATIVE_SKILL_NAME: &str = "load-inline-native-code";
 const SUBMISSION_FILENAME: &str = "submission.py";
 const SKILL_TEMPLATE: &str =
     include_str!("../../templates/setup/skills/popcorn-submission-workflow/SKILL.md");
+const NATIVE_SKILL_TEMPLATE: &str =
+    include_str!("../../templates/setup/skills/load-inline-native-code/SKILL.md");
 const AGENTS_TEMPLATE: &str = include_str!("../../templates/setup/AGENTS.md");
-const SUBMISSION_TEMPLATE: &str = include_str!("../../templates/setup/submission.py");
+
+const COMPETITION_YAMLS: &[&str] = &[
+    "pmpp_v2.yaml",
+    "nvidia.yaml",
+    "amd.yaml",
+    "amd_distributed.yaml",
+    "bioml.yaml",
+];
+
+const RAW_GITHUB_BASE: &str =
+    "https://raw.githubusercontent.com/gpu-mode/reference-kernels/main/problems";
+
+#[derive(Deserialize)]
+struct CompetitionIndex {
+    name: String,
+    problems: Vec<ProblemEntry>,
+}
+
+#[derive(Deserialize)]
+struct ProblemEntry {
+    directory: String,
+    name: String,
+    gpus: Vec<String>,
+}
 
 #[derive(Clone, Copy)]
 enum ActionStatus {
@@ -28,11 +56,161 @@ impl ActionStatus {
     }
 }
 
-pub fn run_setup(force: bool) -> Result<()> {
+async fn fetch_competition_index(client: &reqwest::Client) -> Result<Vec<(String, ProblemEntry)>> {
+    let mut entries = Vec::new();
+    for filename in COMPETITION_YAMLS {
+        let url = format!("{}/{}", RAW_GITHUB_BASE, filename);
+        let resp = client
+            .get(&url)
+            .send()
+            .await
+            .with_context(|| format!("Failed to fetch {}", url))?;
+        if !resp.status().is_success() {
+            eprintln!(
+                "Warning: could not fetch {} (status {})",
+                filename,
+                resp.status()
+            );
+            continue;
+        }
+        let text = resp.text().await?;
+        let index: CompetitionIndex =
+            serde_yaml::from_str(&text).with_context(|| format!("Failed to parse {}", filename))?;
+        let comp_name = index.name.clone();
+        for problem in index.problems {
+            entries.push((comp_name.clone(), problem));
+        }
+    }
+    if entries.is_empty() {
+        return Err(anyhow!(
+            "No competitions found. Check your network connection."
+        ));
+    }
+    Ok(entries)
+}
+
+async fn download_submission(
+    client: &reqwest::Client,
+    directory: &str,
+    leaderboard_name: &str,
+    gpu: &str,
+) -> Result<String> {
+    let url = format!("{}/{}/submission.py", RAW_GITHUB_BASE, directory);
+    let resp = client
+        .get(&url)
+        .send()
+        .await
+        .with_context(|| format!("Failed to fetch {}", url))?;
+    if !resp.status().is_success() {
+        return Err(anyhow!(
+            "Failed to download submission.py from {} (status {})",
+            url,
+            resp.status()
+        ));
+    }
+    let body = resp.text().await?;
+
+    // Strip existing #!POPCORN directives and leading blank lines
+    let content: String = body
+        .lines()
+        .skip_while(|line| line.starts_with("#!POPCORN") || line.trim().is_empty())
+        .collect::<Vec<_>>()
+        .join("\n");
+
+    Ok(format!(
+        "#!POPCORN leaderboard {}\n#!POPCORN gpu {}\n\n{}\n",
+        leaderboard_name, gpu, content
+    ))
+}
+
+fn prompt_choice(prompt_text: &str, max: usize) -> Result<usize> {
+    loop {
+        print!("{}", prompt_text);
+        io::stdout().flush()?;
+        let mut input = String::new();
+        io::stdin().read_line(&mut input)?;
+        match input.trim().parse::<usize>() {
+            Ok(n) if n >= 1 && n <= max => return Ok(n - 1),
+            _ => println!("Please enter a number between 1 and {}", max),
+        }
+    }
+}
+
+pub async fn run_setup() -> Result<()> {
     let cwd = env::current_dir().context("Failed to determine current directory")?;
+
+    // Fetch competitions from GitHub
+    println!("Fetching competitions from gpu-mode/reference-kernels...");
+    let client = reqwest::Client::new();
+    let entries = fetch_competition_index(&client).await?;
+
+    // Build unique competition list preserving order
+    let mut comp_names: Vec<String> = Vec::new();
+    for (name, _) in &entries {
+        if !comp_names.contains(name) {
+            comp_names.push(name.clone());
+        }
+    }
+
+    // Select competition
+    println!("\nAvailable competitions:");
+    for (i, name) in comp_names.iter().enumerate() {
+        println!("  {}. {}", i + 1, name);
+    }
+    let comp_idx = prompt_choice(
+        &format!("\nSelect a competition [1-{}]: ", comp_names.len()),
+        comp_names.len(),
+    )?;
+    let chosen_comp = &comp_names[comp_idx];
+
+    // Filter problems for chosen competition
+    let problems: Vec<&ProblemEntry> = entries
+        .iter()
+        .filter(|(name, _)| name == chosen_comp)
+        .map(|(_, p)| p)
+        .collect();
+
+    // Select problem
+    println!("\nProblems in \"{}\":", chosen_comp);
+    for (i, p) in problems.iter().enumerate() {
+        println!("  {}. {}", i + 1, p.name);
+    }
+    let prob_idx = prompt_choice(
+        &format!("\nSelect a problem [1-{}]: ", problems.len()),
+        problems.len(),
+    )?;
+    let chosen_problem = problems[prob_idx];
+
+    // Select GPU
+    println!("\nAvailable GPUs for \"{}\":", chosen_problem.name);
+    for (i, gpu) in chosen_problem.gpus.iter().enumerate() {
+        println!("  {}. {}", i + 1, gpu);
+    }
+    let gpu_idx = prompt_choice(
+        &format!("\nSelect a GPU [1-{}]: ", chosen_problem.gpus.len()),
+        chosen_problem.gpus.len(),
+    )?;
+    let chosen_gpu = &chosen_problem.gpus[gpu_idx];
+
+    // Download submission template
+    println!(
+        "\nDownloading submission template for {} on {}...",
+        chosen_problem.name, chosen_gpu
+    );
+    let submission_content = download_submission(
+        &client,
+        &chosen_problem.directory,
+        &chosen_problem.name,
+        chosen_gpu,
+    )
+    .await?;
+
+    // Write scaffolding files
     let popcorn_dir = cwd.join(".popcorn");
     let skill_dir = popcorn_dir.join("skills").join(SKILL_NAME);
     let skill_path = skill_dir.join("SKILL.md");
+    let native_skill_dir = popcorn_dir.join("skills").join(NATIVE_SKILL_NAME);
+    let native_skill_path = native_skill_dir.join("SKILL.md");
     let manifest_path = popcorn_dir.join("setup.json");
     let submission_path = cwd.join(SUBMISSION_FILENAME);
     let agents_path = cwd.join("AGENTS.md");
@@ -43,41 +221,59 @@ pub fn run_setup(force: bool) -> Result<()> {
             skill_dir.to_string_lossy()
         )
     })?;
+    fs::create_dir_all(&native_skill_dir).with_context(|| {
+        format!(
+            "Failed to create skill directory at {}",
+            native_skill_dir.to_string_lossy()
+        )
+    })?;
 
     let readme_path = cwd.join("README.md");
     let readme_content = fs::read_to_string(&readme_path).unwrap_or_default();
     let skill_markdown = build_skill_markdown(&readme_content);
-    let skill_status = write_text_file(&skill_path, &skill_markdown, force)?;
+    let skill_status = write_text_file(&skill_path, &skill_markdown, true)?;
+
+    let native_skill_status = write_text_file(&native_skill_path, NATIVE_SKILL_TEMPLATE, true)?;
 
     let manifest = json!({
         "schema_version": 1,
         "setup_source": "popcorn setup",
-        "skills": [{
-            "name": SKILL_NAME,
-            "path": format!(".popcorn/skills/{SKILL_NAME}")
-        }],
+        "skills": [
+            {
+                "name": SKILL_NAME,
+                "path": format!(".popcorn/skills/{SKILL_NAME}")
+            },
+            {
+                "name": NATIVE_SKILL_NAME,
+                "path": format!(".popcorn/skills/{NATIVE_SKILL_NAME}")
+            }
+        ],
         "agents": ["codex", "claude"]
     });
     let manifest_text = serde_json::to_string_pretty(&manifest)?;
-    let manifest_status = write_text_file(&manifest_path, &manifest_text, force)?;
+    let manifest_status = write_text_file(&manifest_path, &manifest_text, true)?;
 
-    let agents_md = build_agents_markdown(&skill_path);
-    let agents_status = write_text_file(&agents_path, &agents_md, force)?;
+    let agents_md = build_agents_markdown(&skill_path, &native_skill_path);
+    let agents_status = write_text_file(&agents_path, &agents_md, true)?;
 
-    let codex_link_status = create_agent_skill_view(&cwd, "codex", &skill_dir, force)?;
-    let claude_link_status = create_agent_skill_view(&cwd, "claude", &skill_dir, force)?;
+    let codex_link_status = create_agent_skill_view(&cwd, "codex", &skill_dir, true)?;
+    let claude_link_status = create_agent_skill_view(&cwd, "claude", &skill_dir, true)?;
+    let codex_native_link_status = create_agent_skill_view(&cwd, "codex", &native_skill_dir, true)?;
+    let claude_native_link_status =
+        create_agent_skill_view(&cwd, "claude", &native_skill_dir, true)?;
 
-    let submission_status = write_text_file(
-        &submission_path,
-        &build_submission_template(),
-        force,
-    )?;
+    let submission_status = write_text_file(&submission_path, &submission_content, true)?;
 
     println!(
         "{} {}",
         skill_status.label(),
         relative_display(&cwd, &skill_path)
     );
+    println!(
+        "{} {}",
+        native_skill_status.label(),
+        relative_display(&cwd, &native_skill_path)
+    );
     println!(
         "{} {}",
         manifest_status.label(),
@@ -91,17 +287,27 @@ pub fn run_setup(force: bool) -> Result<()> {
     println!(
         "{} {}",
         codex_link_status.label(),
+        relative_display(&cwd, &cwd.join(".codex").join("skills").join(SKILL_NAME))
+    );
+    println!(
+        "{} {}",
+        codex_native_link_status.label(),
         relative_display(
             &cwd,
-            &cwd.join(".codex").join("skills").join(SKILL_NAME)
+            &cwd.join(".codex").join("skills").join(NATIVE_SKILL_NAME)
         )
     );
     println!(
         "{} {}",
         claude_link_status.label(),
+        relative_display(&cwd, &cwd.join(".claude").join("skills").join(SKILL_NAME))
+    );
+    println!(
+        "{} {}",
+        claude_native_link_status.label(),
         relative_display(
             &cwd,
-            &cwd.join(".claude").join("skills").join(SKILL_NAME)
+            &cwd.join(".claude").join("skills").join(NATIVE_SKILL_NAME)
         )
     );
     println!(
@@ -148,10 +354,13 @@ fn create_agent_skill_view(
     skill_source_dir: &Path,
     force: bool,
 ) -> Result<ActionStatus> {
+    let skill_dir_name = skill_source_dir
+        .file_name()
+        .ok_or_else(|| anyhow!("skill source dir has no file name"))?;
     let agent_skills_dir = cwd.join(format!(".{}", agent_name)).join("skills");
     fs::create_dir_all(&agent_skills_dir)?;
 
-    let link_path = agent_skills_dir.join(SKILL_NAME);
+    let link_path = agent_skills_dir.join(skill_dir_name);
     let existed_before = path_exists(&link_path);
     if existed_before && !force {
         return Ok(ActionStatus::Skipped);
@@ -161,7 +370,7 @@ fn create_agent_skill_view(
         remove_existing_path(&link_path)?;
     }
 
-    let relative_target = PathBuf::from("../../.popcorn/skills").join(SKILL_NAME);
+    let relative_target = PathBuf::from("../../.popcorn/skills").join(skill_dir_name);
     let symlink_result = create_symlink_dir(&relative_target, &link_path);
     if symlink_result.is_err() {
         copy_dir_all(skill_source_dir, &link_path)?;
@@ -257,18 +466,20 @@ fn build_skill_markdown(readme_content: &str) -> String {
     )
 }
 
-fn build_agents_markdown(skill_path: &Path) -> String {
+fn build_agents_markdown(skill_path: &Path, native_skill_path: &Path) -> String {
     let skill_path_text = skill_path.to_string_lossy().to_string();
+    let native_skill_path_text = native_skill_path.to_string_lossy().to_string();
     render_template(
         AGENTS_TEMPLATE,
-        &[("{{SKILL_NAME}}", SKILL_NAME), ("{{SKILL_PATH}}", &skill_path_text)],
+        &[
+            ("{{SKILL_NAME}}", SKILL_NAME),
+            ("{{SKILL_PATH}}", &skill_path_text),
+            ("{{NATIVE_SKILL_NAME}}", NATIVE_SKILL_NAME),
+            ("{{NATIVE_SKILL_PATH}}", &native_skill_path_text),
+        ],
     )
 }
 
-fn build_submission_template() -> String {
-    SUBMISSION_TEMPLATE.to_string()
-}
-
 fn render_template(template: &str, replacements: &[(&str, &str)]) -> String {
     let mut output = template.to_string();
     for (needle, value) in replacements {
diff --git a/templates/setup/AGENTS.md b/templates/setup/AGENTS.md
index a0f8e5c..353790d 100644
--- a/templates/setup/AGENTS.md
+++ b/templates/setup/AGENTS.md
@@ -3,6 +3,7 @@ A skill is a local instruction bundle stored in `SKILL.md`.
 
 ### Available skills
 - {{SKILL_NAME}}: Helps with Popcorn CLI registration, submission setup, submission modes, and file directives. (file: {{SKILL_PATH}})
+- {{NATIVE_SKILL_NAME}}: Helps write CUDA and HIP kernels using torch.utils.cpp_extension.load_inline(). Use when writing native GPU code inside a Python submission. (file: {{NATIVE_SKILL_PATH}})
 
 ### How to use skills
 - Load the skill by reading its `SKILL.md` file when user requests match the description.
diff --git a/templates/setup/skills/load-inline-native-code/SKILL.md b/templates/setup/skills/load-inline-native-code/SKILL.md
new file mode 100644
index 0000000..ee23351
--- /dev/null
+++ b/templates/setup/skills/load-inline-native-code/SKILL.md
@@ -0,0 +1,143 @@
+---
+name: load-inline-native-code
+description: Helps write CUDA and HIP kernels using torch.utils.cpp_extension.load_inline(). Use when users want to write native GPU code (CUDA/HIP) inside a Python submission file.
+compatibility: Intended for popcorn-cli submissions targeting NVIDIA or AMD GPUs with native kernel code.
+---
+
+# Writing Native GPU Kernels with load_inline()
+
+Use this skill when the user wants to write a custom CUDA or HIP kernel inside their Python submission file using `torch.utils.cpp_extension.load_inline()`.
+
+## Overview
+
+`load_inline()` compiles C++/CUDA/HIP source code at runtime and loads it as a Python module. This lets you write raw GPU kernels directly in your `submission.py` without a separate build system.
+
+## CUDA Template (NVIDIA GPUs)
+
+```python
+import torch
+from torch.utils.cpp_extension import load_inline
+from task import input_t, output_t
+
+CUDA_SRC = """
+template <typename scalar_t>
+__global__ void my_kernel(const scalar_t* __restrict__ input,
+                          scalar_t* __restrict__ output,
+                          int N) {
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if (idx < N) {
+        output[idx] = input[idx];
+    }
+}
+
+torch::Tensor my_op(torch::Tensor input, torch::Tensor output) {
+    int N = input.numel();
+    const int threads = 256;
+    const int blocks = (N + threads - 1) / threads;
+
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "my_kernel", ([&] {
+        my_kernel<scalar_t><<<blocks, threads>>>(
+            input.data_ptr<scalar_t>(),
+            output.data_ptr<scalar_t>(),
+            N
+        );
+    }));
+
+    cudaError_t err = cudaGetLastError();
+    if (err != cudaSuccess) {
+        throw std::runtime_error(cudaGetErrorString(err));
+    }
+    return output;
+}
+"""
+
+CPP_SRC = """
+torch::Tensor my_op(torch::Tensor input, torch::Tensor output);
+"""
+
+module = load_inline(
+    name='my_module',
+    cpp_sources=[CPP_SRC],
+    cuda_sources=[CUDA_SRC],
+    functions=['my_op'],
+    verbose=True,
+)
+
+def custom_kernel(data: input_t) -> output_t:
+    input, output = data
+    return module.my_op(input, output)
+```
+
+## HIP Template (AMD GPUs)
+
+```python
+import os
+os.environ['PYTORCH_ROCM_ARCH'] = 'gfx942'
+os.environ['CXX'] = 'clang++'
+
+import torch
+from torch.utils.cpp_extension import load_inline
+from task import input_t, output_t
+
+CUDA_SRC = """
+#include <hip/amd_detail/amd_hip_bf16.h>
+
+__global__ void my_kernel(const float* input, float* output, int N) {
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if (idx < N) {
+        output[idx] = input[idx];
+    }
+}
+
+void my_op(torch::Tensor input, torch::Tensor output) {
+    int N = input.numel();
+    const int threads = 256;
+    const int blocks = (N + threads - 1) / threads;
+    my_kernel<<<blocks, threads>>>(
+        input.data_ptr<float>(),
+        output.data_ptr<float>(),
+        N
+    );
+}
+"""
+
+CPP_SRC = """
+void my_op(torch::Tensor input, torch::Tensor output);
+"""
+
+module = load_inline(
+    name='my_module',
+    cpp_sources=[CPP_SRC],
+    cuda_sources=[CUDA_SRC],
+    functions=['my_op'],
+    verbose=True,
+    extra_cuda_cflags=["--offload-arch=gfx942", "-std=c++20"],
+)
+
+def custom_kernel(data: input_t) -> output_t:
+    input, output = data
+    module.my_op(input, output)
+    return output
+```
+
+## Key Points
+
+- **cpp_sources**: C++ header declaring the functions you want to call from Python. These are the bindings.
+- **cuda_sources**: The actual CUDA/HIP kernel code and the C++ wrapper that launches it.
+- **functions**: List of function names to expose to Python. Must match the C++ function signatures exactly.
+- **verbose=True**: Prints compilation output so you can debug build errors.
+- **extra_cuda_cflags**: Pass extra compiler flags. Needed for AMD HIP (`--offload-arch=gfx942`) or C++ standard selection.
+
+## Common Patterns
+
+- Use `AT_DISPATCH_FLOATING_TYPES_AND_HALF` to handle multiple dtypes in CUDA kernels.
+- For AMD/HIP, set `PYTORCH_ROCM_ARCH` and `CXX` env vars **before** importing torch.
+- Always check `cudaGetLastError()` after kernel launches for NVIDIA targets.
+- The `load_inline` call compiles on first run and caches the result. Subsequent runs reuse the cache unless the source changes.
+- Keep the module-level `load_inline()` call **outside** `custom_kernel()` so compilation happens once at import time, not on every call.
+
+## Guardrails
+- The `custom_kernel` function signature must match `def custom_kernel(data: input_t) -> output_t:`.
+- The module is compiled at import time. Do not call `load_inline()` inside `custom_kernel()`.
+- For AMD GPUs, always set `PYTORCH_ROCM_ARCH` before any torch import.
+- Use `torch::Tensor` in C++ signatures for seamless Python-C++ tensor passing.
diff --git a/templates/setup/submission.py b/templates/setup/submission.py
deleted file mode 100644
index 2112cf0..0000000
--- a/templates/setup/submission.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#!POPCORN leaderboard grayscale
-#!POPCORN gpu A100
-
-"""
-Popcorn submission template generated by `popcorn setup`.
-
-README-aligned notes:
-- Submissions are a single Python file.
-- You can install extra dependencies at runtime with `pip` if needed.
-- Submit with: `popcorn submit submission.py`
-"""
-
-
-def solution():
-    # Replace with your kernel implementation.
-    return "hello from popcorn"
-
-
-if __name__ == "__main__":
-    print(solution())