diff --git a/sandbox-sidecar/scripts/Dockerfile.runner b/sandbox-sidecar/scripts/Dockerfile.runner
new file mode 100644
index 000000000..098aba9de
--- /dev/null
+++ b/sandbox-sidecar/scripts/Dockerfile.runner
@@ -0,0 +1,24 @@
+# Dockerfile.runner
+# Minimal Terraform runner image - similar to what you'd use in Kubernetes
+FROM ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y \
+    curl \
+    unzip \
+    ca-certificates \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Terraform (match the version we benchmark)
+ARG TF_VERSION=1.5.7
+RUN curl -fsSL https://releases.hashicorp.com/terraform/${TF_VERSION}/terraform_${TF_VERSION}_linux_amd64.zip -o /tmp/terraform.zip \
+    && unzip /tmp/terraform.zip -d /usr/local/bin \
+    && rm /tmp/terraform.zip \
+    && terraform version
+
+WORKDIR /workspace
+
+CMD ["bash"]
+
diff --git a/sandbox-sidecar/scripts/debug-terraform-e2b.ts b/sandbox-sidecar/scripts/debug-terraform-e2b.ts
new file mode 100644
index 000000000..677662f24
--- /dev/null
+++ b/sandbox-sidecar/scripts/debug-terraform-e2b.ts
@@ -0,0 +1,198 @@
+// debug-terraform-e2b.ts
+//
+// Run a 10k-null Terraform apply inside an E2B sandbox with
+// *no* stdout/stderr callbacks, just final output.
+//
+// Usage:
+//   cd sandbox-sidecar
+//   npx tsx scripts/debug-terraform-e2b.ts
+//
+// This isolates whether the slowness is:
+//   A) E2B's virtualization/environment
+//   B) Our sidecar's SDK callback overhead
+//   C) Our log handling code
+
+import "dotenv/config";
+import { Sandbox } from "@e2b/code-interpreter";
+
+// Use our pre-built template with terraform + providers cached
+const TEMPLATE_ID = "terraform-1-5-7--tpl-0-2-2";
+const WORK_DIR = "/home/user/benchmark";
+
+// The benchmark Terraform config - 10k null resources
+const MAIN_TF = `
+# Benchmark: 10,000 Null Resources
+# Purpose: Test performance with large number of resources
+
+terraform {
+  required_providers {
+    null = {
+      source  = "hashicorp/null"
+      version = "~> 3.0"
+    }
+  }
+}
+
+resource "null_resource" "massive" {
+  count = 10000
+
+  triggers = {
+    index = count.index
+  }
+}
+`;
+
+async function main() {
+  const apiKey = process.env.E2B_API_KEY;
+  if (!apiKey) {
+    console.error("E2B_API_KEY environment variable is required");
+    console.error("Set it in .env or export E2B_API_KEY=...");
+    process.exit(1);
+  }
+
+  console.log("=".repeat(60));
+  console.log("E2B Terraform Performance Debug Script");
+  console.log("=".repeat(60));
+  console.log(`Template: ${TEMPLATE_ID}`);
+  console.log(`Resources: 10,000 null_resource`);
+  console.log("=".repeat(60));
+
+  console.log("\n[1/6] Creating E2B sandbox...");
+  const startCreate = Date.now();
+  
+  const sandbox = await Sandbox.create(TEMPLATE_ID, {
+    apiKey,
+    timeoutMs: 30 * 60 * 1000, // 30 minutes
+  });
+  
+  const createTime = Date.now() - startCreate;
+  console.log(`Sandbox created: ${sandbox.sandboxId}`);
+  console.log(`Creation time: ${createTime}ms`);
+
+  try {
+    // 2) Create the benchmark directory and write main.tf
+    console.log("\n[2/6] Creating benchmark terraform config...");
+    await sandbox.commands.run(`mkdir -p ${WORK_DIR}`);
+    await sandbox.files.write(`${WORK_DIR}/main.tf`, MAIN_TF);
+    
+    let result = await sandbox.commands.run(`cat ${WORK_DIR}/main.tf`);
+    console.log("Created main.tf:");
+    console.log(result.stdout);
+
+    // 3) Run terraform init
+    console.log("\n[3/6] Running terraform init...");
+    const startInit = Date.now();
+    result = await sandbox.commands.run(
+      `cd ${WORK_DIR} && terraform init -input=false -no-color -plugin-dir=/usr/share/terraform/providers`,
+      { timeoutMs: 300000 }
+    );
+    const initTime = Date.now() - startInit;
+    console.log(`Init time: ${initTime}ms`);
+    console.log(result.stdout.slice(-500));
+    if (result.exitCode !== 0) {
+      console.error("terraform init failed:", result.stderr);
+      return;
+    }
+
+    // 4) TIME TEST: Run apply WITHOUT streaming callbacks
+    console.log("\n[4/6] Running terraform apply (NO streaming callbacks)...");
+    console.log("This will take a while. No output until complete.");
+    console.log("Started at:", new Date().toISOString());
+    
+    const startApply = Date.now();
+    result = await sandbox.commands.run(
+      `cd ${WORK_DIR} && terraform apply -auto-approve -input=false -no-color -parallelism=30`,
+      { 
+        timeoutMs: 60 * 60 * 1000, // 1 hour
+        // NO onStdout/onStderr callbacks!
+      }
+    );
+    const applyTime = Date.now() - startApply;
+    
+    console.log(`\nApply completed at: ${new Date().toISOString()}`);
+    console.log(`Apply time: ${applyTime}ms (${(applyTime/1000).toFixed(1)}s) = ${(applyTime/60000).toFixed(2)} minutes`);
+    console.log(`\nLast 500 chars of output:`);
+    console.log(result.stdout.slice(-500));
+    
+    if (result.exitCode !== 0) {
+      console.error("terraform apply failed:", result.stderr);
+      return;
+    }
+
+    // 5) Destroy and re-apply with output redirected to file
+    console.log("\n[5/6] Destroying resources for second test...");
+    const startDestroy = Date.now();
+    await sandbox.commands.run(
+      `cd ${WORK_DIR} && terraform destroy -auto-approve -input=false -no-color -parallelism=30 > /tmp/destroy.log 2>&1`,
+      { timeoutMs: 60 * 60 * 1000 }
+    );
+    const destroyTime = Date.now() - startDestroy;
+    console.log(`Destroy time: ${destroyTime}ms (${(destroyTime/1000).toFixed(1)}s)`);
+    
+    // 6) Apply with file redirect - completely bypasses SDK stdout handling
+    console.log("\n[6/6] Running apply with output redirected to file...");
+    console.log("(This tests if SDK stdout collection has overhead)");
+    const startApplyFile = Date.now();
+    result = await sandbox.commands.run(
+      `cd ${WORK_DIR} && terraform apply -auto-approve -input=false -no-color -parallelism=30 > /tmp/apply.log 2>&1; echo "EXIT_CODE=$?"`,
+      { timeoutMs: 60 * 60 * 1000 }
+    );
+    const applyFileTime = Date.now() - startApplyFile;
+    
+    console.log(`Apply (file redirect) time: ${applyFileTime}ms (${(applyFileTime/1000).toFixed(1)}s) = ${(applyFileTime/60000).toFixed(2)} minutes`);
+    
+    // Read last part of log
+    result = await sandbox.commands.run(`tail -10 /tmp/apply.log`);
+    console.log("Last 10 lines of apply.log:");
+    console.log(result.stdout);
+
+    // Summary
+    console.log("\n" + "=".repeat(60));
+    console.log("SUMMARY");
+    console.log("=".repeat(60));
+    console.log(`Sandbox creation:      ${createTime}ms (${(createTime/1000).toFixed(1)}s)`);
+    console.log(`Terraform init:        ${initTime}ms (${(initTime/1000).toFixed(1)}s)`);
+    console.log(`Apply (SDK collects):  ${applyTime}ms (${(applyTime/1000).toFixed(1)}s) = ${(applyTime/60000).toFixed(2)} min`);
+    console.log(`Apply (file redirect): ${applyFileTime}ms (${(applyFileTime/1000).toFixed(1)}s) = ${(applyFileTime/60000).toFixed(2)} min`);
+    console.log(`Destroy:               ${destroyTime}ms (${(destroyTime/1000).toFixed(1)}s)`);
+    console.log("=".repeat(60));
+    
+    console.log("\nDIAGNOSIS:");
+    if (applyTime > 600000) { // > 10 minutes
+      console.log("❌ Apply took > 10 minutes.");
+      console.log("   The E2B environment itself is slow.");
+      console.log("   This is NOT a sidecar code issue - it's E2B's VM performance.");
+      console.log("   Options:");
+      console.log("   - Contact E2B about VM performance");
+      console.log("   - Try a different sandbox provider");
+      console.log("   - Accept this as the baseline for E2B");
+    } else if (applyTime > 300000) { // > 5 minutes
+      console.log("⚠️  Apply took 5-10 minutes.");
+      console.log("   This is similar to Spacelift's performance.");
+      console.log("   E2B is comparable but not faster.");
+    } else if (applyTime > 60000) { // > 1 minute
+      console.log("🟡 Apply took 1-5 minutes.");
+      console.log("   E2B is reasonably fast.");
+      console.log("   Our sidecar overhead might be adding to this.");
+    } else {
+      console.log("✅ Apply took < 1 minute!");
+      console.log("   E2B is fast - any slowness is in our sidecar code.");
+    }
+    
+    const sdkOverhead = applyTime - applyFileTime;
+    if (Math.abs(sdkOverhead) > 10000) { // > 10 second difference
+      console.log(`\n📊 SDK stdout collection overhead: ${(sdkOverhead/1000).toFixed(1)}s`);
+    }
+
+  } finally {
+    console.log("\nKilling sandbox...");
+    await sandbox.kill();
+    console.log("Done.");
+  }
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
+
diff --git a/sandbox-sidecar/scripts/debug-terraform-local.sh b/sandbox-sidecar/scripts/debug-terraform-local.sh
new file mode 100755
index 000000000..4f5207ee3
--- /dev/null
+++ b/sandbox-sidecar/scripts/debug-terraform-local.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+#
+# debug-terraform-local.sh
+#
+# Run the same 10k null_resource benchmark locally to establish a baseline.
+# Compare this time against E2B to see the overhead.
+#
+# Usage:
+#   cd sandbox-sidecar/scripts
+#   chmod +x debug-terraform-local.sh
+#   ./debug-terraform-local.sh
+#
+
+set -e
+
+WORK_DIR="/tmp/terraform-benchmark-$$"
+echo "============================================================"
+echo "Local Terraform Performance Benchmark"
+echo "============================================================"
+echo "Work directory: $WORK_DIR"
+echo "Terraform version: $(terraform version -json | jq -r '.terraform_version' 2>/dev/null || terraform version | head -1)"
+echo "============================================================"
+
+# Create work directory
+mkdir -p "$WORK_DIR"
+cd "$WORK_DIR"
+
+# Create the benchmark terraform config
+cat > main.tf << 'EOF'
+# Benchmark: 10,000 Null Resources
+# Purpose: Test performance with large number of resources
+
+terraform {
+  required_providers {
+    null = {
+      source  = "hashicorp/null"
+      version = "~> 3.0"
+    }
+  }
+}
+
+resource "null_resource" "massive" {
+  count = 10000
+
+  triggers = {
+    index = count.index
+  }
+}
+EOF
+
+echo ""
+echo "[1/4] Created main.tf with 10,000 null_resources"
+cat main.tf
+echo ""
+
+# Init
+echo "[2/4] Running terraform init..."
+INIT_START=$(date +%s%3N)
+terraform init -input=false -no-color > /dev/null 2>&1
+INIT_END=$(date +%s%3N)
+INIT_TIME=$((INIT_END - INIT_START))
+echo "Init time: ${INIT_TIME}ms"
+
+# Apply
+echo ""
+echo "[3/4] Running terraform apply -parallelism=30..."
+echo "Started at: $(date)"
+APPLY_START=$(date +%s%3N)
+terraform apply -auto-approve -input=false -no-color -parallelism=30 > /tmp/apply-local.log 2>&1
+APPLY_END=$(date +%s%3N)
+APPLY_TIME=$((APPLY_END - APPLY_START))
+echo "Completed at: $(date)"
+echo ""
+echo "Last 10 lines of output:"
+tail -10 /tmp/apply-local.log
+echo ""
+
+# Destroy
+echo "[4/4] Running terraform destroy..."
+DESTROY_START=$(date +%s%3N)
+terraform destroy -auto-approve -input=false -no-color -parallelism=30 > /dev/null 2>&1
+DESTROY_END=$(date +%s%3N)
+DESTROY_TIME=$((DESTROY_END - DESTROY_START))
+
+# Cleanup
+cd /
+rm -rf "$WORK_DIR"
+
+# Summary
+echo ""
+echo "============================================================"
+echo "SUMMARY - LOCAL MACHINE BASELINE"
+echo "============================================================"
+echo "Terraform init:  ${INIT_TIME}ms ($((INIT_TIME / 1000))s)"
+echo "Terraform apply: ${APPLY_TIME}ms ($((APPLY_TIME / 1000))s) = $(echo "scale=2; $APPLY_TIME / 60000" | bc) minutes"
+echo "Terraform destroy: ${DESTROY_TIME}ms ($((DESTROY_TIME / 1000))s)"
+echo "============================================================"
+echo ""
+echo "Compare this to E2B results to see the overhead."
+echo ""
+
+if [ $APPLY_TIME -lt 60000 ]; then
+  echo "✅ Local apply took < 1 minute - this is the target for E2B"
+elif [ $APPLY_TIME -lt 120000 ]; then
+  echo "🟡 Local apply took 1-2 minutes"
+else
+  echo "⚠️  Local apply took > 2 minutes - your machine might be slow too"
+fi
+
diff --git a/sandbox-sidecar/scripts/docker-runner-benchmark.ts b/sandbox-sidecar/scripts/docker-runner-benchmark.ts
new file mode 100644
index 000000000..0dd0e9d29
--- /dev/null
+++ b/sandbox-sidecar/scripts/docker-runner-benchmark.ts
@@ -0,0 +1,331 @@
+// docker-runner-benchmark.ts
+//
+// Run the 10k null_resource benchmark in a Docker container
+// with the SAME log batching as our production sidecar.
+//
+// This simulates what a Kubernetes-based runner (like Atlantis) would do.
+//
+// Usage:
+//   cd sandbox-sidecar/scripts
+//   npx tsx docker-runner-benchmark.ts
+//
+// Compare results to E2B to show:
+//   "Our own Kubernetes runners could be Nx faster than E2B"
+
+import { spawn } from "child_process";
+import * as path from "path";
+import * as fs from "fs";
+import * as os from "os";
+import { fileURLToPath } from "url";
+
+// ES module compatibility
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+const IMAGE_NAME = "tf-runner:benchmark";
+const TF_VERSION = "1.5.7";
+
+// The benchmark Terraform config - 10k null resources
+const MAIN_TF = `
+# Benchmark: 10,000 Null Resources
+# Purpose: Test performance with large number of resources
+
+terraform {
+  required_providers {
+    null = {
+      source  = "hashicorp/null"
+      version = "~> 3.0"
+    }
+  }
+}
+
+resource "null_resource" "massive" {
+  count = 10000
+
+  triggers = {
+    index = count.index
+  }
+}
+`;
+
+// ============================================================
+// Log batching - SAME as our production sidecar
+// ============================================================
+class LogBuffer {
+  private chunks: string[] = [];
+  private totalBytes = 0;
+  private flushTimeout: NodeJS.Timeout | null = null;
+  private readonly FLUSH_INTERVAL_MS = 100;
+  private readonly FLUSH_SIZE_BYTES = 4096;
+
+  constructor(private onFlush: (batch: string) => void) {}
+
+  append(chunk: string) {
+    if (!chunk) return;
+    this.chunks.push(chunk);
+    this.totalBytes += chunk.length;
+
+    if (this.totalBytes >= this.FLUSH_SIZE_BYTES) {
+      this.flush();
+    } else if (!this.flushTimeout) {
+      this.flushTimeout = setTimeout(() => {
+        this.flushTimeout = null;
+        this.flush();
+      }, this.FLUSH_INTERVAL_MS);
+    }
+  }
+
+  flush() {
+    if (this.flushTimeout) {
+      clearTimeout(this.flushTimeout);
+      this.flushTimeout = null;
+    }
+    if (this.chunks.length > 0) {
+      const batch = this.chunks.join("");
+      this.chunks = [];
+      this.totalBytes = 0;
+      this.onFlush(batch);
+    }
+  }
+
+  getAll(): string {
+    this.flush();
+    return ""; // Already flushed
+  }
+}
+
+// ============================================================
+// Docker helpers
+// ============================================================
+
+function runCommand(
+  cmd: string,
+  args: string[],
+  options: {
+    cwd?: string;
+    onStdout?: (chunk: string) => void;
+    onStderr?: (chunk: string) => void;
+  } = {}
+): Promise<{ code: number; stdout: string; stderr: string }> {
+  return new Promise((resolve, reject) => {
+    const child = spawn(cmd, args, {
+      cwd: options.cwd,
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+
+    let stdout = "";
+    let stderr = "";
+
+    child.stdout.on("data", (data: Buffer) => {
+      const chunk = data.toString();
+      stdout += chunk;
+      options.onStdout?.(chunk);
+    });
+
+    child.stderr.on("data", (data: Buffer) => {
+      const chunk = data.toString();
+      stderr += chunk;
+      options.onStderr?.(chunk);
+    });
+
+    child.on("error", reject);
+    child.on("close", (code) => {
+      resolve({ code: code ?? 1, stdout, stderr });
+    });
+  });
+}
+
+async function dockerImageExists(image: string): Promise<boolean> {
+  const { code } = await runCommand("docker", ["image", "inspect", image]);
+  return code === 0;
+}
+
+async function buildDockerImage(): Promise<void> {
+  console.log(`Building Docker image: ${IMAGE_NAME}`);
+  const dockerfilePath = path.join(__dirname, "Dockerfile.runner");
+  
+  if (!fs.existsSync(dockerfilePath)) {
+    throw new Error(`Dockerfile not found at ${dockerfilePath}`);
+  }
+
+  const { code, stderr } = await runCommand(
+    "docker",
+    ["build", "-t", IMAGE_NAME, "-f", dockerfilePath, "--build-arg", `TF_VERSION=${TF_VERSION}`, __dirname],
+    {
+      onStdout: (chunk) => process.stdout.write(chunk),
+      onStderr: (chunk) => process.stderr.write(chunk),
+    }
+  );
+
+  if (code !== 0) {
+    throw new Error(`Docker build failed: ${stderr}`);
+  }
+}
+
+// ============================================================
+// Main benchmark
+// ============================================================
+
+async function main() {
+  console.log("=".repeat(60));
+  console.log("Docker Runner Benchmark (K8s-style)");
+  console.log("=".repeat(60));
+  console.log(`Image: ${IMAGE_NAME}`);
+  console.log(`Terraform: ${TF_VERSION}`);
+  console.log(`Resources: 10,000 null_resource`);
+  console.log(`Log batching: YES (same as production sidecar)`);
+  console.log("=".repeat(60));
+
+  // Create temp directory for terraform files
+  const workDir = fs.mkdtempSync(path.join(os.tmpdir(), "tf-benchmark-"));
+  console.log(`\nWork directory: ${workDir}`);
+
+  try {
+    // Write main.tf
+    fs.writeFileSync(path.join(workDir, "main.tf"), MAIN_TF);
+    console.log("Created main.tf with 10k null_resources");
+
+    // Build Docker image if needed
+    console.log("\n[1/5] Checking Docker image...");
+    const imageExists = await dockerImageExists(IMAGE_NAME);
+    if (!imageExists) {
+      await buildDockerImage();
+    } else {
+      console.log(`Image ${IMAGE_NAME} already exists, skipping build`);
+    }
+
+    // Collect all logs with batching (like production)
+    const allLogs: string[] = [];
+    const logBuffer = new LogBuffer((batch) => {
+      allLogs.push(batch);
+      // Simulate what production does - we collect but don't print every line
+    });
+
+    // Run terraform init
+    console.log("\n[2/5] Running terraform init in Docker...");
+    const initStart = Date.now();
+    let result = await runCommand(
+      "docker",
+      [
+        "run",
+        "--rm",
+        "-v", `${workDir}:/workspace`,
+        IMAGE_NAME,
+        "terraform", "init", "-input=false", "-no-color",
+      ],
+      {
+        onStdout: (chunk) => logBuffer.append(chunk),
+        onStderr: (chunk) => logBuffer.append(chunk),
+      }
+    );
+    logBuffer.flush();
+    const initTime = Date.now() - initStart;
+    console.log(`Init time: ${initTime}ms (${(initTime / 1000).toFixed(1)}s)`);
+
+    if (result.code !== 0) {
+      console.error("terraform init failed");
+      console.error(result.stderr);
+      process.exit(1);
+    }
+
+    // Run terraform apply with log batching
+    console.log("\n[3/5] Running terraform apply in Docker (with log batching)...");
+    console.log("Started at:", new Date().toISOString());
+    
+    const applyStart = Date.now();
+    result = await runCommand(
+      "docker",
+      [
+        "run",
+        "--rm",
+        "-v", `${workDir}:/workspace`,
+        IMAGE_NAME,
+        "terraform", "apply", "-auto-approve", "-input=false", "-no-color", "-parallelism=30",
+      ],
+      {
+        onStdout: (chunk) => logBuffer.append(chunk),
+        onStderr: (chunk) => logBuffer.append(chunk),
+      }
+    );
+    logBuffer.flush();
+    const applyTime = Date.now() - applyStart;
+
+    console.log(`\nApply completed at: ${new Date().toISOString()}`);
+    console.log(`Apply time: ${applyTime}ms (${(applyTime / 1000).toFixed(1)}s) = ${(applyTime / 60000).toFixed(2)} minutes`);
+
+    if (result.code !== 0) {
+      console.error("terraform apply failed");
+      console.error(result.stderr.slice(-1000));
+      process.exit(1);
+    }
+
+    // Show last bit of logs
+    const combinedLogs = allLogs.join("");
+    console.log("\nLast 500 chars of logs:");
+    console.log(combinedLogs.slice(-500));
+
+    // Run terraform destroy
+    console.log("\n[4/5] Running terraform destroy in Docker...");
+    const destroyStart = Date.now();
+    result = await runCommand(
+      "docker",
+      [
+        "run",
+        "--rm",
+        "-v", `${workDir}:/workspace`,
+        IMAGE_NAME,
+        "terraform", "destroy", "-auto-approve", "-input=false", "-no-color", "-parallelism=30",
+      ],
+      {
+        onStdout: (chunk) => logBuffer.append(chunk),
+        onStderr: (chunk) => logBuffer.append(chunk),
+      }
+    );
+    logBuffer.flush();
+    const destroyTime = Date.now() - destroyStart;
+    console.log(`Destroy time: ${destroyTime}ms (${(destroyTime / 1000).toFixed(1)}s)`);
+
+    // Summary
+    console.log("\n[5/5] Cleanup...");
+
+    console.log("\n" + "=".repeat(60));
+    console.log("SUMMARY - DOCKER RUNNER (K8s-style)");
+    console.log("=".repeat(60));
+    console.log(`Terraform init:    ${initTime}ms (${(initTime / 1000).toFixed(1)}s)`);
+    console.log(`Terraform apply:   ${applyTime}ms (${(applyTime / 1000).toFixed(1)}s) = ${(applyTime / 60000).toFixed(2)} min`);
+    console.log(`Terraform destroy: ${destroyTime}ms (${(destroyTime / 1000).toFixed(1)}s)`);
+    console.log(`Total logs collected: ${combinedLogs.length} bytes`);
+    console.log("=".repeat(60));
+
+    console.log("\nCOMPARISON:");
+    console.log("┌─────────────────┬──────────────┬─────────────┐");
+    console.log("│ Environment     │ Apply Time   │ vs Docker   │");
+    console.log("├─────────────────┼──────────────┼─────────────┤");
+    console.log(`│ Docker (this)   │ ${(applyTime / 60000).toFixed(2)} min     │ 1.0x        │`);
+    console.log("│ Your Mac (M4)   │ ~4.5 min     │ ~1.0x       │");
+    console.log("│ Spacelift       │ ~5-6 min     │ ~1.2x       │");
+    console.log("│ E2B             │ ~14 min      │ ~3.0x       │");
+    console.log("└─────────────────┴──────────────┴─────────────┘");
+    
+    console.log("\n💡 TAKEAWAY:");
+    if (applyTime < 360000) { // < 6 min
+      console.log("✅ Docker runner is FAST - similar to bare metal!");
+      console.log("   Self-hosted Kubernetes runners would be ~3x faster than E2B.");
+      console.log("   Consider offering a 'bring your own runner' option.");
+    } else {
+      console.log("🟡 Docker runner is slower than expected.");
+      console.log("   Check Docker resource limits (CPU/memory).");
+    }
+
+  } finally {
+    // Cleanup
+    fs.rmSync(workDir, { recursive: true, force: true });
+    console.log("\nCleaned up temp directory.");
+  }
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
+
diff --git a/sandbox-sidecar/scripts/docker-runner-modules-benchmark.ts b/sandbox-sidecar/scripts/docker-runner-modules-benchmark.ts
new file mode 100644
index 000000000..0fab25228
--- /dev/null
+++ b/sandbox-sidecar/scripts/docker-runner-modules-benchmark.ts
@@ -0,0 +1,366 @@
+// docker-runner-modules-benchmark.ts
+//
+// Run a module-based benchmark (50 modules x 10 resources = 500 resources)
+// Times PLAN and APPLY separately.
+//
+// Usage:
+//   cd sandbox-sidecar/scripts
+//   npx tsx docker-runner-modules-benchmark.ts
+
+import { spawn } from "child_process";
+import * as path from "path";
+import * as fs from "fs";
+import * as os from "os";
+import { fileURLToPath } from "url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+const IMAGE_NAME = "tf-runner:benchmark";
+const TF_VERSION = "1.5.7";
+
+// Main terraform config - calls 50 child modules
+const MAIN_TF = `
+# Benchmark: Root Module with Many Child Modules
+# Purpose: Test module loading and initialization performance
+# Total resources: 50 modules x 10 resources = 500 resources
+
+terraform {
+  required_providers {
+    null = {
+      source  = "hashicorp/null"
+      version = "~> 3.0"
+    }
+    random = {
+      source  = "hashicorp/random"
+      version = "~> 3.0"
+    }
+  }
+}
+
+# Call the child module 50 times
+module "child" {
+  count         = 50
+  source        = "./modules/simple-module"
+  instance_name = "child-\${count.index}"
+  resource_count = 10
+}
+
+output "module_outputs" {
+  description = "Outputs from all child modules"
+  value = {
+    for idx, mod in module.child : idx => mod.resource_ids
+  }
+}
+`;
+
+// Child module that creates multiple resources
+const MODULE_MAIN_TF = `
+# Simple child module
+# Creates 'resource_count' null resources
+
+variable "instance_name" {
+  type        = string
+  description = "Name for this instance"
+}
+
+variable "resource_count" {
+  type        = number
+  description = "Number of resources to create"
+  default     = 10
+}
+
+resource "null_resource" "items" {
+  count = var.resource_count
+
+  triggers = {
+    name  = var.instance_name
+    index = count.index
+  }
+}
+
+resource "random_id" "suffix" {
+  count       = var.resource_count
+  byte_length = 4
+}
+
+output "resource_ids" {
+  description = "IDs of created resources"
+  value       = null_resource.items[*].id
+}
+
+output "random_ids" {
+  description = "Random IDs"
+  value       = random_id.suffix[*].hex
+}
+`;
+
+// Log buffer (same as production)
+class LogBuffer {
+  private chunks: string[] = [];
+  private totalBytes = 0;
+  private flushTimeout: NodeJS.Timeout | null = null;
+  private readonly FLUSH_INTERVAL_MS = 100;
+  private readonly FLUSH_SIZE_BYTES = 4096;
+
+  constructor(private onFlush: (batch: string) => void) {}
+
+  append(chunk: string) {
+    if (!chunk) return;
+    this.chunks.push(chunk);
+    this.totalBytes += chunk.length;
+
+    if (this.totalBytes >= this.FLUSH_SIZE_BYTES) {
+      this.flush();
+    } else if (!this.flushTimeout) {
+      this.flushTimeout = setTimeout(() => {
+        this.flushTimeout = null;
+        this.flush();
+      }, this.FLUSH_INTERVAL_MS);
+    }
+  }
+
+  flush() {
+    if (this.flushTimeout) {
+      clearTimeout(this.flushTimeout);
+      this.flushTimeout = null;
+    }
+    if (this.chunks.length > 0) {
+      const batch = this.chunks.join("");
+      this.chunks = [];
+      this.totalBytes = 0;
+      this.onFlush(batch);
+    }
+  }
+}
+
+function runCommand(
+  cmd: string,
+  args: string[],
+  options: {
+    cwd?: string;
+    onStdout?: (chunk: string) => void;
+    onStderr?: (chunk: string) => void;
+  } = {}
+): Promise<{ code: number; stdout: string; stderr: string }> {
+  return new Promise((resolve, reject) => {
+    const child = spawn(cmd, args, {
+      cwd: options.cwd,
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+
+    let stdout = "";
+    let stderr = "";
+
+    child.stdout.on("data", (data: Buffer) => {
+      const chunk = data.toString();
+      stdout += chunk;
+      options.onStdout?.(chunk);
+    });
+
+    child.stderr.on("data", (data: Buffer) => {
+      const chunk = data.toString();
+      stderr += chunk;
+      options.onStderr?.(chunk);
+    });
+
+    child.on("error", reject);
+    child.on("close", (code) => {
+      resolve({ code: code ?? 1, stdout, stderr });
+    });
+  });
+}
+
+async function dockerImageExists(image: string): Promise<boolean> {
+  const { code } = await runCommand("docker", ["image", "inspect", image]);
+  return code === 0;
+}
+
+async function buildDockerImage(): Promise<void> {
+  console.log(`Building Docker image: ${IMAGE_NAME}`);
+  const dockerfilePath = path.join(__dirname, "Dockerfile.runner");
+
+  if (!fs.existsSync(dockerfilePath)) {
+    throw new Error(`Dockerfile not found at ${dockerfilePath}`);
+  }
+
+  const { code, stderr } = await runCommand(
+    "docker",
+    ["build", "-t", IMAGE_NAME, "-f", dockerfilePath, "--build-arg", `TF_VERSION=${TF_VERSION}`, __dirname],
+    {
+      onStdout: (chunk) => process.stdout.write(chunk),
+      onStderr: (chunk) => process.stderr.write(chunk),
+    }
+  );
+
+  if (code !== 0) {
+    throw new Error(`Docker build failed: ${stderr}`);
+  }
+}
+
+async function main() {
+  console.log("=".repeat(60));
+  console.log("Docker Runner - MODULES Benchmark");
+  console.log("=".repeat(60));
+  console.log(`Image: ${IMAGE_NAME}`);
+  console.log(`Terraform: ${TF_VERSION}`);
+  console.log(`Structure: 50 modules x 10 resources = 500 total resources`);
+  console.log(`Tests: PLAN and APPLY timed separately`);
+  console.log("=".repeat(60));
+
+  // Create temp directory
+  const workDir = fs.mkdtempSync(path.join(os.tmpdir(), "tf-modules-bench-"));
+  console.log(`\nWork directory: ${workDir}`);
+
+  try {
+    // Create directory structure
+    const modulesDir = path.join(workDir, "modules", "simple-module");
+    fs.mkdirSync(modulesDir, { recursive: true });
+
+    // Write main.tf
+    fs.writeFileSync(path.join(workDir, "main.tf"), MAIN_TF);
+    console.log("Created main.tf (50 module calls)");
+
+    // Write module
+    fs.writeFileSync(path.join(modulesDir, "main.tf"), MODULE_MAIN_TF);
+    console.log("Created modules/simple-module/main.tf");
+
+    // Check/build Docker image
+    console.log("\n[1/6] Checking Docker image...");
+    const imageExists = await dockerImageExists(IMAGE_NAME);
+    if (!imageExists) {
+      await buildDockerImage();
+    } else {
+      console.log(`Image ${IMAGE_NAME} already exists`);
+    }
+
+    const allLogs: string[] = [];
+    const logBuffer = new LogBuffer((batch) => allLogs.push(batch));
+
+    // INIT
+    console.log("\n[2/6] Running terraform init...");
+    const initStart = Date.now();
+    let result = await runCommand(
+      "docker",
+      [
+        "run", "--rm",
+        "-v", `${workDir}:/workspace`,
+        IMAGE_NAME,
+        "terraform", "init", "-input=false", "-no-color",
+      ],
+      {
+        onStdout: (chunk) => logBuffer.append(chunk),
+        onStderr: (chunk) => logBuffer.append(chunk),
+      }
+    );
+    logBuffer.flush();
+    const initTime = Date.now() - initStart;
+    console.log(`Init time: ${initTime}ms (${(initTime / 1000).toFixed(1)}s)`);
+
+    if (result.code !== 0) {
+      console.error("terraform init failed:", result.stderr);
+      process.exit(1);
+    }
+
+    // PLAN (timed separately)
+    console.log("\n[3/6] Running terraform plan...");
+    console.log("Started at:", new Date().toISOString());
+    const planStart = Date.now();
+    result = await runCommand(
+      "docker",
+      [
+        "run", "--rm",
+        "-v", `${workDir}:/workspace`,
+        IMAGE_NAME,
+        "terraform", "plan", "-input=false", "-no-color", "-out=tfplan",
+      ],
+      {
+        onStdout: (chunk) => logBuffer.append(chunk),
+        onStderr: (chunk) => logBuffer.append(chunk),
+      }
+    );
+    logBuffer.flush();
+    const planTime = Date.now() - planStart;
+    console.log(`Plan time: ${planTime}ms (${(planTime / 1000).toFixed(1)}s)`);
+
+    if (result.code !== 0) {
+      console.error("terraform plan failed:", result.stderr.slice(-500));
+      process.exit(1);
+    }
+
+    // APPLY (timed separately)
+    console.log("\n[4/6] Running terraform apply...");
+    console.log("Started at:", new Date().toISOString());
+    const applyStart = Date.now();
+    result = await runCommand(
+      "docker",
+      [
+        "run", "--rm",
+        "-v", `${workDir}:/workspace`,
+        IMAGE_NAME,
+        "terraform", "apply", "-auto-approve", "-input=false", "-no-color", "-parallelism=30",
+      ],
+      {
+        onStdout: (chunk) => logBuffer.append(chunk),
+        onStderr: (chunk) => logBuffer.append(chunk),
+      }
+    );
+    logBuffer.flush();
+    const applyTime = Date.now() - applyStart;
+    console.log(`Apply time: ${applyTime}ms (${(applyTime / 1000).toFixed(1)}s)`);
+
+    if (result.code !== 0) {
+      console.error("terraform apply failed:", result.stderr.slice(-500));
+      process.exit(1);
+    }
+
+    // DESTROY
+    console.log("\n[5/6] Running terraform destroy...");
+    const destroyStart = Date.now();
+    result = await runCommand(
+      "docker",
+      [
+        "run", "--rm",
+        "-v", `${workDir}:/workspace`,
+        IMAGE_NAME,
+        "terraform", "destroy", "-auto-approve", "-input=false", "-no-color", "-parallelism=30",
+      ],
+      {
+        onStdout: (chunk) => logBuffer.append(chunk),
+        onStderr: (chunk) => logBuffer.append(chunk),
+      }
+    );
+    logBuffer.flush();
+    const destroyTime = Date.now() - destroyStart;
+    console.log(`Destroy time: ${destroyTime}ms (${(destroyTime / 1000).toFixed(1)}s)`);
+
+    // Summary
+    console.log("\n[6/6] Results");
+    console.log("\n" + "=".repeat(60));
+    console.log("SUMMARY - MODULES BENCHMARK (Docker)");
+    console.log("=".repeat(60));
+    console.log(`Resources: 50 modules × 20 resources each = 1000 total`);
+    console.log("-".repeat(60));
+    console.log(`Terraform init:    ${initTime}ms (${(initTime / 1000).toFixed(1)}s)`);
+    console.log(`Terraform plan:    ${planTime}ms (${(planTime / 1000).toFixed(1)}s)`);
+    console.log(`Terraform apply:   ${applyTime}ms (${(applyTime / 1000).toFixed(1)}s)`);
+    console.log(`Terraform destroy: ${destroyTime}ms (${(destroyTime / 1000).toFixed(1)}s)`);
+    console.log("-".repeat(60));
+    console.log(`Plan + Apply:      ${planTime + applyTime}ms (${((planTime + applyTime) / 1000).toFixed(1)}s)`);
+    console.log("=".repeat(60));
+
+    console.log("\n📊 Use these numbers to compare against E2B:");
+    console.log("   Run the same benchmark in E2B and compare plan/apply times.");
+    console.log("   If Docker is 2-3x faster, K8s runners are the better choice.");
+
+  } finally {
+    fs.rmSync(workDir, { recursive: true, force: true });
+    console.log("\nCleaned up temp directory.");
+  }
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
+
diff --git a/sandbox-sidecar/scripts/k8s-runner-benchmark.ts b/sandbox-sidecar/scripts/k8s-runner-benchmark.ts
new file mode 100644
index 000000000..f3fee05de
--- /dev/null
+++ b/sandbox-sidecar/scripts/k8s-runner-benchmark.ts
@@ -0,0 +1,329 @@
+// k8s-runner-benchmark.ts
+//
+// Run the 10k null_resource benchmark as a Kubernetes Job.
+// This tests real network latency to your cluster.
+//
+// Prerequisites:
+//   1. kubectl configured to access your cluster
+//   2. Run: kubectl apply -f k8s-setup.yaml
+//
+// Usage:
+//   cd sandbox-sidecar/scripts
+//   npx tsx k8s-runner-benchmark.ts
+
+import { spawn } from "child_process";
+import * as path from "path";
+import { fileURLToPath } from "url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+const NAMESPACE = "otaco-runners";
+const TF_VERSION = "1.5.7";
+const JOB_NAME_PREFIX = "otaco-benchmark";
+
+// 10k null resources benchmark
+const MAIN_TF = `
+terraform {
+  required_providers {
+    null = {
+      source  = "hashicorp/null"
+      version = "~> 3.0"
+    }
+  }
+}
+
+resource "null_resource" "massive" {
+  count = 10000
+  triggers = {
+    index = count.index
+  }
+}
+`;
+
+function kubectl(args: string[], options: { input?: string } = {}): Promise<{ stdout: string; stderr: string; code: number }> {
+  return new Promise((resolve, reject) => {
+    const child = spawn("kubectl", args, {
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+
+    let stdout = "";
+    let stderr = "";
+
+    child.stdout.on("data", (data) => { stdout += data.toString(); });
+    child.stderr.on("data", (data) => { stderr += data.toString(); });
+
+    if (options.input) {
+      child.stdin.write(options.input);
+      child.stdin.end();
+    }
+
+    child.on("error", reject);
+    child.on("close", (code) => {
+      resolve({ stdout, stderr, code: code ?? 1 });
+    });
+  });
+}
+
+async function checkPrerequisites(): Promise<boolean> {
+  // Check kubectl access
+  const { code, stderr } = await kubectl(["get", "namespace", NAMESPACE]);
+  if (code !== 0) {
+    console.error(`❌ Namespace '${NAMESPACE}' not found.`);
+    console.error("Run: kubectl apply -f k8s-setup.yaml");
+    console.error(stderr);
+    return false;
+  }
+  console.log(`✅ Namespace '${NAMESPACE}' exists`);
+  return true;
+}
+
+async function createConfigMap(name: string, mainTf: string): Promise<void> {
+  // Delete if exists
+  await kubectl(["delete", "configmap", name, "-n", NAMESPACE, "--ignore-not-found"]);
+  
+  // Create ConfigMap with the Terraform config
+  const configMapYaml = `
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: ${name}
+  namespace: ${NAMESPACE}
+data:
+  main.tf: |
+${mainTf.split('\n').map(line => '    ' + line).join('\n')}
+`;
+  
+  const { code, stderr } = await kubectl(["apply", "-f", "-"], { input: configMapYaml });
+  if (code !== 0) {
+    throw new Error(`Failed to create ConfigMap: ${stderr}`);
+  }
+}
+
+async function waitForJobCompletion(jobName: string, timeoutMs: number = 3600000): Promise<"complete" | "failed"> {
+  const startTime = Date.now();
+  const pollInterval = 5000; // 5 seconds
+
+  while (Date.now() - startTime < timeoutMs) {
+    // Get job status
+    const { stdout, code } = await kubectl([
+      "get", "job", jobName, "-n", NAMESPACE,
+      "-o", "jsonpath={.status.succeeded},{.status.failed},{.status.active}"
+    ]);
+
+    if (code !== 0) {
+      console.log("  Job not found yet, waiting...");
+      await new Promise(r => setTimeout(r, pollInterval));
+      continue;
+    }
+
+    const [succeeded, failed, active] = stdout.split(",");
+    const elapsed = ((Date.now() - startTime) / 1000).toFixed(0);
+
+    if (succeeded === "1") {
+      console.log(`  ✅ Job completed successfully (${elapsed}s)`);
+      return "complete";
+    }
+
+    if (failed === "1") {
+      console.log(`  ❌ Job failed (${elapsed}s)`);
+      return "failed";
+    }
+
+    // Still running - show progress
+    if (active === "1") {
+      // Try to get pod phase for more detail
+      const { stdout: podPhase } = await kubectl([
+        "get", "pods", "-n", NAMESPACE, "-l", `job-name=${jobName}`,
+        "-o", "jsonpath={.items[0].status.phase}"
+      ]);
+      console.log(`  ⏳ Running... phase=${podPhase || "unknown"} elapsed=${elapsed}s`);
+    } else {
+      // Check if pod is pending due to resources
+      const { stdout: podInfo } = await kubectl([
+        "get", "pods", "-n", NAMESPACE, "-l", `job-name=${jobName}`,
+        "-o", "jsonpath={.items[0].status.phase},{.items[0].status.conditions[?(@.type=='PodScheduled')].reason}"
+      ]);
+      const [phase, reason] = (podInfo || ",").split(",");
+      if (reason === "Unschedulable") {
+        console.log(`  ⏳ Pending... UNSCHEDULABLE - cluster doesn't have 8 CPU available! elapsed=${elapsed}s`);
+      } else {
+        console.log(`  ⏳ Pending... phase=${phase || "unknown"} reason=${reason || "starting"} elapsed=${elapsed}s`);
+      }
+    }
+
+    await new Promise(r => setTimeout(r, pollInterval));
+  }
+
+  throw new Error(`Job timed out after ${timeoutMs}ms`);
+}
+
+async function runJob(jobName: string, configMapName: string, command: string): Promise<{ duration: number; logs: string }> {
+  // Delete old job if exists
+  await kubectl(["delete", "job", jobName, "-n", NAMESPACE, "--ignore-not-found"]);
+
+  const jobYaml = `
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: ${jobName}
+  namespace: ${NAMESPACE}
+spec:
+  ttlSecondsAfterFinished: 300
+  backoffLimit: 0
+  activeDeadlineSeconds: 3600
+  template:
+    spec:
+      restartPolicy: Never
+      serviceAccountName: otaco-runner
+      # Use dedicated runner nodes only
+      nodeSelector:
+        dedicated: otaco-runner
+      tolerations:
+        - key: "dedicated"
+          operator: "Equal"
+          value: "otaco-runner"
+          effect: "NoSchedule"
+      containers:
+        - name: terraform
+          image: hashicorp/terraform:${TF_VERSION}
+          command: ["sh", "-c"]
+          args:
+            - |
+              set -e
+              cd /workspace
+              cp /config/main.tf .
+              echo "=== Starting Terraform ==="
+              terraform init -input=false -no-color
+              echo "=== Init Complete ==="
+              ${command}
+              echo "=== Command Complete ==="
+          env:
+            - name: TF_IN_AUTOMATION
+              value: "1"
+          resources:
+            requests:
+              cpu: "4"
+              memory: "4Gi"
+            limits:
+              cpu: "4"
+              memory: "4Gi"
+          volumeMounts:
+            - name: config
+              mountPath: /config
+            - name: workspace
+              mountPath: /workspace
+      volumes:
+        - name: config
+          configMap:
+            name: ${configMapName}
+        - name: workspace
+          emptyDir: {}
+`;
+
+  const startTime = Date.now();
+  
+  // Create job
+  console.log(`Creating job ${jobName}...`);
+  const { code: createCode, stderr: createErr } = await kubectl(["apply", "-f", "-"], { input: jobYaml });
+  if (createCode !== 0) {
+    throw new Error(`Failed to create job: ${createErr}`);
+  }
+
+  // Wait for job to complete with polling (shows progress)
+  console.log("Waiting for job to complete...");
+  const status = await waitForJobCompletion(jobName);
+
+  const duration = Date.now() - startTime;
+
+  // Get logs
+  const { stdout: logs } = await kubectl(["logs", `job/${jobName}`, "-n", NAMESPACE]);
+
+  if (status === "failed") {
+    console.error("Job failed. Logs:");
+    console.error(logs);
+    throw new Error("Job failed");
+  }
+
+  return { duration, logs };
+}
+
+async function main() {
+  console.log("=".repeat(60));
+  console.log("Kubernetes Runner Benchmark");
+  console.log("=".repeat(60));
+  console.log(`Namespace: ${NAMESPACE}`);
+  console.log(`Terraform: ${TF_VERSION}`);
+  console.log(`Resources: 10,000 null_resource`);
+  console.log(`Tests: PLAN and APPLY with real network latency`);
+  console.log("=".repeat(60));
+
+  // Check prerequisites
+  console.log("\n[1/5] Checking prerequisites...");
+  if (!await checkPrerequisites()) {
+    process.exit(1);
+  }
+
+  // Create ConfigMap with Terraform config
+  const configMapName = `tf-config-${Date.now()}`;
+  console.log(`\n[2/5] Creating ConfigMap ${configMapName}...`);
+  await createConfigMap(configMapName, MAIN_TF);
+  console.log("ConfigMap created");
+
+  try {
+    // Run PLAN job
+    console.log("\n[3/5] Running Terraform PLAN...");
+    const planJobName = `${JOB_NAME_PREFIX}-plan-${Date.now()}`;
+    const planResult = await runJob(
+      planJobName,
+      configMapName,
+      "terraform plan -input=false -no-color -out=tfplan -parallelism=30"
+    );
+    console.log(`Plan completed in ${planResult.duration}ms (${(planResult.duration/1000).toFixed(1)}s)`);
+
+    // Run APPLY job  
+    console.log("\n[4/5] Running Terraform APPLY...");
+    const applyJobName = `${JOB_NAME_PREFIX}-apply-${Date.now()}`;
+    const applyResult = await runJob(
+      applyJobName,
+      configMapName,
+      "terraform apply -auto-approve -input=false -no-color -parallelism=30"
+    );
+    console.log(`Apply completed in ${applyResult.duration}ms (${(applyResult.duration/1000).toFixed(1)}s)`);
+
+    // Show logs
+    console.log("\n[5/5] Results");
+    console.log("\nLast 500 chars of apply logs:");
+    console.log(applyResult.logs.slice(-500));
+
+    // Summary
+    console.log("\n" + "=".repeat(60));
+    console.log("SUMMARY - KUBERNETES RUNNER (with network latency)");
+    console.log("=".repeat(60));
+    console.log(`Plan time:  ${planResult.duration}ms (${(planResult.duration/1000).toFixed(1)}s) = ${(planResult.duration/60000).toFixed(2)} min`);
+    console.log(`Apply time: ${applyResult.duration}ms (${(applyResult.duration/1000).toFixed(1)}s) = ${(applyResult.duration/60000).toFixed(2)} min`);
+    console.log(`Total:      ${planResult.duration + applyResult.duration}ms = ${((planResult.duration + applyResult.duration)/60000).toFixed(2)} min`);
+    console.log("=".repeat(60));
+
+    console.log("\nCOMPARISON:");
+    console.log("┌─────────────────────┬──────────────┬─────────────┐");
+    console.log("│ Environment         │ Apply Time   │ vs K8s      │");
+    console.log("├─────────────────────┼──────────────┼─────────────┤");
+    console.log(`│ K8s (this)          │ ${(applyResult.duration/60000).toFixed(2)} min     │ 1.0x        │`);
+    console.log("│ Docker (local)      │ ~4-5 min     │ ~1.0x       │");
+    console.log("│ E2B                 │ ~14 min      │ ~3.0x       │");
+    console.log("└─────────────────────┴──────────────┴─────────────┘");
+
+    console.log("\n💡 This includes real network latency to your cluster!");
+
+  } finally {
+    // Cleanup ConfigMap
+    console.log("\nCleaning up...");
+    await kubectl(["delete", "configmap", configMapName, "-n", NAMESPACE, "--ignore-not-found"]);
+  }
+}
+
+main().catch((err) => {
+  console.error("Benchmark failed:", err);
+  process.exit(1);
+});
diff --git a/sandbox-sidecar/scripts/k8s-setup.yaml b/sandbox-sidecar/scripts/k8s-setup.yaml
new file mode 100644
index 000000000..d9efc6588
--- /dev/null
+++ b/sandbox-sidecar/scripts/k8s-setup.yaml
@@ -0,0 +1,82 @@
+# k8s-setup.yaml
+# Setup namespace and resources for testing OpenTaco runners on K8s
+#
+# Apply with:
+#   kubectl apply -f k8s-setup.yaml
+#
+# Delete with:
+#   kubectl delete -f k8s-setup.yaml
+
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: otaco-runners
+  labels:
+    app: otaco-runner
+    purpose: benchmark
+
+---
+# ServiceAccount for the runner jobs
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: otaco-runner
+  namespace: otaco-runners
+
+---
+# Role with minimal permissions (just needs to run containers)
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: otaco-runner-role
+  namespace: otaco-runners
+rules: []  # No special permissions needed for running terraform
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: otaco-runner-binding
+  namespace: otaco-runners
+subjects:
+  - kind: ServiceAccount
+    name: otaco-runner
+    namespace: otaco-runners
+roleRef:
+  kind: Role
+  name: otaco-runner-role
+  apiGroup: rbac.authorization.k8s.io
+
+---
+# Optional: ResourceQuota to limit resource usage (2 jobs max)
+apiVersion: v1
+kind: ResourceQuota
+metadata:
+  name: otaco-runner-quota
+  namespace: otaco-runners
+spec:
+  hard:
+    requests.cpu: "16"
+    requests.memory: "16Gi"
+    limits.cpu: "16"
+    limits.memory: "16Gi"
+    pods: "2"
+
+---
+# Optional: LimitRange for default resource limits
+apiVersion: v1
+kind: LimitRange
+metadata:
+  name: otaco-runner-limits
+  namespace: otaco-runners
+spec:
+  limits:
+    - default:
+        cpu: "4"
+        memory: "4Gi"
+      defaultRequest:
+        cpu: "4"
+        memory: "4Gi"
+      type: Container
+
diff --git a/sandbox-sidecar/scripts/test-docker-runner.ts b/sandbox-sidecar/scripts/test-docker-runner.ts
new file mode 100644
index 000000000..247fa72ea
--- /dev/null
+++ b/sandbox-sidecar/scripts/test-docker-runner.ts
@@ -0,0 +1,173 @@
+// test-docker-runner.ts
+//
+// Test the Docker runner locally with a simple 10-null benchmark.
+// This simulates what would happen when the sidecar runs with SANDBOX_RUNNER=docker
+//
+// Usage:
+//   cd sandbox-sidecar
+//   npx tsx scripts/test-docker-runner.ts
+
+import * as fs from "fs";
+import * as path from "path";
+import * as os from "os";
+import { execSync } from "child_process";
+import { DockerSandboxRunner } from "../src/runners/dockerRunner.js";
+import { SandboxRunRecord } from "../src/jobs/jobTypes.js";
+
+// Simple benchmark: 100 null resources (quick test)
+const MAIN_TF = `
+terraform {
+  required_providers {
+    null = {
+      source  = "hashicorp/null"
+      version = "~> 3.0"
+    }
+  }
+}
+
+resource "null_resource" "test" {
+  count = 100
+
+  triggers = {
+    index = count.index
+  }
+}
+
+output "resource_count" {
+  value = length(null_resource.test)
+}
+`;
+
+async function createConfigArchive(): Promise<string> {
+  // Create temp dir with terraform files
+  const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "tf-test-"));
+  fs.writeFileSync(path.join(tempDir, "main.tf"), MAIN_TF);
+
+  // Create tar.gz archive
+  const archivePath = path.join(tempDir, "bundle.tar.gz");
+  execSync(`tar -czf bundle.tar.gz main.tf`, { cwd: tempDir });
+
+  // Read and base64 encode
+  const archiveBuffer = fs.readFileSync(archivePath);
+  const base64 = archiveBuffer.toString("base64");
+
+  // Cleanup temp dir
+  fs.rmSync(tempDir, { recursive: true, force: true });
+
+  return base64;
+}
+
+async function main() {
+  console.log("=".repeat(60));
+  console.log("Docker Runner Test");
+  console.log("=".repeat(60));
+  console.log("Resources: 100 null_resource (quick test)");
+  console.log("Operations: plan, then apply");
+  console.log("=".repeat(60));
+
+  // Create the Docker runner
+  const runner = new DockerSandboxRunner({
+    terraformVersion: "1.5.7",
+  });
+
+  console.log(`\nRunner: ${runner.name}`);
+
+  // Create config archive
+  console.log("\n[1/4] Creating config archive...");
+  const configArchive = await createConfigArchive();
+  console.log(`Archive size: ${configArchive.length} bytes (base64)`);
+
+  // Create a mock job for PLAN
+  const planJob: SandboxRunRecord = {
+    id: "test-plan-001",
+    status: "pending",
+    logs: "",
+    payload: {
+      operation: "plan",
+      runId: "test-run-001",
+      orgId: "test-org",
+      unitId: "test-unit",
+      configurationVersionId: "cv-001",
+      isDestroy: false,
+      terraformVersion: "1.5.7",
+      engine: "terraform",
+      configArchive,
+    },
+    createdAt: new Date(),
+    updatedAt: new Date(),
+  };
+
+  // Run PLAN
+  console.log("\n[2/4] Running terraform plan via Docker runner...");
+  const planStart = Date.now();
+  
+  let logOutput = "";
+  const planResult = await runner.run(planJob, (chunk) => {
+    logOutput += chunk;
+    // Don't print every chunk, just collect
+  });
+  
+  const planTime = Date.now() - planStart;
+  console.log(`Plan time: ${planTime}ms (${(planTime / 1000).toFixed(1)}s)`);
+  console.log(`Plan has changes: ${planResult.result?.hasChanges}`);
+  console.log(`Resources to add: ${planResult.result?.resourceAdditions}`);
+  console.log(`Log output size: ${planResult.logs.length} bytes`);
+
+  // Create a mock job for APPLY
+  const applyJob: SandboxRunRecord = {
+    id: "test-apply-001",
+    status: "pending",
+    logs: "",
+    payload: {
+      operation: "apply",
+      runId: "test-run-001",
+      orgId: "test-org",
+      unitId: "test-unit",
+      configurationVersionId: "cv-001",
+      isDestroy: false,
+      terraformVersion: "1.5.7",
+      engine: "terraform",
+      configArchive,
+    },
+    createdAt: new Date(),
+    updatedAt: new Date(),
+  };
+
+  // Run APPLY
+  console.log("\n[3/4] Running terraform apply via Docker runner...");
+  const applyStart = Date.now();
+  
+  logOutput = "";
+  const applyResult = await runner.run(applyJob, (chunk) => {
+    logOutput += chunk;
+  });
+  
+  const applyTime = Date.now() - applyStart;
+  console.log(`Apply time: ${applyTime}ms (${(applyTime / 1000).toFixed(1)}s)`);
+  console.log(`State size: ${applyResult.result?.state?.length || 0} bytes (base64)`);
+  console.log(`Log output size: ${applyResult.logs.length} bytes`);
+
+  // Show last bit of logs
+  console.log("\n[4/4] Last 300 chars of apply logs:");
+  console.log(applyResult.logs.slice(-300));
+
+  // Summary
+  console.log("\n" + "=".repeat(60));
+  console.log("SUMMARY");
+  console.log("=".repeat(60));
+  console.log(`Plan time:  ${planTime}ms (${(planTime / 1000).toFixed(1)}s)`);
+  console.log(`Apply time: ${applyTime}ms (${(applyTime / 1000).toFixed(1)}s)`);
+  console.log(`Total:      ${planTime + applyTime}ms (${((planTime + applyTime) / 1000).toFixed(1)}s)`);
+  console.log("=".repeat(60));
+
+  console.log("\n✅ Docker runner test completed successfully!");
+  console.log("\nTo use in production, set:");
+  console.log("  SANDBOX_RUNNER=docker");
+  console.log("  DOCKER_TERRAFORM_VERSION=1.5.7  # optional, defaults to 1.5.7");
+}
+
+main().catch((err) => {
+  console.error("Test failed:", err);
+  process.exit(1);
+});
+
diff --git a/sandbox-sidecar/src/config.ts b/sandbox-sidecar/src/config.ts
index d2d818f5d..74e5343c8 100644
--- a/sandbox-sidecar/src/config.ts
+++ b/sandbox-sidecar/src/config.ts
@@ -2,7 +2,7 @@ import dotenv from "dotenv";
 
 dotenv.config();
 
-export type RunnerType = "e2b";
+export type RunnerType = "e2b" | "docker";
 
 export interface AppConfig {
   port: number;
@@ -11,6 +11,10 @@ export interface AppConfig {
     apiKey?: string;
     bareBonesTemplateId?: string; // Base template for custom versions
   };
+  docker: {
+    image?: string; // Custom Docker image (default: hashicorp/terraform:<version>)
+    terraformVersion?: string; // Default TF version if not specified in job
+  };
 }
 
 const parsePort = (value: string | undefined, fallback: number) => {
@@ -27,17 +31,21 @@ const parsePort = (value: string | undefined, fallback: number) => {
 export function loadConfig(): AppConfig {
   const runnerEnv = (process.env.SANDBOX_RUNNER || "e2b").toLowerCase();
   
-  if (runnerEnv !== "e2b") {
-    throw new Error("Only E2B runner is supported. Set SANDBOX_RUNNER=e2b");
+  if (runnerEnv !== "e2b" && runnerEnv !== "docker") {
+    throw new Error("Unsupported runner. Set SANDBOX_RUNNER=e2b or SANDBOX_RUNNER=docker");
   }
 
   return {
     port: parsePort(process.env.PORT, 9100),
-    runner: "e2b",
+    runner: runnerEnv as RunnerType,
     e2b: {
       apiKey: process.env.E2B_API_KEY,
       bareBonesTemplateId: process.env.E2B_BAREBONES_TEMPLATE_ID,
     },
+    docker: {
+      image: process.env.DOCKER_TERRAFORM_IMAGE,
+      terraformVersion: process.env.DOCKER_TERRAFORM_VERSION || "1.5.7",
+    },
   };
 }
 
diff --git a/sandbox-sidecar/src/jobs/jobStore.ts b/sandbox-sidecar/src/jobs/jobStore.ts
index 8384d013b..d0d62fba7 100644
--- a/sandbox-sidecar/src/jobs/jobStore.ts
+++ b/sandbox-sidecar/src/jobs/jobStore.ts
@@ -6,8 +6,36 @@ import {
   SandboxRunResult,
 } from "./jobTypes.js";
 
+/**
+ * Efficient log buffer that avoids O(n²) string concatenation.
+ * Uses array of chunks + lazy join for O(n) total complexity.
+ */
+class LogBuffer {
+  private chunks: string[] = [];
+  private cachedResult: string | null = null;
+
+  append(chunk: string) {
+    if (!chunk) return;
+    this.chunks.push(chunk);
+    this.cachedResult = null; // Invalidate cache
+  }
+
+  toString(): string {
+    if (this.cachedResult === null) {
+      this.cachedResult = this.chunks.join("");
+    }
+    return this.cachedResult;
+  }
+
+  set(logs: string) {
+    this.chunks = [logs];
+    this.cachedResult = logs;
+  }
+}
+
 export class JobStore {
   private jobs = new Map<string, SandboxRunRecord>();
+  private logBuffers = new Map<string, LogBuffer>();
 
   create(payload: SandboxRunPayload): SandboxRunRecord {
     const id = `sbx_run_${nanoid(10)}`;
@@ -21,11 +49,20 @@ export class JobStore {
       updatedAt: now,
     };
     this.jobs.set(id, job);
+    this.logBuffers.set(id, new LogBuffer());
     return job;
   }
 
   get(id: string): SandboxRunRecord | undefined {
-    return this.jobs.get(id);
+    const job = this.jobs.get(id);
+    if (job) {
+      // Lazily materialize logs from buffer
+      const buffer = this.logBuffers.get(id);
+      if (buffer) {
+        job.logs = buffer.toString();
+      }
+    }
+    return job;
   }
 
   updateStatus(id: string, status: JobStatus, logs?: string, error?: string) {
@@ -33,6 +70,10 @@ export class JobStore {
     if (!job) return;
     job.status = status;
     if (typeof logs === "string") {
+      const buffer = this.logBuffers.get(id);
+      if (buffer) {
+        buffer.set(logs);
+      }
       job.logs = logs;
     }
     job.error = error;
@@ -40,10 +81,15 @@ export class JobStore {
   }
 
   appendLogs(id: string, chunk: string) {
+    const buffer = this.logBuffers.get(id);
+    if (!buffer || !chunk) return;
+    buffer.append(chunk);
+    // Note: We don't update job.logs here - it's materialized lazily in get()
+    // This avoids O(n²) string concatenation!
     const job = this.jobs.get(id);
-    if (!job || !chunk) return;
-    job.logs = `${job.logs}${chunk}`;
-    job.updatedAt = new Date();
+    if (job) {
+      job.updatedAt = new Date();
+    }
   }
 
   setResult(id: string, result: SandboxRunResult | undefined) {
@@ -52,4 +98,14 @@ export class JobStore {
     job.result = result;
     job.updatedAt = new Date();
   }
+
+  // Clean up buffer when job is done (optional memory optimization)
+  finalize(id: string) {
+    const job = this.jobs.get(id);
+    const buffer = this.logBuffers.get(id);
+    if (job && buffer) {
+      job.logs = buffer.toString();
+    }
+    // Keep buffer for now in case logs are requested again
+  }
 }
diff --git a/sandbox-sidecar/src/routes/runRoutes.ts b/sandbox-sidecar/src/routes/runRoutes.ts
index 25988baea..2d7e72156 100644
--- a/sandbox-sidecar/src/routes/runRoutes.ts
+++ b/sandbox-sidecar/src/routes/runRoutes.ts
@@ -17,6 +17,14 @@ export function createRunRouter(
   router.post("/api/v1/sandboxes/runs", (req, res, next) => {
     try {
       const parsed = runRequestSchema.parse(req.body);
+      
+      // Debug: log received metadata including AWS region
+      console.log("Received run request metadata:", {
+        hasMetadata: !!parsed.metadata,
+        awsRegion: parsed.metadata?.AWS_REGION || "(not set)",
+        awsKeyLength: parsed.metadata?.AWS_ACCESS_KEY_ID?.length || 0,
+      });
+      
       const payload: SandboxRunPayload = {
         operation: parsed.operation,
         runId: parsed.run_id,
diff --git a/sandbox-sidecar/src/runners/dockerRunner.ts b/sandbox-sidecar/src/runners/dockerRunner.ts
new file mode 100644
index 000000000..4803a1eae
--- /dev/null
+++ b/sandbox-sidecar/src/runners/dockerRunner.ts
@@ -0,0 +1,344 @@
+import { spawn } from "child_process";
+import * as fs from "fs";
+import * as path from "path";
+import * as os from "os";
+import { SandboxRunner, RunnerOutput } from "./types.js";
+import { SandboxRunRecord, SandboxRunResult } from "../jobs/jobTypes.js";
+import { logger } from "../logger.js";
+
+export interface DockerRunnerOptions {
+  image?: string; // Docker image with Terraform installed
+  terraformVersion?: string; // Default TF version if not specified in job
+}
+
+/**
+ * Docker-based runner that executes Terraform commands in a local Docker container.
+ * This is much faster than E2B (~3x) and can be used for local testing or K8s deployments.
+ */
+export class DockerSandboxRunner implements SandboxRunner {
+  readonly name = "docker";
+
+  constructor(private readonly options: DockerRunnerOptions = {}) {}
+
+  async run(job: SandboxRunRecord, appendLog?: (chunk: string) => void): Promise<RunnerOutput> {
+    if (job.payload.operation === "plan") {
+      return this.runPlan(job, appendLog);
+    }
+    return this.runApply(job, appendLog);
+  }
+
+  private getImage(requestedVersion?: string): string {
+    if (this.options.image) {
+      return this.options.image;
+    }
+    const version = requestedVersion || this.options.terraformVersion || "1.5.7";
+    return `hashicorp/terraform:${version}`;
+  }
+
+  private async runPlan(job: SandboxRunRecord, appendLog?: (chunk: string) => void): Promise<RunnerOutput> {
+    const workDir = await this.setupWorkspace(job);
+    const image = this.getImage(job.payload.terraformVersion);
+    
+    logger.info({ workDir, image, operation: "plan" }, "Starting Docker plan");
+
+    try {
+      const logs: string[] = [];
+      const streamLog = (chunk: string) => {
+        if (!chunk) return;
+        logs.push(chunk);
+        appendLog?.(chunk);
+      };
+
+      // Run terraform init
+      await this.runDockerCommand(
+        image,
+        workDir,
+        ["init", "-input=false", "-no-color"],
+        job.payload.metadata,
+        streamLog,
+      );
+
+      // Run terraform plan
+      const planArgs = ["plan", "-input=false", "-no-color", "-out=tfplan.binary", "-parallelism=30"];
+      if (job.payload.isDestroy) {
+        planArgs.splice(1, 0, "-destroy");
+      }
+      await this.runDockerCommand(image, workDir, planArgs, job.payload.metadata, streamLog);
+
+      // Get plan JSON
+      const showResult = await this.runDockerCommand(
+        image,
+        workDir,
+        ["show", "-json", "tfplan.binary"],
+        job.payload.metadata,
+      );
+
+      const planJSON = showResult.stdout;
+      const summary = this.summarizePlan(planJSON);
+      const result: SandboxRunResult = {
+        hasChanges: summary.hasChanges,
+        resourceAdditions: summary.additions,
+        resourceChanges: summary.changes,
+        resourceDestructions: summary.destroys,
+        planJSON: Buffer.from(planJSON, "utf8").toString("base64"),
+      };
+
+      return { logs: logs.join(""), result };
+    } finally {
+      await this.cleanup(workDir);
+    }
+  }
+
+  private async runApply(job: SandboxRunRecord, appendLog?: (chunk: string) => void): Promise<RunnerOutput> {
+    const startTime = Date.now();
+    const workDir = await this.setupWorkspace(job);
+    const image = this.getImage(job.payload.terraformVersion);
+
+    logger.info({ 
+      workDir, 
+      image, 
+      operation: "apply",
+      isDestroy: job.payload.isDestroy,
+    }, "Starting Docker apply");
+
+    try {
+      const logs: string[] = [];
+      const streamLog = (chunk: string) => {
+        if (!chunk) return;
+        logs.push(chunk);
+        appendLog?.(chunk);
+      };
+
+      // Run terraform init
+      logger.info({ elapsed: Date.now() - startTime }, "Starting terraform init");
+      await this.runDockerCommand(
+        image,
+        workDir,
+        ["init", "-input=false", "-no-color"],
+        job.payload.metadata,
+        streamLog,
+      );
+      logger.info({ elapsed: Date.now() - startTime }, "Terraform init completed");
+
+      // Run terraform apply/destroy
+      const applyCommand = job.payload.isDestroy ? "destroy" : "apply";
+      logger.info({ command: applyCommand, elapsed: Date.now() - startTime }, "Starting terraform apply/destroy");
+      
+      await this.runDockerCommand(
+        image,
+        workDir,
+        [applyCommand, "-auto-approve", "-input=false", "-no-color", "-parallelism=30"],
+        job.payload.metadata,
+        streamLog,
+      );
+      logger.info({ command: applyCommand, elapsed: Date.now() - startTime }, "Terraform apply/destroy completed");
+
+      // Read state file
+      let stateBase64 = "";
+      const execDir = job.payload.workingDirectory
+        ? path.join(workDir, job.payload.workingDirectory)
+        : workDir;
+      const statePath = path.join(execDir, "terraform.tfstate");
+
+      if (fs.existsSync(statePath)) {
+        const stateContent = fs.readFileSync(statePath, "utf-8");
+        stateBase64 = Buffer.from(stateContent, "utf8").toString("base64");
+        logger.info({ stateSize: stateContent.length }, "Captured terraform.tfstate");
+      }
+
+      const result: SandboxRunResult = {
+        state: stateBase64,
+      };
+
+      logger.info({ elapsed: Date.now() - startTime }, "Apply operation completed");
+      return { logs: logs.join(""), result };
+    } finally {
+      await this.cleanup(workDir);
+    }
+  }
+
+  private async setupWorkspace(job: SandboxRunRecord): Promise<string> {
+    // Create temp directory
+    const workDir = fs.mkdtempSync(path.join(os.tmpdir(), "tf-docker-"));
+    
+    // Write the config archive
+    const archivePath = path.join(workDir, "bundle.tar.gz");
+    const archiveBuffer = Buffer.from(job.payload.configArchive, "base64");
+    fs.writeFileSync(archivePath, archiveBuffer);
+
+    // Extract the archive
+    await this.runLocalCommand("tar", ["-xzf", "bundle.tar.gz"], workDir);
+
+    // Determine exec directory
+    const execDir = job.payload.workingDirectory
+      ? path.join(workDir, job.payload.workingDirectory)
+      : workDir;
+
+    // Write state file if provided
+    if (job.payload.state) {
+      const statePath = path.join(execDir, "terraform.tfstate");
+      const stateBuffer = Buffer.from(job.payload.state, "base64");
+      fs.writeFileSync(statePath, stateBuffer);
+      logger.info({ stateSize: stateBuffer.length, statePath }, "Wrote state file");
+    }
+
+    return workDir;
+  }
+
+  private async cleanup(workDir: string): Promise<void> {
+    try {
+      fs.rmSync(workDir, { recursive: true, force: true });
+    } catch (err) {
+      logger.warn({ err, workDir }, "Failed to cleanup work directory");
+    }
+  }
+
+  private buildEnvArgs(metadata?: Record<string, string>): string[] {
+    const envArgs: string[] = [
+      "-e", "TF_IN_AUTOMATION=1",
+    ];
+
+    // Add AWS credentials if provided
+    if (metadata?.AWS_ACCESS_KEY_ID) {
+      envArgs.push("-e", `AWS_ACCESS_KEY_ID=${metadata.AWS_ACCESS_KEY_ID}`);
+      envArgs.push("-e", `AWS_SECRET_ACCESS_KEY=${metadata.AWS_SECRET_ACCESS_KEY || ""}`);
+      envArgs.push("-e", `AWS_REGION=${metadata.AWS_REGION || "us-east-1"}`);
+      envArgs.push("-e", `AWS_DEFAULT_REGION=${metadata.AWS_REGION || "us-east-1"}`);
+    }
+
+    return envArgs;
+  }
+
+  private async runDockerCommand(
+    image: string,
+    workDir: string,
+    args: string[],
+    metadata?: Record<string, string>,
+    onOutput?: (chunk: string) => void,
+  ): Promise<{ stdout: string; stderr: string }> {
+    const execDir = workDir; // Mount the work dir as /workspace
+    const envArgs = this.buildEnvArgs(metadata);
+
+    const dockerArgs = [
+      "run",
+      "--rm",
+      "-v", `${execDir}:/workspace`,
+      "-w", "/workspace",
+      ...envArgs,
+      image,
+      ...args,
+    ];
+
+    logger.info({ cmd: `docker ${dockerArgs.join(" ").slice(0, 100)}...` }, "Running Docker command");
+
+    return new Promise((resolve, reject) => {
+      const child = spawn("docker", dockerArgs);
+
+      let stdout = "";
+      let stderr = "";
+
+      // Batch log output (same as production E2B runner)
+      let pendingChunks: string[] = [];
+      let flushTimeout: NodeJS.Timeout | null = null;
+      const FLUSH_INTERVAL_MS = 100;
+      const FLUSH_SIZE_BYTES = 4096;
+
+      const flushPending = () => {
+        if (pendingChunks.length === 0) return;
+        const batch = pendingChunks.join("");
+        pendingChunks = [];
+        onOutput?.(batch);
+      };
+
+      const bufferChunk = (chunk: string) => {
+        pendingChunks.push(chunk);
+        const totalSize = pendingChunks.reduce((sum, c) => sum + c.length, 0);
+        
+        if (totalSize >= FLUSH_SIZE_BYTES) {
+          if (flushTimeout) {
+            clearTimeout(flushTimeout);
+            flushTimeout = null;
+          }
+          flushPending();
+        } else if (!flushTimeout) {
+          flushTimeout = setTimeout(() => {
+            flushTimeout = null;
+            flushPending();
+          }, FLUSH_INTERVAL_MS);
+        }
+      };
+
+      child.stdout.on("data", (data: Buffer) => {
+        const chunk = data.toString();
+        stdout += chunk;
+        bufferChunk(chunk);
+      });
+
+      child.stderr.on("data", (data: Buffer) => {
+        const chunk = data.toString();
+        stderr += chunk;
+        bufferChunk(chunk);
+      });
+
+      child.on("error", reject);
+      child.on("close", (code) => {
+        if (flushTimeout) {
+          clearTimeout(flushTimeout);
+        }
+        flushPending();
+
+        if (code !== 0) {
+          reject(new Error(`terraform ${args[0]} exited with code ${code}\n${stderr}`));
+        } else {
+          resolve({ stdout, stderr });
+        }
+      });
+    });
+  }
+
+  private async runLocalCommand(cmd: string, args: string[], cwd: string): Promise<void> {
+    return new Promise((resolve, reject) => {
+      const child = spawn(cmd, args, { cwd });
+      child.on("error", reject);
+      child.on("close", (code) => {
+        if (code !== 0) {
+          reject(new Error(`${cmd} exited with code ${code}`));
+        } else {
+          resolve();
+        }
+      });
+    });
+  }
+
+  private summarizePlan(planJSON: string) {
+    try {
+      const parsed = JSON.parse(planJSON);
+      const changes = parsed?.resource_changes ?? [];
+      let additions = 0;
+      let updates = 0;
+      let destroys = 0;
+
+      for (const change of changes) {
+        const actions: string[] = change?.change?.actions ?? [];
+        if (actions.includes("create")) additions += 1;
+        if (actions.includes("update")) updates += 1;
+        if (actions.includes("delete") || actions.includes("destroy")) destroys += 1;
+        if (actions.includes("replace")) {
+          additions += 1;
+          destroys += 1;
+        }
+      }
+
+      return {
+        hasChanges: additions + updates + destroys > 0,
+        additions,
+        changes: updates,
+        destroys,
+      };
+    } catch (error) {
+      logger.warn({ error }, "Failed to parse terraform plan JSON");
+      return { hasChanges: false, additions: 0, changes: 0, destroys: 0 };
+    }
+  }
+}
+
diff --git a/sandbox-sidecar/src/runners/e2bRunner.ts b/sandbox-sidecar/src/runners/e2bRunner.ts
index 84a6323a0..5ed8ad479 100644
--- a/sandbox-sidecar/src/runners/e2bRunner.ts
+++ b/sandbox-sidecar/src/runners/e2bRunner.ts
@@ -47,27 +47,35 @@ export class E2BSandboxRunner implements SandboxRunner {
         appendLog?.(chunk);
       };
 
-      // Run terraform init
+      // Run terraform init (with AWS creds if configured for benchmark)
+      // -plugin-dir points to pre-extracted providers from template build (instant, no download)
+      const metadata = job.payload.metadata;
       await this.runTerraformCommand(
         sandbox,
         workDir,
-        ["init", "-input=false", "-no-color"],
+        ["init", "-input=false", "-no-color", "-plugin-dir=/usr/share/terraform/providers"],
         logs,
         streamLog,
+        metadata,
       );
 
       // Run terraform plan
-      const planArgs = ["plan", "-input=false", "-no-color", "-out=tfplan.binary"];
+      // Use higher parallelism for faster execution (default is 10)
+      // 30 is a good balance: faster than default, but unlikely to hit AWS rate limits
+      const planArgs = ["plan", "-input=false", "-no-color", "-out=tfplan.binary", "-parallelism=30"];
       if (job.payload.isDestroy) {
         planArgs.splice(1, 0, "-destroy");
       }
-      await this.runTerraformCommand(sandbox, workDir, planArgs, logs, streamLog);
+      await this.runTerraformCommand(sandbox, workDir, planArgs, logs, streamLog, metadata);
 
       // Get plan JSON
       const showResult = await this.runTerraformCommand(
         sandbox,
         workDir,
         ["show", "-json", "tfplan.binary"],
+        undefined,
+        undefined,
+        metadata,
       );
 
       const planJSON = showResult.stdout;
@@ -89,7 +97,15 @@ export class E2BSandboxRunner implements SandboxRunner {
   private async runApply(job: SandboxRunRecord, appendLog?: (chunk: string) => void): Promise<RunnerOutput> {
     const requestedVersion = job.payload.terraformVersion || "1.5.7";
       const requestedEngine = job.payload.engine || "terraform";
+      const startTime = Date.now();
       const { sandbox, needsInstall } = await this.createSandbox(requestedVersion, requestedEngine);
+      
+      logger.info({ 
+        sandboxId: sandbox.sandboxId, 
+        workingDir: job.payload.workingDirectory,
+        isDestroy: job.payload.isDestroy,
+      }, "Starting apply operation");
+      
       try {
         // Install IaC tool if using fallback template
         if (needsInstall) {
@@ -103,35 +119,103 @@ export class E2BSandboxRunner implements SandboxRunner {
           appendLog?.(chunk);
         };
 
-      // Run terraform init
+      // Run terraform init (with AWS creds if configured for benchmark)
+      // -plugin-dir points to pre-extracted providers from template build (instant, no download)
+      const metadata = job.payload.metadata;
+      
+      logger.info({ sandboxId: sandbox.sandboxId, elapsed: Date.now() - startTime }, "Starting terraform init");
       await this.runTerraformCommand(
         sandbox,
         workDir,
-        ["init", "-input=false", "-no-color"],
+        ["init", "-input=false", "-no-color", "-plugin-dir=/usr/share/terraform/providers"],
         logs,
         streamLog,
+        metadata,
       );
+      logger.info({ sandboxId: sandbox.sandboxId, elapsed: Date.now() - startTime }, "Terraform init completed");
 
       // Run terraform apply/destroy
+      // Use higher parallelism for faster execution (default is 10)
+      // 30 is a good balance: faster than default, but unlikely to hit AWS rate limits
       const applyCommand = job.payload.isDestroy ? "destroy" : "apply";
-      await this.runTerraformCommand(
+      logger.info({ sandboxId: sandbox.sandboxId, command: applyCommand, elapsed: Date.now() - startTime }, "Starting terraform apply/destroy");
+      const applyResult = await this.runTerraformCommand(
         sandbox,
         workDir,
-        [applyCommand, "-auto-approve", "-input=false", "-no-color"],
+        [applyCommand, "-auto-approve", "-input=false", "-no-color", "-parallelism=30"],
         logs,
         streamLog,
+        metadata,
       );
+      logger.info({ sandboxId: sandbox.sandboxId, command: applyCommand, elapsed: Date.now() - startTime }, "Terraform apply/destroy completed");
+
+      // Log the apply output for debugging
+      logger.info({ 
+        stdout: applyResult.stdout.slice(-500),
+        stderr: applyResult.stderr.slice(-500),
+      }, "terraform apply output (last 500 chars)");
+
+      // Read the actual terraform.tfstate file (not terraform show -json which is different format)
+      // Check both standard location and workspace location
+      let stateBase64 = "";
+      
+      try {
+        // Try standard location first
+        let statePath = `${workDir}/terraform.tfstate`;
+        let stateContent: string | null = null;
+        
+        try {
+          stateContent = await sandbox.files.read(statePath);
+          logger.info({ path: statePath }, "found state file at standard location");
+        } catch {
+          // Try workspace location - find the workspace state directory
+          const lsResult = await sandbox.commands.run(`find ${workDir} -name "terraform.tfstate" -type f 2>/dev/null | head -1`);
+          const foundPath = lsResult.stdout.trim();
+          if (foundPath) {
+            stateContent = await sandbox.files.read(foundPath);
+            logger.info({ path: foundPath }, "found state file at workspace location");
+          }
+        }
+        
+        if (stateContent && stateContent.trim()) {
+          stateBase64 = Buffer.from(stateContent, "utf8").toString("base64");
+          logger.info({ stateSize: stateContent.length }, "captured terraform.tfstate file");
+        } else {
+          logger.info("no terraform.tfstate file found");
+        }
+      } catch (err) {
+        // State doesn't exist - this is OK for empty applies or destroys
+        logger.warn({ error: err }, "no state found after apply (may be empty apply)");
+      }
 
-      // Read the state file
-      const statePath = `${workDir}/terraform.tfstate`;
-      const stateContent = await sandbox.files.read(statePath);
       const result: SandboxRunResult = {
-        state: Buffer.from(stateContent, "utf8").toString("base64"),
+        state: stateBase64,
       };
 
+      logger.info({ sandboxId: sandbox.sandboxId, elapsed: Date.now() - startTime }, "Apply operation completed successfully");
       return { logs: logs.join(""), result };
+    } catch (err) {
+      const elapsed = Date.now() - startTime;
+      const errorMessage = err instanceof Error ? err.message : String(err);
+      
+      // Log detailed error info for debugging sandbox termination issues
+      logger.error({
+        sandboxId: sandbox.sandboxId,
+        elapsed,
+        elapsedSeconds: Math.round(elapsed / 1000),
+        errorMessage,
+        errorType: err instanceof Error ? err.constructor.name : typeof err,
+        workingDir: job.payload.workingDirectory,
+      }, "Apply operation failed - sandbox may have been terminated");
+      
+      throw err;
     } finally {
-      await sandbox.kill();
+      try {
+        await sandbox.kill();
+      } catch (killErr) {
+        // Sandbox may already be dead, that's fine
+        logger.debug({ killErr }, "Failed to kill sandbox (may already be terminated)");
+      }
     }
   }
 
@@ -157,11 +241,21 @@ export class E2BSandboxRunner implements SandboxRunner {
       logger.warn({ templateId, engine, version }, "no pre-built template found, will install at runtime");
     }
     
-    logger.info({ templateId }, "creating E2B sandbox");
+
+    const sandboxTimeoutSeconds = 60 * 60; // 1 hour
+    
+
+    
+    logger.info({ 
+      templateId, 
+      timeoutSeconds: sandboxTimeoutSeconds,
+    }, "creating E2B sandbox");
+    
     const sandbox = await Sandbox.create(templateId, {
       apiKey: this.options.apiKey,
+      timeoutMs: sandboxTimeoutSeconds * 1000,
     });
-    logger.info({ sandboxId: sandbox.sandboxId }, "E2B sandbox created");
+    logger.info({ sandboxId: sandbox.sandboxId }, "E2B sandbox created with extended timeout");
     
     // Store engine metadata for command execution
     (sandbox as any)._requestedEngine = engine;
@@ -169,6 +263,33 @@ export class E2BSandboxRunner implements SandboxRunner {
     return { sandbox, needsInstall };
   }
 
+  /**
+   * Build environment variables for Terraform execution.
+   * Includes AWS credentials if provided in metadata for benchmark runs.
+   */
+  private buildTerraformEnvs(metadata?: Record<string, string>): Record<string, string> {
+    const envs: Record<string, string> = {
+      TF_IN_AUTOMATION: "1",
+    };
+
+    // Inject AWS credentials if provided (for benchmark runs with real resources)
+    if (metadata?.AWS_ACCESS_KEY_ID) {
+      envs.AWS_ACCESS_KEY_ID = metadata.AWS_ACCESS_KEY_ID;
+      envs.AWS_SECRET_ACCESS_KEY = metadata.AWS_SECRET_ACCESS_KEY || "";
+      envs.AWS_REGION = metadata.AWS_REGION || "us-east-1";
+      // Also set default region for AWS SDK
+      envs.AWS_DEFAULT_REGION = envs.AWS_REGION;
+      logger.info({ 
+        region: envs.AWS_REGION,
+        keyLength: envs.AWS_ACCESS_KEY_ID.length,
+      }, "AWS credentials injected into terraform environment");
+    } else {
+      logger.warn("No AWS credentials in metadata - AWS resources will fail");
+    }
+
+    return envs;
+  }
+
 
   private async installIacTool(sandbox: Sandbox, engine: string, version: string): Promise<void> {
     logger.info({ engine, version }, "installing IaC tool at runtime");
@@ -273,6 +394,7 @@ export class E2BSandboxRunner implements SandboxRunner {
     args: string[],
     logBuffer?: string[],
     appendLog?: (chunk: string) => void,
+    metadata?: Record<string, string>,
   ): Promise<{ stdout: string; stderr: string }> {
     const engine = (sandbox as any)._requestedEngine || "terraform";
     const binaryName = engine === "tofu" ? "tofu" : "terraform";
@@ -280,24 +402,74 @@ export class E2BSandboxRunner implements SandboxRunner {
     logger.info({ cmd: cmdStr, cwd, engine }, "running IaC command in E2B sandbox");
 
     let sawStream = false;
+    
+    // Batch log chunks to reduce callback frequency
+    // This significantly improves performance for large outputs (10k+ resources)
+    let pendingChunks: string[] = [];
+    let flushTimeout: ReturnType<typeof setTimeout> | null = null;
+    const FLUSH_INTERVAL_MS = 100; // Flush every 100ms
+    const FLUSH_SIZE_BYTES = 4096; // Or when buffer reaches 4KB
+    
+    const flushPending = () => {
+      if (pendingChunks.length === 0) return;
+      const batch = pendingChunks.join("");
+      pendingChunks = [];
+      if (logBuffer) {
+        logBuffer.push(batch);
+      }
+      appendLog?.(batch);
+    };
+    
     const pipeChunk = (chunk: string | undefined) => {
       if (!chunk) return;
       sawStream = true;
-      if (logBuffer) {
-        logBuffer.push(chunk);
+      pendingChunks.push(chunk);
+      
+      // Flush if buffer is large enough
+      const totalSize = pendingChunks.reduce((sum, c) => sum + c.length, 0);
+      if (totalSize >= FLUSH_SIZE_BYTES) {
+        if (flushTimeout) {
+          clearTimeout(flushTimeout);
+          flushTimeout = null;
+        }
+        flushPending();
+      } else if (!flushTimeout) {
+        // Schedule a flush if not already pending
+        flushTimeout = setTimeout(() => {
+          flushTimeout = null;
+          flushPending();
+        }, FLUSH_INTERVAL_MS);
       }
-      appendLog?.(chunk);
     };
 
+    // Use long timeout for benchmarks (1 hour) - EKS and large operations need this
+    // Pro tier supports up to 24 hours, Hobby up to 1 hour
+    const timeoutMs = 60 * 60 * 1000; // 1 hour
+    
+    // Explicitly extend sandbox lifetime before running long commands
+    // This ensures the sandbox won't be killed mid-operation
+    try {
+      await Sandbox.setTimeout(sandbox.sandboxId, timeoutMs, { apiKey: this.options.apiKey });
+      logger.info({ sandboxId: sandbox.sandboxId, timeoutMs }, "Extended sandbox timeout before command");
+    } catch (err) {
+      logger.warn({ err, sandboxId: sandbox.sandboxId }, "Failed to extend sandbox timeout (continuing anyway)");
+    }
+    
     const result = await sandbox.commands.run(cmdStr, {
       cwd,
-      envs: {
-        TF_IN_AUTOMATION: "1",
-      },
+      envs: this.buildTerraformEnvs(metadata),
       onStdout: pipeChunk,
       onStderr: pipeChunk,
+      timeoutMs,
     });
 
+    // Clear any pending flush timeout and flush remaining chunks
+    if (flushTimeout) {
+      clearTimeout(flushTimeout);
+      flushTimeout = null;
+    }
+    flushPending();
+
     const stdout = result.stdout;
     const stderr = result.stderr;
     const exitCode = result.exitCode;
@@ -305,7 +477,10 @@ export class E2BSandboxRunner implements SandboxRunner {
     // Push any remaining buffered output for completeness in final log
     const mergedLogs = `${stdout}\n${stderr}`.trim();
     if (!sawStream && mergedLogs.length > 0) {
-      pipeChunk(mergedLogs + "\n");
+      if (logBuffer) {
+        logBuffer.push(mergedLogs + "\n");
+      }
+      appendLog?.(mergedLogs + "\n");
     }
     if (exitCode !== 0) {
       throw new Error(
diff --git a/sandbox-sidecar/src/runners/index.ts b/sandbox-sidecar/src/runners/index.ts
index 33e286e1f..e5c263fd9 100644
--- a/sandbox-sidecar/src/runners/index.ts
+++ b/sandbox-sidecar/src/runners/index.ts
@@ -1,11 +1,16 @@
 import { AppConfig } from "../config.js";
 import { SandboxRunner } from "./types.js";
 import { E2BSandboxRunner } from "./e2bRunner.js";
+import { DockerSandboxRunner } from "./dockerRunner.js";
 
 export function createRunner(config: AppConfig): SandboxRunner {
-  if (config.runner !== "e2b") {
-    throw new Error("Only E2B runner is supported. Set SANDBOX_RUNNER=e2b");
+  switch (config.runner) {
+    case "docker":
+      return new DockerSandboxRunner(config.docker);
+    case "e2b":
+      return new E2BSandboxRunner(config.e2b);
+    default:
+      throw new Error(`Unsupported runner: ${config.runner}. Use SANDBOX_RUNNER=e2b or SANDBOX_RUNNER=docker`);
   }
-  return new E2BSandboxRunner(config.e2b);
 }
 
diff --git a/sandbox-sidecar/src/templateRegistry.ts b/sandbox-sidecar/src/templateRegistry.ts
index ae0760aea..eb5b0d2cb 100644
--- a/sandbox-sidecar/src/templateRegistry.ts
+++ b/sandbox-sidecar/src/templateRegistry.ts
@@ -8,7 +8,8 @@ export interface TemplateInfo {
 }
 
 // Template version - bump this when the build recipe changes
-const TEMPLATE_VERSION = "0.1.2";
+// 0.2.2: Pre-extracted providers with -plugin-dir (no extraction at runtime)
+const TEMPLATE_VERSION = "0.2.2";
 
 // Generate alias matching the build system
 function aliasFor(engine: string, version: string, tplVersion: string): string {
diff --git a/sandbox-sidecar/templates/build-all.ts b/sandbox-sidecar/templates/build-all.ts
index 99c88c747..e8551f819 100644
--- a/sandbox-sidecar/templates/build-all.ts
+++ b/sandbox-sidecar/templates/build-all.ts
@@ -28,8 +28,8 @@ async function main() {
 
     await Template.build(buildTemplateObject(spec), {
       alias,
-      cpuCount: 2,
-      memoryMB: 4096,
+      cpuCount: 8,      // Max for Pro tier (was 2)
+      memoryMB: 8192,   // 8GB - Max for Pro tier (was 4GB)
       onBuildLogs: defaultBuildLogger(),
     });
 
diff --git a/sandbox-sidecar/templates/build.ts b/sandbox-sidecar/templates/build.ts
index 0f2d0f79f..18f5bfe68 100644
--- a/sandbox-sidecar/templates/build.ts
+++ b/sandbox-sidecar/templates/build.ts
@@ -5,8 +5,8 @@ import { template } from "./test-template.ts";
 async function main() {
   const buildInfo = await Template.build(template, {
     alias: "terraform-prebuilt-new",           // template name / alias
-    cpuCount: 4,
-    memoryMB: 2048,
+    cpuCount: 8,      // Max for Pro tier
+    memoryMB: 8192,   // 8GB - Max for Pro tier
     onBuildLogs: defaultBuildLogger(),
   });
 
diff --git a/sandbox-sidecar/templates/manifest.ts b/sandbox-sidecar/templates/manifest.ts
index 3748542e2..8129a3a23 100644
--- a/sandbox-sidecar/templates/manifest.ts
+++ b/sandbox-sidecar/templates/manifest.ts
@@ -7,7 +7,7 @@ export interface TemplateSpec {
   tplVersion: string;      
 }
 
-export const TEMPLATE_VERSION = "0.1.2";  // bump this when recipe changes
+export const TEMPLATE_VERSION = "0.2.2";  // bump this when recipe changes (0.2.2: pre-extracted providers with -plugin-dir)
 
 export const TEMPLATES: TemplateSpec[] = [
   { engine: "terraform", engineVersion: "1.0.11", tplVersion: TEMPLATE_VERSION },
diff --git a/sandbox-sidecar/templates/terraform-template.ts b/sandbox-sidecar/templates/terraform-template.ts
index 02f7a9b83..fa4a88962 100644
--- a/sandbox-sidecar/templates/terraform-template.ts
+++ b/sandbox-sidecar/templates/terraform-template.ts
@@ -1,6 +1,25 @@
 // templates/terraform-template.ts
 import { Template } from "e2b";
 
+// Common providers to pre-cache in templates for faster init
+// These are the most commonly used providers across Terraform projects
+const CACHED_PROVIDERS = `
+terraform {
+  required_providers {
+    aws        = { source = "hashicorp/aws",        version = "~> 5.0" }
+    google     = { source = "hashicorp/google",     version = "~> 5.0" }
+    azurerm    = { source = "hashicorp/azurerm",    version = "~> 3.0" }
+    kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.0" }
+    helm       = { source = "hashicorp/helm",       version = "~> 2.0" }
+    random     = { source = "hashicorp/random",     version = "~> 3.0" }
+    null       = { source = "hashicorp/null",       version = "~> 3.0" }
+    local      = { source = "hashicorp/local",      version = "~> 2.0" }
+    tls        = { source = "hashicorp/tls",        version = "~> 4.0" }
+    time       = { source = "hashicorp/time",       version = "~> 0.9" }
+  }
+}
+`;
+
 export function terraformTemplate(version: string) {
   // version like "1.5.7"
   return Template()
@@ -19,6 +38,24 @@ export function terraformTemplate(version: string) {
       chmod +x /usr/local/bin/terraform
       rm terraform.zip
     `)
+    // Pre-extract common providers during template build for instant init at runtime
+    // Using terraform init downloads and extracts providers, then we move them to a shared location
+    .runCmd(`
+      set -e
+      echo "Pre-extracting Terraform providers..."
+      mkdir -p /usr/share/terraform/providers
+      cd /tmp
+      cat > providers.tf << 'TFEOF'
+${CACHED_PROVIDERS}
+TFEOF
+      # Initialize to download and extract providers
+      terraform init -input=false
+      # Move extracted providers to shared location (already in correct layout for -plugin-dir)
+      mv .terraform/providers/* /usr/share/terraform/providers/
+      rm -rf /tmp/providers.tf /tmp/.terraform /tmp/.terraform.lock.hcl
+      echo "Provider extraction complete. Pre-extracted providers:"
+      find /usr/share/terraform/providers -type f -name "terraform-provider-*" | head -20
+    `)
 
     // back to normal user for sandbox runtime
     .setUser("user");
diff --git a/sandbox-sidecar/templates/tofu-template.ts b/sandbox-sidecar/templates/tofu-template.ts
index bc5749c18..73237624e 100644
--- a/sandbox-sidecar/templates/tofu-template.ts
+++ b/sandbox-sidecar/templates/tofu-template.ts
@@ -1,6 +1,25 @@
 // templates/tofu-template.ts
 import { Template } from "e2b";
 
+// Common providers to pre-cache in templates for faster init
+// These are the most commonly used providers across Terraform/OpenTofu projects
+const CACHED_PROVIDERS = `
+terraform {
+  required_providers {
+    aws        = { source = "hashicorp/aws",        version = "~> 5.0" }
+    google     = { source = "hashicorp/google",     version = "~> 5.0" }
+    azurerm    = { source = "hashicorp/azurerm",    version = "~> 3.0" }
+    kubernetes = { source = "hashicorp/kubernetes", version = "~> 2.0" }
+    helm       = { source = "hashicorp/helm",       version = "~> 2.0" }
+    random     = { source = "hashicorp/random",     version = "~> 3.0" }
+    null       = { source = "hashicorp/null",       version = "~> 3.0" }
+    local      = { source = "hashicorp/local",      version = "~> 2.0" }
+    tls        = { source = "hashicorp/tls",        version = "~> 4.0" }
+    time       = { source = "hashicorp/time",       version = "~> 0.9" }
+  }
+}
+`;
+
 export function tofuTemplate(version: string) {
   return Template()
     .fromUbuntuImage("22.04")
@@ -19,5 +38,23 @@ export function tofuTemplate(version: string) {
       # Verify installation
       /usr/local/bin/tofu version
     `)
+    // Pre-extract common providers during template build for instant init at runtime
+    // Using tofu init downloads and extracts providers, then we move them to a shared location
+    .runCmd(`
+      set -e
+      echo "Pre-extracting OpenTofu providers..."
+      mkdir -p /usr/share/terraform/providers
+      cd /tmp
+      cat > providers.tf << 'TFEOF'
+${CACHED_PROVIDERS}
+TFEOF
+      # Initialize to download and extract providers
+      tofu init -input=false
+      # Move extracted providers to shared location (already in correct layout for -plugin-dir)
+      mv .terraform/providers/* /usr/share/terraform/providers/
+      rm -rf /tmp/providers.tf /tmp/.terraform /tmp/.terraform.lock.hcl
+      echo "Provider extraction complete. Pre-extracted providers:"
+      find /usr/share/terraform/providers -type f -name "terraform-provider-*" | head -20
+    `)
     .setUser("user");
 }
diff --git a/taco/cmd/statesman/main.go b/taco/cmd/statesman/main.go
index 360ddc7a7..0ec098c6b 100644
--- a/taco/cmd/statesman/main.go
+++ b/taco/cmd/statesman/main.go
@@ -8,6 +8,7 @@ import (
 	"net/http"
 	"os"
 	"os/signal"
+	"strings"
 	"time"
 
 	"github.com/diggerhq/digger/opentaco/internal/analytics"
@@ -21,6 +22,7 @@ import (
 	"github.com/diggerhq/digger/opentaco/internal/repositories"
 	"github.com/diggerhq/digger/opentaco/internal/sandbox"
 	"github.com/diggerhq/digger/opentaco/internal/storage"
+	"github.com/google/uuid"
 	"github.com/kelseyhightower/envconfig"
 	"github.com/labstack/echo/v4"
 	echomiddleware "github.com/labstack/echo/v4/middleware"
@@ -101,7 +103,16 @@ func main() {
 		if err != nil {
 			slog.Warn("Failed to list units from storage", "error", err)
 		} else {
+			syncedCount := 0
+			skippedCount := 0
 			for _, unit := range units {
+				// Skip non-unit paths (config-versions, plans, runs, etc.)
+				// Valid unit paths are: {org-uuid}/{unit-uuid}
+				if !isValidUnitPath(unit.ID) {
+					skippedCount++
+					continue
+				}
+				
 				if err := queryStore.SyncEnsureUnit(context.Background(), unit.ID); err != nil {
 					slog.Warn("Failed to sync unit", "unit_id", unit.ID, "error", err)
 					continue
@@ -110,8 +121,9 @@ func main() {
 				if err := queryStore.SyncUnitMetadata(context.Background(), unit.ID, unit.Size, unit.Updated); err != nil {
 					slog.Warn("Failed to sync metadata for unit", "unit_id", unit.ID, "error", err)
 				}
+				syncedCount++
 			}
-			slog.Info("Synced units from storage to database", "count", len(units))
+			slog.Info("Synced units from storage to database", "synced", syncedCount, "skipped_non_units", skippedCount)
 		}
 	} else {
 		slog.Info("Query backend already has units, skipping sync", "count", len(existingUnits))
@@ -275,3 +287,22 @@ func main() {
 	analytics.SendEssential("server_shutdown_complete")
 	slog.Info("Server shutdown complete")
 }
+
+// isValidUnitPath checks if a storage path matches the expected unit format: {org-uuid}/{unit-uuid}
+// This filters out TFE-related paths like config-versions/, plans/, runs/, etc.
+func isValidUnitPath(path string) bool {
+	parts := strings.SplitN(strings.Trim(path, "/"), "/", 2)
+	if len(parts) != 2 {
+		return false
+	}
+
+	// Both parts must be valid UUIDs
+	if _, err := uuid.Parse(parts[0]); err != nil {
+		return false
+	}
+	if _, err := uuid.Parse(parts[1]); err != nil {
+		return false
+	}
+
+	return true
+}
diff --git a/taco/internal/api/routes.go b/taco/internal/api/routes.go
index a2245194c..fe64d14e2 100644
--- a/taco/internal/api/routes.go
+++ b/taco/internal/api/routes.go
@@ -5,9 +5,11 @@ import (
 	"fmt"
 	"log"
 	"net/http"
+	"os"
 	"time"
 
 	"github.com/diggerhq/digger/opentaco/internal/analytics"
+	"github.com/diggerhq/digger/opentaco/internal/github"
 	"github.com/diggerhq/digger/opentaco/internal/tfe"
 
 	authpkg "github.com/diggerhq/digger/opentaco/internal/auth"
@@ -362,6 +364,48 @@ func RegisterRoutes(e *echo.Echo, deps Dependencies) {
 		})
 	})
 
+	// Register GitHub webhook for benchmarks (if OPENTACO_GITHUB_TOKEN is set)
+	RegisterGitHubWebhook(e, deps)
+
 	// Register webhook-authenticated internal routes (if OPENTACO_ENABLE_INTERNAL_ENDPOINTS is set)
 	RegisterInternalRoutes(e, deps)
 }
+
+// RegisterGitHubWebhook registers the GitHub webhook endpoint for benchmark operations.
+// This enables /opentaco plan, /opentaco apply, /opentaco destroy commands via PR comments.
+// Required env vars (BOTH must be set to enable):
+//   - OPENTACO_GITHUB_TOKEN: GitHub personal access token or app token
+//   - OPENTACO_GITHUB_WEBHOOK_SECRET: Secret for validating webhook signatures (required for security)
+func RegisterGitHubWebhook(e *echo.Echo, deps Dependencies) {
+	githubToken := os.Getenv("OPENTACO_GITHUB_TOKEN")
+	webhookSecret := os.Getenv("OPENTACO_GITHUB_WEBHOOK_SECRET")
+
+	// Require BOTH token and secret to enable - security by default
+	if githubToken == "" || webhookSecret == "" {
+		if githubToken != "" && webhookSecret == "" {
+			log.Println("WARNING: OPENTACO_GITHUB_TOKEN set but OPENTACO_GITHUB_WEBHOOK_SECRET missing - webhook disabled for security")
+		}
+		return
+	}
+
+	log.Println("Registering GitHub webhook endpoint at /webhooks/github")
+
+	// Create GitHub client
+	ghClient := github.NewClient(githubToken)
+
+	// Create command executor with sandbox and storage
+	executor := github.NewCommandExecutor(
+		ghClient,
+		deps.Sandbox,
+		deps.Repository,
+		deps.BlobStore,
+	)
+
+	// Create webhook handler
+	handler := github.NewWebhookHandler(ghClient, executor)
+
+	// Register the webhook endpoint (no auth required - uses webhook signature validation)
+	e.POST("/webhooks/github", handler.HandleWebhook)
+
+	log.Println("GitHub webhook registered successfully")
+}
diff --git a/taco/internal/github/client.go b/taco/internal/github/client.go
new file mode 100644
index 000000000..c9d49feac
--- /dev/null
+++ b/taco/internal/github/client.go
@@ -0,0 +1,372 @@
+package github
+
+import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+)
+
+// Client provides GitHub API operations for the webhook handler
+type Client struct {
+	token      string
+	httpClient *http.Client
+	baseURL    string
+}
+
+// NewClient creates a new GitHub API client
+func NewClient(token string) *Client {
+	return &Client{
+		token: token,
+		httpClient: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+		baseURL: "https://api.github.com",
+	}
+}
+
+// NewClientFromEnv creates a client from environment variables
+func NewClientFromEnv() (*Client, error) {
+	token := os.Getenv("OPENTACO_GITHUB_TOKEN")
+	if token == "" {
+		return nil, fmt.Errorf("OPENTACO_GITHUB_TOKEN is required")
+	}
+	return NewClient(token), nil
+}
+
+// PostComment posts a comment on an issue or PR
+func (c *Client) PostComment(ctx context.Context, owner, repo string, issueNumber int, body string) error {
+	url := fmt.Sprintf("%s/repos/%s/%s/issues/%d/comments", c.baseURL, owner, repo, issueNumber)
+
+	payload := map[string]string{"body": body}
+	jsonBody, err := json.Marshal(payload)
+	if err != nil {
+		return fmt.Errorf("failed to marshal comment: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(jsonBody))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	c.setHeaders(req)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to post comment: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		body, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("GitHub API returned %d: %s", resp.StatusCode, string(body))
+	}
+
+	slog.Info("Posted comment to GitHub",
+		slog.String("repo", fmt.Sprintf("%s/%s", owner, repo)),
+		slog.Int("issue", issueNumber))
+
+	return nil
+}
+
+// UpdateComment updates an existing comment
+func (c *Client) UpdateComment(ctx context.Context, owner, repo string, commentID int64, body string) error {
+	url := fmt.Sprintf("%s/repos/%s/%s/issues/comments/%d", c.baseURL, owner, repo, commentID)
+
+	payload := map[string]string{"body": body}
+	jsonBody, err := json.Marshal(payload)
+	if err != nil {
+		return fmt.Errorf("failed to marshal comment: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPatch, url, bytes.NewReader(jsonBody))
+	if err != nil {
+		return fmt.Errorf("failed to create request: %w", err)
+	}
+
+	c.setHeaders(req)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("failed to update comment: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		body, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("GitHub API returned %d: %s", resp.StatusCode, string(body))
+	}
+
+	return nil
+}
+
+// GetPullRequest fetches PR details
+func (c *Client) GetPullRequest(ctx context.Context, owner, repo string, number int) (*PullRequest, error) {
+	url := fmt.Sprintf("%s/repos/%s/%s/pulls/%d", c.baseURL, owner, repo, number)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	c.setHeaders(req)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get PR: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("GitHub API returned %d: %s", resp.StatusCode, string(body))
+	}
+
+	var pr PullRequest
+	if err := json.NewDecoder(resp.Body).Decode(&pr); err != nil {
+		return nil, fmt.Errorf("failed to decode PR: %w", err)
+	}
+
+	return &pr, nil
+}
+
+// DownloadRepoArchive downloads the repository at a specific ref as a tar.gz archive
+func (c *Client) DownloadRepoArchive(ctx context.Context, owner, repo, ref string) ([]byte, error) {
+	// GitHub provides tarball downloads at /repos/{owner}/{repo}/tarball/{ref}
+	url := fmt.Sprintf("%s/repos/%s/%s/tarball/%s", c.baseURL, owner, repo, ref)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	c.setHeaders(req)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to download tarball: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("GitHub API returned %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Read the tarball
+	tarball, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read tarball: %w", err)
+	}
+
+	slog.Info("Downloaded repo archive",
+		slog.String("repo", fmt.Sprintf("%s/%s", owner, repo)),
+		slog.String("ref", ref),
+		slog.Int("size", len(tarball)))
+
+	return tarball, nil
+}
+
+// DownloadAndRepackage downloads the repo and repackages it without the GitHub wrapper directory
+// GitHub tarballs have a root directory like "owner-repo-sha/", this removes it
+func (c *Client) DownloadAndRepackage(ctx context.Context, owner, repo, ref string) ([]byte, error) {
+	tarball, err := c.DownloadRepoArchive(ctx, owner, repo, ref)
+	if err != nil {
+		return nil, err
+	}
+
+	// Repackage to remove the GitHub wrapper directory
+	return repackageTarball(tarball)
+}
+
+// repackageTarball removes the root directory wrapper from a GitHub tarball
+func repackageTarball(input []byte) ([]byte, error) {
+	// Open the gzipped input
+	gzr, err := gzip.NewReader(bytes.NewReader(input))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create gzip reader: %w", err)
+	}
+	defer gzr.Close()
+
+	tr := tar.NewReader(gzr)
+
+	// Create output buffer
+	var buf bytes.Buffer
+	gzw := gzip.NewWriter(&buf)
+	tw := tar.NewWriter(gzw)
+
+	// Find the prefix (first directory) to strip
+	var prefix string
+	var fileCount int
+	var tfFileCount int
+
+	slog.Info("Starting tarball repackage", slog.Int("input_size", len(input)))
+
+	for {
+		hdr, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return nil, fmt.Errorf("failed to read tar: %w", err)
+		}
+
+		// Skip PAX extended headers (GitHub tarballs use PAX format)
+		// These have Typeflag 'x' (PAX header) or 'g' (global PAX header)
+		// or names like "pax_global_header" or "PaxHeader/"
+		if hdr.Typeflag == tar.TypeXHeader || hdr.Typeflag == tar.TypeXGlobalHeader {
+			continue
+		}
+		if strings.HasPrefix(hdr.Name, "pax_global_header") || strings.HasPrefix(hdr.Name, "PaxHeader") {
+			continue
+		}
+
+		// Detect and strip the prefix directory (the GitHub wrapper like "owner-repo-sha/")
+		if prefix == "" {
+			parts := strings.SplitN(hdr.Name, "/", 2)
+			if len(parts) > 0 && parts[0] != "" {
+				prefix = parts[0] + "/"
+				slog.Info("Detected GitHub tarball prefix to strip", slog.String("prefix", prefix))
+			}
+		}
+
+		// Skip the root directory entry itself
+		if hdr.Name == prefix || hdr.Name == strings.TrimSuffix(prefix, "/") {
+			continue
+		}
+
+		// Strip the prefix from the path
+		newName := strings.TrimPrefix(hdr.Name, prefix)
+		if newName == "" {
+			continue
+		}
+
+		fileCount++
+		if strings.HasSuffix(newName, ".tf") {
+			tfFileCount++
+		}
+
+		// Create new header with stripped path
+		newHdr := &tar.Header{
+			Name:     newName,
+			Mode:     hdr.Mode,
+			Size:     hdr.Size,
+			ModTime:  hdr.ModTime,
+			Typeflag: hdr.Typeflag,
+		}
+
+		if err := tw.WriteHeader(newHdr); err != nil {
+			return nil, fmt.Errorf("failed to write header: %w", err)
+		}
+
+		if hdr.Size > 0 {
+			if _, err := io.Copy(tw, tr); err != nil {
+				return nil, fmt.Errorf("failed to copy file: %w", err)
+			}
+		}
+	}
+
+	slog.Info("Repackaged tarball completed",
+		slog.String("stripped_prefix", prefix),
+		slog.Int("total_files", fileCount),
+		slog.Int("tf_files", tfFileCount))
+
+	if tfFileCount == 0 {
+		slog.Warn("No .tf files found in archive after repackaging!")
+	}
+
+	if err := tw.Close(); err != nil {
+		return nil, fmt.Errorf("failed to close tar writer: %w", err)
+	}
+	if err := gzw.Close(); err != nil {
+		return nil, fmt.Errorf("failed to close gzip writer: %w", err)
+	}
+
+	return buf.Bytes(), nil
+}
+
+// CreateRepoArchiveFromDir creates a tar.gz archive from a local directory
+// This is useful for testing or when the repo is already cloned
+func CreateRepoArchiveFromDir(dir string) ([]byte, error) {
+	var buf bytes.Buffer
+	gzw := gzip.NewWriter(&buf)
+	tw := tar.NewWriter(gzw)
+
+	err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Get relative path
+		relPath, err := filepath.Rel(dir, path)
+		if err != nil {
+			return err
+		}
+
+		// Skip the root
+		if relPath == "." {
+			return nil
+		}
+
+		// Skip .git directory
+		if strings.HasPrefix(relPath, ".git") {
+			if info.IsDir() {
+				return filepath.SkipDir
+			}
+			return nil
+		}
+
+		hdr, err := tar.FileInfoHeader(info, "")
+		if err != nil {
+			return err
+		}
+		hdr.Name = relPath
+
+		if err := tw.WriteHeader(hdr); err != nil {
+			return err
+		}
+
+		if !info.IsDir() {
+			f, err := os.Open(path)
+			if err != nil {
+				return err
+			}
+			defer f.Close()
+			if _, err := io.Copy(tw, f); err != nil {
+				return err
+			}
+		}
+
+		return nil
+	})
+
+	if err != nil {
+		return nil, fmt.Errorf("failed to create archive: %w", err)
+	}
+
+	if err := tw.Close(); err != nil {
+		return nil, fmt.Errorf("failed to close tar: %w", err)
+	}
+	if err := gzw.Close(); err != nil {
+		return nil, fmt.Errorf("failed to close gzip: %w", err)
+	}
+
+	return buf.Bytes(), nil
+}
+
+func (c *Client) setHeaders(req *http.Request) {
+	req.Header.Set("Authorization", "Bearer "+c.token)
+	req.Header.Set("Accept", "application/vnd.github+json")
+	req.Header.Set("X-GitHub-Api-Version", "2022-11-28")
+	req.Header.Set("Content-Type", "application/json")
+}
+
diff --git a/taco/internal/github/commands.go b/taco/internal/github/commands.go
new file mode 100644
index 000000000..f2b240f11
--- /dev/null
+++ b/taco/internal/github/commands.go
@@ -0,0 +1,602 @@
+package github
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/diggerhq/digger/opentaco/internal/domain"
+	"github.com/diggerhq/digger/opentaco/internal/sandbox"
+	"github.com/diggerhq/digger/opentaco/internal/storage"
+	"github.com/google/uuid"
+)
+
+// CommandExecutor executes terraform commands via the sandbox
+type CommandExecutor struct {
+	client   *Client
+	sandbox  sandbox.Sandbox
+	unitRepo domain.UnitRepository
+	store    storage.UnitStore
+}
+
+// OrgID used for GitHub benchmark operations
+const benchmarkOrgID = "github-benchmark"
+
+// ExecuteRequest contains everything needed to execute a command
+type ExecuteRequest struct {
+	Command   Command
+	Owner     string
+	Repo      string
+	PRNumber  int
+	Branch    string
+	CommitSHA string
+}
+
+// NewCommandExecutor creates a new command executor
+func NewCommandExecutor(
+	client *Client,
+	sandbox sandbox.Sandbox,
+	unitRepo domain.UnitRepository,
+	store storage.UnitStore,
+) *CommandExecutor {
+	return &CommandExecutor{
+		client:   client,
+		sandbox:  sandbox,
+		unitRepo: unitRepo,
+		store:    store,
+	}
+}
+
+// Execute runs the terraform command and returns the result
+func (e *CommandExecutor) Execute(ctx context.Context, req *ExecuteRequest) *CommandResult {
+	result := &CommandResult{
+		Command: req.Command,
+		Success: false,
+	}
+
+	totalStart := time.Now()
+
+	logger := slog.Default().With(
+		slog.String("action", req.Command.Action),
+		slog.String("repo", fmt.Sprintf("%s/%s", req.Owner, req.Repo)),
+		slog.Int("pr", req.PRNumber),
+	)
+
+	// 1. Download repository
+	logger.Info("Downloading repository")
+	cloneStart := time.Now()
+	archive, err := e.client.DownloadAndRepackage(ctx, req.Owner, req.Repo, req.CommitSHA)
+	if err != nil {
+		result.Error = fmt.Sprintf("Failed to download repository: %v", err)
+		logger.Error("Download failed", slog.String("error", err.Error()))
+		result.Timing.Total = time.Since(totalStart)
+		return result
+	}
+	result.Timing.Clone = time.Since(cloneStart)
+	logger.Info("Repository downloaded", slog.Duration("duration", result.Timing.Clone))
+
+	// 2. Generate unit ID for state storage
+	// Format: github/{owner}/{repo}/pr-{number}
+	unitID := fmt.Sprintf("github/%s/%s/pr-%d", req.Owner, req.Repo, req.PRNumber)
+
+	// 3. Load existing state if any
+	var state []byte
+	logger.Info("Looking for existing state", slog.String("unit_id", unitID))
+	if meta, err := e.unitRepo.Get(ctx, unitID); err == nil && meta != nil {
+		logger.Info("Unit found, downloading state...")
+		if stateData, err := e.store.Download(ctx, unitID); err == nil {
+			state = stateData
+			logger.Info("Loaded existing state", slog.Int("size", len(state)))
+		} else {
+			logger.Warn("Failed to download state", slog.String("error", err.Error()))
+		}
+	} else {
+		logger.Info("No existing unit/state found", slog.String("error", fmt.Sprintf("%v", err)))
+	}
+
+	// 4. Get terraform version from options or use default
+	tfVersion := req.Command.Options["version"]
+	if tfVersion == "" {
+		tfVersion = os.Getenv("OPENTACO_DEFAULT_TF_VERSION")
+		if tfVersion == "" {
+			tfVersion = "1.5.7" // Default version
+		}
+	}
+
+	// 5. Get engine (terraform or tofu)
+	engine := req.Command.Options["engine"]
+	if engine == "" {
+		engine = os.Getenv("OPENTACO_DEFAULT_ENGINE")
+		if engine == "" {
+			engine = "terraform"
+		}
+	}
+
+	// 6. Get working directory if specified (empty means root of repo)
+	workingDir := req.Command.Options["dir"]
+	// Don't default to "." - the sandbox handles empty string as the root workspace
+
+	// 7. Generate run ID
+	runID := uuid.New().String()
+
+	// Create sandbox request metadata
+	metadata := map[string]string{
+		"github_owner":   req.Owner,
+		"github_repo":    req.Repo,
+		"github_pr":      fmt.Sprintf("%d", req.PRNumber),
+		"github_branch":  req.Branch,
+		"github_sha":     req.CommitSHA,
+		"command_action": req.Command.Action,
+		"benchmark":      "true", // Flag for benchmark mode
+	}
+
+	// Pass AWS credentials to sandbox if configured
+	// Note: Only passed in metadata, never logged for security
+	if awsKey := os.Getenv("AWS_ACCESS_KEY_ID"); awsKey != "" {
+		metadata["AWS_ACCESS_KEY_ID"] = awsKey
+		metadata["AWS_SECRET_ACCESS_KEY"] = os.Getenv("AWS_SECRET_ACCESS_KEY")
+		metadata["AWS_REGION"] = os.Getenv("AWS_REGION")
+		if metadata["AWS_REGION"] == "" {
+			metadata["AWS_REGION"] = os.Getenv("AWS_DEFAULT_REGION")
+		}
+		if metadata["AWS_REGION"] == "" {
+			metadata["AWS_REGION"] = "us-east-1"
+		}
+		// Log that credentials are configured (not the values)
+		logger.Info("AWS credentials configured for sandbox",
+			slog.String("region", metadata["AWS_REGION"]),
+			slog.Int("key_length", len(awsKey)))
+	} else {
+		logger.Warn("AWS_ACCESS_KEY_ID not set - AWS resources will fail")
+	}
+
+	// Save clone timing before the switch (since result gets reassigned)
+	cloneTime := result.Timing.Clone
+
+	// 8. Execute based on action
+	switch req.Command.Action {
+	case "plan":
+		result = e.executePlan(ctx, logger, req, runID, unitID, archive, state, tfVersion, engine, workingDir, metadata, totalStart)
+	case "apply":
+		result = e.executeApply(ctx, logger, req, runID, unitID, archive, state, tfVersion, engine, workingDir, metadata, totalStart, false)
+	case "destroy":
+		result = e.executeApply(ctx, logger, req, runID, unitID, archive, state, tfVersion, engine, workingDir, metadata, totalStart, true)
+	case "benchmark":
+		result = e.executeBenchmark(ctx, logger, req, runID, unitID, archive, tfVersion, engine, workingDir, metadata, totalStart)
+		// Restore the clone time we measured before the switch
+		result.Timing.Clone = cloneTime
+	default:
+		result.Error = fmt.Sprintf("Unknown action: %s", req.Command.Action)
+	}
+
+	// For non-benchmark actions, recalculate Clone as total minus init/execute
+	if req.Command.Action != "benchmark" {
+		result.Timing.Clone = time.Since(cloneStart) - result.Timing.Init - result.Timing.Execute
+		if result.Timing.Clone < 0 {
+			result.Timing.Clone = 0
+		}
+	}
+
+	return result
+}
+
+func (e *CommandExecutor) executePlan(
+	ctx context.Context,
+	logger *slog.Logger,
+	req *ExecuteRequest,
+	runID, unitID string,
+	archive, state []byte,
+	tfVersion, engine, workingDir string,
+	metadata map[string]string,
+	totalStart time.Time,
+) *CommandResult {
+	result := &CommandResult{
+		Command: req.Command,
+		Success: false,
+	}
+
+	if e.sandbox == nil {
+		result.Error = "Sandbox provider not configured"
+		result.Timing.Total = time.Since(totalStart)
+		return result
+	}
+
+	// Generate a config version ID for the sandbox
+	configVersionID := fmt.Sprintf("cv-%s", uuid.New().String()[:8])
+	
+	planReq := &sandbox.PlanRequest{
+		RunID:                  runID,
+		PlanID:                 uuid.New().String(),
+		OrgID:                  "github-benchmark",
+		UnitID:                 unitID,
+		ConfigurationVersionID: configVersionID,
+		TerraformVersion:       tfVersion,
+		Engine:                 engine,
+		WorkingDirectory:       workingDir,
+		ConfigArchive:          archive,
+		State:                  state,
+		Metadata:               metadata,
+	}
+
+	logger.Info("Executing plan in sandbox",
+		slog.String("run_id", runID),
+		slog.String("engine", engine),
+		slog.String("version", tfVersion))
+
+	executeStart := time.Now()
+	planResult, err := e.sandbox.ExecutePlan(ctx, planReq)
+	result.Timing.Execute = time.Since(executeStart)
+
+	if err != nil {
+		result.Error = fmt.Sprintf("Plan execution failed: %v", err)
+		result.Timing.Total = time.Since(totalStart)
+		logger.Error("Plan failed", slog.String("error", err.Error()))
+		return result
+	}
+
+	result.Success = true
+	result.Output = planResult.Logs
+	result.Summary = formatPlanSummary(planResult)
+
+	// Parse init time from logs if available
+	result.Timing.Init = parseInitTime(planResult.Logs)
+	// Adjust execute time to be plan-only (subtract init)
+	if result.Timing.Init > 0 && result.Timing.Execute > result.Timing.Init {
+		result.Timing.Execute = result.Timing.Execute - result.Timing.Init
+	}
+
+	result.Timing.Total = time.Since(totalStart)
+
+	logger.Info("Plan completed",
+		slog.Bool("has_changes", planResult.HasChanges),
+		slog.Int("additions", planResult.ResourceAdditions),
+		slog.Int("changes", planResult.ResourceChanges),
+		slog.Int("destructions", planResult.ResourceDestructions),
+		slog.Duration("total", result.Timing.Total))
+
+	return result
+}
+
+func (e *CommandExecutor) executeApply(
+	ctx context.Context,
+	logger *slog.Logger,
+	req *ExecuteRequest,
+	runID, unitID string,
+	archive, state []byte,
+	tfVersion, engine, workingDir string,
+	metadata map[string]string,
+	totalStart time.Time,
+	isDestroy bool,
+) *CommandResult {
+	result := &CommandResult{
+		Command: req.Command,
+		Success: false,
+	}
+
+	if e.sandbox == nil {
+		result.Error = "Sandbox provider not configured"
+		result.Timing.Total = time.Since(totalStart)
+		return result
+	}
+
+	// Generate a config version ID for the sandbox
+	configVersionID := fmt.Sprintf("cv-%s", uuid.New().String()[:8])
+	
+	applyReq := &sandbox.ApplyRequest{
+		RunID:                  runID,
+		PlanID:                 uuid.New().String(),
+		OrgID:                  "github-benchmark",
+		UnitID:                 unitID,
+		ConfigurationVersionID: configVersionID,
+		IsDestroy:              isDestroy,
+		TerraformVersion:       tfVersion,
+		Engine:                 engine,
+		WorkingDirectory:       workingDir,
+		ConfigArchive:          archive,
+		State:                  state,
+		Metadata:               metadata,
+	}
+
+	action := "apply"
+	if isDestroy {
+		action = "destroy"
+	}
+
+	logger.Info(fmt.Sprintf("Executing %s in sandbox", action),
+		slog.String("run_id", runID),
+		slog.String("engine", engine),
+		slog.String("version", tfVersion),
+		slog.Bool("is_destroy", isDestroy))
+
+	executeStart := time.Now()
+	applyResult, err := e.sandbox.ExecuteApply(ctx, applyReq)
+	result.Timing.Execute = time.Since(executeStart)
+
+	if err != nil {
+		result.Error = fmt.Sprintf("%s execution failed: %v", strings.Title(action), err)
+		result.Timing.Total = time.Since(totalStart)
+		logger.Error(fmt.Sprintf("%s failed", action), slog.String("error", err.Error()))
+		return result
+	}
+
+	// Save the new state
+	logger.Info("Apply result received",
+		slog.Int("state_size", len(applyResult.State)),
+		slog.Int("logs_size", len(applyResult.Logs)),
+		slog.Bool("is_destroy", isDestroy))
+
+	if len(applyResult.State) > 0 && !isDestroy {
+		if err := e.saveState(ctx, unitID, applyResult.State); err != nil {
+			logger.Warn("Failed to save state", slog.String("error", err.Error()))
+		} else {
+			logger.Info("State saved successfully",
+				slog.String("unit_id", unitID),
+				slog.Int("size", len(applyResult.State)))
+		}
+	} else if !isDestroy {
+		logger.Warn("No state returned from apply - state will not persist!")
+	}
+
+	// For destroy, clean up the state
+	if isDestroy {
+		if err := e.cleanupState(ctx, unitID); err != nil {
+			logger.Warn("Failed to cleanup state", slog.String("error", err.Error()))
+		} else {
+			logger.Info("State cleaned up after destroy")
+		}
+	}
+
+	result.Success = true
+	result.Output = applyResult.Logs
+	result.Summary = formatApplySummary(applyResult.Logs, isDestroy)
+
+	// Parse init time from logs if available
+	result.Timing.Init = parseInitTime(applyResult.Logs)
+	// Adjust execute time
+	if result.Timing.Init > 0 && result.Timing.Execute > result.Timing.Init {
+		result.Timing.Execute = result.Timing.Execute - result.Timing.Init
+	}
+
+	result.Timing.Total = time.Since(totalStart)
+
+	logger.Info(fmt.Sprintf("%s completed", action),
+		slog.Duration("total", result.Timing.Total))
+
+	return result
+}
+
+// executeBenchmark runs apply followed by destroy in a single flow
+// This keeps state in the sandbox and ensures resources are cleaned up
+func (e *CommandExecutor) executeBenchmark(
+	ctx context.Context,
+	logger *slog.Logger,
+	req *ExecuteRequest,
+	runID, unitID string,
+	archive []byte,
+	tfVersion, engine, workingDir string,
+	metadata map[string]string,
+	totalStart time.Time,
+) *CommandResult {
+	result := &CommandResult{
+		Command: req.Command,
+		Success: false,
+	}
+
+	if e.sandbox == nil {
+		result.Error = "Sandbox provider not configured"
+		result.Timing.Total = time.Since(totalStart)
+		return result
+	}
+
+	// Generate a config version ID for the sandbox
+	configVersionID := fmt.Sprintf("cv-%s", uuid.New().String()[:8])
+
+	logger.Info("Starting benchmark: apply + destroy cycle",
+		slog.String("run_id", runID),
+		slog.String("engine", engine),
+		slog.String("version", tfVersion))
+
+	var allLogs strings.Builder
+
+	// Phase 1: Apply
+	applyStart := time.Now()
+	applyReq := &sandbox.ApplyRequest{
+		RunID:                  runID,
+		PlanID:                 uuid.New().String(),
+		OrgID:                  "github-benchmark",
+		UnitID:                 unitID,
+		ConfigurationVersionID: configVersionID,
+		IsDestroy:              false,
+		TerraformVersion:       tfVersion,
+		Engine:                 engine,
+		WorkingDirectory:       workingDir,
+		ConfigArchive:          archive,
+		State:                  nil, // Fresh apply
+		Metadata:               metadata,
+	}
+
+	applyResult, err := e.sandbox.ExecuteApply(ctx, applyReq)
+	result.Timing.Apply = time.Since(applyStart)
+
+	if err != nil {
+		result.Error = fmt.Sprintf("Apply phase failed: %v", err)
+		result.Timing.Total = time.Since(totalStart)
+		logger.Error("Benchmark apply failed", slog.String("error", err.Error()))
+		return result
+	}
+
+	allLogs.WriteString("=== APPLY PHASE ===\n")
+	allLogs.WriteString(applyResult.Logs)
+	allLogs.WriteString("\n\n")
+
+	logger.Info("Benchmark apply completed",
+		slog.Duration("duration", result.Timing.Apply),
+		slog.Int("state_size", len(applyResult.State)))
+
+	// Phase 2: Destroy (using state from apply)
+	destroyStart := time.Now()
+	destroyReq := &sandbox.ApplyRequest{
+		RunID:                  runID + "-destroy",
+		PlanID:                 uuid.New().String(),
+		OrgID:                  "github-benchmark",
+		UnitID:                 unitID,
+		ConfigurationVersionID: configVersionID,
+		IsDestroy:              true,
+		TerraformVersion:       tfVersion,
+		Engine:                 engine,
+		WorkingDirectory:       workingDir,
+		ConfigArchive:          archive,
+		State:                  applyResult.State, // Use state from apply
+		Metadata:               metadata,
+	}
+
+	destroyResult, err := e.sandbox.ExecuteApply(ctx, destroyReq)
+	result.Timing.Destroy = time.Since(destroyStart)
+
+	if err != nil {
+		result.Error = fmt.Sprintf("Destroy phase failed (resources may be orphaned!): %v", err)
+		result.Timing.Total = time.Since(totalStart)
+		logger.Error("Benchmark destroy failed", slog.String("error", err.Error()))
+		return result
+	}
+
+	allLogs.WriteString("=== DESTROY PHASE ===\n")
+	allLogs.WriteString(destroyResult.Logs)
+
+	logger.Info("Benchmark destroy completed",
+		slog.Duration("duration", result.Timing.Destroy))
+
+	// Success!
+	result.Success = true
+	result.Output = allLogs.String()
+	result.Summary = fmt.Sprintf("Apply: %.2fs | Destroy: %.2fs | Total: %.2fs",
+		result.Timing.Apply.Seconds(),
+		result.Timing.Destroy.Seconds(),
+		time.Since(totalStart).Seconds())
+
+	result.Timing.Total = time.Since(totalStart)
+
+	logger.Info("Benchmark completed successfully",
+		slog.Duration("apply", result.Timing.Apply),
+		slog.Duration("destroy", result.Timing.Destroy),
+		slog.Duration("total", result.Timing.Total))
+
+	return result
+}
+
+func (e *CommandExecutor) saveState(ctx context.Context, unitID string, state []byte) error {
+	// Check if unit exists, create if not
+	if _, err := e.unitRepo.Get(ctx, unitID); err != nil {
+		// Create the unit with the benchmark org ID
+		_, err = e.unitRepo.Create(ctx, benchmarkOrgID, unitID)
+		if err != nil {
+			return fmt.Errorf("failed to create unit: %w", err)
+		}
+	}
+
+	// Save state (empty lock ID since we're not holding a lock)
+	if err := e.store.Upload(ctx, unitID, state, ""); err != nil {
+		return fmt.Errorf("failed to save state: %w", err)
+	}
+
+	return nil
+}
+
+func (e *CommandExecutor) cleanupState(ctx context.Context, unitID string) error {
+	// Delete the unit and state
+	if err := e.unitRepo.Delete(ctx, unitID); err != nil {
+		return fmt.Errorf("failed to delete unit: %w", err)
+	}
+	return nil
+}
+
+// formatPlanSummary creates a summary from plan results
+func formatPlanSummary(result *sandbox.PlanResult) string {
+	if !result.HasChanges {
+		return "No changes. Infrastructure is up-to-date."
+	}
+	return fmt.Sprintf("%d to add, %d to change, %d to destroy",
+		result.ResourceAdditions,
+		result.ResourceChanges,
+		result.ResourceDestructions)
+}
+
+// formatApplySummary creates a summary from apply output
+func formatApplySummary(logs string, isDestroy bool) string {
+	// Try to extract the summary line from terraform output
+	// Example: "Apply complete! Resources: 10 added, 0 changed, 0 destroyed."
+	// Example: "Destroy complete! Resources: 10 destroyed."
+
+	lines := strings.Split(logs, "\n")
+	for _, line := range lines {
+		if strings.Contains(line, "Apply complete!") || strings.Contains(line, "Destroy complete!") {
+			return strings.TrimSpace(line)
+		}
+	}
+
+	if isDestroy {
+		return "Destroy completed"
+	}
+	return "Apply completed"
+}
+
+// parseInitTime attempts to extract init duration from terraform logs
+func parseInitTime(logs string) time.Duration {
+	// Look for patterns like "Initializing..." to "Terraform has been successfully initialized!"
+	// This is approximate since terraform doesn't output exact timing
+
+	// Try to find "initialized" marker and estimate based on log positions
+	// For now, return 0 and let the caller use the full execute time
+	// In a more sophisticated implementation, we could:
+	// 1. Have the sandbox report separate init/execute times
+	// 2. Parse timestamps from logs if available
+
+	// Check for common patterns that indicate init completed
+	if strings.Contains(logs, "Terraform has been successfully initialized") ||
+		strings.Contains(logs, "OpenTofu has been successfully initialized") {
+		// Estimate ~3 seconds for init (typical)
+		// This is a placeholder - real timing should come from sandbox
+		return 0
+	}
+
+	return 0
+}
+
+// extractResourceCounts extracts add/change/destroy counts from terraform output
+func extractResourceCounts(logs string) (add, change, destroy int) {
+	// Pattern: "Plan: X to add, Y to change, Z to destroy"
+	// Pattern: "Apply complete! Resources: X added, Y changed, Z destroyed"
+
+	patterns := []struct {
+		regex  *regexp.Regexp
+		addIdx int
+		chgIdx int
+		desIdx int
+	}{
+		{
+			regexp.MustCompile(`Plan: (\d+) to add, (\d+) to change, (\d+) to destroy`),
+			1, 2, 3,
+		},
+		{
+			regexp.MustCompile(`Resources: (\d+) added, (\d+) changed, (\d+) destroyed`),
+			1, 2, 3,
+		},
+	}
+
+	for _, p := range patterns {
+		matches := p.regex.FindStringSubmatch(logs)
+		if len(matches) >= 4 {
+			fmt.Sscanf(matches[p.addIdx], "%d", &add)
+			fmt.Sscanf(matches[p.chgIdx], "%d", &change)
+			fmt.Sscanf(matches[p.desIdx], "%d", &destroy)
+			return
+		}
+	}
+
+	return 0, 0, 0
+}
+
diff --git a/taco/internal/github/types.go b/taco/internal/github/types.go
new file mode 100644
index 000000000..5ccc69bfb
--- /dev/null
+++ b/taco/internal/github/types.go
@@ -0,0 +1,143 @@
+package github
+
+import "time"
+
+// Webhook event types from GitHub
+// Reference: https://docs.github.com/en/webhooks/webhook-events-and-payloads
+
+// IssueCommentEvent is triggered when a comment is created on an issue or PR
+type IssueCommentEvent struct {
+	Action  string  `json:"action"` // created, edited, deleted
+	Issue   Issue   `json:"issue"`
+	Comment Comment `json:"comment"`
+	Repo    Repo    `json:"repository"`
+	Sender  User    `json:"sender"`
+}
+
+// PullRequestEvent is triggered when a PR is opened, synchronized, etc.
+type PullRequestEvent struct {
+	Action      string      `json:"action"` // opened, synchronize, closed, reopened
+	Number      int         `json:"number"`
+	PullRequest PullRequest `json:"pull_request"`
+	Repo        Repo        `json:"repository"`
+	Sender      User        `json:"sender"`
+}
+
+// Issue represents a GitHub issue (PRs are also issues)
+type Issue struct {
+	ID        int64  `json:"id"`
+	Number    int    `json:"number"`
+	Title     string `json:"title"`
+	Body      string `json:"body"`
+	State     string `json:"state"` // open, closed
+	User      User   `json:"user"`
+	Labels    []Label `json:"labels"`
+	CreatedAt string `json:"created_at"`
+	UpdatedAt string `json:"updated_at"`
+	// If this issue is a PR, pull_request will be non-nil
+	PullRequest *IssuePR `json:"pull_request,omitempty"`
+}
+
+// IssuePR contains PR-specific fields when an issue is actually a PR
+type IssuePR struct {
+	URL      string `json:"url"`
+	HTMLURL  string `json:"html_url"`
+	DiffURL  string `json:"diff_url"`
+	PatchURL string `json:"patch_url"`
+}
+
+// PullRequest represents a GitHub pull request
+type PullRequest struct {
+	ID        int64  `json:"id"`
+	Number    int    `json:"number"`
+	Title     string `json:"title"`
+	Body      string `json:"body"`
+	State     string `json:"state"` // open, closed
+	Draft     bool   `json:"draft"`
+	Merged    bool   `json:"merged"`
+	User      User   `json:"user"`
+	Head      Branch `json:"head"`
+	Base      Branch `json:"base"`
+	CreatedAt string `json:"created_at"`
+	UpdatedAt string `json:"updated_at"`
+}
+
+// Branch represents a git branch reference
+type Branch struct {
+	Ref  string `json:"ref"`  // branch name
+	SHA  string `json:"sha"`  // commit SHA
+	Repo Repo   `json:"repo"` // repo containing the branch
+}
+
+// Comment represents a GitHub comment
+type Comment struct {
+	ID        int64  `json:"id"`
+	Body      string `json:"body"`
+	User      User   `json:"user"`
+	CreatedAt string `json:"created_at"`
+	UpdatedAt string `json:"updated_at"`
+}
+
+// Repo represents a GitHub repository
+type Repo struct {
+	ID            int64  `json:"id"`
+	Name          string `json:"name"`
+	FullName      string `json:"full_name"` // owner/repo
+	Owner         User   `json:"owner"`
+	Private       bool   `json:"private"`
+	HTMLURL       string `json:"html_url"`
+	CloneURL      string `json:"clone_url"`
+	SSHURL        string `json:"ssh_url"`
+	DefaultBranch string `json:"default_branch"`
+}
+
+// User represents a GitHub user
+type User struct {
+	ID    int64  `json:"id"`
+	Login string `json:"login"`
+	Type  string `json:"type"` // User, Bot, Organization
+	Email string `json:"email,omitempty"`
+}
+
+// Label represents a GitHub issue/PR label
+type Label struct {
+	ID    int64  `json:"id"`
+	Name  string `json:"name"`
+	Color string `json:"color"`
+}
+
+// Command represents a parsed /opentaco command
+type Command struct {
+	Action  string            // plan, apply, destroy
+	Options map[string]string // additional flags
+	Raw     string            // original comment text
+}
+
+// CommandResult holds the result of executing a command
+type CommandResult struct {
+	Command   Command
+	Success   bool
+	Error     string
+	Timing    TimingBreakdown
+	Output    string
+	Summary   string
+}
+
+// TimingBreakdown holds timing for each phase
+type TimingBreakdown struct {
+	Clone   time.Duration `json:"clone"`
+	Init    time.Duration `json:"init"`
+	Execute time.Duration `json:"execute"` // plan, apply, or destroy time
+	Apply   time.Duration `json:"apply,omitempty"`   // for benchmark: apply phase
+	Destroy time.Duration `json:"destroy,omitempty"` // for benchmark: destroy phase
+	Total   time.Duration `json:"total"`
+}
+
+// WebhookConfig holds configuration for the GitHub webhook handler
+type WebhookConfig struct {
+	WebhookSecret  string // Secret for validating webhook signatures
+	AppID          string // GitHub App ID (optional, for App auth)
+	PrivateKeyPath string // Path to GitHub App private key (optional)
+	Token          string // Personal access token (alternative to App auth)
+}
+
diff --git a/taco/internal/github/webhook.go b/taco/internal/github/webhook.go
new file mode 100644
index 000000000..1b213ba16
--- /dev/null
+++ b/taco/internal/github/webhook.go
@@ -0,0 +1,333 @@
+package github
+
+import (
+	"context"
+	"crypto/hmac"
+	"crypto/sha256"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+	"os"
+	"strings"
+
+	"github.com/labstack/echo/v4"
+)
+
+// WebhookHandler handles GitHub webhook events
+type WebhookHandler struct {
+	client   *Client
+	executor *CommandExecutor
+	secret   string
+}
+
+// NewWebhookHandler creates a new webhook handler
+// Note: The webhook secret is required at registration time (see RegisterGitHubWebhook)
+func NewWebhookHandler(client *Client, executor *CommandExecutor) *WebhookHandler {
+	return &WebhookHandler{
+		client:   client,
+		executor: executor,
+		secret:   os.Getenv("OPENTACO_GITHUB_WEBHOOK_SECRET"),
+	}
+}
+
+// HandleWebhook is the main webhook endpoint handler
+func (h *WebhookHandler) HandleWebhook(c echo.Context) error {
+	// Read the body
+	body, err := io.ReadAll(c.Request().Body)
+	if err != nil {
+		slog.Error("Failed to read webhook body", slog.String("error", err.Error()))
+		return c.JSON(http.StatusBadRequest, map[string]string{"error": "failed to read body"})
+	}
+
+	// Validate webhook signature (required - enforced at registration)
+	signature := c.Request().Header.Get("X-Hub-Signature-256")
+	if !h.validateSignature(body, signature) {
+		slog.Warn("Invalid webhook signature received")
+		return c.JSON(http.StatusUnauthorized, map[string]string{"error": "invalid signature"})
+	}
+
+	// Get event type
+	eventType := c.Request().Header.Get("X-GitHub-Event")
+	deliveryID := c.Request().Header.Get("X-GitHub-Delivery")
+
+	slog.Info("Received GitHub webhook",
+		slog.String("event", eventType),
+		slog.String("delivery_id", deliveryID))
+
+	switch eventType {
+	case "issue_comment":
+		return h.handleIssueComment(c, body)
+	case "pull_request":
+		// Optional: auto-plan on PR open
+		return h.handlePullRequest(c, body)
+	case "ping":
+		return c.JSON(http.StatusOK, map[string]string{"message": "pong"})
+	default:
+		slog.Debug("Ignoring unhandled event type", slog.String("event", eventType))
+		return c.JSON(http.StatusOK, map[string]string{"message": "event ignored"})
+	}
+}
+
+// handleIssueComment processes issue/PR comment events
+func (h *WebhookHandler) handleIssueComment(c echo.Context, body []byte) error {
+	ctx := c.Request().Context()
+
+	var event IssueCommentEvent
+	if err := json.Unmarshal(body, &event); err != nil {
+		slog.Error("Failed to parse issue comment event", slog.String("error", err.Error()))
+		return c.JSON(http.StatusBadRequest, map[string]string{"error": "failed to parse event"})
+	}
+
+	// Only process new comments
+	if event.Action != "created" {
+		return c.JSON(http.StatusOK, map[string]string{"message": "ignoring non-created action"})
+	}
+
+	// Only process comments on PRs (issues with pull_request field)
+	if event.Issue.PullRequest == nil {
+		return c.JSON(http.StatusOK, map[string]string{"message": "ignoring non-PR comment"})
+	}
+
+	// Ignore bot comments to prevent loops
+	if event.Sender.Type == "Bot" {
+		return c.JSON(http.StatusOK, map[string]string{"message": "ignoring bot comment"})
+	}
+
+	// Parse command from comment
+	cmd := ParseCommand(event.Comment.Body)
+	if cmd == nil {
+		return c.JSON(http.StatusOK, map[string]string{"message": "no command found"})
+	}
+
+	slog.Info("Processing command",
+		slog.String("action", cmd.Action),
+		slog.String("repo", event.Repo.FullName),
+		slog.Int("pr", event.Issue.Number),
+		slog.String("user", event.Sender.Login))
+
+	// Parse owner/repo
+	parts := strings.Split(event.Repo.FullName, "/")
+	if len(parts) != 2 {
+		return c.JSON(http.StatusBadRequest, map[string]string{"error": "invalid repo name"})
+	}
+	owner, repo := parts[0], parts[1]
+
+	// Get PR details to find the branch
+	pr, err := h.client.GetPullRequest(ctx, owner, repo, event.Issue.Number)
+	if err != nil {
+		slog.Error("Failed to get PR details", slog.String("error", err.Error()))
+		// Post error comment
+		h.client.PostComment(ctx, owner, repo, event.Issue.Number,
+			fmt.Sprintf("❌ **OpenTaco Error**\n\nFailed to get PR details: %s", err.Error()))
+		return c.JSON(http.StatusOK, map[string]string{"message": "failed to get PR"})
+	}
+
+	// Post acknowledgment comment
+	ackMsg := fmt.Sprintf("🚀 **OpenTaco** starting `%s`...\n\n_Downloading repository and preparing sandbox..._", cmd.Action)
+	h.client.PostComment(ctx, owner, repo, event.Issue.Number, ackMsg)
+
+	// Execute command asynchronously with background context
+	// (the HTTP request context is canceled after response is sent)
+	go func() {
+		// Use background context since HTTP request will complete before execution finishes
+		bgCtx := context.Background()
+		
+		result := h.executor.Execute(bgCtx, &ExecuteRequest{
+			Command:   *cmd,
+			Owner:     owner,
+			Repo:      repo,
+			PRNumber:  event.Issue.Number,
+			Branch:    pr.Head.Ref,
+			CommitSHA: pr.Head.SHA,
+		})
+
+		// Post result comment
+		resultComment := FormatResult(result)
+		if err := h.client.PostComment(bgCtx, owner, repo, event.Issue.Number, resultComment); err != nil {
+			slog.Error("Failed to post result comment", slog.String("error", err.Error()))
+		}
+	}()
+
+	return c.JSON(http.StatusOK, map[string]string{"message": "command accepted"})
+}
+
+// handlePullRequest processes PR events (optional auto-plan)
+func (h *WebhookHandler) handlePullRequest(c echo.Context, body []byte) error {
+	var event PullRequestEvent
+	if err := json.Unmarshal(body, &event); err != nil {
+		slog.Error("Failed to parse PR event", slog.String("error", err.Error()))
+		return c.JSON(http.StatusBadRequest, map[string]string{"error": "failed to parse event"})
+	}
+
+	// Only process opened/synchronize if auto-plan is enabled
+	autoPlan := os.Getenv("OPENTACO_GITHUB_AUTO_PLAN") == "true"
+	if !autoPlan {
+		return c.JSON(http.StatusOK, map[string]string{"message": "auto-plan disabled"})
+	}
+
+	if event.Action != "opened" && event.Action != "synchronize" {
+		return c.JSON(http.StatusOK, map[string]string{"message": "ignoring action"})
+	}
+
+	// TODO: Implement auto-plan on PR open/sync
+	slog.Info("Would auto-plan for PR",
+		slog.String("repo", event.Repo.FullName),
+		slog.Int("pr", event.Number),
+		slog.String("action", event.Action))
+
+	return c.JSON(http.StatusOK, map[string]string{"message": "auto-plan not yet implemented"})
+}
+
+// validateSignature validates the webhook signature
+func (h *WebhookHandler) validateSignature(body []byte, signature string) bool {
+	if !strings.HasPrefix(signature, "sha256=") {
+		return false
+	}
+
+	expected := signature[7:] // Remove "sha256=" prefix
+
+	mac := hmac.New(sha256.New, []byte(h.secret))
+	mac.Write(body)
+	computed := hex.EncodeToString(mac.Sum(nil))
+
+	return hmac.Equal([]byte(expected), []byte(computed))
+}
+
+// ParseCommand parses an /opentaco command from comment text
+func ParseCommand(text string) *Command {
+	lines := strings.Split(text, "\n")
+
+	for _, line := range lines {
+		line = strings.TrimSpace(line)
+
+		// Check for /opentaco command
+		if !strings.HasPrefix(line, "/opentaco ") && line != "/opentaco" {
+			continue
+		}
+
+		// Parse the command
+		parts := strings.Fields(line)
+		if len(parts) < 2 {
+			continue
+		}
+
+		action := strings.ToLower(parts[1])
+
+		// Validate action
+		switch action {
+		case "plan", "apply", "destroy", "benchmark":
+			cmd := &Command{
+				Action:  action,
+				Options: make(map[string]string),
+				Raw:     line,
+			}
+
+			// Parse additional options
+			for i := 2; i < len(parts); i++ {
+				opt := parts[i]
+				if strings.HasPrefix(opt, "--") {
+					// Handle --key=value or --flag
+					opt = strings.TrimPrefix(opt, "--")
+					if idx := strings.Index(opt, "="); idx > 0 {
+						cmd.Options[opt[:idx]] = opt[idx+1:]
+					} else {
+						cmd.Options[opt] = "true"
+					}
+				}
+			}
+
+			return cmd
+		default:
+			// Unknown action, skip
+			continue
+		}
+	}
+
+	return nil
+}
+
+// FormatResult formats a command result as a markdown comment
+func FormatResult(result *CommandResult) string {
+	var sb strings.Builder
+
+	// Header based on action
+	switch result.Command.Action {
+	case "plan":
+		if result.Success {
+			sb.WriteString("## ✅ OpenTaco Plan\n\n")
+		} else {
+			sb.WriteString("## ❌ OpenTaco Plan Failed\n\n")
+		}
+	case "apply":
+		if result.Success {
+			sb.WriteString("## ✅ OpenTaco Apply\n\n")
+		} else {
+			sb.WriteString("## ❌ OpenTaco Apply Failed\n\n")
+		}
+	case "destroy":
+		if result.Success {
+			sb.WriteString("## ✅ OpenTaco Destroy\n\n")
+		} else {
+			sb.WriteString("## ❌ OpenTaco Destroy Failed\n\n")
+		}
+	case "benchmark":
+		if result.Success {
+			sb.WriteString("## ✅ OpenTaco Benchmark Complete\n\n")
+		} else {
+			sb.WriteString("## ❌ OpenTaco Benchmark Failed\n\n")
+		}
+	}
+
+	// Timing breakdown
+	sb.WriteString(fmt.Sprintf("**Total Duration:** %.2fs\n\n", result.Timing.Total.Seconds()))
+	sb.WriteString("| Phase | Duration |\n")
+	sb.WriteString("|-------|----------|\n")
+	if result.Timing.Clone > 0 {
+		sb.WriteString(fmt.Sprintf("| Clone | %.2fs |\n", result.Timing.Clone.Seconds()))
+	}
+	if result.Timing.Init > 0 {
+		sb.WriteString(fmt.Sprintf("| Init | %.2fs |\n", result.Timing.Init.Seconds()))
+	}
+
+	switch result.Command.Action {
+	case "plan":
+		sb.WriteString(fmt.Sprintf("| Plan | %.2fs |\n", result.Timing.Execute.Seconds()))
+	case "apply":
+		sb.WriteString(fmt.Sprintf("| Apply | %.2fs |\n", result.Timing.Execute.Seconds()))
+	case "destroy":
+		sb.WriteString(fmt.Sprintf("| Destroy | %.2fs |\n", result.Timing.Execute.Seconds()))
+	case "benchmark":
+		sb.WriteString(fmt.Sprintf("| Apply | %.2fs |\n", result.Timing.Apply.Seconds()))
+		sb.WriteString(fmt.Sprintf("| Destroy | %.2fs |\n", result.Timing.Destroy.Seconds()))
+	}
+
+	sb.WriteString("\n")
+
+	// Summary
+	if result.Summary != "" {
+		sb.WriteString(fmt.Sprintf("**Summary:** %s\n\n", result.Summary))
+	}
+
+	// Error message if failed
+	if !result.Success && result.Error != "" {
+		sb.WriteString(fmt.Sprintf("**Error:** %s\n\n", result.Error))
+	}
+
+	// Full output in collapsible section
+	if result.Output != "" {
+		sb.WriteString("<details>\n<summary>Full Output</summary>\n\n```\n")
+		// Truncate if too long
+		output := result.Output
+		if len(output) > 50000 {
+			output = output[:50000] + "\n... (truncated)"
+		}
+		sb.WriteString(output)
+		sb.WriteString("\n```\n</details>\n")
+	}
+
+	return sb.String()
+}
+
diff --git a/ui/src/routes/statesman/github/webhook.tsx b/ui/src/routes/statesman/github/webhook.tsx
new file mode 100644
index 000000000..2e166f655
--- /dev/null
+++ b/ui/src/routes/statesman/github/webhook.tsx
@@ -0,0 +1,59 @@
+import { createFileRoute } from '@tanstack/react-router';
+
+// GitHub webhook passthrough to Statesman
+// This route receives GitHub webhooks and forwards them to the internal Statesman service
+// Enable by setting STATESMAN_GITHUB_WEBHOOK_ENABLED=true
+export const Route = createFileRoute('/statesman/github/webhook')({
+  server: {
+    handlers: {
+      POST: async ({ request }) => {
+        // Check if GitHub webhook is enabled
+        if (process.env.STATESMAN_GITHUB_WEBHOOK_ENABLED !== 'true') {
+          console.log('GitHub webhook disabled (STATESMAN_GITHUB_WEBHOOK_ENABLED not set)');
+          return new Response(JSON.stringify({ error: 'GitHub webhook not enabled' }), { 
+            status: 404,
+            headers: { 'Content-Type': 'application/json' }
+          });
+        }
+
+        const statesmanUrl = process.env.STATESMAN_BACKEND_URL;
+        if (!statesmanUrl) {
+          console.error('STATESMAN_BACKEND_URL not configured');
+          return new Response(JSON.stringify({ error: 'Backend not configured' }), { 
+            status: 500,
+            headers: { 'Content-Type': 'application/json' }
+          });
+        }
+
+        try {
+          console.log('Forwarding GitHub webhook to Statesman');
+          
+          // Forward all headers (including GitHub signature headers)
+          const response = await fetch(`${statesmanUrl}/webhooks/github`, {
+            method: 'POST',
+            headers: request.headers,
+            body: request.body,
+            // @ts-expect-error: 'duplex' is required by Node/undici for streaming bodies
+            duplex: 'half',
+          });
+
+          // Return the response from Statesman
+          const responseBody = await response.text();
+          return new Response(responseBody, {
+            status: response.status,
+            headers: {
+              'Content-Type': response.headers.get('Content-Type') || 'application/json',
+            },
+          });
+        } catch (error) {
+          console.error('Error forwarding GitHub webhook to Statesman:', error);
+          return new Response(JSON.stringify({ error: 'Internal server error' }), { 
+            status: 500,
+            headers: { 'Content-Type': 'application/json' }
+          });
+        }
+      },
+    },
+  },
+});
+