From 2eef2fe6c7006114019d8e04b35dbf1d7b85c399 Mon Sep 17 00:00:00 2001
From: vishal veerareddy <vishalveera.reddy@servicenow.com>
Date: Fri, 23 Jan 2026 19:38:47 -0800
Subject: [PATCH 1/4] Added headroom support

---
 .claude/settings.local.json       |  20 +-
 .env.example                      | 106 ++++++
 docker-compose.yml                |  85 +++++
 documentation/README.md           |   3 +-
 documentation/headroom.md         | 519 ++++++++++++++++++++++++++++++
 headroom-sidecar/Dockerfile       |  47 +++
 headroom-sidecar/config.py        |  93 ++++++
 headroom-sidecar/requirements.txt |  14 +
 headroom-sidecar/server.py        | 451 ++++++++++++++++++++++++++
 package-lock.json                 | 136 +++++++-
 package.json                      |   5 +-
 src/api/health.js                 |  27 ++
 src/api/router.js                 |  61 ++++
 src/cache/prompt.js               |  30 +-
 src/clients/databricks.js         |  96 +++++-
 src/config/index.js               |  72 +++++
 src/headroom/client.js            | 435 +++++++++++++++++++++++++
 src/headroom/health.js            | 163 ++++++++++
 src/headroom/index.js             | 240 ++++++++++++++
 src/headroom/launcher.js          | 517 +++++++++++++++++++++++++++++
 src/orchestrator/index.js         |  51 +++
 src/server.js                     |  26 +-
 22 files changed, 3177 insertions(+), 20 deletions(-)
 create mode 100644 documentation/headroom.md
 create mode 100644 headroom-sidecar/Dockerfile
 create mode 100644 headroom-sidecar/config.py
 create mode 100644 headroom-sidecar/requirements.txt
 create mode 100644 headroom-sidecar/server.py
 create mode 100644 src/headroom/client.js
 create mode 100644 src/headroom/health.js
 create mode 100644 src/headroom/index.js
 create mode 100644 src/headroom/launcher.js

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index 93ba699..1c3387c 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -27,7 +27,25 @@
       "Bash(jobs:*)",
       "Bash(gh pr view:*)",
       "Bash(gh run list:*)",
-      "Bash(gh run view:*)"
+      "Bash(gh run view:*)",
+      "Bash(npm install:*)",
+      "Bash(npm run lint:*)",
+      "Bash(docker ps:*)",
+      "Bash(docker logs:*)",
+      "Bash(docker exec:*)",
+      "Bash(docker compose build:*)",
+      "Bash(docker compose restart:*)",
+      "Bash(docker compose:*)",
+      "Bash(docker run:*)",
+      "Bash(python3:*)",
+      "Bash(iconv:*)",
+      "Bash(ls:*)",
+      "Bash(pgrep:*)",
+      "Bash(docker-compose build:*)",
+      "Bash(xargs:*)",
+      "Bash(docker info:*)",
+      "Bash(docker container ls:*)",
+      "Bash(node --check:*)"
     ],
     "deny": [],
     "ask": []
diff --git a/.env.example b/.env.example
index ddb3723..a462e81 100644
--- a/.env.example
+++ b/.env.example
@@ -340,3 +340,109 @@ HOT_RELOAD_DEBOUNCE_MS=1000
 #   VERTEX_API_KEY=your-google-api-key
 #   VERTEX_MODEL=gemini-2.0-flash
 #   npm start
+
+# ==============================================================================
+# Headroom Context Compression (Sidecar)
+# ==============================================================================
+# Headroom provides 47-92% token reduction through intelligent context compression.
+# It runs as a Python sidecar container managed automatically by Lynkr via Docker.
+#
+# Features:
+#   - Smart Crusher: Statistical JSON compression for tool outputs
+#   - Cache Aligner: Stabilizes dynamic content for provider cache hits
+#   - CCR (Compress-Cache-Retrieve): Reversible compression with on-demand retrieval
+#   - Rolling Window: Token budget enforcement with turn-based windowing
+#   - LLMLingua (optional): ML-based 20x compression with GPU acceleration
+
+# Enable/disable Headroom compression (default: false)
+HEADROOM_ENABLED=false
+
+# Sidecar endpoint (auto-configured when Docker is enabled)
+HEADROOM_ENDPOINT=http://localhost:8787
+
+# Request timeout in milliseconds
+HEADROOM_TIMEOUT_MS=5000
+
+# Minimum tokens to trigger compression (skip small requests)
+HEADROOM_MIN_TOKENS=500
+
+# Operating mode: "audit" (observe only) or "optimize" (apply transforms)
+HEADROOM_MODE=optimize
+
+# Provider for cache optimization hints: anthropic, openai, google
+HEADROOM_PROVIDER=anthropic
+
+# Log level: debug, info, warning, error
+HEADROOM_LOG_LEVEL=info
+
+# ==============================================================================
+# Headroom Docker Configuration
+# ==============================================================================
+# When enabled, Lynkr automatically manages the Headroom container lifecycle
+
+# Enable Docker container management (default: true when HEADROOM_ENABLED=true)
+HEADROOM_DOCKER_ENABLED=true
+
+# Docker image to use
+HEADROOM_DOCKER_IMAGE=lynkr/headroom-sidecar:latest
+
+# Container name
+HEADROOM_DOCKER_CONTAINER_NAME=lynkr-headroom
+
+# Port mapping
+HEADROOM_DOCKER_PORT=8787
+
+# Resource limits
+HEADROOM_DOCKER_MEMORY_LIMIT=512m
+HEADROOM_DOCKER_CPU_LIMIT=1.0
+
+# Restart policy: no, always, unless-stopped, on-failure
+HEADROOM_DOCKER_RESTART_POLICY=unless-stopped
+
+# Docker network (optional, leave empty for default bridge)
+# HEADROOM_DOCKER_NETWORK=lynkr-network
+
+# Build from local source instead of pulling image
+# HEADROOM_DOCKER_AUTO_BUILD=true
+# HEADROOM_DOCKER_BUILD_CONTEXT=./headroom-sidecar
+
+# ==============================================================================
+# Headroom Transform Settings
+# ==============================================================================
+
+# Smart Crusher (statistical JSON compression)
+HEADROOM_SMART_CRUSHER=true
+HEADROOM_SMART_CRUSHER_MIN_TOKENS=200
+HEADROOM_SMART_CRUSHER_MAX_ITEMS=15
+
+# Tool Crusher (fixed-rules compression for tool outputs)
+HEADROOM_TOOL_CRUSHER=true
+
+# Cache Aligner (stabilize dynamic content like UUIDs, timestamps)
+HEADROOM_CACHE_ALIGNER=true
+
+# Rolling Window (context overflow management)
+HEADROOM_ROLLING_WINDOW=true
+HEADROOM_KEEP_TURNS=3
+
+# ==============================================================================
+# Headroom CCR (Compress-Cache-Retrieve)
+# ==============================================================================
+
+# Enable CCR for reversible compression with on-demand retrieval
+HEADROOM_CCR=true
+
+# TTL for cached content in seconds (default: 5 minutes)
+HEADROOM_CCR_TTL=300
+
+# ==============================================================================
+# Headroom LLMLingua (Optional ML Compression)
+# ==============================================================================
+# LLMLingua-2 provides ML-based 20x compression using BERT token classification.
+# Requires GPU for reasonable performance, or use CPU with longer timeouts.
+
+# Enable LLMLingua (default: false, requires GPU recommended)
+HEADROOM_LLMLINGUA=false
+
+# Device: cuda, cpu, auto
+HEADROOM_LLMLINGUA_DEVICE=auto
diff --git a/docker-compose.yml b/docker-compose.yml
index 8b40158..4f99144 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -142,6 +142,27 @@ services:
       CIRCUIT_BREAKER_TIMEOUT: ${CIRCUIT_BREAKER_TIMEOUT:-60000}
       LOAD_SHEDDING_MEMORY_THRESHOLD: ${LOAD_SHEDDING_MEMORY_THRESHOLD:-0.85}
 
+      # ============================================================
+      # HEADROOM CONTEXT COMPRESSION (OPTIONAL)
+      # ============================================================
+      # Provides 47-92% token reduction through intelligent compression
+      HEADROOM_ENABLED: ${HEADROOM_ENABLED:-false}
+      HEADROOM_ENDPOINT: ${HEADROOM_ENDPOINT:-http://headroom:8787}
+      HEADROOM_TIMEOUT_MS: ${HEADROOM_TIMEOUT_MS:-5000}
+      HEADROOM_MIN_TOKENS: ${HEADROOM_MIN_TOKENS:-500}
+      HEADROOM_MODE: ${HEADROOM_MODE:-optimize}
+      # Disable Docker management - we use docker-compose instead
+      HEADROOM_DOCKER_ENABLED: "false"
+      # Transform settings
+      HEADROOM_SMART_CRUSHER: ${HEADROOM_SMART_CRUSHER:-true}
+      HEADROOM_TOOL_CRUSHER: ${HEADROOM_TOOL_CRUSHER:-true}
+      HEADROOM_CACHE_ALIGNER: ${HEADROOM_CACHE_ALIGNER:-true}
+      HEADROOM_ROLLING_WINDOW: ${HEADROOM_ROLLING_WINDOW:-true}
+      HEADROOM_KEEP_TURNS: ${HEADROOM_KEEP_TURNS:-3}
+      HEADROOM_CCR: ${HEADROOM_CCR:-true}
+      HEADROOM_CCR_TTL: ${HEADROOM_CCR_TTL:-300}
+      HEADROOM_LLMLINGUA: ${HEADROOM_LLMLINGUA:-false}
+
     volumes:
       - ./data:/app/data  # Persist SQLite databases
       - .:/workspace      # Mount workspace
@@ -244,6 +265,68 @@ services:
       retries: 3
       start_period: 20s
 
+  # Headroom context compression sidecar (47-92% token reduction)
+  headroom:
+    image: lynkr/headroom-sidecar:latest
+    container_name: lynkr-headroom
+    profiles:
+      - headroom
+    build:
+      context: ./headroom-sidecar
+      dockerfile: Dockerfile
+    ports:
+      - "8787:8787"
+    environment:
+      HEADROOM_HOST: "0.0.0.0"
+      HEADROOM_PORT: "8787"
+      HEADROOM_LOG_LEVEL: ${HEADROOM_LOG_LEVEL:-info}
+      HEADROOM_MODE: ${HEADROOM_MODE:-optimize}
+      HEADROOM_PROVIDER: ${HEADROOM_PROVIDER:-anthropic}
+      # Transforms
+      HEADROOM_SMART_CRUSHER: ${HEADROOM_SMART_CRUSHER:-true}
+      HEADROOM_SMART_CRUSHER_MIN_TOKENS: ${HEADROOM_SMART_CRUSHER_MIN_TOKENS:-200}
+      HEADROOM_SMART_CRUSHER_MAX_ITEMS: ${HEADROOM_SMART_CRUSHER_MAX_ITEMS:-15}
+      HEADROOM_TOOL_CRUSHER: ${HEADROOM_TOOL_CRUSHER:-true}
+      HEADROOM_CACHE_ALIGNER: ${HEADROOM_CACHE_ALIGNER:-true}
+      HEADROOM_ROLLING_WINDOW: ${HEADROOM_ROLLING_WINDOW:-true}
+      HEADROOM_KEEP_TURNS: ${HEADROOM_KEEP_TURNS:-3}
+      # CCR
+      HEADROOM_CCR: ${HEADROOM_CCR:-true}
+      HEADROOM_CCR_TTL: ${HEADROOM_CCR_TTL:-300}
+      # LLMLingua (optional, requires GPU)
+      HEADROOM_LLMLINGUA: ${HEADROOM_LLMLINGUA:-false}
+      HEADROOM_LLMLINGUA_DEVICE: ${HEADROOM_LLMLINGUA_DEVICE:-auto}
+    volumes:
+      - headroom-data:/app/data
+    restart: unless-stopped
+    networks:
+      - lynkr-network
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8787/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+    labels:
+      - "com.lynkr.service=headroom"
+      - "com.lynkr.description=Context compression sidecar"
+    deploy:
+      resources:
+        limits:
+          cpus: '1'
+          memory: 512M
+        reservations:
+          cpus: '0.25'
+          memory: 256M
+    # Uncomment for GPU support (LLMLingua)
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: 1
+    #           capabilities: [gpu]
+
 volumes:
   ollama-data:
     driver: local
@@ -251,6 +334,8 @@ volumes:
   #   driver: local
   searxng-data:
     driver: local
+  headroom-data:
+    driver: local
 
 networks:
   lynkr-network:
diff --git a/documentation/README.md b/documentation/README.md
index 01aec91..6569a62 100644
--- a/documentation/README.md
+++ b/documentation/README.md
@@ -31,6 +31,7 @@ Understand Lynkr's capabilities:
 - **[Architecture & Features](features.md)** - System architecture, request flow, format conversion, and core capabilities
 - **[Memory System](memory-system.md)** - Titans-inspired long-term memory with surprise-based filtering and decay
 - **[Token Optimization](token-optimization.md)** - Achieve 60-80% cost reduction through smart tool selection, prompt caching, and memory deduplication
+- **[Headroom Compression](headroom.md)** - 47-92% token reduction through intelligent context compression (Smart Crusher, CCR, LLMLingua)
 - **[Tools & Execution Modes](tools.md)** - Tool calling, server vs client execution, custom tool integration, MCP support
 
 ---
@@ -71,7 +72,7 @@ Get help and contribute:
 - [Installation](installation.md) | [Providers](providers.md) | [Claude Code](claude-code-cli.md) | [Cursor](cursor-integration.md) | [Embeddings](embeddings.md)
 
 ### Features & Optimization
-- [Features](features.md) | [Memory System](memory-system.md) | [Token Optimization](token-optimization.md) | [Tools](tools.md)
+- [Features](features.md) | [Memory System](memory-system.md) | [Token Optimization](token-optimization.md) | [Headroom](headroom.md) | [Tools](tools.md)
 
 ### Deployment & Production
 - [Docker](docker.md) | [Production](production.md) | [API Reference](api.md)
diff --git a/documentation/headroom.md b/documentation/headroom.md
new file mode 100644
index 0000000..5fe3ff2
--- /dev/null
+++ b/documentation/headroom.md
@@ -0,0 +1,519 @@
+# Headroom Context Compression
+
+Headroom is an intelligent context compression system that reduces LLM token usage by 47-92% while preserving semantic meaning. It runs as a Python sidecar container that Lynkr manages automatically via Docker.
+
+---
+
+## Overview
+
+### What is Headroom?
+
+Headroom is a context optimization SDK that compresses LLM prompts and tool outputs using:
+
+1. **Smart Crusher** - Statistical JSON compression based on field analysis
+2. **Cache Aligner** - Stabilizes dynamic content (UUIDs, timestamps) for provider cache hits
+3. **CCR (Compress-Cache-Retrieve)** - Reversible compression with on-demand retrieval
+4. **Rolling Window** - Token budget enforcement with turn-based windowing
+5. **LLMLingua** (optional) - ML-based 20x compression using BERT
+
+### Benefits
+
+| Metric | Without Headroom | With Headroom |
+|--------|-----------------|---------------|
+| Token usage | 100% | 8-53% (47-92% reduction) |
+| Cache hit rate | ~20% | ~60-80% |
+| Cost per request | $0.01-0.05 | $0.002-0.02 |
+| Context overflow | Common | Rare |
+
+---
+
+## Quick Start
+
+### 1. Enable Headroom
+
+Add to your `.env`:
+
+```bash
+# Enable Headroom compression
+HEADROOM_ENABLED=true
+```
+
+### 2. Start Lynkr
+
+```bash
+npm start
+```
+
+Lynkr will automatically:
+1. Pull the `lynkr/headroom-sidecar:latest` Docker image
+2. Start the container with configured settings
+3. Wait for health checks to pass
+4. Begin compressing requests
+
+### 3. Verify It's Working
+
+Check the health endpoint:
+
+```bash
+curl http://localhost:8081/health/headroom
+```
+
+Expected response:
+```json
+{
+  "enabled": true,
+  "healthy": true,
+  "service": {
+    "available": true,
+    "ccrEnabled": true,
+    "llmlinguaEnabled": false
+  },
+  "docker": {
+    "running": true,
+    "status": "running",
+    "health": "healthy"
+  }
+}
+```
+
+---
+
+## How It Works
+
+### Transform Pipeline
+
+When a request arrives, Headroom processes it through a three-stage pipeline:
+
+```
+Request → Cache Aligner → Smart Crusher → Context Manager → Compressed Request
+                ↓               ↓                ↓
+         Stabilize IDs    Compress JSON    Enforce budget
+```
+
+### 1. Cache Aligner
+
+**Problem**: Dynamic content like UUIDs and timestamps change every request, preventing provider cache hits.
+
+**Solution**: Replace dynamic values with stable placeholders:
+
+```json
+// Before
+{"id": "f47ac10b-58cc-4372-a567-0e02b2c3d479", "created": "2024-01-15T10:30:00Z"}
+
+// After
+{"id": "[ID:1]", "created": "[TS:1]"}
+```
+
+**Result**: 60-80% cache hit rate instead of ~20%.
+
+### 2. Smart Crusher
+
+**Problem**: Tool outputs often contain repetitive JSON with many similar items.
+
+**Solution**: Statistical analysis to identify and compress redundant fields:
+
+```json
+// Before (100 search results, ~50KB)
+[
+  {"title": "Result 1", "url": "...", "snippet": "...", "score": 0.95, ...},
+  {"title": "Result 2", "url": "...", "snippet": "...", "score": 0.93, ...},
+  // ... 98 more items
+]
+
+// After (~5KB)
+{
+  "_meta": {"compressed": true, "original_count": 100, "kept": 12},
+  "items": [
+    // Top 12 most relevant items with essential fields only
+  ]
+}
+```
+
+**Compression strategies**:
+- **High-variance fields**: Keep (they're informative)
+- **Low-variance fields**: Remove (they're redundant)
+- **Unique fields**: Keep first occurrence only
+- **Repetitive arrays**: Sample representative items
+
+### 3. CCR (Compress-Cache-Retrieve)
+
+**Problem**: Sometimes you need to retrieve compressed content later.
+
+**Solution**: Hash-based reversible compression:
+
+```json
+// Compressed message
+{
+  "content": "[CCR:abc123] 100 files found. Use ccr_retrieve to explore.",
+  "ccr_available": true
+}
+
+// Tool definition injected
+{
+  "name": "ccr_retrieve",
+  "description": "Retrieve compressed content by hash",
+  "input_schema": {
+    "hash": "string",
+    "query": "string (optional search within results)"
+  }
+}
+```
+
+When the LLM calls `ccr_retrieve`, Headroom returns the full original content.
+
+---
+
+## Configuration
+
+### Basic Settings
+
+```bash
+# Enable/disable Headroom
+HEADROOM_ENABLED=true
+
+# Sidecar endpoint
+HEADROOM_ENDPOINT=http://localhost:8787
+
+# Request timeout (ms)
+HEADROOM_TIMEOUT_MS=5000
+
+# Skip compression for small requests (tokens)
+HEADROOM_MIN_TOKENS=500
+
+# Mode: "audit" (observe) or "optimize" (apply)
+HEADROOM_MODE=optimize
+```
+
+### Docker Settings
+
+```bash
+# Enable automatic container management
+HEADROOM_DOCKER_ENABLED=true
+
+# Docker image
+HEADROOM_DOCKER_IMAGE=lynkr/headroom-sidecar:latest
+
+# Container name
+HEADROOM_DOCKER_CONTAINER_NAME=lynkr-headroom
+
+# Port mapping
+HEADROOM_DOCKER_PORT=8787
+
+# Resource limits
+HEADROOM_DOCKER_MEMORY_LIMIT=512m
+HEADROOM_DOCKER_CPU_LIMIT=1.0
+
+# Restart policy
+HEADROOM_DOCKER_RESTART_POLICY=unless-stopped
+```
+
+### Transform Settings
+
+```bash
+# Smart Crusher (statistical JSON compression)
+HEADROOM_SMART_CRUSHER=true
+HEADROOM_SMART_CRUSHER_MIN_TOKENS=200
+HEADROOM_SMART_CRUSHER_MAX_ITEMS=15
+
+# Tool Crusher (fixed-rules compression)
+HEADROOM_TOOL_CRUSHER=true
+
+# Cache Aligner (stabilize dynamic content)
+HEADROOM_CACHE_ALIGNER=true
+
+# Rolling Window (context overflow management)
+HEADROOM_ROLLING_WINDOW=true
+HEADROOM_KEEP_TURNS=3
+```
+
+### CCR Settings
+
+```bash
+# Enable CCR for reversible compression
+HEADROOM_CCR=true
+
+# Cache TTL in seconds
+HEADROOM_CCR_TTL=300
+```
+
+### LLMLingua Settings (Optional)
+
+LLMLingua provides ML-based compression using BERT token classification. Requires GPU for reasonable performance.
+
+```bash
+# Enable LLMLingua (default: false)
+HEADROOM_LLMLINGUA=true
+
+# Device: cuda, cpu, auto
+HEADROOM_LLMLINGUA_DEVICE=cuda
+```
+
+**Note**: LLMLingua adds 100-500ms latency per request. Only enable if you have a GPU and need maximum compression.
+
+---
+
+## API Endpoints
+
+### Health Check
+
+```bash
+GET /health/headroom
+```
+
+Returns Headroom health status including container and service state.
+
+### Compression Metrics
+
+```bash
+GET /metrics/compression
+```
+
+Returns compression statistics:
+
+```json
+{
+  "enabled": true,
+  "endpoint": "http://localhost:8787",
+  "client": {
+    "totalCalls": 150,
+    "successfulCompressions": 120,
+    "skippedCompressions": 25,
+    "failures": 5,
+    "totalTokensSaved": 450000,
+    "averageLatencyMs": 45,
+    "compressionRate": 80,
+    "failureRate": 3
+  },
+  "server": {
+    "requests_total": 150,
+    "compressions_applied": 120,
+    "average_compression_ratio": 0.35,
+    "ccr_retrievals": 45
+  }
+}
+```
+
+### Detailed Status
+
+```bash
+GET /headroom/status
+```
+
+Returns full status including configuration, metrics, and recent logs.
+
+### Container Restart
+
+```bash
+POST /headroom/restart
+```
+
+Restarts the Headroom container (useful for applying config changes).
+
+### Container Logs
+
+```bash
+GET /headroom/logs?tail=100
+```
+
+Returns recent container logs for debugging.
+
+---
+
+## Monitoring
+
+### Health Check Integration
+
+Headroom status is included in the `/health/ready` endpoint:
+
+```json
+{
+  "status": "ready",
+  "checks": {
+    "database": { "healthy": true },
+    "memory": { "healthy": true },
+    "headroom": {
+      "healthy": true,
+      "enabled": true,
+      "service": "available",
+      "docker": "running"
+    }
+  }
+}
+```
+
+**Note**: Headroom is non-critical. If it fails, Lynkr continues without compression.
+
+### Logging
+
+Headroom logs compression events:
+
+```
+INFO: Headroom compression applied
+  tokensBefore: 15000
+  tokensAfter: 5200
+  savingsPercent: 65.3
+  latencyMs: 42
+  transforms: ["cache_aligner", "smart_crusher"]
+```
+
+---
+
+## Troubleshooting
+
+### Container Won't Start
+
+**Check Docker is running:**
+```bash
+docker ps
+```
+
+**Check for port conflicts:**
+```bash
+lsof -i :8787
+```
+
+**View container logs:**
+```bash
+curl http://localhost:8081/headroom/logs
+# or
+docker logs lynkr-headroom
+```
+
+### High Latency
+
+1. **Reduce transforms**: Disable LLMLingua if not needed
+2. **Increase resources**: Raise `HEADROOM_DOCKER_MEMORY_LIMIT`
+3. **Skip small requests**: Increase `HEADROOM_MIN_TOKENS`
+
+### Compression Not Applied
+
+Check:
+1. `HEADROOM_ENABLED=true` in `.env`
+2. Request has more than `HEADROOM_MIN_TOKENS` tokens
+3. Health endpoint shows `healthy: true`
+
+### CCR Retrieval Fails
+
+1. Check `HEADROOM_CCR=true`
+2. Verify TTL hasn't expired (`HEADROOM_CCR_TTL`)
+3. Ensure same session is used (CCR is session-scoped)
+
+---
+
+## Architecture
+
+### System Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                         Lynkr (Node.js)                         │
+│  ┌──────────────────────────────────────────────────────────┐  │
+│  │  Request Handler                                          │  │
+│  │    ↓                                                      │  │
+│  │  src/headroom/client.js ──HTTP──→ Headroom Sidecar       │  │
+│  │    ↓                              (Python Container)      │  │
+│  │  Compressed Request                    │                  │  │
+│  │    ↓                                   ↓                  │  │
+│  │  LLM Provider                    ┌─────────────┐         │  │
+│  │                                  │ Transforms  │         │  │
+│  └──────────────────────────────────│ - Aligner   │─────────┘  │
+│                                     │ - Crusher   │            │
+│                                     │ - CCR Store │            │
+│                                     │ - LLMLingua │            │
+│                                     └─────────────┘            │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+### Request Flow
+
+1. **Request arrives** at Lynkr
+2. **Token estimation** - Skip if below `HEADROOM_MIN_TOKENS`
+3. **Send to sidecar** - HTTP POST to `/compress`
+4. **Transform pipeline** executes:
+   - Cache Aligner stabilizes dynamic content
+   - Smart Crusher compresses JSON structures
+   - Context Manager enforces token budget
+5. **Return compressed** messages and tools
+6. **Forward to LLM** provider
+7. **On CCR tool call** - Retrieve original content
+
+### File Structure
+
+```
+src/headroom/
+├── index.js        # HeadroomManager singleton, exports
+├── launcher.js     # Docker container lifecycle (dockerode)
+├── client.js       # HTTP client for sidecar API
+└── health.js       # Health check functionality
+```
+
+---
+
+## Best Practices
+
+### 1. Start with Defaults
+
+The default configuration is optimized for most use cases:
+- Smart Crusher: Enabled
+- Cache Aligner: Enabled
+- CCR: Enabled
+- LLMLingua: Disabled (enable only with GPU)
+
+### 2. Monitor Compression Rates
+
+Check `/metrics/compression` regularly:
+- **Good**: 60-80% compression rate
+- **Warning**: Below 40% (check transform settings)
+- **Issue**: High failure rate (check container health)
+
+### 3. Tune for Your Workload
+
+| Workload | Recommended Settings |
+|----------|---------------------|
+| Code assistance | `SMART_CRUSHER_MAX_ITEMS=20` |
+| Search-heavy | `SMART_CRUSHER_MAX_ITEMS=10`, CCR enabled |
+| Long conversations | `ROLLING_WINDOW=true`, `KEEP_TURNS=5` |
+| Cost-sensitive | Enable LLMLingua with GPU |
+
+### 4. Use Audit Mode First
+
+Test compression without applying it:
+
+```bash
+HEADROOM_MODE=audit
+```
+
+This logs what would be compressed without modifying requests.
+
+---
+
+## FAQ
+
+### Does Headroom affect response quality?
+
+Minimal impact. Smart Crusher preserves high-variance (informative) fields and CCR allows full retrieval when needed. LLMLingua may have ~1.5% quality reduction.
+
+### Can I use Headroom without Docker?
+
+Yes. Disable Docker management and run the sidecar manually:
+
+```bash
+HEADROOM_DOCKER_ENABLED=false
+HEADROOM_ENDPOINT=http://your-headroom-server:8787
+```
+
+### Is Headroom required?
+
+No. If Headroom fails or is disabled, Lynkr works normally without compression.
+
+### What providers benefit most?
+
+All providers benefit from compression. Anthropic and OpenAI see additional benefits from Cache Aligner improving cache hit rates.
+
+---
+
+## References
+
+- [Headroom GitHub Repository](https://github.com/chopratejas/headroom)
+- [LLMLingua Paper](https://arxiv.org/abs/2310.05736)
+- [Anthropic Prompt Caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching)
diff --git a/headroom-sidecar/Dockerfile b/headroom-sidecar/Dockerfile
new file mode 100644
index 0000000..61da055
--- /dev/null
+++ b/headroom-sidecar/Dockerfile
@@ -0,0 +1,47 @@
+# Headroom Sidecar - Context Compression Service
+# Provides 47-92% token reduction for LLM requests
+
+FROM python:3.11-slim
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY server.py .
+COPY config.py .
+
+# Create data directory
+RUN mkdir -p /app/data
+
+# Environment variables with defaults
+ENV HEADROOM_HOST="0.0.0.0" \
+    HEADROOM_PORT="8787" \
+    HEADROOM_LOG_LEVEL="info" \
+    HEADROOM_MODE="optimize" \
+    HEADROOM_PROVIDER="anthropic" \
+    HEADROOM_SMART_CRUSHER="true" \
+    HEADROOM_SMART_CRUSHER_MIN_TOKENS="200" \
+    HEADROOM_SMART_CRUSHER_MAX_ITEMS="15" \
+    HEADROOM_TOOL_CRUSHER="true" \
+    HEADROOM_CACHE_ALIGNER="true" \
+    HEADROOM_ROLLING_WINDOW="true" \
+    HEADROOM_KEEP_TURNS="3" \
+    HEADROOM_CCR="true" \
+    HEADROOM_CCR_TTL="300" \
+    HEADROOM_LLMLINGUA="false" \
+    HEADROOM_LLMLINGUA_DEVICE="auto"
+
+EXPOSE 8787
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
+    CMD curl -f http://localhost:8787/health || exit 1
+
+CMD ["python", "server.py"]
diff --git a/headroom-sidecar/config.py b/headroom-sidecar/config.py
new file mode 100644
index 0000000..8bf53ed
--- /dev/null
+++ b/headroom-sidecar/config.py
@@ -0,0 +1,93 @@
+"""
+Headroom Sidecar Configuration
+Loads settings from environment variables
+"""
+
+import os
+from typing import Optional
+
+
+def str_to_bool(value: str) -> bool:
+    """Convert string to boolean"""
+    return value.lower() in ("true", "1", "yes", "on")
+
+
+class HeadroomConfig:
+    """Configuration for Headroom sidecar"""
+
+    def __init__(self):
+        # Server settings
+        self.host = os.environ.get("HEADROOM_HOST", "0.0.0.0")
+        self.port = int(os.environ.get("HEADROOM_PORT", "8787"))
+        self.log_level = os.environ.get("HEADROOM_LOG_LEVEL", "info")
+
+        # Operating mode
+        self.mode = os.environ.get("HEADROOM_MODE", "optimize")
+        self.provider = os.environ.get("HEADROOM_PROVIDER", "anthropic")
+
+        # Smart Crusher settings
+        self.smart_crusher_enabled = str_to_bool(
+            os.environ.get("HEADROOM_SMART_CRUSHER", "true")
+        )
+        self.smart_crusher_min_tokens = int(
+            os.environ.get("HEADROOM_SMART_CRUSHER_MIN_TOKENS", "200")
+        )
+        self.smart_crusher_max_items = int(
+            os.environ.get("HEADROOM_SMART_CRUSHER_MAX_ITEMS", "15")
+        )
+
+        # Tool Crusher settings
+        self.tool_crusher_enabled = str_to_bool(
+            os.environ.get("HEADROOM_TOOL_CRUSHER", "true")
+        )
+
+        # Cache Aligner settings
+        self.cache_aligner_enabled = str_to_bool(
+            os.environ.get("HEADROOM_CACHE_ALIGNER", "true")
+        )
+
+        # Rolling Window settings
+        self.rolling_window_enabled = str_to_bool(
+            os.environ.get("HEADROOM_ROLLING_WINDOW", "true")
+        )
+        self.keep_turns = int(os.environ.get("HEADROOM_KEEP_TURNS", "3"))
+
+        # CCR settings
+        self.ccr_enabled = str_to_bool(os.environ.get("HEADROOM_CCR", "true"))
+        self.ccr_ttl = int(os.environ.get("HEADROOM_CCR_TTL", "300"))
+
+        # LLMLingua settings
+        self.llmlingua_enabled = str_to_bool(
+            os.environ.get("HEADROOM_LLMLINGUA", "false")
+        )
+        self.llmlingua_device = os.environ.get("HEADROOM_LLMLINGUA_DEVICE", "auto")
+
+    def to_dict(self) -> dict:
+        """Return configuration as dictionary"""
+        return {
+            "host": self.host,
+            "port": self.port,
+            "log_level": self.log_level,
+            "mode": self.mode,
+            "provider": self.provider,
+            "smart_crusher": {
+                "enabled": self.smart_crusher_enabled,
+                "min_tokens": self.smart_crusher_min_tokens,
+                "max_items": self.smart_crusher_max_items,
+            },
+            "tool_crusher": {"enabled": self.tool_crusher_enabled},
+            "cache_aligner": {"enabled": self.cache_aligner_enabled},
+            "rolling_window": {
+                "enabled": self.rolling_window_enabled,
+                "keep_turns": self.keep_turns,
+            },
+            "ccr": {"enabled": self.ccr_enabled, "ttl": self.ccr_ttl},
+            "llmlingua": {
+                "enabled": self.llmlingua_enabled,
+                "device": self.llmlingua_device,
+            },
+        }
+
+
+# Global config instance
+config = HeadroomConfig()
diff --git a/headroom-sidecar/requirements.txt b/headroom-sidecar/requirements.txt
new file mode 100644
index 0000000..29f1182
--- /dev/null
+++ b/headroom-sidecar/requirements.txt
@@ -0,0 +1,14 @@
+# Headroom Sidecar Dependencies
+
+# Core framework
+fastapi>=0.109.0
+uvicorn[standard]>=0.27.0
+pydantic>=2.5.0
+
+# Headroom SDK
+headroom-ai>=0.1.0
+
+# Optional: LLMLingua support (uncomment for ML compression)
+# llmlingua>=0.2.0
+# torch>=2.0.0
+# transformers>=4.36.0
diff --git a/headroom-sidecar/server.py b/headroom-sidecar/server.py
new file mode 100644
index 0000000..ea3a1df
--- /dev/null
+++ b/headroom-sidecar/server.py
@@ -0,0 +1,451 @@
+"""
+Headroom Sidecar Server
+FastAPI application providing context compression via HTTP API
+"""
+
+import logging
+import time
+import hashlib
+import json
+from typing import Any, Dict, List, Optional
+from datetime import datetime
+
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+import uvicorn
+
+from config import config
+
+# Setup logging
+logging.basicConfig(
+    level=getattr(logging, config.log_level.upper()),
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger("headroom-sidecar")
+
+# Initialize FastAPI app
+app = FastAPI(
+    title="Headroom Sidecar",
+    description="Context compression service for LLM requests",
+    version="1.0.0",
+)
+
+# Try to import headroom, fallback to basic compression if not available
+try:
+    from headroom import (
+        TransformPipeline,
+        SmartCrusher,
+        SmartCrusherConfig,
+        ToolCrusher,
+        ToolCrusherConfig,
+        RollingWindow,
+        RollingWindowConfig,
+        AnthropicProvider,
+        OpenAIProvider,
+    )
+    import warnings
+    warnings.filterwarnings("ignore", message=".*tiktoken approximation.*")
+
+    # Create transforms based on config
+    transforms = []
+
+    if config.smart_crusher_enabled:
+        transforms.append(SmartCrusher(SmartCrusherConfig(
+            enabled=True,
+            min_tokens_to_crush=config.smart_crusher_min_tokens,
+            max_items_after_crush=config.smart_crusher_max_items,
+        )))
+        logger.info("SmartCrusher enabled")
+
+    if config.tool_crusher_enabled:
+        transforms.append(ToolCrusher(ToolCrusherConfig(
+            enabled=True,
+        )))
+        logger.info("ToolCrusher enabled")
+
+    if config.rolling_window_enabled:
+        transforms.append(RollingWindow(RollingWindowConfig(
+            enabled=True,
+            keep_last_turns=config.keep_turns,
+        )))
+        logger.info("RollingWindow enabled")
+
+    # Create provider based on config
+    if config.provider == "openai":
+        headroom_provider = OpenAIProvider()
+    else:
+        headroom_provider = AnthropicProvider()
+
+    headroom_pipeline = TransformPipeline(transforms=transforms, provider=headroom_provider) if transforms else None
+    HEADROOM_AVAILABLE = headroom_pipeline is not None
+    logger.info(f"Headroom SDK loaded successfully with {len(transforms)} transforms (provider: {config.provider})")
+except ImportError as e:
+    logger.warning(f"Headroom SDK not available: {e}. Using basic compression.")
+    headroom_pipeline = None
+    HEADROOM_AVAILABLE = False
+
+# CCR Store (in-memory with TTL)
+ccr_store: Dict[str, Dict[str, Any]] = {}
+
+# Metrics
+metrics = {
+    "requests_total": 0,
+    "compressions_applied": 0,
+    "compressions_skipped": 0,
+    "errors": 0,
+    "ccr_stores": 0,
+    "ccr_retrievals": 0,
+    "total_tokens_before": 0,
+    "total_tokens_after": 0,
+    "start_time": datetime.utcnow().isoformat(),
+}
+
+
+# Request/Response models
+class CompressRequest(BaseModel):
+    messages: List[Dict[str, Any]]
+    tools: Optional[List[Dict[str, Any]]] = None
+    model: Optional[str] = "claude-3-5-sonnet-20241022"
+    model_limit: Optional[int] = 200000
+    mode: Optional[str] = None
+    token_budget: Optional[int] = None
+    query_context: Optional[str] = None
+    preserve_recent_turns: Optional[int] = None
+    target_ratio: Optional[float] = None
+
+
+class CompressResponse(BaseModel):
+    messages: List[Dict[str, Any]]
+    tools: Optional[List[Dict[str, Any]]] = None
+    compressed: bool
+    stats: Dict[str, Any]
+
+
+class CCRRetrieveRequest(BaseModel):
+    hash: str
+    query: Optional[str] = None
+    max_results: Optional[int] = 20
+
+
+class CCRRetrieveResponse(BaseModel):
+    success: bool
+    content: Optional[Any] = None
+    items_retrieved: int = 0
+    was_search: bool = False
+    error: Optional[str] = None
+
+
+def estimate_tokens(data: Any) -> int:
+    """Estimate token count (rough approximation: ~4 chars per token)"""
+    text = json.dumps(data) if not isinstance(data, str) else data
+    return len(text) // 4
+
+
+def generate_hash(content: Any) -> str:
+    """Generate hash for CCR storage"""
+    text = json.dumps(content, sort_keys=True)
+    return hashlib.sha256(text.encode()).hexdigest()[:12]
+
+
+def cleanup_expired_ccr():
+    """Remove expired CCR entries"""
+    now = time.time()
+    expired = [k for k, v in ccr_store.items() if now - v["timestamp"] > config.ccr_ttl]
+    for key in expired:
+        del ccr_store[key]
+
+
+def basic_compress(messages: List[Dict], tools: Optional[List] = None) -> Dict:
+    """Basic compression when Headroom SDK is not available"""
+    tokens_before = estimate_tokens(messages)
+    compressed_messages = []
+
+    for msg in messages:
+        compressed_msg = msg.copy()
+
+        # Compress large tool results
+        if msg.get("role") == "user" and isinstance(msg.get("content"), list):
+            new_content = []
+            for block in msg["content"]:
+                if block.get("type") == "tool_result":
+                    content = block.get("content", "")
+                    if isinstance(content, str) and len(content) > 2000:
+                        # Store in CCR and replace with reference
+                        hash_key = generate_hash(content)
+                        ccr_store[hash_key] = {
+                            "content": content,
+                            "timestamp": time.time(),
+                            "tool_name": block.get("tool_use_id", "unknown"),
+                        }
+                        metrics["ccr_stores"] += 1
+                        block = block.copy()
+                        block["content"] = (
+                            f"[CCR:{hash_key}] Content compressed ({len(content)} chars). "
+                            f"Use ccr_retrieve to access full content."
+                        )
+                new_content.append(block)
+            compressed_msg["content"] = new_content
+        compressed_messages.append(compressed_msg)
+
+    tokens_after = estimate_tokens(compressed_messages)
+
+    return {
+        "messages": compressed_messages,
+        "tools": tools,
+        "compressed": tokens_after < tokens_before,
+        "stats": {
+            "tokens_before": tokens_before,
+            "tokens_after": tokens_after,
+            "tokens_saved": tokens_before - tokens_after,
+            "savings_percent": round(
+                (1 - tokens_after / tokens_before) * 100, 1
+            ) if tokens_before > 0 else 0,
+            "transforms_applied": ["basic_ccr"] if tokens_after < tokens_before else [],
+            "latency_ms": 0,
+        },
+    }
+
+
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    cleanup_expired_ccr()
+    return {
+        "status": "healthy",
+        "headroom_loaded": HEADROOM_AVAILABLE,
+        "ccr_enabled": config.ccr_enabled,
+        "llmlingua_enabled": config.llmlingua_enabled,
+        "entries_cached": len(ccr_store),
+        "config": config.to_dict(),
+    }
+
+
+@app.get("/metrics")
+async def get_metrics():
+    """Get compression metrics"""
+    return {
+        **metrics,
+        "average_compression_ratio": (
+            round(metrics["total_tokens_after"] / metrics["total_tokens_before"], 3)
+            if metrics["total_tokens_before"] > 0
+            else 1.0
+        ),
+        "ccr_entries": len(ccr_store),
+        "uptime_seconds": (
+            datetime.utcnow() - datetime.fromisoformat(metrics["start_time"])
+        ).total_seconds(),
+    }
+
+
+@app.post("/compress", response_model=CompressResponse)
+async def compress_messages(request: CompressRequest):
+    """Compress messages and tools"""
+    start_time = time.time()
+    metrics["requests_total"] += 1
+
+    try:
+        tokens_before = estimate_tokens(request.messages)
+        metrics["total_tokens_before"] += tokens_before
+
+        # Skip if below minimum tokens
+        if tokens_before < config.smart_crusher_min_tokens:
+            metrics["compressions_skipped"] += 1
+            return CompressResponse(
+                messages=request.messages,
+                tools=request.tools,
+                compressed=False,
+                stats={
+                    "skipped": True,
+                    "reason": f"Below threshold ({tokens_before} < {config.smart_crusher_min_tokens})",
+                },
+            )
+
+        # Use Headroom SDK if available
+        if HEADROOM_AVAILABLE and headroom_pipeline:
+            try:
+                result = headroom_pipeline.apply(
+                    request.messages,
+                    model=request.model,
+                    model_limit=request.model_limit,
+                )
+
+                # Extract messages from TransformResult
+                if hasattr(result, 'messages'):
+                    compressed_messages = result.messages
+                    # transforms_applied may be strings or objects with .name
+                    if hasattr(result, 'transforms_applied'):
+                        transforms_applied = [t if isinstance(t, str) else getattr(t, 'name', str(t)) for t in result.transforms_applied]
+                    else:
+                        transforms_applied = []
+                elif isinstance(result, dict):
+                    compressed_messages = result.get("messages", request.messages)
+                    transforms_applied = result.get("transforms", [])
+                else:
+                    compressed_messages = result if isinstance(result, list) else request.messages
+                    transforms_applied = []
+
+                tokens_after = estimate_tokens(compressed_messages)
+                metrics["total_tokens_after"] += tokens_after
+                metrics["compressions_applied"] += 1
+
+                return CompressResponse(
+                    messages=compressed_messages,
+                    tools=request.tools,  # Tools not modified by current transforms
+                    compressed=tokens_after < tokens_before,
+                    stats={
+                        "tokens_before": tokens_before,
+                        "tokens_after": tokens_after,
+                        "tokens_saved": tokens_before - tokens_after,
+                        "savings_percent": round(
+                            (1 - tokens_after / tokens_before) * 100, 1
+                        ) if tokens_before > 0 else 0,
+                        "transforms_applied": transforms_applied,
+                        "latency_ms": round((time.time() - start_time) * 1000, 1),
+                    },
+                )
+            except Exception as e:
+                logger.warning(f"Headroom SDK error, falling back to basic: {e}")
+
+        # Fallback to basic compression
+        result = basic_compress(request.messages, request.tools)
+        metrics["total_tokens_after"] += result["stats"]["tokens_after"]
+        if result["compressed"]:
+            metrics["compressions_applied"] += 1
+        else:
+            metrics["compressions_skipped"] += 1
+
+        result["stats"]["latency_ms"] = round((time.time() - start_time) * 1000, 1)
+        return CompressResponse(**result)
+
+    except Exception as e:
+        metrics["errors"] += 1
+        logger.error(f"Compression error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/ccr/retrieve", response_model=CCRRetrieveResponse)
+async def ccr_retrieve(request: CCRRetrieveRequest):
+    """Retrieve content from CCR store"""
+    cleanup_expired_ccr()
+
+    if request.hash not in ccr_store:
+        return CCRRetrieveResponse(
+            success=False,
+            error=f"Hash {request.hash} not found or expired",
+        )
+
+    entry = ccr_store[request.hash]
+    content = entry["content"]
+    metrics["ccr_retrievals"] += 1
+
+    # If query provided, search within content
+    if request.query:
+        if isinstance(content, list):
+            # Filter list items by query
+            filtered = [
+                item
+                for item in content
+                if request.query.lower() in json.dumps(item).lower()
+            ][: request.max_results]
+            return CCRRetrieveResponse(
+                success=True,
+                content=filtered,
+                items_retrieved=len(filtered),
+                was_search=True,
+            )
+        elif isinstance(content, str):
+            # Return content if query matches
+            if request.query.lower() in content.lower():
+                return CCRRetrieveResponse(
+                    success=True,
+                    content=content,
+                    items_retrieved=1,
+                    was_search=True,
+                )
+            return CCRRetrieveResponse(
+                success=False,
+                error="Query not found in content",
+            )
+
+    # Return full content
+    return CCRRetrieveResponse(
+        success=True,
+        content=content,
+        items_retrieved=1 if not isinstance(content, list) else len(content),
+        was_search=False,
+    )
+
+
+@app.post("/ccr/track")
+async def ccr_track(
+    hash_key: str,
+    turn_number: int,
+    tool_name: str,
+    sample: str,
+):
+    """Track compression for proactive expansion"""
+    return {"tracked": True, "hash_key": hash_key}
+
+
+@app.post("/ccr/analyze")
+async def ccr_analyze(query: str, turn_number: int):
+    """Analyze query for proactive CCR expansion"""
+    # Simple keyword matching for expansion suggestions
+    expansions = []
+    for hash_key, entry in ccr_store.items():
+        if query.lower() in json.dumps(entry["content"]).lower():
+            expansions.append(
+                {
+                    "hash": hash_key,
+                    "tool_name": entry.get("tool_name", "unknown"),
+                    "relevance": 0.8,
+                }
+            )
+    return {"expansions": expansions[:5]}
+
+
+@app.post("/compress/llmlingua")
+async def llmlingua_compress(
+    text: str,
+    target_ratio: float = 0.5,
+    force_tokens: Optional[str] = None,
+):
+    """Compress text using LLMLingua (if available)"""
+    if not config.llmlingua_enabled:
+        raise HTTPException(status_code=400, detail="LLMLingua is not enabled")
+
+    try:
+        # Try to import and use llmlingua
+        from llmlingua import PromptCompressor
+
+        compressor = PromptCompressor(device_map=config.llmlingua_device)
+        result = compressor.compress_prompt(
+            text,
+            rate=target_ratio,
+            force_tokens=json.loads(force_tokens) if force_tokens else None,
+        )
+        return {
+            "compressed": result["compressed_prompt"],
+            "original_tokens": result.get("origin_tokens", len(text) // 4),
+            "compressed_tokens": result.get("compressed_tokens", len(result["compressed_prompt"]) // 4),
+            "ratio": result.get("rate", target_ratio),
+        }
+    except ImportError:
+        raise HTTPException(
+            status_code=501,
+            detail="LLMLingua not installed. Add llmlingua to requirements.txt",
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+if __name__ == "__main__":
+    logger.info(f"Starting Headroom sidecar on {config.host}:{config.port}")
+    logger.info(f"Configuration: {json.dumps(config.to_dict(), indent=2)}")
+    uvicorn.run(
+        app,
+        host=config.host,
+        port=config.port,
+        log_level=config.log_level,
+    )
diff --git a/package-lock.json b/package-lock.json
index 4fe532f..e157950 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,18 +1,19 @@
 {
   "name": "lynkr",
-  "version": "4.2.0",
+  "version": "5.0.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "lynkr",
-      "version": "4.2.0",
+      "version": "5.0.0",
       "license": "Apache-2.0",
       "dependencies": {
         "@azure/openai": "^2.0.0",
         "better-sqlite3": "^9.4.0",
         "compression": "^1.7.4",
         "diff": "^5.2.0",
+        "dockerode": "^4.0.2",
         "dotenv": "^16.4.5",
         "express": "^5.1.0",
         "express-rate-limit": "^8.2.1",
@@ -153,6 +154,12 @@
         "node": ">=18.0.0"
       }
     },
+    "node_modules/@balena/dockerignore": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@balena/dockerignore/-/dockerignore-1.0.2.tgz",
+      "integrity": "sha512-wMue2Sy4GAVTk6Ic4tJVcnfdau+gx2EnG7S+uAEe+TWJFqE4YoWN4/H8MSLj4eYJKxGg26lZwboEniNiNwZQ6Q==",
+      "license": "Apache-2.0"
+    },
     "node_modules/@eslint-community/eslint-utils": {
       "version": "4.9.0",
       "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.9.0.tgz",
@@ -430,6 +437,15 @@
       "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
       "license": "Python-2.0"
     },
+    "node_modules/asn1": {
+      "version": "0.2.6",
+      "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.6.tgz",
+      "integrity": "sha512-ix/FxPn0MDjeyJ7i/yoHGFt/EX6LyNbxSEhPPXODPL+KB0VPk86UYfL0lMdy+KCnv+fmvIzySwaK5COwqVbWTQ==",
+      "license": "MIT",
+      "dependencies": {
+        "safer-buffer": "~2.1.0"
+      }
+    },
     "node_modules/atomic-sleep": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/atomic-sleep/-/atomic-sleep-1.0.0.tgz",
@@ -466,6 +482,15 @@
       ],
       "license": "MIT"
     },
+    "node_modules/bcrypt-pbkdf": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz",
+      "integrity": "sha512-qeFIXtP4MSoi6NLqO12WfqARWWuCKi2Rn/9hJLEmtB5yTNr9DqFWkJRCf2qShWzPeAMRnOgCrq0sg/KLv5ES9w==",
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "tweetnacl": "^0.14.3"
+      }
+    },
     "node_modules/better-sqlite3": {
       "version": "9.6.0",
       "resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-9.6.0.tgz",
@@ -619,6 +644,15 @@
         "ieee754": "^1.2.1"
       }
     },
+    "node_modules/buildcheck": {
+      "version": "0.0.7",
+      "resolved": "https://registry.npmjs.org/buildcheck/-/buildcheck-0.0.7.tgz",
+      "integrity": "sha512-lHblz4ahamxpTmnsk+MNTRWsjYKv965MwOrSJyeD588rR3Jcu7swE+0wN5F+PbL5cjgu/9ObkhfzEPuofEMwLA==",
+      "optional": true,
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
     "node_modules/bytes": {
       "version": "3.1.2",
       "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
@@ -856,6 +890,20 @@
         "node": ">=6.6.0"
       }
     },
+    "node_modules/cpu-features": {
+      "version": "0.0.10",
+      "resolved": "https://registry.npmjs.org/cpu-features/-/cpu-features-0.0.10.tgz",
+      "integrity": "sha512-9IkYqtX3YHPCzoVg1Py+o9057a3i0fp7S530UWokCSaFVTc7CwXPRiOjRjBQQ18ZCNafx78YfnG+HALxtVmOGA==",
+      "hasInstallScript": true,
+      "optional": true,
+      "dependencies": {
+        "buildcheck": "~0.0.6",
+        "nan": "^2.19.0"
+      },
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
     "node_modules/cross-spawn": {
       "version": "7.0.6",
       "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
@@ -956,6 +1004,61 @@
         "node": ">=0.3.1"
       }
     },
+    "node_modules/docker-modem": {
+      "version": "5.0.6",
+      "resolved": "https://registry.npmjs.org/docker-modem/-/docker-modem-5.0.6.tgz",
+      "integrity": "sha512-ens7BiayssQz/uAxGzH8zGXCtiV24rRWXdjNha5V4zSOcxmAZsfGVm/PPFbwQdqEkDnhG+SyR9E3zSHUbOKXBQ==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "debug": "^4.1.1",
+        "readable-stream": "^3.5.0",
+        "split-ca": "^1.0.1",
+        "ssh2": "^1.15.0"
+      },
+      "engines": {
+        "node": ">= 8.0"
+      }
+    },
+    "node_modules/docker-modem/node_modules/readable-stream": {
+      "version": "3.6.2",
+      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
+      "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
+      "license": "MIT",
+      "dependencies": {
+        "inherits": "^2.0.3",
+        "string_decoder": "^1.1.1",
+        "util-deprecate": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/dockerode": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/dockerode/-/dockerode-4.0.2.tgz",
+      "integrity": "sha512-9wM1BVpVMFr2Pw3eJNXrYYt6DT9k0xMcsSCjtPvyQ+xa1iPg/Mo3T/gUcwI0B2cczqCeCYRPF8yFYDwtFXT0+w==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@balena/dockerignore": "^1.0.2",
+        "docker-modem": "^5.0.3",
+        "tar-fs": "~2.0.1"
+      },
+      "engines": {
+        "node": ">= 8.0"
+      }
+    },
+    "node_modules/dockerode/node_modules/tar-fs": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.0.1.tgz",
+      "integrity": "sha512-6tzWDMeroL87uF/+lin46k+Q+46rAJ0SyPGz7OW7wTgblI273hsBqk2C1j0/xNadNLKDTUL9BukSjB7cwgmlPA==",
+      "license": "MIT",
+      "dependencies": {
+        "chownr": "^1.1.1",
+        "mkdirp-classic": "^0.5.2",
+        "pump": "^3.0.0",
+        "tar-stream": "^2.0.0"
+      }
+    },
     "node_modules/doctrine": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz",
@@ -3080,6 +3183,12 @@
         "atomic-sleep": "^1.0.0"
       }
     },
+    "node_modules/split-ca": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/split-ca/-/split-ca-1.0.1.tgz",
+      "integrity": "sha512-Q5thBSxp5t8WPTTJQS59LrGqOZqOsrhDGDVm8azCqIBjSBd7nd9o2PM+mDulQQkh8h//4U6hFZnc/mul8t5pWQ==",
+      "license": "ISC"
+    },
     "node_modules/split2": {
       "version": "4.2.0",
       "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz",
@@ -3089,6 +3198,23 @@
         "node": ">= 10.x"
       }
     },
+    "node_modules/ssh2": {
+      "version": "1.17.0",
+      "resolved": "https://registry.npmjs.org/ssh2/-/ssh2-1.17.0.tgz",
+      "integrity": "sha512-wPldCk3asibAjQ/kziWQQt1Wh3PgDFpC0XpwclzKcdT1vql6KeYxf5LIt4nlFkUeR8WuphYMKqUA56X4rjbfgQ==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "asn1": "^0.2.6",
+        "bcrypt-pbkdf": "^1.0.2"
+      },
+      "engines": {
+        "node": ">=10.16.0"
+      },
+      "optionalDependencies": {
+        "cpu-features": "~0.0.10",
+        "nan": "^2.23.0"
+      }
+    },
     "node_modules/statuses": {
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz",
@@ -3295,6 +3421,12 @@
         "node": "*"
       }
     },
+    "node_modules/tweetnacl": {
+      "version": "0.14.5",
+      "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz",
+      "integrity": "sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==",
+      "license": "Unlicense"
+    },
     "node_modules/type-check": {
       "version": "0.4.0",
       "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
diff --git a/package.json b/package.json
index 5a9d6d2..a6798ac 100644
--- a/package.json
+++ b/package.json
@@ -8,7 +8,9 @@
     "lynkr-setup": "./scripts/setup.js"
   },
   "scripts": {
-    "start": "node index.js",
+    "prestart": "docker compose --profile headroom up -d headroom 2>/dev/null || echo 'Headroom container not started (Docker may not be running)'",
+    "start": "node index.js 2>&1 | npx pino-pretty --sync",
+    "stop": "docker compose --profile headroom down",
     "dev": "nodemon index.js",
     "lint": "eslint src index.js",
     "test": "npm run test:unit && npm run test:performance",
@@ -46,6 +48,7 @@
     "better-sqlite3": "^9.4.0",
     "compression": "^1.7.4",
     "diff": "^5.2.0",
+    "dockerode": "^4.0.2",
     "dotenv": "^16.4.5",
     "express": "^5.1.0",
     "express-rate-limit": "^8.2.1",
diff --git a/src/api/health.js b/src/api/health.js
index fa60395..aea436a 100644
--- a/src/api/health.js
+++ b/src/api/health.js
@@ -2,6 +2,7 @@ const Database = require("better-sqlite3");
 const { invokeModel } = require("../clients/databricks");
 const logger = require("../logger");
 const config = require("../config");
+const { getHeadroomManager } = require("../headroom");
 
 /**
  * Health check endpoints
@@ -65,6 +66,32 @@ async function readinessCheck(req, res) {
     allHealthy = false;
   }
 
+  // Check Headroom (if enabled)
+  if (config.headroom?.enabled) {
+    try {
+      const headroomManager = getHeadroomManager();
+      const headroomHealth = await headroomManager.getHealth();
+      checks.headroom = {
+        healthy: headroomHealth.healthy,
+        enabled: headroomHealth.enabled,
+        service: headroomHealth.service?.available ? "available" : "unavailable",
+        docker: headroomHealth.docker?.running ? "running" : "stopped",
+        error: headroomHealth.error,
+      };
+      // Don't fail overall health if Headroom is unavailable
+      // It's a non-critical service - compression will be skipped
+      if (!headroomHealth.healthy) {
+        checks.headroom.note = "Compression will be skipped";
+      }
+    } catch (err) {
+      checks.headroom = {
+        healthy: false,
+        error: err.message,
+        note: "Compression will be skipped",
+      };
+    }
+  }
+
   // Optional: Check provider (can be slow)
   if (req.query.deep === "true") {
     const provider = config.modelProvider?.type || "databricks";
diff --git a/src/api/router.js b/src/api/router.js
index 8c15c21..894a99d 100644
--- a/src/api/router.js
+++ b/src/api/router.js
@@ -595,4 +595,65 @@ router.use("/v1", openaiRouter);
 // These provide /v1/models and /v1/providers for Claude Code CLI compatibility
 router.use("/v1", providersRouter);
 
+// Headroom compression endpoints
+router.get("/metrics/compression", async (req, res) => {
+  try {
+    const { getCombinedMetrics } = require("../headroom");
+    const metrics = await getCombinedMetrics();
+    res.json(metrics);
+  } catch (error) {
+    res.status(500).json({ error: error.message });
+  }
+});
+
+router.get("/health/headroom", async (req, res) => {
+  try {
+    const { getHeadroomManager } = require("../headroom");
+    const manager = getHeadroomManager();
+    const health = await manager.getHealth();
+    res.status(health.healthy ? 200 : 503).json(health);
+  } catch (error) {
+    res.status(500).json({ error: error.message });
+  }
+});
+
+router.get("/headroom/status", async (req, res) => {
+  try {
+    const { getHeadroomManager } = require("../headroom");
+    const manager = getHeadroomManager();
+    const status = await manager.getDetailedStatus();
+    res.json(status);
+  } catch (error) {
+    res.status(500).json({ error: error.message });
+  }
+});
+
+router.post("/headroom/restart", async (req, res) => {
+  try {
+    const { getHeadroomManager } = require("../headroom");
+    const manager = getHeadroomManager();
+    const result = await manager.restart();
+    res.json({ success: true, ...result });
+  } catch (error) {
+    res.status(500).json({ error: error.message });
+  }
+});
+
+router.get("/headroom/logs", async (req, res) => {
+  try {
+    const { getHeadroomManager } = require("../headroom");
+    const manager = getHeadroomManager();
+    const tail = parseInt(req.query.tail || "100", 10);
+    const logs = await manager.getLogs(tail);
+
+    if (logs === null) {
+      return res.status(400).json({ error: "Docker management is disabled" });
+    }
+
+    res.type("text/plain").send(logs);
+  } catch (error) {
+    res.status(500).json({ error: error.message });
+  }
+});
+
 module.exports = router;
diff --git a/src/cache/prompt.js b/src/cache/prompt.js
index 4fb8e72..c8d4592 100644
--- a/src/cache/prompt.js
+++ b/src/cache/prompt.js
@@ -46,6 +46,7 @@ class PromptCache {
         ? options.pruneIntervalMs
         : 300000;
     this.pruneTimer = null;
+    this.isClosed = false; // Track if database has been closed
 
     // Initialize persistent cache database
     if (this.enabled) {
@@ -169,16 +170,21 @@ class PromptCache {
   }
 
   pruneExpired() {
-    if (!this.enabled || !this.db) return;
+    if (!this.enabled || !this.db || this.isClosed) return;
     if (this.ttlMs <= 0) return;
     try {
+      // Check if database is still open
+      if (!this.db.open) return;
       const now = Date.now();
       const result = this.deleteExpiredStmt.run(now);
       if (result.changes > 0) {
         logger.debug({ deleted: result.changes }, "Pruned expired cache entries");
       }
     } catch (error) {
-      logger.warn({ err: error }, "Failed to prune expired cache entries");
+      // Only log if not a "connection closed" error during shutdown
+      if (!this.isClosed) {
+        logger.warn({ err: error }, "Failed to prune expired cache entries");
+      }
     }
   }
 
@@ -361,15 +367,27 @@ class PromptCache {
 
   // Cleanup method
   close() {
-    this.stopPruning(); // Stop pruning first
-    if (this.db) {
+    if (this.isClosed) return; // Already closed
+    this.stopPruning(); // Stop pruning timer first
+
+    if (this.db && this.db.open) {
       try {
-        this.pruneExpired(); // Final cleanup
+        // Final prune before closing (direct call, not through pruneExpired)
+        if (this.ttlMs > 0 && this.deleteExpiredStmt) {
+          const result = this.deleteExpiredStmt.run(Date.now());
+          if (result.changes > 0) {
+            logger.debug({ deleted: result.changes }, "Final cache prune on close");
+          }
+        }
         this.db.close();
+        logger.debug("Prompt cache database closed");
       } catch (error) {
-        logger.warn({ err: error }, "Failed to close cache database");
+        // Ignore errors during shutdown - database may already be closed
+        logger.debug({ err: error }, "Cache database close (may already be closed)");
       }
     }
+
+    this.isClosed = true; // Mark as closed after cleanup
   }
 }
 
diff --git a/src/clients/databricks.js b/src/clients/databricks.js
index d8b8e24..93128af 100644
--- a/src/clients/databricks.js
+++ b/src/clients/databricks.js
@@ -500,11 +500,91 @@ async function invokeAzureOpenAI(body) {
     return performJsonRequest(endpoint, { headers, body: azureBody }, "Azure OpenAI");
   }
   else if (format === "responses") {
-    azureBody.max_completion_tokens = azureBody.max_tokens;
-    delete azureBody.max_tokens;
-    delete azureBody.temperature;
-    delete azureBody.top_p;
-    return performJsonRequest(endpoint, { headers, body: azureBody }, "Azure OpenAI");
+    // Responses API uses 'input' instead of 'messages' and flat tool format
+    // Convert tools from Chat Completions format to Responses API format
+    const responsesTools = azureBody.tools?.map(tool => {
+      if (tool.type === "function" && tool.function) {
+        // Flatten: {type:"function", function:{name,description,parameters}} -> {type:"function", name, description, parameters}
+        return {
+          type: "function",
+          name: tool.function.name,
+          description: tool.function.description,
+          parameters: tool.function.parameters
+        };
+      }
+      return tool;
+    });
+
+    const responsesBody = {
+      input: azureBody.messages,
+      model: azureBody.model,
+      max_output_tokens: azureBody.max_tokens,
+      tools: responsesTools,
+      tool_choice: azureBody.tool_choice,
+      stream: false
+    };
+    logger.info({
+      format: "responses",
+      inputCount: responsesBody.input?.length,
+      model: responsesBody.model,
+      hasTools: !!responsesBody.tools
+    }, "Using Responses API format");
+
+    const result = await performJsonRequest(endpoint, { headers, body: responsesBody }, "Azure OpenAI Responses");
+
+    // Convert Responses API response to Chat Completions format
+    if (result.ok && result.json?.output) {
+      const outputArray = result.json.output || [];
+
+      // Find message output (contains text content)
+      const messageOutput = outputArray.find(o => o.type === "message");
+      const textContent = messageOutput?.content?.find(c => c.type === "output_text")?.text || "";
+
+      // Find function_call outputs (tool calls are separate items in output array)
+      const toolCalls = outputArray
+        .filter(o => o.type === "function_call")
+        .map(tc => ({
+          id: tc.call_id || tc.id || `call_${Date.now()}`,
+          type: "function",
+          function: {
+            name: tc.name,
+            arguments: typeof tc.arguments === 'string' ? tc.arguments : JSON.stringify(tc.arguments || {})
+          }
+        }));
+
+      logger.info({
+        outputTypes: outputArray.map(o => o.type),
+        hasMessage: !!messageOutput,
+        toolCallCount: toolCalls.length,
+        toolCallNames: toolCalls.map(tc => tc.function.name)
+      }, "Parsing Responses API output");
+
+      // Convert to Chat Completions format
+      result.json = {
+        id: result.json.id,
+        object: "chat.completion",
+        created: result.json.created_at,
+        model: result.json.model,
+        choices: [{
+          index: 0,
+          message: {
+            role: "assistant",
+            content: textContent,
+            tool_calls: toolCalls.length > 0 ? toolCalls : undefined
+          },
+          finish_reason: toolCalls.length > 0 ? "tool_calls" : "stop"
+        }],
+        usage: result.json.usage
+      };
+
+      logger.info({
+        convertedContent: textContent?.substring(0, 100),
+        hasToolCalls: toolCalls.length > 0,
+        toolCallCount: toolCalls.length
+      }, "Converted Responses API to Chat Completions format");
+    }
+
+    return result;
   }
   else {
     throw new Error(`Unsupported Azure OpenAI endpoint format: ${format}`);
@@ -1167,9 +1247,9 @@ function convertOpenAIToAnthropic(response) {
   const hasToolCalls = Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
   if (message.content) {
     content.push({ type: "text", text: message.content });
-  } else if (message.reasoning_content && !message.content && !hasToolCalls) {
-    // Z.AI returns reasoning in reasoning_content - if no final content AND no tool calls, use a placeholder
-    content.push({ type: "text", text: "[Model is still thinking - increase max_tokens for complete response]" });
+  } else if (message.reasoning_content && !message.content) {
+    // Thinking models (Kimi-K2, o1, etc.) return response in reasoning_content
+    content.push({ type: "text", text: message.reasoning_content });
   }
 
   // Convert tool calls
diff --git a/src/config/index.js b/src/config/index.js
index 63efb76..e823fdb 100644
--- a/src/config/index.js
+++ b/src/config/index.js
@@ -185,6 +185,40 @@ const smartToolSelectionTokenBudget = Number.parseInt(
   10
 );
 
+// Headroom sidecar configuration
+const headroomEnabled = process.env.HEADROOM_ENABLED === "true";
+const headroomEndpoint = process.env.HEADROOM_ENDPOINT?.trim() || "http://localhost:8787";
+const headroomTimeoutMs = Number.parseInt(process.env.HEADROOM_TIMEOUT_MS ?? "5000", 10);
+const headroomMinTokens = Number.parseInt(process.env.HEADROOM_MIN_TOKENS ?? "500", 10);
+const headroomMode = (process.env.HEADROOM_MODE ?? "optimize").toLowerCase();
+
+// Headroom Docker container configuration
+const headroomDockerEnabled = process.env.HEADROOM_DOCKER_ENABLED !== "false"; // default true when headroom enabled
+const headroomDockerImage = process.env.HEADROOM_DOCKER_IMAGE ?? "lynkr/headroom-sidecar:latest";
+const headroomDockerContainerName = process.env.HEADROOM_DOCKER_CONTAINER_NAME ?? "lynkr-headroom";
+const headroomDockerPort = Number.parseInt(process.env.HEADROOM_DOCKER_PORT ?? "8787", 10);
+const headroomDockerMemoryLimit = process.env.HEADROOM_DOCKER_MEMORY_LIMIT ?? "512m";
+const headroomDockerCpuLimit = process.env.HEADROOM_DOCKER_CPU_LIMIT ?? "1.0";
+const headroomDockerRestartPolicy = process.env.HEADROOM_DOCKER_RESTART_POLICY ?? "unless-stopped";
+const headroomDockerNetwork = process.env.HEADROOM_DOCKER_NETWORK ?? null;
+const headroomDockerBuildContext = process.env.HEADROOM_DOCKER_BUILD_CONTEXT ?? "./headroom-sidecar";
+const headroomDockerAutoBuild = process.env.HEADROOM_DOCKER_AUTO_BUILD === "true";
+
+// Headroom transform configuration (passed to sidecar)
+const headroomSmartCrusher = process.env.HEADROOM_SMART_CRUSHER !== "false";
+const headroomSmartCrusherMinTokens = Number.parseInt(process.env.HEADROOM_SMART_CRUSHER_MIN_TOKENS ?? "200", 10);
+const headroomSmartCrusherMaxItems = Number.parseInt(process.env.HEADROOM_SMART_CRUSHER_MAX_ITEMS ?? "15", 10);
+const headroomToolCrusher = process.env.HEADROOM_TOOL_CRUSHER !== "false";
+const headroomCacheAligner = process.env.HEADROOM_CACHE_ALIGNER !== "false";
+const headroomRollingWindow = process.env.HEADROOM_ROLLING_WINDOW !== "false";
+const headroomKeepTurns = Number.parseInt(process.env.HEADROOM_KEEP_TURNS ?? "3", 10);
+const headroomCcrEnabled = process.env.HEADROOM_CCR !== "false";
+const headroomCcrTtl = Number.parseInt(process.env.HEADROOM_CCR_TTL ?? "300", 10);
+const headroomLlmlingua = process.env.HEADROOM_LLMLINGUA === "true";
+const headroomLlmlinguaDevice = process.env.HEADROOM_LLMLINGUA_DEVICE ?? "auto";
+const headroomProvider = process.env.HEADROOM_PROVIDER ?? "anthropic";
+const headroomLogLevel = process.env.HEADROOM_LOG_LEVEL ?? "info";
+
 // Only require Databricks credentials if it's the primary provider or used as fallback
 if (modelProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
   throw new Error("Set DATABRICKS_API_BASE and DATABRICKS_API_KEY before starting the proxy.");
@@ -668,6 +702,44 @@ const config = {
     tokenBudget: smartToolSelectionTokenBudget,
     minimalMode: false,  // HARDCODED - disabled
   },
+  headroom: {
+    enabled: headroomEnabled,
+    endpoint: headroomEndpoint,
+    timeoutMs: Number.isNaN(headroomTimeoutMs) ? 5000 : headroomTimeoutMs,
+    minTokens: Number.isNaN(headroomMinTokens) ? 500 : headroomMinTokens,
+    mode: headroomMode,
+    docker: {
+      enabled: headroomDockerEnabled,
+      image: headroomDockerImage,
+      containerName: headroomDockerContainerName,
+      port: Number.isNaN(headroomDockerPort) ? 8787 : headroomDockerPort,
+      memoryLimit: headroomDockerMemoryLimit,
+      cpuLimit: headroomDockerCpuLimit,
+      restartPolicy: headroomDockerRestartPolicy,
+      network: headroomDockerNetwork,
+      buildContext: headroomDockerBuildContext,
+      autoBuild: headroomDockerAutoBuild,
+    },
+    transforms: {
+      smartCrusher: headroomSmartCrusher,
+      smartCrusherMinTokens: Number.isNaN(headroomSmartCrusherMinTokens) ? 200 : headroomSmartCrusherMinTokens,
+      smartCrusherMaxItems: Number.isNaN(headroomSmartCrusherMaxItems) ? 15 : headroomSmartCrusherMaxItems,
+      toolCrusher: headroomToolCrusher,
+      cacheAligner: headroomCacheAligner,
+      rollingWindow: headroomRollingWindow,
+      keepTurns: Number.isNaN(headroomKeepTurns) ? 3 : headroomKeepTurns,
+    },
+    ccr: {
+      enabled: headroomCcrEnabled,
+      ttlSeconds: Number.isNaN(headroomCcrTtl) ? 300 : headroomCcrTtl,
+    },
+    llmlingua: {
+      enabled: headroomLlmlingua,
+      device: headroomLlmlinguaDevice,
+    },
+    provider: headroomProvider,
+    logLevel: headroomLogLevel,
+  },
 };
 
 /**
diff --git a/src/headroom/client.js b/src/headroom/client.js
new file mode 100644
index 0000000..3e5bd37
--- /dev/null
+++ b/src/headroom/client.js
@@ -0,0 +1,435 @@
+/**
+ * Headroom Sidecar HTTP Client
+ *
+ * HTTP client for communicating with the Headroom compression sidecar.
+ * Provides message compression, CCR retrieval, and metrics collection.
+ */
+
+const logger = require("../logger");
+const config = require("../config");
+
+// Metrics tracking
+const metrics = {
+  totalCalls: 0,
+  successfulCompressions: 0,
+  skippedCompressions: 0,
+  failures: 0,
+  totalTokensSaved: 0,
+  totalLatencyMs: 0,
+  ccrRetrievals: 0,
+  ccrSearches: 0,
+};
+
+/**
+ * Get Headroom configuration
+ */
+function getConfig() {
+  return config.headroom;
+}
+
+/**
+ * Check if Headroom is enabled
+ */
+function isEnabled() {
+  return config.headroom?.enabled === true;
+}
+
+/**
+ * Check if Headroom sidecar is healthy
+ */
+async function checkHealth() {
+  const headroomConfig = getConfig();
+
+  if (!isEnabled()) {
+    return { available: false, reason: "disabled" };
+  }
+
+  try {
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), 2000);
+
+    const response = await fetch(`${headroomConfig.endpoint}/health`, {
+      signal: controller.signal,
+    });
+    clearTimeout(timeout);
+
+    if (response.ok) {
+      const data = await response.json();
+      return {
+        available: data.headroom_loaded === true,
+        status: data.status,
+        ccrEnabled: data.ccr_enabled,
+        llmlinguaEnabled: data.llmlingua_enabled,
+        entriesCached: data.entries_cached,
+      };
+    }
+    return { available: false, reason: "unhealthy", status: response.status };
+  } catch (err) {
+    return { available: false, reason: err.message };
+  }
+}
+
+/**
+ * Estimate tokens in messages (rough approximation: ~4 chars per token)
+ */
+function estimateTokens(messages) {
+  const text = JSON.stringify(messages);
+  return Math.ceil(text.length / 4);
+}
+
+/**
+ * Compress messages using Headroom sidecar
+ *
+ * @param {Array} messages - Chat messages in Anthropic format
+ * @param {Array} tools - Tool definitions
+ * @param {Object} options - Compression options
+ * @returns {Object} { messages, tools, compressed, stats }
+ */
+async function compressMessages(messages, tools = [], options = {}) {
+  const headroomConfig = getConfig();
+  metrics.totalCalls++;
+
+  if (!isEnabled()) {
+    return {
+      messages,
+      tools,
+      compressed: false,
+      stats: { skipped: true, reason: "disabled" },
+    };
+  }
+
+  // Estimate tokens - skip if below threshold
+  const estimatedTokens = estimateTokens(messages);
+  if (estimatedTokens < headroomConfig.minTokens) {
+    metrics.skippedCompressions++;
+    return {
+      messages,
+      tools,
+      compressed: false,
+      stats: {
+        skipped: true,
+        reason: `Below threshold (${estimatedTokens} < ${headroomConfig.minTokens})`,
+      },
+    };
+  }
+
+  try {
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), headroomConfig.timeoutMs);
+
+    const response = await fetch(`${headroomConfig.endpoint}/compress`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({
+        messages,
+        tools,
+        model: options.model || "claude-3-5-sonnet-20241022",
+        model_limit: options.modelLimit || 200000,
+        mode: options.mode || headroomConfig.mode,
+        token_budget: options.tokenBudget,
+        query_context: options.queryContext,
+        preserve_recent_turns: options.preserveRecentTurns,
+        target_ratio: options.targetRatio,
+      }),
+      signal: controller.signal,
+    });
+
+    clearTimeout(timeout);
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`Headroom returned ${response.status}: ${errorText}`);
+    }
+
+    const result = await response.json();
+
+    // Update metrics
+    if (result.compressed) {
+      metrics.successfulCompressions++;
+      metrics.totalTokensSaved += result.stats?.tokens_saved || 0;
+      metrics.totalLatencyMs += result.stats?.latency_ms || 0;
+
+      logger.info(
+        {
+          tokensBefore: result.stats?.tokens_before,
+          tokensAfter: result.stats?.tokens_after,
+          savingsPercent: result.stats?.savings_percent,
+          latencyMs: result.stats?.latency_ms,
+          transforms: result.stats?.transforms_applied,
+        },
+        "Headroom compression applied"
+      );
+    } else {
+      metrics.skippedCompressions++;
+      logger.debug({ reason: result.stats?.reason }, "Headroom compression skipped");
+    }
+
+    return {
+      messages: result.messages,
+      tools: result.tools,
+      compressed: result.compressed,
+      stats: result.stats,
+    };
+  } catch (err) {
+    metrics.failures++;
+
+    if (err.name === "AbortError") {
+      logger.warn({ timeoutMs: headroomConfig.timeoutMs }, "Headroom compression timed out");
+    } else {
+      logger.warn({ error: err.message }, "Headroom compression failed, using original");
+    }
+
+    return {
+      messages,
+      tools,
+      compressed: false,
+      stats: { skipped: true, reason: err.message },
+    };
+  }
+}
+
+/**
+ * Retrieve original content from CCR store
+ *
+ * @param {string} hash - Hash key from compression marker
+ * @param {string} query - Optional search query to filter results
+ * @param {number} maxResults - Maximum results for search (default 20)
+ * @returns {Object} { success, content, itemsRetrieved, wasSearch, error }
+ */
+async function ccrRetrieve(hash, query = null, maxResults = 20) {
+  const headroomConfig = getConfig();
+
+  if (!isEnabled()) {
+    return { success: false, error: "Headroom disabled" };
+  }
+
+  try {
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), headroomConfig.timeoutMs);
+
+    const response = await fetch(`${headroomConfig.endpoint}/ccr/retrieve`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ hash, query, max_results: maxResults }),
+      signal: controller.signal,
+    });
+
+    clearTimeout(timeout);
+
+    if (!response.ok) {
+      throw new Error(`CCR retrieve returned ${response.status}`);
+    }
+
+    const result = await response.json();
+
+    if (result.success) {
+      if (result.was_search) {
+        metrics.ccrSearches++;
+        logger.debug({ hash, query, items: result.items_retrieved }, "CCR search completed");
+      } else {
+        metrics.ccrRetrievals++;
+        logger.debug({ hash, items: result.items_retrieved }, "CCR retrieval completed");
+      }
+    }
+
+    return {
+      success: result.success,
+      content: result.content,
+      itemsRetrieved: result.items_retrieved || 0,
+      wasSearch: result.was_search || false,
+      error: result.error,
+    };
+  } catch (err) {
+    logger.error({ error: err.message, hash }, "CCR retrieval failed");
+    return { success: false, error: err.message };
+  }
+}
+
+/**
+ * Track compression for proactive CCR expansion
+ */
+async function ccrTrack(hashKey, turnNumber, toolName, sample) {
+  const headroomConfig = getConfig();
+
+  if (!isEnabled()) {
+    return { tracked: false };
+  }
+
+  try {
+    const params = new URLSearchParams({
+      hash_key: hashKey,
+      turn_number: String(turnNumber),
+      tool_name: toolName,
+      sample: sample.substring(0, 500),
+    });
+
+    const response = await fetch(`${headroomConfig.endpoint}/ccr/track?${params}`, {
+      method: "POST",
+      signal: AbortSignal.timeout(2000),
+    });
+
+    if (response.ok) {
+      return await response.json();
+    }
+    return { tracked: false };
+  } catch (err) {
+    logger.debug({ error: err.message }, "CCR tracking failed");
+    return { tracked: false };
+  }
+}
+
+/**
+ * Analyze query for proactive CCR expansion
+ */
+async function ccrAnalyze(query, turnNumber) {
+  const headroomConfig = getConfig();
+
+  if (!isEnabled()) {
+    return { expansions: [] };
+  }
+
+  try {
+    const response = await fetch(`${headroomConfig.endpoint}/ccr/analyze`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ query, turn_number: turnNumber }),
+      signal: AbortSignal.timeout(2000),
+    });
+
+    if (response.ok) {
+      return await response.json();
+    }
+    return { expansions: [] };
+  } catch (err) {
+    logger.debug({ error: err.message }, "CCR analysis failed");
+    return { expansions: [] };
+  }
+}
+
+/**
+ * Compress text using LLMLingua-2 ML compression
+ * (Optional - requires LLMLingua enabled in sidecar)
+ */
+async function llmlinguaCompress(text, targetRatio = 0.5, forceTokens = null) {
+  const headroomConfig = getConfig();
+
+  if (!isEnabled()) {
+    return { success: false, error: "Headroom disabled" };
+  }
+
+  try {
+    const params = new URLSearchParams({
+      text,
+      target_ratio: String(targetRatio),
+    });
+
+    if (forceTokens && Array.isArray(forceTokens)) {
+      params.append("force_tokens", JSON.stringify(forceTokens));
+    }
+
+    const response = await fetch(`${headroomConfig.endpoint}/compress/llmlingua?${params}`, {
+      method: "POST",
+      signal: AbortSignal.timeout(30000), // LLMLingua can be slow
+    });
+
+    if (!response.ok) {
+      const error = await response.text();
+      return { success: false, error };
+    }
+
+    const result = await response.json();
+    return {
+      success: true,
+      compressed: result.compressed,
+      originalTokens: result.original_tokens,
+      compressedTokens: result.compressed_tokens,
+      ratio: result.ratio,
+    };
+  } catch (err) {
+    logger.error({ error: err.message }, "LLMLingua compression failed");
+    return { success: false, error: err.message };
+  }
+}
+
+/**
+ * Get client-side metrics
+ */
+function getMetrics() {
+  return {
+    ...metrics,
+    averageLatencyMs:
+      metrics.successfulCompressions > 0
+        ? Math.round(metrics.totalLatencyMs / metrics.successfulCompressions)
+        : 0,
+    compressionRate:
+      metrics.totalCalls > 0
+        ? Math.round((metrics.successfulCompressions / metrics.totalCalls) * 100)
+        : 0,
+    failureRate:
+      metrics.totalCalls > 0 ? Math.round((metrics.failures / metrics.totalCalls) * 100) : 0,
+  };
+}
+
+/**
+ * Get server-side metrics from sidecar
+ */
+async function getServerMetrics() {
+  const headroomConfig = getConfig();
+
+  if (!isEnabled()) {
+    return null;
+  }
+
+  try {
+    const response = await fetch(`${headroomConfig.endpoint}/metrics`, {
+      signal: AbortSignal.timeout(2000),
+    });
+
+    if (response.ok) {
+      return await response.json();
+    }
+    return null;
+  } catch (err) {
+    logger.debug({ error: err.message }, "Failed to fetch server metrics");
+    return null;
+  }
+}
+
+/**
+ * Get combined metrics (client + server)
+ */
+async function getCombinedMetrics() {
+  const clientMetrics = getMetrics();
+  const serverMetrics = await getServerMetrics();
+
+  return {
+    enabled: isEnabled(),
+    endpoint: getConfig().endpoint,
+    client: clientMetrics,
+    server: serverMetrics,
+  };
+}
+
+/**
+ * Reset client-side metrics
+ */
+function resetMetrics() {
+  Object.keys(metrics).forEach((key) => {
+    metrics[key] = 0;
+  });
+}
+
+module.exports = {
+  isEnabled,
+  checkHealth,
+  compressMessages,
+  ccrRetrieve,
+  ccrTrack,
+  ccrAnalyze,
+  llmlinguaCompress,
+  getMetrics,
+  getServerMetrics,
+  getCombinedMetrics,
+  resetMetrics,
+  estimateTokens,
+};
diff --git a/src/headroom/health.js b/src/headroom/health.js
new file mode 100644
index 0000000..759bcea
--- /dev/null
+++ b/src/headroom/health.js
@@ -0,0 +1,163 @@
+/**
+ * Headroom Health Check Module
+ *
+ * Provides health check functionality for the Headroom sidecar,
+ * including container status and service availability checks.
+ */
+
+const logger = require("../logger");
+const config = require("../config");
+const launcher = require("./launcher");
+const client = require("./client");
+
+// Cached health status
+let lastHealthCheck = null;
+let lastCheckTime = 0;
+const CACHE_TTL_MS = 5000; // Cache health status for 5 seconds
+
+/**
+ * Perform a comprehensive health check on the Headroom system
+ */
+async function checkHeadroomHealth() {
+  const headroomConfig = config.headroom;
+  const now = Date.now();
+
+  // Return cached result if still valid
+  if (lastHealthCheck && now - lastCheckTime < CACHE_TTL_MS) {
+    return lastHealthCheck;
+  }
+
+  const result = {
+    enabled: headroomConfig?.enabled === true,
+    healthy: false,
+    timestamp: new Date().toISOString(),
+    docker: null,
+    service: null,
+    error: null,
+  };
+
+  if (!result.enabled) {
+    result.healthy = true; // Disabled is considered "healthy" for the overall system
+    result.note = "Headroom is disabled";
+    lastHealthCheck = result;
+    lastCheckTime = now;
+    return result;
+  }
+
+  try {
+    // Check Docker container status (if Docker management is enabled)
+    if (headroomConfig.docker?.enabled) {
+      const containerStatus = await launcher.getStatus();
+      result.docker = {
+        exists: containerStatus.exists,
+        running: containerStatus.running,
+        status: containerStatus.status,
+        health: containerStatus.health,
+        id: containerStatus.id,
+        image: containerStatus.image,
+      };
+    }
+
+    // Check HTTP service health
+    const serviceHealth = await client.checkHealth();
+    result.service = serviceHealth;
+
+    // Determine overall health
+    if (serviceHealth.available) {
+      result.healthy = true;
+    } else if (headroomConfig.docker?.enabled && result.docker?.running) {
+      // Container is running but service not responding - might be starting up
+      result.healthy = false;
+      result.error = "Container running but service not responding";
+    } else {
+      result.healthy = false;
+      result.error = serviceHealth.reason || "Service unavailable";
+    }
+  } catch (err) {
+    result.healthy = false;
+    result.error = err.message;
+    logger.error({ err }, "Headroom health check failed");
+  }
+
+  lastHealthCheck = result;
+  lastCheckTime = now;
+
+  return result;
+}
+
+/**
+ * Simple availability check (faster than full health check)
+ */
+async function isAvailable() {
+  if (!config.headroom?.enabled) {
+    return false;
+  }
+
+  const health = await client.checkHealth();
+  return health.available === true;
+}
+
+/**
+ * Wait for Headroom to become available
+ */
+async function waitForAvailable(maxWaitMs = 30000, intervalMs = 1000) {
+  const startTime = Date.now();
+
+  while (Date.now() - startTime < maxWaitMs) {
+    if (await isAvailable()) {
+      return true;
+    }
+    await new Promise((resolve) => setTimeout(resolve, intervalMs));
+  }
+
+  return false;
+}
+
+/**
+ * Get detailed status for debugging/monitoring
+ */
+async function getDetailedStatus() {
+  const health = await checkHeadroomHealth();
+  const metrics = await client.getCombinedMetrics();
+
+  let containerLogs = null;
+  if (config.headroom?.docker?.enabled) {
+    containerLogs = await launcher.getLogs(50);
+  }
+
+  return {
+    health,
+    metrics,
+    config: {
+      enabled: config.headroom?.enabled,
+      endpoint: config.headroom?.endpoint,
+      mode: config.headroom?.mode,
+      minTokens: config.headroom?.minTokens,
+      docker: config.headroom?.docker?.enabled
+        ? {
+            enabled: true,
+            image: config.headroom.docker.image,
+            containerName: config.headroom.docker.containerName,
+            port: config.headroom.docker.port,
+          }
+        : { enabled: false },
+    },
+    recentLogs: containerLogs ? containerLogs.split("\n").slice(-20) : null,
+  };
+}
+
+/**
+ * Clear cached health check result
+ */
+function clearCache() {
+  lastHealthCheck = null;
+  lastCheckTime = 0;
+}
+
+module.exports = {
+  checkHeadroomHealth,
+  isAvailable,
+  waitForAvailable,
+  getDetailedStatus,
+  clearCache,
+};
diff --git a/src/headroom/index.js b/src/headroom/index.js
new file mode 100644
index 0000000..aba43ed
--- /dev/null
+++ b/src/headroom/index.js
@@ -0,0 +1,240 @@
+/**
+ * Headroom Sidecar Integration
+ *
+ * Main entry point for Headroom functionality in Lynkr.
+ * Provides singleton manager for container lifecycle and compression operations.
+ */
+
+const logger = require("../logger");
+const config = require("../config");
+const launcher = require("./launcher");
+const client = require("./client");
+const health = require("./health");
+
+/**
+ * HeadroomManager - Singleton for managing Headroom sidecar lifecycle
+ */
+class HeadroomManager {
+  constructor() {
+    this.initialized = false;
+    this.startupError = null;
+  }
+
+  /**
+   * Initialize the Headroom system
+   * Starts the Docker container if enabled and waits for it to be healthy
+   */
+  async initialize() {
+    if (this.initialized) {
+      return { success: true, alreadyInitialized: true };
+    }
+
+    const headroomConfig = config.headroom;
+
+    if (!headroomConfig?.enabled) {
+      logger.info("Headroom compression is disabled");
+      this.initialized = true;
+      return { success: true, enabled: false };
+    }
+
+    logger.info(
+      {
+        endpoint: headroomConfig.endpoint,
+        mode: headroomConfig.mode,
+        dockerEnabled: headroomConfig.docker?.enabled,
+      },
+      "Initializing Headroom sidecar"
+    );
+
+    try {
+      // Start Docker container if enabled
+      if (headroomConfig.docker?.enabled) {
+        const result = await launcher.ensureRunning();
+        logger.info({ action: result.action }, "Headroom container ready");
+      } else {
+        // Docker not enabled, just wait for external sidecar
+        logger.info("Docker management disabled, waiting for external Headroom sidecar");
+        const available = await health.waitForAvailable(10000, 500);
+
+        if (!available) {
+          throw new Error(`Headroom sidecar not available at ${headroomConfig.endpoint}`);
+        }
+      }
+
+      // Verify service is healthy
+      const healthCheck = await health.checkHeadroomHealth();
+
+      if (!healthCheck.healthy) {
+        throw new Error(`Headroom not healthy: ${healthCheck.error}`);
+      }
+
+      logger.info(
+        {
+          ccrEnabled: healthCheck.service?.ccrEnabled,
+          llmlinguaEnabled: healthCheck.service?.llmlinguaEnabled,
+        },
+        "Headroom sidecar initialized successfully"
+      );
+
+      this.initialized = true;
+      return { success: true, health: healthCheck };
+    } catch (err) {
+      this.startupError = err;
+      logger.error({ err }, "Failed to initialize Headroom sidecar");
+
+      // Don't throw - allow Lynkr to start without Headroom
+      // Compression will be skipped if Headroom is unavailable
+      this.initialized = true;
+      return { success: false, error: err.message };
+    }
+  }
+
+  /**
+   * Shutdown the Headroom system
+   * Stops the Docker container if we started it
+   */
+  async shutdown(removeContainer = false) {
+    if (!config.headroom?.enabled) {
+      return;
+    }
+
+    logger.info("Shutting down Headroom sidecar");
+
+    try {
+      if (config.headroom.docker?.enabled) {
+        await launcher.stop(removeContainer);
+      }
+      logger.info("Headroom sidecar shutdown complete");
+    } catch (err) {
+      logger.error({ err }, "Error during Headroom shutdown");
+    }
+  }
+
+  /**
+   * Compress messages if Headroom is available
+   * Falls back to original messages if compression fails
+   */
+  async compress(messages, tools = [], options = {}) {
+    return client.compressMessages(messages, tools, options);
+  }
+
+  /**
+   * Retrieve content from CCR store
+   */
+  async ccrRetrieve(hash, query = null, maxResults = 20) {
+    return client.ccrRetrieve(hash, query, maxResults);
+  }
+
+  /**
+   * Track compression for proactive expansion
+   */
+  async ccrTrack(hashKey, turnNumber, toolName, sample) {
+    return client.ccrTrack(hashKey, turnNumber, toolName, sample);
+  }
+
+  /**
+   * Analyze query for proactive CCR expansion
+   */
+  async ccrAnalyze(query, turnNumber) {
+    return client.ccrAnalyze(query, turnNumber);
+  }
+
+  /**
+   * Check if Headroom is enabled
+   */
+  isEnabled() {
+    return client.isEnabled();
+  }
+
+  /**
+   * Check if Headroom is available and healthy
+   */
+  async isAvailable() {
+    return health.isAvailable();
+  }
+
+  /**
+   * Get health status
+   */
+  async getHealth() {
+    return health.checkHeadroomHealth();
+  }
+
+  /**
+   * Get metrics
+   */
+  async getMetrics() {
+    return client.getCombinedMetrics();
+  }
+
+  /**
+   * Get detailed status for debugging
+   */
+  async getDetailedStatus() {
+    return health.getDetailedStatus();
+  }
+
+  /**
+   * Restart the Headroom container
+   */
+  async restart() {
+    if (!config.headroom?.docker?.enabled) {
+      throw new Error("Docker management is disabled");
+    }
+    return launcher.restart();
+  }
+
+  /**
+   * Get container logs
+   */
+  async getLogs(tail = 100) {
+    if (!config.headroom?.docker?.enabled) {
+      return null;
+    }
+    return launcher.getLogs(tail);
+  }
+}
+
+// Singleton instance
+let instance = null;
+
+/**
+ * Get the HeadroomManager singleton instance
+ */
+function getHeadroomManager() {
+  if (!instance) {
+    instance = new HeadroomManager();
+  }
+  return instance;
+}
+
+/**
+ * Initialize Headroom (convenience function)
+ */
+async function initializeHeadroom() {
+  const manager = getHeadroomManager();
+  return manager.initialize();
+}
+
+/**
+ * Shutdown Headroom (convenience function)
+ */
+async function shutdownHeadroom(removeContainer = false) {
+  if (instance) {
+    return instance.shutdown(removeContainer);
+  }
+}
+
+module.exports = {
+  HeadroomManager,
+  getHeadroomManager,
+  initializeHeadroom,
+  shutdownHeadroom,
+  // Re-export commonly used functions
+  isEnabled: client.isEnabled,
+  compressMessages: client.compressMessages,
+  ccrRetrieve: client.ccrRetrieve,
+  checkHealth: client.checkHealth,
+  getMetrics: client.getMetrics,
+  getCombinedMetrics: client.getCombinedMetrics,
+};
diff --git a/src/headroom/launcher.js b/src/headroom/launcher.js
new file mode 100644
index 0000000..f35231c
--- /dev/null
+++ b/src/headroom/launcher.js
@@ -0,0 +1,517 @@
+/**
+ * Headroom Sidecar Container Launcher
+ *
+ * Uses dockerode to programmatically manage the Headroom sidecar container lifecycle.
+ * Provides automatic container creation, health checking, and graceful shutdown.
+ */
+
+const Docker = require("dockerode");
+const logger = require("../logger");
+const config = require("../config");
+
+// Initialize Docker client
+const docker = new Docker();
+
+// Launcher state
+let containerInstance = null;
+let isStarting = false;
+let isShuttingDown = false;
+
+/**
+ * Get container environment variables for Headroom sidecar
+ */
+function getContainerEnv() {
+  const headroomConfig = config.headroom;
+  return [
+    `HEADROOM_HOST=0.0.0.0`,
+    `HEADROOM_PORT=${headroomConfig.docker.port}`,
+    `HEADROOM_LOG_LEVEL=${headroomConfig.logLevel}`,
+    `HEADROOM_MODE=${headroomConfig.mode}`,
+    `HEADROOM_PROVIDER=${headroomConfig.provider}`,
+    // Transforms
+    `HEADROOM_SMART_CRUSHER=${headroomConfig.transforms.smartCrusher}`,
+    `HEADROOM_SMART_CRUSHER_MIN_TOKENS=${headroomConfig.transforms.smartCrusherMinTokens}`,
+    `HEADROOM_SMART_CRUSHER_MAX_ITEMS=${headroomConfig.transforms.smartCrusherMaxItems}`,
+    `HEADROOM_TOOL_CRUSHER=${headroomConfig.transforms.toolCrusher}`,
+    `HEADROOM_CACHE_ALIGNER=${headroomConfig.transforms.cacheAligner}`,
+    `HEADROOM_ROLLING_WINDOW=${headroomConfig.transforms.rollingWindow}`,
+    `HEADROOM_KEEP_TURNS=${headroomConfig.transforms.keepTurns}`,
+    // CCR
+    `HEADROOM_CCR=${headroomConfig.ccr.enabled}`,
+    `HEADROOM_CCR_TTL=${headroomConfig.ccr.ttlSeconds}`,
+    // LLMLingua
+    `HEADROOM_LLMLINGUA=${headroomConfig.llmlingua.enabled}`,
+    `HEADROOM_LLMLINGUA_DEVICE=${headroomConfig.llmlingua.device}`,
+  ];
+}
+
+/**
+ * Parse memory limit string to bytes for Docker API
+ * Supports formats like "512m", "1g", "256mb", "1gb"
+ */
+function parseMemoryLimit(limit) {
+  if (typeof limit !== "string") return 536870912; // Default 512MB
+
+  const match = limit.toLowerCase().match(/^(\d+(?:\.\d+)?)\s*(b|k|kb|m|mb|g|gb)?$/);
+  if (!match) return 536870912;
+
+  const value = parseFloat(match[1]);
+  const unit = match[2] || "b";
+
+  const multipliers = {
+    b: 1,
+    k: 1024,
+    kb: 1024,
+    m: 1024 * 1024,
+    mb: 1024 * 1024,
+    g: 1024 * 1024 * 1024,
+    gb: 1024 * 1024 * 1024,
+  };
+
+  return Math.floor(value * (multipliers[unit] || 1));
+}
+
+/**
+ * Parse CPU limit to NanoCPUs for Docker API
+ * Supports formats like "1.0", "0.5", "2"
+ */
+function parseCpuLimit(limit) {
+  if (typeof limit !== "string") return 1e9; // Default 1 CPU
+
+  const value = parseFloat(limit);
+  if (Number.isNaN(value)) return 1e9;
+
+  return Math.floor(value * 1e9); // Convert to NanoCPUs
+}
+
+/**
+ * Check if the container already exists
+ */
+async function getExistingContainer() {
+  const containerName = config.headroom.docker.containerName;
+
+  try {
+    const containers = await docker.listContainers({
+      all: true,
+      filters: { name: [containerName] },
+    });
+
+    // Find exact match (Docker returns partial matches)
+    const match = containers.find(
+      (c) => c.Names.includes(`/${containerName}`) || c.Names.includes(containerName)
+    );
+
+    if (match) {
+      return docker.getContainer(match.Id);
+    }
+    return null;
+  } catch (err) {
+    logger.error({ err }, "Failed to check for existing container");
+    return null;
+  }
+}
+
+/**
+ * Check if the Docker image exists locally
+ */
+async function imageExists(imageName) {
+  try {
+    const image = docker.getImage(imageName);
+    await image.inspect();
+    return true;
+  } catch (err) {
+    if (err.statusCode === 404) {
+      return false;
+    }
+    throw err;
+  }
+}
+
+/**
+ * Pull the Docker image
+ */
+async function pullImage(imageName) {
+  logger.info({ image: imageName }, "Pulling Headroom sidecar image");
+
+  return new Promise((resolve, reject) => {
+    docker.pull(imageName, (err, stream) => {
+      if (err) {
+        return reject(err);
+      }
+
+      docker.modem.followProgress(
+        stream,
+        (err, output) => {
+          if (err) {
+            reject(err);
+          } else {
+            logger.info({ image: imageName }, "Image pull complete");
+            resolve(output);
+          }
+        },
+        (event) => {
+          if (event.status === "Downloading" || event.status === "Extracting") {
+            logger.debug({ status: event.status, progress: event.progress }, "Image pull progress");
+          }
+        }
+      );
+    });
+  });
+}
+
+/**
+ * Build the Docker image from local context
+ */
+async function buildImage(imageName, buildContext) {
+  logger.info({ image: imageName, context: buildContext }, "Building Headroom sidecar image");
+
+  const path = require("path");
+  const fs = require("fs");
+  const { execSync } = require("child_process");
+
+  // Resolve build context path
+  const contextPath = path.resolve(process.cwd(), buildContext);
+
+  if (!fs.existsSync(contextPath)) {
+    throw new Error(`Build context not found: ${contextPath}`);
+  }
+
+  if (!fs.existsSync(path.join(contextPath, "Dockerfile"))) {
+    throw new Error(`Dockerfile not found in: ${contextPath}`);
+  }
+
+  // Use docker build command for simplicity (dockerode build is complex with tar)
+  try {
+    execSync(`docker build -t ${imageName} ${contextPath}`, {
+      stdio: "inherit",
+      encoding: "utf8",
+    });
+    logger.info({ image: imageName }, "Image build complete");
+  } catch (err) {
+    throw new Error(`Failed to build image: ${err.message}`);
+  }
+}
+
+/**
+ * Create and start the Headroom container
+ */
+async function createContainer() {
+  const headroomConfig = config.headroom;
+  const dockerConfig = headroomConfig.docker;
+
+  const containerConfig = {
+    Image: dockerConfig.image,
+    name: dockerConfig.containerName,
+    Env: getContainerEnv(),
+    ExposedPorts: {
+      [`${dockerConfig.port}/tcp`]: {},
+    },
+    HostConfig: {
+      PortBindings: {
+        [`${dockerConfig.port}/tcp`]: [{ HostPort: String(dockerConfig.port) }],
+      },
+      Memory: parseMemoryLimit(dockerConfig.memoryLimit),
+      NanoCpus: parseCpuLimit(dockerConfig.cpuLimit),
+      RestartPolicy: {
+        Name: dockerConfig.restartPolicy,
+      },
+    },
+    Healthcheck: {
+      Test: ["CMD", "curl", "-f", `http://localhost:${dockerConfig.port}/health`],
+      Interval: 30 * 1e9, // 30s in nanoseconds
+      Timeout: 10 * 1e9, // 10s
+      StartPeriod: 30 * 1e9, // 30s
+      Retries: 3,
+    },
+  };
+
+  // Add network if specified
+  if (dockerConfig.network) {
+    containerConfig.HostConfig.NetworkMode = dockerConfig.network;
+  }
+
+  logger.info(
+    {
+      name: dockerConfig.containerName,
+      image: dockerConfig.image,
+      port: dockerConfig.port,
+      memory: dockerConfig.memoryLimit,
+    },
+    "Creating Headroom container"
+  );
+
+  const container = await docker.createContainer(containerConfig);
+  await container.start();
+
+  logger.info({ name: dockerConfig.containerName }, "Headroom container started");
+
+  return container;
+}
+
+/**
+ * Wait for the container to be healthy
+ */
+async function waitForHealthy(container, maxRetries = 30, intervalMs = 1000) {
+  const headroomConfig = config.headroom;
+
+  for (let i = 0; i < maxRetries; i++) {
+    try {
+      // Check container state
+      const info = await container.inspect();
+
+      if (info.State.Health?.Status === "healthy") {
+        logger.info("Headroom container is healthy");
+        return true;
+      }
+
+      if (info.State.Status === "exited" || info.State.Status === "dead") {
+        throw new Error(`Container exited unexpectedly: ${info.State.Status}`);
+      }
+
+      // Also try direct HTTP health check
+      try {
+        const response = await fetch(`${headroomConfig.endpoint}/health`, {
+          signal: AbortSignal.timeout(2000),
+        });
+
+        if (response.ok) {
+          const data = await response.json();
+          if (data.headroom_loaded) {
+            logger.info("Headroom sidecar is ready (HTTP health check passed)");
+            return true;
+          }
+        }
+      } catch {
+        // HTTP check failed, continue waiting
+      }
+
+      logger.debug({ attempt: i + 1, maxRetries }, "Waiting for Headroom container to be healthy");
+      await new Promise((resolve) => setTimeout(resolve, intervalMs));
+    } catch (err) {
+      if (err.message?.includes("exited unexpectedly")) {
+        throw err;
+      }
+      logger.debug({ err: err.message }, "Health check attempt failed");
+      await new Promise((resolve) => setTimeout(resolve, intervalMs));
+    }
+  }
+
+  throw new Error(`Headroom container failed to become healthy after ${maxRetries} attempts`);
+}
+
+/**
+ * Ensure the Headroom container is running
+ * Creates it if it doesn't exist, starts it if stopped
+ */
+async function ensureRunning() {
+  const headroomConfig = config.headroom;
+
+  if (!headroomConfig.enabled) {
+    logger.debug("Headroom is disabled, skipping container launch");
+    return { started: false, reason: "disabled" };
+  }
+
+  if (!headroomConfig.docker.enabled) {
+    logger.debug("Headroom Docker management is disabled");
+    return { started: false, reason: "docker_disabled" };
+  }
+
+  if (isStarting) {
+    logger.debug("Headroom container is already starting");
+    return { started: false, reason: "already_starting" };
+  }
+
+  if (isShuttingDown) {
+    logger.debug("Headroom is shutting down, skipping start");
+    return { started: false, reason: "shutting_down" };
+  }
+
+  isStarting = true;
+
+  try {
+    // Check for existing container
+    let container = await getExistingContainer();
+
+    if (container) {
+      const info = await container.inspect();
+      const state = info.State;
+
+      logger.info(
+        { name: headroomConfig.docker.containerName, state: state.Status },
+        "Found existing Headroom container"
+      );
+
+      if (state.Running) {
+        // Container is already running
+        containerInstance = container;
+        await waitForHealthy(container);
+        return { started: true, action: "existing_running" };
+      }
+
+      // Container exists but is stopped, start it
+      logger.info("Starting existing Headroom container");
+      await container.start();
+      containerInstance = container;
+      await waitForHealthy(container);
+      return { started: true, action: "started_existing" };
+    }
+
+    // No container exists, need to create one
+    // First ensure the image exists
+    const exists = await imageExists(headroomConfig.docker.image);
+
+    if (!exists) {
+      if (headroomConfig.docker.autoBuild) {
+        await buildImage(headroomConfig.docker.image, headroomConfig.docker.buildContext);
+      } else {
+        await pullImage(headroomConfig.docker.image);
+      }
+    }
+
+    // Create and start the container
+    container = await createContainer();
+    containerInstance = container;
+    await waitForHealthy(container);
+
+    return { started: true, action: "created_new" };
+  } catch (err) {
+    logger.error({ err }, "Failed to ensure Headroom container is running");
+    throw err;
+  } finally {
+    isStarting = false;
+  }
+}
+
+/**
+ * Stop and optionally remove the Headroom container
+ */
+async function stop(removeContainer = false) {
+  if (isShuttingDown) {
+    return;
+  }
+
+  isShuttingDown = true;
+
+  try {
+    const container = containerInstance || (await getExistingContainer());
+
+    if (!container) {
+      logger.debug("No Headroom container to stop");
+      return;
+    }
+
+    const info = await container.inspect();
+
+    if (info.State.Running) {
+      logger.info({ name: config.headroom.docker.containerName }, "Stopping Headroom container");
+      await container.stop({ t: 10 }); // 10 second timeout
+      logger.info("Headroom container stopped");
+    }
+
+    if (removeContainer) {
+      logger.info({ name: config.headroom.docker.containerName }, "Removing Headroom container");
+      await container.remove();
+      logger.info("Headroom container removed");
+    }
+
+    containerInstance = null;
+  } catch (err) {
+    if (err.statusCode === 304) {
+      // Container already stopped
+      logger.debug("Headroom container was already stopped");
+    } else if (err.statusCode === 404) {
+      // Container doesn't exist
+      logger.debug("Headroom container does not exist");
+    } else {
+      logger.error({ err }, "Failed to stop Headroom container");
+    }
+  } finally {
+    isShuttingDown = false;
+  }
+}
+
+/**
+ * Get container status
+ */
+async function getStatus() {
+  try {
+    const container = containerInstance || (await getExistingContainer());
+
+    if (!container) {
+      return { exists: false, running: false };
+    }
+
+    const info = await container.inspect();
+
+    return {
+      exists: true,
+      running: info.State.Running,
+      status: info.State.Status,
+      health: info.State.Health?.Status || "unknown",
+      startedAt: info.State.StartedAt,
+      id: info.Id.substring(0, 12),
+      name: info.Name,
+      image: info.Config.Image,
+    };
+  } catch (err) {
+    logger.error({ err }, "Failed to get Headroom container status");
+    return { exists: false, running: false, error: err.message };
+  }
+}
+
+/**
+ * Get container logs
+ */
+async function getLogs(tail = 100) {
+  try {
+    const container = containerInstance || (await getExistingContainer());
+
+    if (!container) {
+      return null;
+    }
+
+    const logs = await container.logs({
+      stdout: true,
+      stderr: true,
+      tail,
+      timestamps: true,
+    });
+
+    return logs.toString("utf8");
+  } catch (err) {
+    logger.error({ err }, "Failed to get Headroom container logs");
+    return null;
+  }
+}
+
+/**
+ * Restart the container
+ */
+async function restart() {
+  try {
+    const container = containerInstance || (await getExistingContainer());
+
+    if (!container) {
+      // No container exists, create one
+      return ensureRunning();
+    }
+
+    logger.info({ name: config.headroom.docker.containerName }, "Restarting Headroom container");
+    await container.restart({ t: 10 });
+    await waitForHealthy(container);
+
+    return { restarted: true };
+  } catch (err) {
+    logger.error({ err }, "Failed to restart Headroom container");
+    throw err;
+  }
+}
+
+module.exports = {
+  ensureRunning,
+  stop,
+  getStatus,
+  getLogs,
+  restart,
+  waitForHealthy,
+};
diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js
index 80de28b..6191d5d 100644
--- a/src/orchestrator/index.js
+++ b/src/orchestrator/index.js
@@ -11,6 +11,7 @@ const systemPrompt = require("../prompts/system");
 const historyCompression = require("../context/compression");
 const tokenBudget = require("../context/budget");
 const { classifyRequestType, selectToolsSmartly } = require("../tools/smart-selection");
+const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = require("../headroom");
 
 const DROP_KEYS = new Set([
   "provider",
@@ -1163,6 +1164,8 @@ async function runAgentLoop({
   cacheKey,
   providerType,
 }) {
+  console.log('[DEBUG] runAgentLoop ENTERED - providerType:', providerType, 'messages:', cleanPayload.messages?.length);
+  logger.info({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop ENTERED');
   const settings = resolveLoopOptions(options);
   const start = Date.now();
   let steps = 0;
@@ -1176,6 +1179,7 @@ async function runAgentLoop({
     }
 
     steps += 1;
+    console.log('[LOOP DEBUG] Entered while loop - step:', steps);
     logger.debug(
       {
         sessionId: session?.id ?? null,
@@ -1362,6 +1366,7 @@ async function runAgentLoop({
     }
 
     // Track estimated token usage before model call
+  console.log('[TOKEN DEBUG] About to track token usage - step:', steps);
   const estimatedTokens = config.tokenTracking?.enabled !== false
     ? tokens.countPayloadTokens(cleanPayload)
     : null;
@@ -1374,6 +1379,52 @@ async function runAgentLoop({
     }, 'Estimated token usage before model call');
   }
 
+  // Apply Headroom compression if enabled
+  console.log('[HEADROOM DEBUG] About to check compression - step:', steps, 'messages:', cleanPayload.messages?.length);
+  logger.info({
+    headroomEnabled: isHeadroomEnabled(),
+    hasMessages: Boolean(cleanPayload.messages),
+    messageCount: cleanPayload.messages?.length ?? 0,
+  }, 'Headroom compression check');
+
+  if (isHeadroomEnabled() && cleanPayload.messages && cleanPayload.messages.length > 0) {
+    console.log('[HEADROOM DEBUG] Entering compression block');
+    try {
+      console.log('[HEADROOM DEBUG] About to call headroomCompress');
+      const compressionResult = await headroomCompress(
+        cleanPayload.messages,
+        cleanPayload.tools || [],
+        {
+          mode: config.headroom?.mode,
+          queryContext: cleanPayload.messages[cleanPayload.messages.length - 1]?.content,
+        }
+      );
+      console.log('[HEADROOM DEBUG] headroomCompress returned - compressed:', compressionResult.compressed, 'stats:', JSON.stringify(compressionResult.stats));
+
+      if (compressionResult.compressed) {
+        cleanPayload.messages = compressionResult.messages;
+        if (compressionResult.tools) {
+          cleanPayload.tools = compressionResult.tools;
+        }
+        logger.info({
+          sessionId: session?.id ?? null,
+          tokensBefore: compressionResult.stats?.tokens_before,
+          tokensAfter: compressionResult.stats?.tokens_after,
+          saved: compressionResult.stats?.tokens_saved,
+          savingsPercent: compressionResult.stats?.savings_percent,
+          transforms: compressionResult.stats?.transforms_applied,
+        }, 'Headroom compression applied to request');
+      } else {
+        logger.debug({
+          sessionId: session?.id ?? null,
+          reason: compressionResult.stats?.reason,
+        }, 'Headroom compression skipped');
+      }
+    } catch (headroomErr) {
+      logger.warn({ err: headroomErr, sessionId: session?.id ?? null }, 'Headroom compression failed, using original messages');
+    }
+  }
+
   const databricksResponse = await invokeModel(cleanPayload);
 
   // Extract and log actual token usage
diff --git a/src/server.js b/src/server.js
index 25c6938..37739f0 100644
--- a/src/server.js
+++ b/src/server.js
@@ -28,6 +28,7 @@ const { registerTestTools } = require("./tools/tests");
 const { registerMcpTools } = require("./tools/mcp");
 const { registerAgentTaskTool } = require("./tools/agent-task");
 const { initConfigWatcher, getConfigWatcher } = require("./config/watcher");
+const { initializeHeadroom, shutdownHeadroom, getHeadroomManager } = require("./headroom");
 
 initialiseMcp();
 registerStubTools();
@@ -121,7 +122,22 @@ function createApp() {
   return app;
 }
 
-function start() {
+async function start() {
+  // Initialize Headroom sidecar (if enabled)
+  // This must happen before the server starts accepting requests
+  if (config.headroom?.enabled) {
+    try {
+      const result = await initializeHeadroom();
+      if (result.success) {
+        logger.info("Headroom sidecar initialized");
+      } else {
+        logger.warn({ error: result.error }, "Headroom initialization failed, continuing without compression");
+      }
+    } catch (err) {
+      logger.error({ err }, "Headroom initialization error, continuing without compression");
+    }
+  }
+
   const app = createApp();
   const server = app.listen(config.port, () => {
     console.log(`Claude→Databricks proxy listening on http://localhost:${config.port}`);
@@ -137,6 +153,14 @@ function start() {
   shutdownManager.registerServer(server);
   shutdownManager.setupSignalHandlers();
 
+  // Register Headroom shutdown callback
+  if (config.headroom?.enabled) {
+    shutdownManager.onShutdown(async () => {
+      logger.info("Stopping Headroom sidecar on shutdown");
+      await shutdownHeadroom(false); // Don't remove container on shutdown
+    });
+  }
+
   // Initialize hot reload config watcher
   if (config.hotReload?.enabled !== false) {
     const watcher = initConfigWatcher({

From 55b04897227d9be65d6fb5fd7bd5245866342f3c Mon Sep 17 00:00:00 2001
From: vishal veerareddy <vishalveera.reddy@servicenow.com>
Date: Sun, 25 Jan 2026 19:05:25 -0800
Subject: [PATCH 2/4] Add --help flag to CLI

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 bin/cli.js                    | 25 ++++++++++++-
 src/clients/databricks.js     | 62 +++++++++++++++++++++++++++++++
 src/clients/standard-tools.js | 22 ++++++++---
 src/orchestrator/index.js     | 19 ++++++++++
 src/prompts/system.js         | 69 +++++++++++++++++++++++++++++++++++
 5 files changed, 191 insertions(+), 6 deletions(-)

diff --git a/bin/cli.js b/bin/cli.js
index ba80dd5..0d47059 100755
--- a/bin/cli.js
+++ b/bin/cli.js
@@ -1,9 +1,32 @@
 #!/usr/bin/env node
 
+const pkg = require('../package.json');
+
 if (process.argv.includes('--version') || process.argv.includes('-v')) {
-  const pkg = require('../package.json');
   console.log(pkg.version);
   process.exit(0);
 }
 
+if (process.argv.includes('--help') || process.argv.includes('-h')) {
+  console.log(`
+${pkg.name} v${pkg.version}
+
+${pkg.description}
+
+Usage:
+  lynkr [options]
+
+Options:
+  -h, --help     Show this help message
+  -v, --version  Show version number
+
+Environment Variables:
+  See .env.example for configuration options
+
+Documentation:
+  ${pkg.homepage}
+`);
+  process.exit(0);
+}
+
 require("../index.js");
diff --git a/src/clients/databricks.js b/src/clients/databricks.js
index 93128af..1b5387e 100644
--- a/src/clients/databricks.js
+++ b/src/clients/databricks.js
@@ -591,6 +591,68 @@ async function invokeAzureOpenAI(body) {
   }
 }
 
+/**
+ * Convert Azure Responses API response to Anthropic format
+ */
+function convertResponsesAPIToAnthropic(response, model) {
+  const content = [];
+  const outputArray = response.output || [];
+
+  // Extract text content from message output
+  const messageOutput = outputArray.find(o => o.type === "message");
+  if (messageOutput?.content) {
+    for (const item of messageOutput.content) {
+      if (item.type === "output_text" && item.text) {
+        content.push({ type: "text", text: item.text });
+      }
+    }
+  }
+
+  // Extract tool calls from function_call outputs
+  const toolCalls = outputArray
+    .filter(o => o.type === "function_call")
+    .map(tc => ({
+      type: "tool_use",
+      id: tc.call_id || tc.id || `call_${Date.now()}`,
+      name: tc.name,
+      input: typeof tc.arguments === 'string' ? JSON.parse(tc.arguments || "{}") : (tc.arguments || {})
+    }));
+
+  content.push(...toolCalls);
+
+  // Handle reasoning_content for thinking models
+  if (content.length === 0 && response.reasoning_content) {
+    content.push({ type: "text", text: response.reasoning_content });
+  }
+
+  // Ensure at least empty text if no content
+  if (content.length === 0) {
+    content.push({ type: "text", text: "" });
+  }
+
+  // Determine stop reason
+  let stopReason = "end_turn";
+  if (toolCalls.length > 0) {
+    stopReason = "tool_use";
+  } else if (response.status === "incomplete" && response.incomplete_details?.reason === "max_output_tokens") {
+    stopReason = "max_tokens";
+  }
+
+  return {
+    id: response.id || `msg_${Date.now()}`,
+    type: "message",
+    role: "assistant",
+    content,
+    model: model || response.model,
+    stop_reason: stopReason,
+    stop_sequence: null,
+    usage: {
+      input_tokens: response.usage?.input_tokens || 0,
+      output_tokens: response.usage?.output_tokens || 0,
+    }
+  };
+}
+
 async function invokeOpenAI(body) {
   if (!config.openai?.apiKey) {
     throw new Error("OpenAI API key is not configured.");
diff --git a/src/clients/standard-tools.js b/src/clients/standard-tools.js
index d5e5964..51e4163 100644
--- a/src/clients/standard-tools.js
+++ b/src/clients/standard-tools.js
@@ -176,27 +176,39 @@ const STANDARD_TOOLS = [
   },
   {
     name: "Task",
-    description: "Launch specialized agents for complex multi-step tasks. Available agents: general-purpose (complex tasks), Explore (codebase exploration), Plan (implementation planning), claude-code-guide (Claude Code documentation).",
+    description: `Launch a specialized agent to handle complex, multi-step tasks autonomously.
+
+YOU MUST USE THIS TOOL WHEN the user asks to:
+- "explore", "dig into", "understand", or "analyze" a codebase → use subagent_type="Explore"
+- "plan", "design", or "architect" something → use subagent_type="Plan"
+- perform complex multi-file research or investigation → use subagent_type="general-purpose"
+
+AVAILABLE AGENTS:
+- Explore: Fast codebase exploration using Glob, Grep, Read. Use for searching, finding files, understanding project structure.
+- Plan: Implementation planning and architecture design. Use for planning features or refactoring.
+- general-purpose: Complex multi-step tasks with all tools available.
+
+EXAMPLE: User says "explore this project" → Call Task with subagent_type="Explore", prompt="Explore the codebase structure, find main entry points, read key files, and summarize what this project does"`,
     input_schema: {
       type: "object",
       properties: {
         description: {
           type: "string",
-          description: "A short (3-5 word) description of the task"
+          description: "A short (3-5 word) description of the task, e.g., 'Explore project structure'"
         },
         prompt: {
           type: "string",
-          description: "The detailed task for the agent to perform"
+          description: "Detailed instructions for the agent. Be specific about what to find, read, or analyze."
         },
         subagent_type: {
           type: "string",
           enum: ["general-purpose", "Explore", "Plan", "claude-code-guide"],
-          description: "The type of specialized agent to use"
+          description: "Agent type: Explore (search/read codebase), Plan (design/architecture), general-purpose (complex research)"
         },
         model: {
           type: "string",
           enum: ["sonnet", "opus", "haiku"],
-          description: "Optional model to use (haiku for quick tasks, sonnet for balanced, opus for complex)"
+          description: "Optional model override. Default is appropriate for each agent type."
         }
       },
       required: ["description", "prompt", "subagent_type"]
diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js
index 6191d5d..b34fbbc 100644
--- a/src/orchestrator/index.js
+++ b/src/orchestrator/index.js
@@ -1325,6 +1325,25 @@ async function runAgentLoop({
       }
     }
 
+    // Inject agent delegation instructions when Task tool is available (for all models)
+    if (steps === 1 && config.agents?.enabled !== false) {
+      try {
+        const injectedSystem = systemPrompt.injectAgentInstructions(
+          cleanPayload.system || '',
+          cleanPayload.tools
+        );
+        if (injectedSystem !== cleanPayload.system) {
+          cleanPayload.system = injectedSystem;
+          logger.debug({
+            sessionId: session?.id ?? null,
+            hasTaskTool: true
+          }, 'Agent delegation instructions injected into system prompt');
+        }
+      } catch (err) {
+        logger.warn({ err, sessionId: session?.id }, 'Agent instructions injection failed, continuing without');
+      }
+    }
+
     if (steps === 1 && config.tokenBudget?.enforcement !== false) {
       try {
         const budgetCheck = tokenBudget.checkBudget(cleanPayload);
diff --git a/src/prompts/system.js b/src/prompts/system.js
index a0d7d0a..823f1cc 100644
--- a/src/prompts/system.js
+++ b/src/prompts/system.js
@@ -8,6 +8,47 @@
 const logger = require('../logger');
 const config = require('../config');
 
+/**
+ * Agent Delegation Instructions
+ *
+ * These instructions tell all models how to use the Task tool for spawning subagents.
+ * Added to system prompt when Task tool is available.
+ */
+const AGENT_DELEGATION_INSTRUCTIONS = `
+## Task Delegation (Subagents)
+
+You have access to the **Task** tool which spawns specialized agents to handle complex work autonomously.
+
+### WHEN TO USE the Task Tool:
+
+| User Request Keywords | Action |
+|----------------------|--------|
+| "explore", "dig into", "understand", "analyze" the codebase | \`Task(subagent_type="Explore")\` |
+| "plan", "design", "architect" an implementation | \`Task(subagent_type="Plan")\` |
+| Complex multi-file research or investigation | \`Task(subagent_type="general-purpose")\` |
+
+### HOW TO CALL the Task Tool:
+
+\`\`\`
+Task(
+  subagent_type: "Explore",
+  description: "Explore project structure",
+  prompt: "Find main entry points, understand the architecture, read key configuration files, and provide a comprehensive summary of what this project does and how it's organized."
+)
+\`\`\`
+
+### AGENT TYPES:
+
+- **Explore**: Fast codebase exploration using Glob, Grep, Read tools. Use for searching files, understanding project structure, finding code patterns.
+- **Plan**: Implementation planning and architecture design. Use for designing features, planning refactoring, or architectural decisions.
+- **general-purpose**: Complex multi-step tasks with access to all tools.
+
+### IMPORTANT:
+- Subagents run independently and return a summary of their findings
+- Use Explore agent for ANY codebase navigation or search tasks instead of doing it yourself
+- The subagent will handle all the file reading and searching, then return results to you
+`;
+
 /**
  * Compress tool descriptions to minimal format
  *
@@ -310,6 +351,32 @@ function calculateSavings(original, optimized) {
   };
 }
 
+/**
+ * Inject agent delegation instructions into system prompt
+ * @param {string} systemPrompt - Existing system prompt
+ * @param {Array} tools - Available tools
+ * @returns {string} System prompt with agent instructions added
+ */
+function injectAgentInstructions(systemPrompt, tools = []) {
+  // Check if Task tool is available
+  const hasTaskTool = tools?.some(t =>
+    t.name === 'Task' || t.function?.name === 'Task'
+  );
+
+  if (!hasTaskTool) {
+    return systemPrompt;
+  }
+
+  // Don't add if already present
+  if (systemPrompt && systemPrompt.includes('Task Delegation')) {
+    return systemPrompt;
+  }
+
+  // Append agent instructions
+  const basePrompt = systemPrompt || '';
+  return basePrompt + '\n\n' + AGENT_DELEGATION_INSTRUCTIONS;
+}
+
 module.exports = {
   compressToolDescriptions,
   optimizeSystemPrompt,
@@ -317,4 +384,6 @@ module.exports = {
   calculateSavings,
   compressText,
   flattenBlocks,
+  injectAgentInstructions,
+  AGENT_DELEGATION_INSTRUCTIONS,
 };

From 190e188c69587565ee0446b6659eda21865f04e0 Mon Sep 17 00:00:00 2001
From: vishal veerareddy <vishalveera.reddy@servicenow.com>
Date: Sun, 25 Jan 2026 19:07:43 -0800
Subject: [PATCH 3/4] 5.0.1

---
 package-lock.json | 4 ++--
 package.json      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index e157950..613d38a 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "lynkr",
-  "version": "5.0.0",
+  "version": "5.0.1",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "lynkr",
-      "version": "5.0.0",
+      "version": "5.0.1",
       "license": "Apache-2.0",
       "dependencies": {
         "@azure/openai": "^2.0.0",
diff --git a/package.json b/package.json
index a6798ac..f8f75b4 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "lynkr",
-  "version": "5.0.0",
+  "version": "5.0.1",
   "description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure  adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
   "main": "index.js",
   "bin": {

From bf6ae816a83567e77eb77ff9735da982705cc579 Mon Sep 17 00:00:00 2001
From: vishal veerareddy <vishalveera.reddy@servicenow.com>
Date: Sun, 25 Jan 2026 19:31:20 -0800
Subject: [PATCH 4/4] rectified subagent

---
 .claude/settings.local.json      |   4 +-
 src/agents/definitions/loader.js |   2 +-
 src/agents/executor.js           |  19 ++++-
 src/api/router.js                |   6 ++
 src/clients/databricks.js        | 116 ++++++++++++++++++++++++++++++-
 src/clients/openrouter-utils.js  |  10 ++-
 src/orchestrator/index.js        |  14 +++-
 src/tools/agent-task.js          |   4 +-
 src/tools/execution.js           |  16 +++--
 src/tools/process.js             |  18 ++++-
 src/workspace/index.js           |  23 ++++++
 11 files changed, 210 insertions(+), 22 deletions(-)

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index 1c3387c..75978e6 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -45,7 +45,9 @@
       "Bash(xargs:*)",
       "Bash(docker info:*)",
       "Bash(docker container ls:*)",
-      "Bash(node --check:*)"
+      "Bash(node --check:*)",
+      "Bash(node index.js:*)",
+      "Bash(tee:*)"
     ],
     "deny": [],
     "ask": []
diff --git a/src/agents/definitions/loader.js b/src/agents/definitions/loader.js
index 598ad7a..ddbd452 100644
--- a/src/agents/definitions/loader.js
+++ b/src/agents/definitions/loader.js
@@ -121,7 +121,7 @@ Work autonomously. Do not ask questions.`,
         "Read"
       ],
       model: "haiku", // Fast, cheap
-      maxSteps: 10,
+      maxSteps: 25, // Increased for thorough exploration
       builtIn: true
     });
 
diff --git a/src/agents/executor.js b/src/agents/executor.js
index ceee018..d52964f 100644
--- a/src/agents/executor.js
+++ b/src/agents/executor.js
@@ -24,6 +24,9 @@ class SubagentExecutor {
       options.mainContext
     );
 
+    // Store client CWD in context for tool execution
+    context.cwd = options.cwd;
+
     try {
       // Set timeout
       const timeout = options.timeout || 120000; // 2 minutes
@@ -159,16 +162,27 @@ class SubagentExecutor {
       payload.tools = filteredTools;
     }
 
+    // Determine provider based on model - subagents should use the specified model
+    let forceProvider = null;
+    if (payload.model?.includes('claude') || payload.model?.includes('sonnet') || payload.model?.includes('haiku') || payload.model?.includes('opus')) {
+      // Route Claude models to the configured Claude provider (azure-openai, databricks, etc.)
+      const config = require('../config');
+      forceProvider = config.modelProvider?.provider || 'azure-openai';
+    } else if (payload.model?.includes('gpt')) {
+      forceProvider = 'azure-openai';
+    }
+
     logger.debug({
       agentId: context.agentId,
       model: payload.model,
+      forceProvider,
       messageCount: context.messages.length,
       toolCount: filteredTools.length,
       toolNames: filteredTools.map(t => t.name)
     }, "Calling model for subagent");
 
-    // Use invokeModel to leverage provider routing
-    const response = await invokeModel(payload);
+    // Use invokeModel with forceProvider to ensure correct model routing
+    const response = await invokeModel(payload, { forceProvider });
 
     if (!response.json) {
       throw new Error("Invalid model response");
@@ -223,6 +237,7 @@ class SubagentExecutor {
         }, {
           sessionId: sessionId,
           agentId: context.agentId,
+          cwd: context.cwd,
           isSubagent: true
         });
 
diff --git a/src/api/router.js b/src/api/router.js
index 894a99d..b3ed198 100644
--- a/src/api/router.js
+++ b/src/api/router.js
@@ -6,6 +6,7 @@ const { createRateLimiter } = require("./middleware/rate-limiter");
 const openaiRouter = require("./openai-router");
 const providersRouter = require("./providers-handler");
 const { getRoutingHeaders, getRoutingStats, analyzeComplexity } = require("../routing");
+const { validateCwd } = require("../workspace");
 
 const router = express.Router();
 
@@ -130,6 +131,9 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       reason: complexity.breakdown?.taskType?.reason || complexity.recommendation,
     });
 
+    // Extract client CWD from request body or header
+    const clientCwd = validateCwd(req.body?.cwd || req.headers['x-workspace-cwd']);
+
     // For true streaming: only support non-tool requests for MVP
     // Tool requests require buffering for agent loop
     if (wantsStream && !hasTools) {
@@ -149,6 +153,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
         payload: req.body,
         headers: req.headers,
         session: req.session,
+        cwd: clientCwd,
         options: {
           maxSteps: req.body?.max_steps,
           maxDurationMs: req.body?.max_duration_ms,
@@ -324,6 +329,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       payload: req.body,
       headers: req.headers,
       session: req.session,
+      cwd: clientCwd,
       options: {
         maxSteps: req.body?.max_steps,
         maxDurationMs: req.body?.max_duration_ms,
diff --git a/src/clients/databricks.js b/src/clients/databricks.js
index 1b5387e..03cf5d4 100644
--- a/src/clients/databricks.js
+++ b/src/clients/databricks.js
@@ -515,8 +515,106 @@ async function invokeAzureOpenAI(body) {
       return tool;
     });
 
+    // Convert messages to Responses API input format
+    // Responses API uses different structure for tool calls and results
+    const responsesInput = [];
+    // Track function call IDs for matching with outputs
+    const pendingCallIds = [];
+
+    for (const msg of azureBody.messages) {
+      if (msg.role === "system") {
+        // System messages become developer messages
+        responsesInput.push({
+          type: "message",
+          role: "developer",
+          content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
+        });
+      } else if (msg.role === "user") {
+        // Check if content contains tool_result blocks (Anthropic format)
+        if (Array.isArray(msg.content)) {
+          for (const block of msg.content) {
+            if (block.type === "tool_result") {
+              // Convert tool_result to function_call_output
+              // Use tool_use_id if available, otherwise pop from pending call IDs
+              const callId = block.tool_use_id || pendingCallIds.shift() || `call_${Date.now()}`;
+              responsesInput.push({
+                type: "function_call_output",
+                call_id: callId,
+                output: typeof block.content === 'string' ? block.content : JSON.stringify(block.content || "")
+              });
+            } else if (block.type === "text") {
+              responsesInput.push({
+                type: "message",
+                role: "user",
+                content: block.text || ""
+              });
+            }
+          }
+        } else {
+          responsesInput.push({
+            type: "message",
+            role: "user",
+            content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
+          });
+        }
+      } else if (msg.role === "assistant") {
+        // Assistant messages - handle tool_calls (OpenAI format) and tool_use blocks (Anthropic format)
+        if (msg.tool_calls && msg.tool_calls.length > 0) {
+          // OpenAI format: tool_calls array
+          for (const tc of msg.tool_calls) {
+            const callId = tc.id || `call_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
+            pendingCallIds.push(callId);
+            responsesInput.push({
+              type: "function_call",
+              call_id: callId,
+              name: tc.function?.name || tc.name,
+              arguments: typeof tc.function?.arguments === 'string' ? tc.function.arguments : JSON.stringify(tc.function?.arguments || {})
+            });
+          }
+        }
+        // Handle content - could be string, array with tool_use blocks, or array with text blocks
+        if (Array.isArray(msg.content)) {
+          // Anthropic format: content is array of blocks
+          for (const block of msg.content) {
+            if (block.type === "tool_use") {
+              const callId = block.id || `call_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
+              pendingCallIds.push(callId);
+              responsesInput.push({
+                type: "function_call",
+                call_id: callId,
+                name: block.name,
+                arguments: typeof block.input === 'string' ? block.input : JSON.stringify(block.input || {})
+              });
+            } else if (block.type === "text" && block.text) {
+              responsesInput.push({
+                type: "message",
+                role: "assistant",
+                content: block.text
+              });
+            }
+          }
+        } else if (msg.content) {
+          // String content
+          responsesInput.push({
+            type: "message",
+            role: "assistant",
+            content: msg.content
+          });
+        }
+      } else if (msg.role === "tool") {
+        // Tool results become function_call_output
+        // Use tool_call_id if available, otherwise pop from pending call IDs
+        const callId = msg.tool_call_id || pendingCallIds.shift() || `call_${Date.now()}`;
+        responsesInput.push({
+          type: "function_call_output",
+          call_id: callId,
+          output: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
+        });
+      }
+    }
+
     const responsesBody = {
-      input: azureBody.messages,
+      input: responsesInput,
       model: azureBody.model,
       max_output_tokens: azureBody.max_tokens,
       tools: responsesTools,
@@ -582,6 +680,22 @@ async function invokeAzureOpenAI(body) {
         hasToolCalls: toolCalls.length > 0,
         toolCallCount: toolCalls.length
       }, "Converted Responses API to Chat Completions format");
+
+      // Now convert from Chat Completions format to Anthropic format
+      const anthropicJson = convertOpenAIToAnthropic(result.json);
+      logger.info({
+        anthropicContentTypes: anthropicJson.content?.map(c => c.type),
+        stopReason: anthropicJson.stop_reason
+      }, "Converted to Anthropic format");
+
+      return {
+        ok: result.ok,
+        status: result.status,
+        json: anthropicJson,
+        text: JSON.stringify(anthropicJson),
+        contentType: "application/json",
+        headers: result.headers,
+      };
     }
 
     return result;
diff --git a/src/clients/openrouter-utils.js b/src/clients/openrouter-utils.js
index 98a6e54..500f683 100644
--- a/src/clients/openrouter-utils.js
+++ b/src/clients/openrouter-utils.js
@@ -166,7 +166,8 @@ function convertAnthropicMessagesToOpenRouter(anthropicMessages) {
       }
 
       if (!foundMatchingToolCall) {
-        logger.warn({
+        // Log but DON'T remove - the tool result may be valid but IDs mismatched due to format conversion
+        logger.debug({
           messageIndex: i,
           toolCallId: msg.tool_call_id,
           precedingMessages: converted.slice(Math.max(0, i - 3), i).map(m => ({
@@ -174,11 +175,8 @@ function convertAnthropicMessagesToOpenRouter(anthropicMessages) {
             hasToolCalls: !!m.tool_calls,
             toolCallIds: m.tool_calls?.map(tc => tc.id)
           }))
-        }, "Orphaned tool message detected - removing from sequence");
-
-        // Remove the orphaned tool message
-        converted.splice(i, 1);
-        i--; // Adjust index after removal
+        }, "Tool message without matching tool_call - keeping for API to validate");
+        // Don't remove - let the API handle validation
       }
     }
   }
diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js
index b34fbbc..33762b1 100644
--- a/src/orchestrator/index.js
+++ b/src/orchestrator/index.js
@@ -1160,6 +1160,7 @@ async function runAgentLoop({
   requestedModel,
   wantsThinking,
   session,
+  cwd,
   options,
   cacheKey,
   providerType,
@@ -1531,7 +1532,12 @@ async function runAgentLoop({
     let message = {};
     let toolCalls = [];
 
-    if (providerType === "azure-anthropic") {
+    // Detect Anthropic format: has 'content' array and 'stop_reason' at top level (no 'choices')
+    // This handles azure-anthropic provider AND azure-openai Responses API (which we convert to Anthropic format)
+    const isAnthropicFormat = providerType === "azure-anthropic" ||
+      (Array.isArray(databricksResponse.json?.content) && databricksResponse.json?.stop_reason !== undefined && !databricksResponse.json?.choices);
+
+    if (isAnthropicFormat) {
       // Anthropic format: { content: [{ type: "tool_use", ... }], stop_reason: "tool_use" }
       message = {
         content: databricksResponse.json?.content ?? [],
@@ -1820,6 +1826,7 @@ async function runAgentLoop({
           const taskExecutions = await Promise.all(
             taskCalls.map(({ call }) => executeToolCall(call, {
               session,
+              cwd,
               requestMessages: cleanPayload.messages,
             }))
           );
@@ -1994,6 +2001,7 @@ async function runAgentLoop({
 
         const execution = await executeToolCall(call, {
           session,
+          cwd,
           requestMessages: cleanPayload.messages,
         });
 
@@ -2523,6 +2531,7 @@ async function runAgentLoop({
 
           const execution = await executeToolCall(attemptCall, {
             session,
+            cwd,
             requestMessages: cleanPayload.messages,
           });
 
@@ -2686,7 +2695,7 @@ async function runAgentLoop({
   };
 }
 
-async function processMessage({ payload, headers, session, options = {} }) {
+async function processMessage({ payload, headers, session, cwd, options = {} }) {
   const requestedModel =
     payload?.model ??
     config.modelProvider?.defaultModel ??
@@ -2763,6 +2772,7 @@ async function processMessage({ payload, headers, session, options = {} }) {
     requestedModel,
     wantsThinking,
     session,
+    cwd,
     options,
     cacheKey,
     providerType: config.modelProvider?.type ?? "databricks",
diff --git a/src/tools/agent-task.js b/src/tools/agent-task.js
index 2a0cdd3..4e69e22 100644
--- a/src/tools/agent-task.js
+++ b/src/tools/agent-task.js
@@ -37,12 +37,14 @@ function registerAgentTaskTool() {
       logger.info({
         subagentType,
         prompt: prompt.slice(0, 100),
-        sessionId: context.sessionId
+        sessionId: context.sessionId,
+        cwd: context.cwd
       }, "Task tool: spawning subagent");
 
       try {
         const result = await spawnAgent(subagentType, prompt, {
           sessionId: context.sessionId,
+          cwd: context.cwd, // Pass client CWD to subagent
           mainContext: context.mainContext // Pass minimal context
         });
 
diff --git a/src/tools/execution.js b/src/tools/execution.js
index cd315e4..3ab9fcd 100644
--- a/src/tools/execution.js
+++ b/src/tools/execution.js
@@ -10,9 +10,11 @@ function parseTimeout(value) {
   return Math.min(parsed, MAX_TIMEOUT_MS);
 }
 
-function normaliseCwd(cwd) {
-  if (!cwd) return workspaceRoot;
-  return resolveWorkspacePath(cwd);
+function normaliseCwd(cwd, contextCwd) {
+  // Priority: explicit cwd arg > context.cwd > workspaceRoot
+  if (cwd) return resolveWorkspacePath(cwd);
+  if (contextCwd) return contextCwd; // Already validated absolute path
+  return workspaceRoot;
 }
 
 function parseSandboxMode(value) {
@@ -44,10 +46,10 @@ function formatProcessResult(result) {
 function registerShellTool() {
   registerTool(
     "shell",
-    async ({ args = {} }) => {
+    async ({ args = {} }, context = {}) => {
       const command = args.command ?? args.cmd ?? args.run ?? args.input;
       const commandArgs = Array.isArray(args.args) ? args.args.map(String) : [];
-      const cwd = normaliseCwd(args.cwd);
+      const cwd = normaliseCwd(args.cwd, context.cwd);
       const timeoutMs = parseTimeout(args.timeout_ms ?? args.timeout);
 
       let spawnCommand;
@@ -106,7 +108,7 @@ function registerShellTool() {
 function registerPythonTool() {
   registerTool(
     "python_exec",
-    async ({ args = {} }) => {
+    async ({ args = {} }, context = {}) => {
       const code =
         typeof args.code === "string"
           ? args.code
@@ -121,7 +123,7 @@ function registerPythonTool() {
       }
 
       const executable = args.executable ?? args.python ?? "python3";
-      const cwd = normaliseCwd(args.cwd);
+      const cwd = normaliseCwd(args.cwd, context.cwd);
       const timeoutMs = parseTimeout(args.timeout_ms ?? args.timeout);
       const requirements = Array.isArray(args.requirements) ? args.requirements : [];
 
diff --git a/src/tools/process.js b/src/tools/process.js
index d6a27b9..7aad7d7 100644
--- a/src/tools/process.js
+++ b/src/tools/process.js
@@ -1,4 +1,5 @@
 const { spawn } = require("child_process");
+const path = require("path");
 const { workspaceRoot, resolveWorkspacePath } = require("../workspace");
 const { isSandboxEnabled, runSandboxProcess } = require("../mcp/sandbox");
 
@@ -48,7 +49,22 @@ async function runProcess({
   if (!command || typeof command !== "string") {
     throw new Error("Command must be a non-empty string.");
   }
-  const resolvedCwd = cwd ? resolveWorkspacePath(cwd) : workspaceRoot;
+  // cwd can be:
+  // 1. An already-resolved absolute path (from normaliseCwd in execution.js)
+  // 2. A relative path that needs resolving against workspaceRoot
+  // 3. null/undefined (use workspaceRoot)
+  let resolvedCwd;
+  if (cwd) {
+    // If it's already an absolute path, use it directly
+    // Otherwise resolve against workspaceRoot
+    if (path.isAbsolute(cwd)) {
+      resolvedCwd = cwd;
+    } else {
+      resolvedCwd = resolveWorkspacePath(cwd);
+    }
+  } else {
+    resolvedCwd = workspaceRoot;
+  }
   const mergedEnv = { ...process.env, ...sanitiseEnv(env) };
   const timeout = normaliseTimeout(timeoutMs ?? DEFAULT_TIMEOUT_MS);
   const sandboxPreference = normaliseSandboxPreference(sandbox);
diff --git a/src/workspace/index.js b/src/workspace/index.js
index dedd2f9..da1a7e0 100644
--- a/src/workspace/index.js
+++ b/src/workspace/index.js
@@ -85,6 +85,28 @@ async function applyFilePatch(targetPath, patchText, options = {}) {
   };
 }
 
+/**
+ * Validate a client-provided CWD path
+ * @param {string} cwd - The path to validate
+ * @returns {string|null} - Resolved absolute path if valid, null otherwise
+ */
+function validateCwd(cwd) {
+  if (!cwd || typeof cwd !== "string") {
+    return null;
+  }
+
+  try {
+    const resolved = path.resolve(cwd);
+    const stats = fs.statSync(resolved);
+    if (!stats.isDirectory()) {
+      return null;
+    }
+    return resolved;
+  } catch {
+    return null;
+  }
+}
+
 module.exports = {
   workspaceRoot,
   resolveWorkspacePath,
@@ -92,4 +114,5 @@ module.exports = {
   writeFile,
   fileExists,
   applyFilePatch,
+  validateCwd,
 };