diff --git a/README.md b/README.md
index 0d36c13c..dee26a99 100644
--- a/README.md
+++ b/README.md
@@ -177,6 +177,28 @@ The following command line options are available for the `start` command:
 | ------ | ------------------------- | ------- | ----- |
 | --json | Output debug info as JSON | false   | none  |
 
+## Configuration (config.json)
+
+- **Location:** `~/.local/share/copilot-api/config.json` (Linux/macOS) or `%USERPROFILE%\.local\share\copilot-api\config.json` (Windows).
+- **Default shape:**
+  ```json
+  {
+    "extraPrompts": {
+      "gpt-5-mini": "<built-in exploration prompt>",
+      "gpt-5.1-codex-max": "<built-in exploration prompt>"
+    },
+    "smallModel": "gpt-5-mini",
+    "modelReasoningEfforts": {
+      "gpt-5-mini": "low"
+    }
+  }
+  ```
+- **extraPrompts:** Map of `model -> prompt` appended to the first system prompt when translating Anthropic-style requests to Copilot. Use this to inject guardrails or guidance per model. Missing default entries are auto-added without overwriting your custom prompts.
+- **smallModel:** Fallback model used for tool-less warmup messages (e.g., Claude Code probe requests) to avoid spending premium requests; defaults to `gpt-5-mini`.
+- **modelReasoningEfforts:** Per-model `reasoning.effort` sent to the Copilot Responses API. Allowed values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. If a model isn’t listed, `high` is used by default.
+
+Edit this file to customize prompts or swap in your own fast model. Restart the server (or rerun the command) after changes so the cached config is refreshed.
+
 ## API Endpoints
 
 The server exposes several endpoints to interact with the Copilot API. It provides OpenAI-compatible endpoints and now also includes support for Anthropic-compatible endpoints, allowing for greater flexibility with different tools and services.
@@ -185,11 +207,12 @@ The server exposes several endpoints to interact with the Copilot API. It provid
 
 These endpoints mimic the OpenAI API structure.
 
-| Endpoint                    | Method | Description                                               |
-| --------------------------- | ------ | --------------------------------------------------------- |
-| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
-| `GET /v1/models`            | `GET`  | Lists the currently available models.                     |
-| `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.  |
+| Endpoint                    | Method | Description                                                      |
+| --------------------------- | ------ | ---------------------------------------------------------------- |
+| `POST /v1/responses`        | `POST` | OpenAI Most advanced interface for generating model responses.          |
+| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation.        |
+| `GET /v1/models`            | `GET`  | Lists the currently available models.                            |
+| `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.         |
 
 ### Anthropic Compatible Endpoints
 
diff --git a/src/lib/config.ts b/src/lib/config.ts
new file mode 100644
index 00000000..dff63eb5
--- /dev/null
+++ b/src/lib/config.ts
@@ -0,0 +1,142 @@
+import consola from "consola"
+import fs from "node:fs"
+
+import { PATHS } from "./paths"
+
+export interface AppConfig {
+  extraPrompts?: Record<string, string>
+  smallModel?: string
+  modelReasoningEfforts?: Record<
+    string,
+    "none" | "minimal" | "low" | "medium" | "high" | "xhigh"
+  >
+}
+
+const gpt5ExplorationPrompt = `## Exploration and reading files
+- **Think first.** Before any tool call, decide ALL files/resources you will need.
+- **Batch everything.** If you need multiple files (even from different places), read them together.
+- **multi_tool_use.parallel** Use multi_tool_use.parallel to parallelize tool calls and only this.
+- **Only make sequential calls if you truly cannot know the next file without seeing a result first.**
+- **Workflow:** (a) plan all needed reads → (b) issue one parallel batch → (c) analyze results → (d) repeat if new, unpredictable reads arise.`
+
+const defaultConfig: AppConfig = {
+  extraPrompts: {
+    "gpt-5-mini": gpt5ExplorationPrompt,
+    "gpt-5.1-codex-max": gpt5ExplorationPrompt,
+  },
+  smallModel: "gpt-5-mini",
+  modelReasoningEfforts: {
+    "gpt-5-mini": "low",
+  },
+}
+
+let cachedConfig: AppConfig | null = null
+
+function ensureConfigFile(): void {
+  try {
+    fs.accessSync(PATHS.CONFIG_PATH, fs.constants.R_OK | fs.constants.W_OK)
+  } catch {
+    fs.mkdirSync(PATHS.APP_DIR, { recursive: true })
+    fs.writeFileSync(
+      PATHS.CONFIG_PATH,
+      `${JSON.stringify(defaultConfig, null, 2)}\n`,
+      "utf8",
+    )
+    try {
+      fs.chmodSync(PATHS.CONFIG_PATH, 0o600)
+    } catch {
+      return
+    }
+  }
+}
+
+function readConfigFromDisk(): AppConfig {
+  ensureConfigFile()
+  try {
+    const raw = fs.readFileSync(PATHS.CONFIG_PATH, "utf8")
+    if (!raw.trim()) {
+      fs.writeFileSync(
+        PATHS.CONFIG_PATH,
+        `${JSON.stringify(defaultConfig, null, 2)}\n`,
+        "utf8",
+      )
+      return defaultConfig
+    }
+    return JSON.parse(raw) as AppConfig
+  } catch (error) {
+    consola.error("Failed to read config file, using default config", error)
+    return defaultConfig
+  }
+}
+
+function mergeDefaultExtraPrompts(config: AppConfig): {
+  mergedConfig: AppConfig
+  changed: boolean
+} {
+  const extraPrompts = config.extraPrompts ?? {}
+  const defaultExtraPrompts = defaultConfig.extraPrompts ?? {}
+
+  const missingExtraPromptModels = Object.keys(defaultExtraPrompts).filter(
+    (model) => !Object.hasOwn(extraPrompts, model),
+  )
+
+  if (missingExtraPromptModels.length === 0) {
+    return { mergedConfig: config, changed: false }
+  }
+
+  return {
+    mergedConfig: {
+      ...config,
+      extraPrompts: {
+        ...defaultExtraPrompts,
+        ...extraPrompts,
+      },
+    },
+    changed: true,
+  }
+}
+
+export function mergeConfigWithDefaults(): AppConfig {
+  const config = readConfigFromDisk()
+  const { mergedConfig, changed } = mergeDefaultExtraPrompts(config)
+
+  if (changed) {
+    try {
+      fs.writeFileSync(
+        PATHS.CONFIG_PATH,
+        `${JSON.stringify(mergedConfig, null, 2)}\n`,
+        "utf8",
+      )
+    } catch (writeError) {
+      consola.warn(
+        "Failed to write merged extraPrompts to config file",
+        writeError,
+      )
+    }
+  }
+
+  cachedConfig = mergedConfig
+  return mergedConfig
+}
+
+export function getConfig(): AppConfig {
+  cachedConfig ??= readConfigFromDisk()
+  return cachedConfig
+}
+
+export function getExtraPromptForModel(model: string): string {
+  const config = getConfig()
+  return config.extraPrompts?.[model] ?? ""
+}
+
+export function getSmallModel(): string {
+  const config = getConfig()
+  return config.smallModel ?? "gpt-5-mini"
+}
+
+export function getReasoningEffortForModel(
+  model: string,
+): "none" | "minimal" | "low" | "medium" | "high" | "xhigh" {
+  const config = getConfig()
+  return config.modelReasoningEfforts?.[model] ?? "high"
+}
diff --git a/src/lib/logger.ts b/src/lib/logger.ts
new file mode 100644
index 00000000..93a3b01f
--- /dev/null
+++ b/src/lib/logger.ts
@@ -0,0 +1,182 @@
+import consola, { type ConsolaInstance } from "consola"
+import fs from "node:fs"
+import path from "node:path"
+import util from "node:util"
+
+import { PATHS } from "./paths"
+import { state } from "./state"
+
+const LOG_RETENTION_DAYS = 7
+const LOG_RETENTION_MS = LOG_RETENTION_DAYS * 24 * 60 * 60 * 1000
+const CLEANUP_INTERVAL_MS = 24 * 60 * 60 * 1000
+const LOG_DIR = path.join(PATHS.APP_DIR, "logs")
+const FLUSH_INTERVAL_MS = 1000
+const MAX_BUFFER_SIZE = 100
+
+const logStreams = new Map<string, fs.WriteStream>()
+const logBuffers = new Map<string, Array<string>>()
+
+const ensureLogDirectory = () => {
+  if (!fs.existsSync(LOG_DIR)) {
+    fs.mkdirSync(LOG_DIR, { recursive: true })
+  }
+}
+
+const cleanupOldLogs = () => {
+  if (!fs.existsSync(LOG_DIR)) {
+    return
+  }
+
+  const now = Date.now()
+
+  for (const entry of fs.readdirSync(LOG_DIR)) {
+    const filePath = path.join(LOG_DIR, entry)
+
+    let stats: fs.Stats
+    try {
+      stats = fs.statSync(filePath)
+    } catch {
+      continue
+    }
+
+    if (!stats.isFile()) {
+      continue
+    }
+
+    if (now - stats.mtimeMs > LOG_RETENTION_MS) {
+      try {
+        fs.rmSync(filePath)
+      } catch {
+        continue
+      }
+    }
+  }
+}
+
+const formatArgs = (args: Array<unknown>) =>
+  args
+    .map((arg) =>
+      typeof arg === "string" ? arg : (
+        util.inspect(arg, { depth: null, colors: false })
+      ),
+    )
+    .join(" ")
+
+const sanitizeName = (name: string) => {
+  const normalized = name
+    .toLowerCase()
+    .replaceAll(/[^a-z0-9]+/g, "-")
+    .replaceAll(/^-+|-+$/g, "")
+
+  return normalized === "" ? "handler" : normalized
+}
+
+const getLogStream = (filePath: string): fs.WriteStream => {
+  let stream = logStreams.get(filePath)
+  if (!stream || stream.destroyed) {
+    stream = fs.createWriteStream(filePath, { flags: "a" })
+    logStreams.set(filePath, stream)
+
+    stream.on("error", (error: unknown) => {
+      console.warn("Log stream error", error)
+      logStreams.delete(filePath)
+    })
+  }
+  return stream
+}
+
+const flushBuffer = (filePath: string) => {
+  const buffer = logBuffers.get(filePath)
+  if (!buffer || buffer.length === 0) {
+    return
+  }
+
+  const stream = getLogStream(filePath)
+  const content = buffer.join("\n") + "\n"
+  stream.write(content, (error) => {
+    if (error) {
+      console.warn("Failed to write handler log", error)
+    }
+  })
+
+  logBuffers.set(filePath, [])
+}
+
+const flushAllBuffers = () => {
+  for (const filePath of logBuffers.keys()) {
+    flushBuffer(filePath)
+  }
+}
+
+const appendLine = (filePath: string, line: string) => {
+  let buffer = logBuffers.get(filePath)
+  if (!buffer) {
+    buffer = []
+    logBuffers.set(filePath, buffer)
+  }
+
+  buffer.push(line)
+
+  if (buffer.length >= MAX_BUFFER_SIZE) {
+    flushBuffer(filePath)
+  }
+}
+
+setInterval(flushAllBuffers, FLUSH_INTERVAL_MS)
+
+const cleanup = () => {
+  flushAllBuffers()
+  for (const stream of logStreams.values()) {
+    stream.end()
+  }
+  logStreams.clear()
+  logBuffers.clear()
+}
+
+process.on("exit", cleanup)
+process.on("SIGINT", () => {
+  cleanup()
+  process.exit(0)
+})
+process.on("SIGTERM", () => {
+  cleanup()
+  process.exit(0)
+})
+
+let lastCleanup = 0
+
+export const createHandlerLogger = (name: string): ConsolaInstance => {
+  ensureLogDirectory()
+
+  const sanitizedName = sanitizeName(name)
+  const instance = consola.withTag(name)
+
+  if (state.verbose) {
+    instance.level = 5
+  }
+  instance.setReporters([])
+
+  instance.addReporter({
+    log(logObj) {
+      ensureLogDirectory()
+
+      if (Date.now() - lastCleanup > CLEANUP_INTERVAL_MS) {
+        cleanupOldLogs()
+        lastCleanup = Date.now()
+      }
+
+      const date = logObj.date
+      const dateKey = date.toLocaleDateString("sv-SE")
+      const timestamp = date.toLocaleString("sv-SE", { hour12: false })
+      const filePath = path.join(LOG_DIR, `${sanitizedName}-${dateKey}.log`)
+      const message = formatArgs(logObj.args as Array<unknown>)
+      const line = `[${timestamp}] [${logObj.type}] [${logObj.tag || name}]${
+        message ? ` ${message}` : ""
+      }`
+
+      appendLine(filePath, line)
+    },
+  })
+
+  return instance
+}
diff --git a/src/lib/paths.ts b/src/lib/paths.ts
index 8d0a9f02..e85c21d8 100644
--- a/src/lib/paths.ts
+++ b/src/lib/paths.ts
@@ -5,15 +5,18 @@ import path from "node:path"
 const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api")
 
 const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token")
+const CONFIG_PATH = path.join(APP_DIR, "config.json")
 
 export const PATHS = {
   APP_DIR,
   GITHUB_TOKEN_PATH,
+  CONFIG_PATH,
 }
 
 export async function ensurePaths(): Promise<void> {
   await fs.mkdir(PATHS.APP_DIR, { recursive: true })
   await ensureFile(PATHS.GITHUB_TOKEN_PATH)
+  await ensureFile(PATHS.CONFIG_PATH)
 }
 
 async function ensureFile(filePath: string): Promise<void> {
diff --git a/src/lib/state.ts b/src/lib/state.ts
index 5ba4dc1d..5d5bc2bb 100644
--- a/src/lib/state.ts
+++ b/src/lib/state.ts
@@ -15,6 +15,7 @@ export interface State {
   // Rate limiting configuration
   rateLimitSeconds?: number
   lastRequestTimestamp?: number
+  verbose: boolean
 }
 
 export const state: State = {
@@ -22,4 +23,5 @@ export const state: State = {
   manualApprove: false,
   rateLimitWait: false,
   showToken: false,
+  verbose: false,
 }
diff --git a/src/lib/tokenizer.ts b/src/lib/tokenizer.ts
index 8c3eda73..e9b83ac5 100644
--- a/src/lib/tokenizer.ts
+++ b/src/lib/tokenizer.ts
@@ -37,7 +37,9 @@ const calculateToolCallsTokens = (
   let tokens = 0
   for (const toolCall of toolCalls) {
     tokens += constants.funcInit
-    tokens += encoder.encode(JSON.stringify(toolCall)).length
+    tokens += encoder.encode(toolCall.id).length
+    tokens += encoder.encode(toolCall.function.name).length
+    tokens += encoder.encode(toolCall.function.arguments).length
   }
   tokens += constants.funcEnd
   return tokens
@@ -158,6 +160,7 @@ const getModelConstants = (model: Model) => {
         enumInit: -3,
         enumItem: 3,
         funcEnd: 12,
+        isGpt: true,
       }
     : {
         funcInit: 7,
@@ -166,6 +169,7 @@ const getModelConstants = (model: Model) => {
         enumInit: -3,
         enumItem: 3,
         funcEnd: 12,
+        isGpt: model.id.startsWith("gpt-"),
       }
 }
 
@@ -218,8 +222,12 @@ const calculateParameterTokens = (
   const line = `${paramName}:${paramType}:${paramDesc}`
   tokens += encoder.encode(line).length
 
+  if (param.type === "array" && param["items"]) {
+    tokens += calculateParametersTokens(param["items"], encoder, constants)
+  }
+
   // Handle additional properties (excluding standard ones)
-  const excludedKeys = new Set(["type", "description", "enum"])
+  const excludedKeys = new Set(["type", "description", "enum", "items"])
   for (const propertyName of Object.keys(param)) {
     if (!excludedKeys.has(propertyName)) {
       const propertyValue = param[propertyName]
@@ -234,6 +242,27 @@ const calculateParameterTokens = (
   return tokens
 }
 
+/**
+ * Calculate tokens for properties object
+ */
+const calculatePropertiesTokens = (
+  properties: Record<string, unknown>,
+  encoder: Encoder,
+  constants: ReturnType<typeof getModelConstants>,
+): number => {
+  let tokens = 0
+  if (Object.keys(properties).length > 0) {
+    tokens += constants.propInit
+    for (const propKey of Object.keys(properties)) {
+      tokens += calculateParameterTokens(propKey, properties[propKey], {
+        encoder,
+        constants,
+      })
+    }
+  }
+  return tokens
+}
+
 /**
  * Calculate tokens for function parameters
  */
@@ -249,18 +278,17 @@ const calculateParametersTokens = (
   const params = parameters as Record<string, unknown>
   let tokens = 0
 
+  const excludedKeys = new Set(["$schema", "additionalProperties"])
   for (const [key, value] of Object.entries(params)) {
+    if (excludedKeys.has(key)) {
+      continue
+    }
     if (key === "properties") {
-      const properties = value as Record<string, unknown>
-      if (Object.keys(properties).length > 0) {
-        tokens += constants.propInit
-        for (const propKey of Object.keys(properties)) {
-          tokens += calculateParameterTokens(propKey, properties[propKey], {
-            encoder,
-            constants,
-          })
-        }
-      }
+      tokens += calculatePropertiesTokens(
+        value as Record<string, unknown>,
+        encoder,
+        constants,
+      )
     } else {
       const paramText =
         typeof value === "string" ? value : JSON.stringify(value)
@@ -306,10 +334,16 @@ export const numTokensForTools = (
   constants: ReturnType<typeof getModelConstants>,
 ): number => {
   let funcTokenCount = 0
-  for (const tool of tools) {
-    funcTokenCount += calculateToolTokens(tool, encoder, constants)
+  if (constants.isGpt) {
+    for (const tool of tools) {
+      funcTokenCount += calculateToolTokens(tool, encoder, constants)
+    }
+    funcTokenCount += constants.funcEnd
+  } else {
+    for (const tool of tools) {
+      funcTokenCount += encoder.encode(JSON.stringify(tool)).length
+    }
   }
-  funcTokenCount += constants.funcEnd
   return funcTokenCount
 }
 
@@ -335,6 +369,7 @@ export const getTokenCount = async (
   )
 
   const constants = getModelConstants(model)
+  // gpt count token https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
   let inputTokens = calculateTokens(inputMessages, encoder, constants)
   if (payload.tools && payload.tools.length > 0) {
     inputTokens += numTokensForTools(payload.tools, encoder, constants)
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 04a5ae9e..3a037a52 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -1,9 +1,9 @@
 import type { Context } from "hono"
 
-import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
@@ -14,11 +14,13 @@ import {
   type ChatCompletionsPayload,
 } from "~/services/copilot/create-chat-completions"
 
+const logger = createHandlerLogger("chat-completions-handler")
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   let payload = await c.req.json<ChatCompletionsPayload>()
-  consola.debug("Request payload:", JSON.stringify(payload).slice(-400))
+  logger.debug("Request payload:", JSON.stringify(payload).slice(-400))
 
   // Find the selected model
   const selectedModel = state.models?.data.find(
@@ -29,12 +31,12 @@ export async function handleCompletion(c: Context) {
   try {
     if (selectedModel) {
       const tokenCount = await getTokenCount(payload, selectedModel)
-      consola.info("Current token count:", tokenCount)
+      logger.info("Current token count:", tokenCount)
     } else {
-      consola.warn("No model selected, skipping token count calculation")
+      logger.warn("No model selected, skipping token count calculation")
     }
   } catch (error) {
-    consola.warn("Failed to calculate token count:", error)
+    logger.warn("Failed to calculate token count:", error)
   }
 
   if (state.manualApprove) await awaitApproval()
@@ -44,20 +46,20 @@ export async function handleCompletion(c: Context) {
       ...payload,
       max_tokens: selectedModel?.capabilities.limits.max_output_tokens,
     }
-    consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
+    logger.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
   const response = await createChatCompletions(payload)
 
   if (isNonStreaming(response)) {
-    consola.debug("Non-streaming response:", JSON.stringify(response))
+    logger.debug("Non-streaming response:", JSON.stringify(response))
     return c.json(response)
   }
 
-  consola.debug("Streaming response")
+  logger.debug("Streaming response")
   return streamSSE(c, async (stream) => {
     for await (const chunk of response) {
-      consola.debug("Streaming chunk:", JSON.stringify(chunk))
+      logger.debug("Streaming chunk:", JSON.stringify(chunk))
       await stream.writeSSE(chunk as SSEMessage)
     }
   })
diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 881fffcc..f07485bf 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock {
 export interface AnthropicThinkingBlock {
   type: "thinking"
   thinking: string
+  signature: string
 }
 
 export type AnthropicUserContentBlock =
diff --git a/src/routes/messages/count-tokens-handler.ts b/src/routes/messages/count-tokens-handler.ts
index 2ec849cb..f280e094 100644
--- a/src/routes/messages/count-tokens-handler.ts
+++ b/src/routes/messages/count-tokens-handler.ts
@@ -33,18 +33,21 @@ export async function handleCountTokens(c: Context) {
     const tokenCount = await getTokenCount(openAIPayload, selectedModel)
 
     if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
-      let mcpToolExist = false
-      if (anthropicBeta?.startsWith("claude-code")) {
-        mcpToolExist = anthropicPayload.tools.some((tool) =>
-          tool.name.startsWith("mcp__"),
+      let addToolSystemPromptCount = false
+      if (anthropicBeta) {
+        const toolsLength = anthropicPayload.tools.length
+        addToolSystemPromptCount = !anthropicPayload.tools.some(
+          (tool) =>
+            tool.name.startsWith("mcp__")
+            || (tool.name === "Skill" && toolsLength === 1),
         )
       }
-      if (!mcpToolExist) {
+      if (addToolSystemPromptCount) {
         if (anthropicPayload.model.startsWith("claude")) {
           // https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/overview#pricing
           tokenCount.input = tokenCount.input + 346
         } else if (anthropicPayload.model.startsWith("grok")) {
-          tokenCount.input = tokenCount.input + 480
+          tokenCount.input = tokenCount.input + 120
         }
       }
     }
@@ -52,8 +55,6 @@ export async function handleCountTokens(c: Context) {
     let finalTokenCount = tokenCount.input + tokenCount.output
     if (anthropicPayload.model.startsWith("claude")) {
       finalTokenCount = Math.round(finalTokenCount * 1.15)
-    } else if (anthropicPayload.model.startsWith("grok")) {
-      finalTokenCount = Math.round(finalTokenCount * 1.03)
     }
 
     consola.info("Token count:", finalTokenCount)
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 85dbf624..1de09459 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -1,16 +1,32 @@
 import type { Context } from "hono"
 
-import consola from "consola"
 import { streamSSE } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { getSmallModel } from "~/lib/config"
+import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
+import {
+  buildErrorEvent,
+  createResponsesStreamState,
+  translateResponsesStreamEvent,
+} from "~/routes/messages/responses-stream-translation"
+import {
+  translateAnthropicMessagesToResponsesPayload,
+  translateResponsesResultToAnthropic,
+} from "~/routes/messages/responses-translation"
+import { getResponsesRequestOptions } from "~/routes/responses/utils"
 import {
   createChatCompletions,
   type ChatCompletionChunk,
   type ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
+import {
+  createResponses,
+  type ResponsesResult,
+  type ResponseStreamEvent,
+} from "~/services/copilot/create-responses"
 
 import {
   type AnthropicMessagesPayload,
@@ -22,38 +38,63 @@ import {
 } from "./non-stream-translation"
 import { translateChunkToAnthropicEvents } from "./stream-translation"
 
+const logger = createHandlerLogger("messages-handler")
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
-  consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
+  logger.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
-  const openAIPayload = translateToOpenAI(anthropicPayload)
-  consola.debug(
-    "Translated OpenAI request payload:",
-    JSON.stringify(openAIPayload),
-  )
+  // fix claude code 2.0.28+ warmup request consume premium request, forcing small model if no tools are used
+  // set "CLAUDE_CODE_SUBAGENT_MODEL": "you small model" also can avoid this
+  const anthropicBeta = c.req.header("anthropic-beta")
+  const noTools = !anthropicPayload.tools || anthropicPayload.tools.length === 0
+  if (anthropicBeta && noTools) {
+    anthropicPayload.model = getSmallModel()
+  }
+
+  const useResponsesApi = shouldUseResponsesApi(anthropicPayload.model)
 
   if (state.manualApprove) {
     await awaitApproval()
   }
 
+  if (useResponsesApi) {
+    return await handleWithResponsesApi(c, anthropicPayload)
+  }
+
+  return await handleWithChatCompletions(c, anthropicPayload)
+}
+
+const RESPONSES_ENDPOINT = "/responses"
+
+const handleWithChatCompletions = async (
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+) => {
+  const openAIPayload = translateToOpenAI(anthropicPayload)
+  logger.debug(
+    "Translated OpenAI request payload:",
+    JSON.stringify(openAIPayload),
+  )
+
   const response = await createChatCompletions(openAIPayload)
 
   if (isNonStreaming(response)) {
-    consola.debug(
+    logger.debug(
       "Non-streaming response from Copilot:",
       JSON.stringify(response).slice(-400),
     )
     const anthropicResponse = translateToAnthropic(response)
-    consola.debug(
+    logger.debug(
       "Translated Anthropic response:",
       JSON.stringify(anthropicResponse),
     )
     return c.json(anthropicResponse)
   }
 
-  consola.debug("Streaming response from Copilot")
+  logger.debug("Streaming response from Copilot")
   return streamSSE(c, async (stream) => {
     const streamState: AnthropicStreamState = {
       messageStartSent: false,
@@ -63,7 +104,7 @@ export async function handleCompletion(c: Context) {
     }
 
     for await (const rawEvent of response) {
-      consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent))
+      logger.debug("Copilot raw stream event:", JSON.stringify(rawEvent))
       if (rawEvent.data === "[DONE]") {
         break
       }
@@ -76,7 +117,7 @@ export async function handleCompletion(c: Context) {
       const events = translateChunkToAnthropicEvents(chunk, streamState)
 
       for (const event of events) {
-        consola.debug("Translated Anthropic event:", JSON.stringify(event))
+        logger.debug("Translated Anthropic event:", JSON.stringify(event))
         await stream.writeSSE({
           event: event.type,
           data: JSON.stringify(event),
@@ -86,6 +127,101 @@ export async function handleCompletion(c: Context) {
   })
 }
 
+const handleWithResponsesApi = async (
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+) => {
+  const responsesPayload =
+    translateAnthropicMessagesToResponsesPayload(anthropicPayload)
+  logger.debug(
+    "Translated Responses payload:",
+    JSON.stringify(responsesPayload),
+  )
+
+  const { vision, initiator } = getResponsesRequestOptions(responsesPayload)
+  const response = await createResponses(responsesPayload, {
+    vision,
+    initiator,
+  })
+
+  if (responsesPayload.stream && isAsyncIterable(response)) {
+    logger.debug("Streaming response from Copilot (Responses API)")
+    return streamSSE(c, async (stream) => {
+      const streamState = createResponsesStreamState()
+
+      for await (const chunk of response) {
+        const eventName = chunk.event
+        if (eventName === "ping") {
+          await stream.writeSSE({ event: "ping", data: "" })
+          continue
+        }
+
+        const data = chunk.data
+        if (!data) {
+          continue
+        }
+
+        logger.debug("Responses raw stream event:", data)
+
+        const events = translateResponsesStreamEvent(
+          JSON.parse(data) as ResponseStreamEvent,
+          streamState,
+        )
+        for (const event of events) {
+          const eventData = JSON.stringify(event)
+          logger.debug("Translated Anthropic event:", eventData)
+          await stream.writeSSE({
+            event: event.type,
+            data: eventData,
+          })
+        }
+
+        if (streamState.messageCompleted) {
+          logger.debug("Message completed, ending stream")
+          break
+        }
+      }
+
+      if (!streamState.messageCompleted) {
+        logger.warn(
+          "Responses stream ended without completion; sending error event",
+        )
+        const errorEvent = buildErrorEvent(
+          "Responses stream ended without completion",
+        )
+        await stream.writeSSE({
+          event: errorEvent.type,
+          data: JSON.stringify(errorEvent),
+        })
+      }
+    })
+  }
+
+  logger.debug(
+    "Non-streaming Responses result:",
+    JSON.stringify(response).slice(-400),
+  )
+  const anthropicResponse = translateResponsesResultToAnthropic(
+    response as ResponsesResult,
+  )
+  logger.debug(
+    "Translated Anthropic response:",
+    JSON.stringify(anthropicResponse),
+  )
+  return c.json(anthropicResponse)
+}
+
+const shouldUseResponsesApi = (modelId: string): boolean => {
+  const selectedModel = state.models?.data.find((model) => model.id === modelId)
+  return (
+    selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false
+  )
+}
+
 const isNonStreaming = (
   response: Awaited<ReturnType<typeof createChatCompletions>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
+
+const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
+  Boolean(value)
+  && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
new file mode 100644
index 00000000..5fa043c9
--- /dev/null
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -0,0 +1,693 @@
+import {
+  type ResponseCompletedEvent,
+  type ResponseCreatedEvent,
+  type ResponseErrorEvent,
+  type ResponseFailedEvent,
+  type ResponseFunctionCallArgumentsDeltaEvent,
+  type ResponseFunctionCallArgumentsDoneEvent,
+  type ResponseIncompleteEvent,
+  type ResponseOutputItemAddedEvent,
+  type ResponseOutputItemDoneEvent,
+  type ResponseReasoningSummaryTextDeltaEvent,
+  type ResponseReasoningSummaryTextDoneEvent,
+  type ResponsesResult,
+  type ResponseStreamEvent,
+  type ResponseTextDeltaEvent,
+  type ResponseTextDoneEvent,
+} from "~/services/copilot/create-responses"
+
+import { type AnthropicStreamEventData } from "./anthropic-types"
+import { translateResponsesResultToAnthropic } from "./responses-translation"
+
+const MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE = 20
+
+class FunctionCallArgumentsValidationError extends Error {
+  constructor(message: string) {
+    super(message)
+    this.name = "FunctionCallArgumentsValidationError"
+  }
+}
+
+const updateWhitespaceRunState = (
+  previousCount: number,
+  chunk: string,
+): {
+  nextCount: number
+  exceeded: boolean
+} => {
+  let count = previousCount
+
+  for (const char of chunk) {
+    if (char === "\r" || char === "\n" || char === "\t") {
+      count += 1
+      if (count > MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE) {
+        return { nextCount: count, exceeded: true }
+      }
+      continue
+    }
+
+    if (char !== " ") {
+      count = 0
+    }
+  }
+
+  return { nextCount: count, exceeded: false }
+}
+
+export interface ResponsesStreamState {
+  messageStartSent: boolean
+  messageCompleted: boolean
+  nextContentBlockIndex: number
+  blockIndexByKey: Map<string, number>
+  openBlocks: Set<number>
+  blockHasDelta: Set<number>
+  functionCallStateByOutputIndex: Map<number, FunctionCallStreamState>
+}
+
+type FunctionCallStreamState = {
+  blockIndex: number
+  toolCallId: string
+  name: string
+  consecutiveWhitespaceCount: number
+}
+
+export const createResponsesStreamState = (): ResponsesStreamState => ({
+  messageStartSent: false,
+  messageCompleted: false,
+  nextContentBlockIndex: 0,
+  blockIndexByKey: new Map(),
+  openBlocks: new Set(),
+  blockHasDelta: new Set(),
+  functionCallStateByOutputIndex: new Map(),
+})
+
+export const translateResponsesStreamEvent = (
+  rawEvent: ResponseStreamEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const eventType = rawEvent.type
+  switch (eventType) {
+    case "response.created": {
+      return handleResponseCreated(rawEvent, state)
+    }
+
+    case "response.output_item.added": {
+      return handleOutputItemAdded(rawEvent, state)
+    }
+
+    case "response.reasoning_summary_text.delta": {
+      return handleReasoningSummaryTextDelta(rawEvent, state)
+    }
+
+    case "response.output_text.delta": {
+      return handleOutputTextDelta(rawEvent, state)
+    }
+
+    case "response.reasoning_summary_text.done": {
+      return handleReasoningSummaryTextDone(rawEvent, state)
+    }
+
+    case "response.output_text.done": {
+      return handleOutputTextDone(rawEvent, state)
+    }
+    case "response.output_item.done": {
+      return handleOutputItemDone(rawEvent, state)
+    }
+
+    case "response.function_call_arguments.delta": {
+      return handleFunctionCallArgumentsDelta(rawEvent, state)
+    }
+
+    case "response.function_call_arguments.done": {
+      return handleFunctionCallArgumentsDone(rawEvent, state)
+    }
+
+    case "response.completed":
+    case "response.incomplete": {
+      return handleResponseCompleted(rawEvent, state)
+    }
+
+    case "response.failed": {
+      return handleResponseFailed(rawEvent, state)
+    }
+
+    case "error": {
+      return handleErrorEvent(rawEvent, state)
+    }
+
+    default: {
+      return []
+    }
+  }
+}
+
+// Helper handlers to keep translateResponsesStreamEvent concise
+const handleResponseCreated = (
+  rawEvent: ResponseCreatedEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  return messageStart(state, rawEvent.response)
+}
+
+const handleOutputItemAdded = (
+  rawEvent: ResponseOutputItemAddedEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const functionCallDetails = extractFunctionCallDetails(rawEvent)
+  if (!functionCallDetails) {
+    return events
+  }
+
+  const { outputIndex, toolCallId, name, initialArguments } =
+    functionCallDetails
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    toolCallId,
+    name,
+    events,
+  })
+
+  if (initialArguments !== undefined && initialArguments.length > 0) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "input_json_delta",
+        partial_json: initialArguments,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  return events
+}
+
+const handleOutputItemDone = (
+  rawEvent: ResponseOutputItemDoneEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const item = rawEvent.item
+  const itemType = item.type
+  if (itemType !== "reasoning") {
+    return events
+  }
+
+  const outputIndex = rawEvent.output_index
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
+  const signature = (item.encrypted_content ?? "") + "@" + item.id
+  if (signature) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "signature_delta",
+        signature,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  return events
+}
+
+const handleFunctionCallArgumentsDelta = (
+  rawEvent: ResponseFunctionCallArgumentsDeltaEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const outputIndex = rawEvent.output_index
+  const deltaText = rawEvent.delta
+
+  if (!deltaText) {
+    return events
+  }
+
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    events,
+  })
+
+  const functionCallState =
+    state.functionCallStateByOutputIndex.get(outputIndex)
+  if (!functionCallState) {
+    return handleFunctionCallArgumentsValidationError(
+      new FunctionCallArgumentsValidationError(
+        "Received function call arguments delta without an open tool call block.",
+      ),
+      state,
+      events,
+    )
+  }
+
+  // fix: copolit function call returning infinite line breaks until max_tokens limit
+  // "arguments": "{\"path\":\"xxx\",\"pattern\":\"**/*.ts\",\"} }? Wait extra braces. Need correct. I should run? Wait overcame. Need proper JSON with pattern \"\n\n\n\n\n\n\n\n...
+  const { nextCount, exceeded } = updateWhitespaceRunState(
+    functionCallState.consecutiveWhitespaceCount,
+    deltaText,
+  )
+  if (exceeded) {
+    return handleFunctionCallArgumentsValidationError(
+      new FunctionCallArgumentsValidationError(
+        "Received function call arguments delta containing more than 20 consecutive whitespace characters.",
+      ),
+      state,
+      events,
+    )
+  }
+  functionCallState.consecutiveWhitespaceCount = nextCount
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "input_json_delta",
+      partial_json: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleFunctionCallArgumentsDone = (
+  rawEvent: ResponseFunctionCallArgumentsDoneEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const outputIndex = rawEvent.output_index
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    events,
+  })
+
+  const finalArguments =
+    typeof rawEvent.arguments === "string" ? rawEvent.arguments : undefined
+
+  if (!state.blockHasDelta.has(blockIndex) && finalArguments) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "input_json_delta",
+        partial_json: finalArguments,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  state.functionCallStateByOutputIndex.delete(outputIndex)
+  return events
+}
+
+const handleOutputTextDelta = (
+  rawEvent: ResponseTextDeltaEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const outputIndex = rawEvent.output_index
+  const contentIndex = rawEvent.content_index
+  const deltaText = rawEvent.delta
+
+  if (!deltaText) {
+    return events
+  }
+
+  const blockIndex = openTextBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "text_delta",
+      text: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleReasoningSummaryTextDelta = (
+  rawEvent: ResponseReasoningSummaryTextDeltaEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const outputIndex = rawEvent.output_index
+  const deltaText = rawEvent.delta
+  const events = new Array<AnthropicStreamEventData>()
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "thinking_delta",
+      thinking: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleReasoningSummaryTextDone = (
+  rawEvent: ResponseReasoningSummaryTextDoneEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const outputIndex = rawEvent.output_index
+  const text = rawEvent.text
+  const events = new Array<AnthropicStreamEventData>()
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
+
+  if (text && !state.blockHasDelta.has(blockIndex)) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "thinking_delta",
+        thinking: text,
+      },
+    })
+  }
+
+  return events
+}
+
+const handleOutputTextDone = (
+  rawEvent: ResponseTextDoneEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const outputIndex = rawEvent.output_index
+  const contentIndex = rawEvent.content_index
+  const text = rawEvent.text
+
+  const blockIndex = openTextBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  if (text && !state.blockHasDelta.has(blockIndex)) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "text_delta",
+        text,
+      },
+    })
+  }
+
+  return events
+}
+
+const handleResponseCompleted = (
+  rawEvent: ResponseCompletedEvent | ResponseIncompleteEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = rawEvent.response
+  const events = new Array<AnthropicStreamEventData>()
+
+  closeAllOpenBlocks(state, events)
+  const anthropic = translateResponsesResultToAnthropic(response)
+  events.push(
+    {
+      type: "message_delta",
+      delta: {
+        stop_reason: anthropic.stop_reason,
+        stop_sequence: anthropic.stop_sequence,
+      },
+      usage: anthropic.usage,
+    },
+    { type: "message_stop" },
+  )
+  state.messageCompleted = true
+  return events
+}
+
+const handleResponseFailed = (
+  rawEvent: ResponseFailedEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = rawEvent.response
+  const events = new Array<AnthropicStreamEventData>()
+  closeAllOpenBlocks(state, events)
+
+  const message =
+    response.error?.message ?? "The response failed due to an unknown error."
+
+  events.push(buildErrorEvent(message))
+  state.messageCompleted = true
+
+  return events
+}
+
+const handleErrorEvent = (
+  rawEvent: ResponseErrorEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const message =
+    typeof rawEvent.message === "string" ?
+      rawEvent.message
+    : "An unexpected error occurred during streaming."
+
+  state.messageCompleted = true
+  return [buildErrorEvent(message)]
+}
+
+const handleFunctionCallArgumentsValidationError = (
+  error: FunctionCallArgumentsValidationError,
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData> = [],
+): Array<AnthropicStreamEventData> => {
+  const reason = error.message
+
+  closeAllOpenBlocks(state, events)
+  state.messageCompleted = true
+
+  events.push(buildErrorEvent(reason))
+
+  return events
+}
+
+const messageStart = (
+  state: ResponsesStreamState,
+  response: ResponsesResult,
+): Array<AnthropicStreamEventData> => {
+  state.messageStartSent = true
+  const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens
+  const inputTokens =
+    (response.usage?.input_tokens ?? 0) - (inputCachedTokens ?? 0)
+  return [
+    {
+      type: "message_start",
+      message: {
+        id: response.id,
+        type: "message",
+        role: "assistant",
+        content: [],
+        model: response.model,
+        stop_reason: null,
+        stop_sequence: null,
+        usage: {
+          input_tokens: inputTokens,
+          output_tokens: 0,
+          cache_read_input_tokens: inputCachedTokens ?? 0,
+        },
+      },
+    },
+  ]
+}
+
+const openTextBlockIfNeeded = (
+  state: ResponsesStreamState,
+  params: {
+    outputIndex: number
+    contentIndex: number
+    events: Array<AnthropicStreamEventData>
+  },
+): number => {
+  const { outputIndex, contentIndex, events } = params
+  const key = getBlockKey(outputIndex, contentIndex)
+  let blockIndex = state.blockIndexByKey.get(key)
+
+  if (blockIndex === undefined) {
+    blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+    state.blockIndexByKey.set(key, blockIndex)
+  }
+
+  if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "text",
+        text: "",
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
+const openThinkingBlockIfNeeded = (
+  state: ResponsesStreamState,
+  outputIndex: number,
+  events: Array<AnthropicStreamEventData>,
+): number => {
+  //thinking blocks has multiple summary_index, should combine into one block
+  const summaryIndex = 0
+  const key = getBlockKey(outputIndex, summaryIndex)
+  let blockIndex = state.blockIndexByKey.get(key)
+
+  if (blockIndex === undefined) {
+    blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+    state.blockIndexByKey.set(key, blockIndex)
+  }
+
+  if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "thinking",
+        thinking: "",
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
+const closeBlockIfOpen = (
+  state: ResponsesStreamState,
+  blockIndex: number,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  if (!state.openBlocks.has(blockIndex)) {
+    return
+  }
+
+  events.push({ type: "content_block_stop", index: blockIndex })
+  state.openBlocks.delete(blockIndex)
+  state.blockHasDelta.delete(blockIndex)
+}
+
+const closeOpenBlocks = (
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  for (const blockIndex of state.openBlocks) {
+    closeBlockIfOpen(state, blockIndex, events)
+  }
+}
+
+const closeAllOpenBlocks = (
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  closeOpenBlocks(state, events)
+
+  state.functionCallStateByOutputIndex.clear()
+}
+
+export const buildErrorEvent = (message: string): AnthropicStreamEventData => ({
+  type: "error",
+  error: {
+    type: "api_error",
+    message,
+  },
+})
+
+const getBlockKey = (outputIndex: number, contentIndex: number): string =>
+  `${outputIndex}:${contentIndex}`
+
+const openFunctionCallBlock = (
+  state: ResponsesStreamState,
+  params: {
+    outputIndex: number
+    toolCallId?: string
+    name?: string
+    events: Array<AnthropicStreamEventData>
+  },
+): number => {
+  const { outputIndex, toolCallId, name, events } = params
+
+  let functionCallState = state.functionCallStateByOutputIndex.get(outputIndex)
+
+  if (!functionCallState) {
+    const blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+
+    const resolvedToolCallId = toolCallId ?? `tool_call_${blockIndex}`
+    const resolvedName = name ?? "function"
+
+    functionCallState = {
+      blockIndex,
+      toolCallId: resolvedToolCallId,
+      name: resolvedName,
+      consecutiveWhitespaceCount: 0,
+    }
+
+    state.functionCallStateByOutputIndex.set(outputIndex, functionCallState)
+  }
+
+  const { blockIndex } = functionCallState
+
+  if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "tool_use",
+        id: functionCallState.toolCallId,
+        name: functionCallState.name,
+        input: {},
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
+type FunctionCallDetails = {
+  outputIndex: number
+  toolCallId: string
+  name: string
+  initialArguments?: string
+}
+
+const extractFunctionCallDetails = (
+  rawEvent: ResponseOutputItemAddedEvent,
+): FunctionCallDetails | undefined => {
+  const item = rawEvent.item
+  const itemType = item.type
+  if (itemType !== "function_call") {
+    return undefined
+  }
+
+  const outputIndex = rawEvent.output_index
+  const toolCallId = item.call_id
+  const name = item.name
+  const initialArguments = item.arguments
+  return {
+    outputIndex,
+    toolCallId,
+    name,
+    initialArguments,
+  }
+}
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
new file mode 100644
index 00000000..c8e9460b
--- /dev/null
+++ b/src/routes/messages/responses-translation.ts
@@ -0,0 +1,642 @@
+import consola from "consola"
+
+import {
+  getExtraPromptForModel,
+  getReasoningEffortForModel,
+} from "~/lib/config"
+import {
+  type ResponsesPayload,
+  type ResponseInputContent,
+  type ResponseInputImage,
+  type ResponseInputItem,
+  type ResponseInputMessage,
+  type ResponseInputReasoning,
+  type ResponseInputText,
+  type ResponsesResult,
+  type ResponseOutputContentBlock,
+  type ResponseOutputFunctionCall,
+  type ResponseOutputItem,
+  type ResponseOutputReasoning,
+  type ResponseReasoningBlock,
+  type ResponseOutputRefusal,
+  type ResponseOutputText,
+  type ResponseFunctionToolCallItem,
+  type ResponseFunctionCallOutputItem,
+  type Tool,
+  type ToolChoiceFunction,
+  type ToolChoiceOptions,
+} from "~/services/copilot/create-responses"
+
+import {
+  type AnthropicAssistantContentBlock,
+  type AnthropicAssistantMessage,
+  type AnthropicResponse,
+  type AnthropicImageBlock,
+  type AnthropicMessage,
+  type AnthropicMessagesPayload,
+  type AnthropicTextBlock,
+  type AnthropicThinkingBlock,
+  type AnthropicTool,
+  type AnthropicToolResultBlock,
+  type AnthropicToolUseBlock,
+  type AnthropicUserContentBlock,
+  type AnthropicUserMessage,
+} from "./anthropic-types"
+
+const MESSAGE_TYPE = "message"
+
+export const translateAnthropicMessagesToResponsesPayload = (
+  payload: AnthropicMessagesPayload,
+): ResponsesPayload => {
+  const input: Array<ResponseInputItem> = []
+
+  for (const message of payload.messages) {
+    input.push(...translateMessage(message))
+  }
+
+  const translatedTools = convertAnthropicTools(payload.tools)
+  const toolChoice = convertAnthropicToolChoice(payload.tool_choice)
+
+  const { safetyIdentifier, promptCacheKey } = parseUserId(
+    payload.metadata?.user_id,
+  )
+
+  const responsesPayload: ResponsesPayload = {
+    model: payload.model,
+    input,
+    instructions: translateSystemPrompt(payload.system, payload.model),
+    temperature: 1, // reasoning high temperature fixed to 1
+    top_p: payload.top_p ?? null,
+    max_output_tokens: Math.max(payload.max_tokens, 12800),
+    tools: translatedTools,
+    tool_choice: toolChoice,
+    metadata: payload.metadata ? { ...payload.metadata } : null,
+    safety_identifier: safetyIdentifier,
+    prompt_cache_key: promptCacheKey,
+    stream: payload.stream ?? null,
+    store: false,
+    parallel_tool_calls: true,
+    reasoning: {
+      effort: getReasoningEffortForModel(payload.model),
+      summary: "detailed",
+    },
+    include: ["reasoning.encrypted_content"],
+  }
+
+  return responsesPayload
+}
+
+const translateMessage = (
+  message: AnthropicMessage,
+): Array<ResponseInputItem> => {
+  if (message.role === "user") {
+    return translateUserMessage(message)
+  }
+
+  return translateAssistantMessage(message)
+}
+
+const translateUserMessage = (
+  message: AnthropicUserMessage,
+): Array<ResponseInputItem> => {
+  if (typeof message.content === "string") {
+    return [createMessage("user", message.content)]
+  }
+
+  if (!Array.isArray(message.content)) {
+    return []
+  }
+
+  const items: Array<ResponseInputItem> = []
+  const pendingContent: Array<ResponseInputContent> = []
+
+  for (const block of message.content) {
+    if (block.type === "tool_result") {
+      flushPendingContent("user", pendingContent, items)
+      items.push(createFunctionCallOutput(block))
+      continue
+    }
+
+    const converted = translateUserContentBlock(block)
+    if (converted) {
+      pendingContent.push(converted)
+    }
+  }
+
+  flushPendingContent("user", pendingContent, items)
+
+  return items
+}
+
+const translateAssistantMessage = (
+  message: AnthropicAssistantMessage,
+): Array<ResponseInputItem> => {
+  if (typeof message.content === "string") {
+    return [createMessage("assistant", message.content)]
+  }
+
+  if (!Array.isArray(message.content)) {
+    return []
+  }
+
+  const items: Array<ResponseInputItem> = []
+  const pendingContent: Array<ResponseInputContent> = []
+
+  for (const block of message.content) {
+    if (block.type === "tool_use") {
+      flushPendingContent("assistant", pendingContent, items)
+      items.push(createFunctionToolCall(block))
+      continue
+    }
+
+    if (
+      block.type === "thinking"
+      && block.signature
+      && block.signature.includes("@")
+    ) {
+      flushPendingContent("assistant", pendingContent, items)
+      items.push(createReasoningContent(block))
+      continue
+    }
+
+    const converted = translateAssistantContentBlock(block)
+    if (converted) {
+      pendingContent.push(converted)
+    }
+  }
+
+  flushPendingContent("assistant", pendingContent, items)
+
+  return items
+}
+
+const translateUserContentBlock = (
+  block: AnthropicUserContentBlock,
+): ResponseInputContent | undefined => {
+  switch (block.type) {
+    case "text": {
+      return createTextContent(block.text)
+    }
+    case "image": {
+      return createImageContent(block)
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+const translateAssistantContentBlock = (
+  block: AnthropicAssistantContentBlock,
+): ResponseInputContent | undefined => {
+  switch (block.type) {
+    case "text": {
+      return createOutPutTextContent(block.text)
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+const flushPendingContent = (
+  role: ResponseInputMessage["role"],
+  pendingContent: Array<ResponseInputContent>,
+  target: Array<ResponseInputItem>,
+) => {
+  if (pendingContent.length === 0) {
+    return
+  }
+
+  const messageContent = [...pendingContent]
+
+  target.push(createMessage(role, messageContent))
+  pendingContent.length = 0
+}
+
+const createMessage = (
+  role: ResponseInputMessage["role"],
+  content: string | Array<ResponseInputContent>,
+): ResponseInputMessage => ({
+  type: MESSAGE_TYPE,
+  role,
+  content,
+})
+
+const createTextContent = (text: string): ResponseInputText => ({
+  type: "input_text",
+  text,
+})
+
+const createOutPutTextContent = (text: string): ResponseInputText => ({
+  type: "output_text",
+  text,
+})
+
+const createImageContent = (
+  block: AnthropicImageBlock,
+): ResponseInputImage => ({
+  type: "input_image",
+  image_url: `data:${block.source.media_type};base64,${block.source.data}`,
+  detail: "auto",
+})
+
+const createReasoningContent = (
+  block: AnthropicThinkingBlock,
+): ResponseInputReasoning => {
+  // align with vscode-copilot-chat extractThinkingData, should add id, otherwise it will cause miss cache occasionally —— the usage input cached tokens to be 0
+  // https://github.com/microsoft/vscode-copilot-chat/blob/main/src/platform/endpoint/node/responsesApi.ts#L162
+  // when use in codex cli, reasoning id is empty, so it will cause miss cache occasionally
+  const array = block.signature.split("@")
+  const signature = array[0]
+  const id = array[1]
+  return {
+    id,
+    type: "reasoning",
+    summary: [
+      {
+        type: "summary_text",
+        text: block.thinking,
+      },
+    ],
+    encrypted_content: signature,
+  }
+}
+
+const createFunctionToolCall = (
+  block: AnthropicToolUseBlock,
+): ResponseFunctionToolCallItem => ({
+  type: "function_call",
+  call_id: block.id,
+  name: block.name,
+  arguments: JSON.stringify(block.input),
+  status: "completed",
+})
+
+const createFunctionCallOutput = (
+  block: AnthropicToolResultBlock,
+): ResponseFunctionCallOutputItem => ({
+  type: "function_call_output",
+  call_id: block.tool_use_id,
+  output: convertToolResultContent(block.content),
+  status: block.is_error ? "incomplete" : "completed",
+})
+
+const translateSystemPrompt = (
+  system: string | Array<AnthropicTextBlock> | undefined,
+  model: string,
+): string | null => {
+  if (!system) {
+    return null
+  }
+
+  const extraPrompt = getExtraPromptForModel(model)
+
+  if (typeof system === "string") {
+    return system + extraPrompt
+  }
+
+  const text = system
+    .map((block, index) => {
+      if (index === 0) {
+        return block.text + extraPrompt
+      }
+      return block.text
+    })
+    .join(" ")
+  return text.length > 0 ? text : null
+}
+
+const convertAnthropicTools = (
+  tools: Array<AnthropicTool> | undefined,
+): Array<Tool> | null => {
+  if (!tools || tools.length === 0) {
+    return null
+  }
+
+  return tools.map((tool) => ({
+    type: "function",
+    name: tool.name,
+    parameters: tool.input_schema,
+    strict: false,
+    ...(tool.description ? { description: tool.description } : {}),
+  }))
+}
+
+const convertAnthropicToolChoice = (
+  choice: AnthropicMessagesPayload["tool_choice"],
+): ToolChoiceOptions | ToolChoiceFunction => {
+  if (!choice) {
+    return "auto"
+  }
+
+  switch (choice.type) {
+    case "auto": {
+      return "auto"
+    }
+    case "any": {
+      return "required"
+    }
+    case "tool": {
+      return choice.name ? { type: "function", name: choice.name } : "auto"
+    }
+    case "none": {
+      return "none"
+    }
+    default: {
+      return "auto"
+    }
+  }
+}
+
+export const translateResponsesResultToAnthropic = (
+  response: ResponsesResult,
+): AnthropicResponse => {
+  const contentBlocks = mapOutputToAnthropicContent(response.output)
+  const usage = mapResponsesUsage(response)
+  let anthropicContent = fallbackContentBlocks(response.output_text)
+  if (contentBlocks.length > 0) {
+    anthropicContent = contentBlocks
+  }
+
+  const stopReason = mapResponsesStopReason(response)
+
+  return {
+    id: response.id,
+    type: "message",
+    role: "assistant",
+    content: anthropicContent,
+    model: response.model,
+    stop_reason: stopReason,
+    stop_sequence: null,
+    usage,
+  }
+}
+
+const mapOutputToAnthropicContent = (
+  output: Array<ResponseOutputItem>,
+): Array<AnthropicAssistantContentBlock> => {
+  const contentBlocks: Array<AnthropicAssistantContentBlock> = []
+
+  for (const item of output) {
+    switch (item.type) {
+      case "reasoning": {
+        const thinkingText = extractReasoningText(item)
+        if (thinkingText.length > 0) {
+          contentBlocks.push({
+            type: "thinking",
+            thinking: thinkingText,
+            signature: (item.encrypted_content ?? "") + "@" + item.id,
+          })
+        }
+        break
+      }
+      case "function_call": {
+        const toolUseBlock = createToolUseContentBlock(item)
+        if (toolUseBlock) {
+          contentBlocks.push(toolUseBlock)
+        }
+        break
+      }
+      case "message": {
+        const combinedText = combineMessageTextContent(item.content)
+        if (combinedText.length > 0) {
+          contentBlocks.push({ type: "text", text: combinedText })
+        }
+        break
+      }
+      default: {
+        // Future compatibility for unrecognized output item types.
+        const combinedText = combineMessageTextContent(
+          (item as { content?: Array<ResponseOutputContentBlock> }).content,
+        )
+        if (combinedText.length > 0) {
+          contentBlocks.push({ type: "text", text: combinedText })
+        }
+      }
+    }
+  }
+
+  return contentBlocks
+}
+
+const combineMessageTextContent = (
+  content: Array<ResponseOutputContentBlock> | undefined,
+): string => {
+  if (!Array.isArray(content)) {
+    return ""
+  }
+
+  let aggregated = ""
+
+  for (const block of content) {
+    if (isResponseOutputText(block)) {
+      aggregated += block.text
+      continue
+    }
+
+    if (isResponseOutputRefusal(block)) {
+      aggregated += block.refusal
+      continue
+    }
+
+    if (typeof (block as { text?: unknown }).text === "string") {
+      aggregated += (block as { text: string }).text
+      continue
+    }
+
+    if (typeof (block as { reasoning?: unknown }).reasoning === "string") {
+      aggregated += (block as { reasoning: string }).reasoning
+      continue
+    }
+  }
+
+  return aggregated
+}
+
+const extractReasoningText = (item: ResponseOutputReasoning): string => {
+  const segments: Array<string> = []
+
+  const collectFromBlocks = (blocks?: Array<ResponseReasoningBlock>) => {
+    if (!Array.isArray(blocks)) {
+      return
+    }
+
+    for (const block of blocks) {
+      if (typeof block.text === "string") {
+        segments.push(block.text)
+        continue
+      }
+    }
+  }
+
+  collectFromBlocks(item.summary)
+
+  return segments.join("").trim()
+}
+
+const createToolUseContentBlock = (
+  call: ResponseOutputFunctionCall,
+): AnthropicToolUseBlock | null => {
+  const toolId = call.call_id
+  if (!call.name || !toolId) {
+    return null
+  }
+
+  const input = parseFunctionCallArguments(call.arguments)
+
+  return {
+    type: "tool_use",
+    id: toolId,
+    name: call.name,
+    input,
+  }
+}
+
+const parseFunctionCallArguments = (
+  rawArguments: string,
+): Record<string, unknown> => {
+  if (typeof rawArguments !== "string" || rawArguments.trim().length === 0) {
+    return {}
+  }
+
+  try {
+    const parsed: unknown = JSON.parse(rawArguments)
+
+    if (Array.isArray(parsed)) {
+      return { arguments: parsed }
+    }
+
+    if (parsed && typeof parsed === "object") {
+      return parsed as Record<string, unknown>
+    }
+  } catch (error) {
+    consola.warn("Failed to parse function call arguments", {
+      error,
+      rawArguments,
+    })
+  }
+
+  return { raw_arguments: rawArguments }
+}
+
+const fallbackContentBlocks = (
+  outputText: string,
+): Array<AnthropicAssistantContentBlock> => {
+  if (!outputText) {
+    return []
+  }
+
+  return [
+    {
+      type: "text",
+      text: outputText,
+    },
+  ]
+}
+
+const mapResponsesStopReason = (
+  response: ResponsesResult,
+): AnthropicResponse["stop_reason"] => {
+  const { status, incomplete_details: incompleteDetails } = response
+
+  if (status === "completed") {
+    if (response.output.some((item) => item.type === "function_call")) {
+      return "tool_use"
+    }
+    return "end_turn"
+  }
+
+  if (status === "incomplete") {
+    if (incompleteDetails?.reason === "max_output_tokens") {
+      return "max_tokens"
+    }
+    if (incompleteDetails?.reason === "content_filter") {
+      return "end_turn"
+    }
+  }
+
+  return null
+}
+
+const mapResponsesUsage = (
+  response: ResponsesResult,
+): AnthropicResponse["usage"] => {
+  const inputTokens = response.usage?.input_tokens ?? 0
+  const outputTokens = response.usage?.output_tokens ?? 0
+  const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens
+
+  return {
+    input_tokens: inputTokens - (inputCachedTokens ?? 0),
+    output_tokens: outputTokens,
+    ...(response.usage?.input_tokens_details?.cached_tokens !== undefined && {
+      cache_read_input_tokens:
+        response.usage.input_tokens_details.cached_tokens,
+    }),
+  }
+}
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null
+
+const isResponseOutputText = (
+  block: ResponseOutputContentBlock,
+): block is ResponseOutputText =>
+  isRecord(block)
+  && "type" in block
+  && (block as { type?: unknown }).type === "output_text"
+
+const isResponseOutputRefusal = (
+  block: ResponseOutputContentBlock,
+): block is ResponseOutputRefusal =>
+  isRecord(block)
+  && "type" in block
+  && (block as { type?: unknown }).type === "refusal"
+
+const parseUserId = (
+  userId: string | undefined,
+): { safetyIdentifier: string | null; promptCacheKey: string | null } => {
+  if (!userId || typeof userId !== "string") {
+    return { safetyIdentifier: null, promptCacheKey: null }
+  }
+
+  // Parse safety_identifier: content between "user_" and "_account"
+  const userMatch = userId.match(/user_([^_]+)_account/)
+  const safetyIdentifier = userMatch ? userMatch[1] : null
+
+  // Parse prompt_cache_key: content after "_session_"
+  const sessionMatch = userId.match(/_session_(.+)$/)
+  const promptCacheKey = sessionMatch ? sessionMatch[1] : null
+
+  return { safetyIdentifier, promptCacheKey }
+}
+
+const convertToolResultContent = (
+  content: string | Array<AnthropicTextBlock> | Array<AnthropicImageBlock>,
+): string | Array<ResponseInputContent> => {
+  if (typeof content === "string") {
+    return content
+  }
+
+  if (Array.isArray(content)) {
+    const result: Array<ResponseInputContent> = []
+    for (const block of content) {
+      switch (block.type) {
+        case "text": {
+          result.push(createTextContent(block.text))
+          break
+        }
+        case "image": {
+          result.push(createImageContent(block))
+          break
+        }
+        default: {
+          break
+        }
+      }
+    }
+    return result
+  }
+
+  return ""
+}
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
new file mode 100644
index 00000000..574d61fc
--- /dev/null
+++ b/src/routes/responses/handler.ts
@@ -0,0 +1,80 @@
+import type { Context } from "hono"
+
+import { streamSSE } from "hono/streaming"
+
+import { awaitApproval } from "~/lib/approval"
+import { createHandlerLogger } from "~/lib/logger"
+import { checkRateLimit } from "~/lib/rate-limit"
+import { state } from "~/lib/state"
+import {
+  createResponses,
+  type ResponsesPayload,
+  type ResponsesResult,
+} from "~/services/copilot/create-responses"
+
+import { getResponsesRequestOptions } from "./utils"
+
+const logger = createHandlerLogger("responses-handler")
+
+const RESPONSES_ENDPOINT = "/responses"
+
+export const handleResponses = async (c: Context) => {
+  await checkRateLimit(state)
+
+  const payload = await c.req.json<ResponsesPayload>()
+  logger.debug("Responses request payload:", JSON.stringify(payload))
+
+  const selectedModel = state.models?.data.find(
+    (model) => model.id === payload.model,
+  )
+  const supportsResponses =
+    selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false
+
+  if (!supportsResponses) {
+    return c.json(
+      {
+        error: {
+          message:
+            "This model does not support the responses endpoint. Please choose a different model.",
+          type: "invalid_request_error",
+        },
+      },
+      400,
+    )
+  }
+
+  const { vision, initiator } = getResponsesRequestOptions(payload)
+
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  const response = await createResponses(payload, { vision, initiator })
+
+  if (isStreamingRequested(payload) && isAsyncIterable(response)) {
+    logger.debug("Forwarding native Responses stream")
+    return streamSSE(c, async (stream) => {
+      for await (const chunk of response) {
+        logger.debug("Responses stream chunk:", JSON.stringify(chunk))
+        await stream.writeSSE({
+          id: (chunk as { id?: string }).id,
+          event: (chunk as { event?: string }).event,
+          data: (chunk as { data?: string }).data ?? "",
+        })
+      }
+    })
+  }
+
+  logger.debug(
+    "Forwarding native Responses result:",
+    JSON.stringify(response).slice(-400),
+  )
+  return c.json(response as ResponsesResult)
+}
+
+const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
+  Boolean(value)
+  && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
+
+const isStreamingRequested = (payload: ResponsesPayload): boolean =>
+  Boolean(payload.stream)
diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts
new file mode 100644
index 00000000..af242342
--- /dev/null
+++ b/src/routes/responses/route.ts
@@ -0,0 +1,15 @@
+import { Hono } from "hono"
+
+import { forwardError } from "~/lib/error"
+
+import { handleResponses } from "./handler"
+
+export const responsesRoutes = new Hono()
+
+responsesRoutes.post("/", async (c) => {
+  try {
+    return await handleResponses(c)
+  } catch (error) {
+    return await forwardError(c, error)
+  }
+})
diff --git a/src/routes/responses/utils.ts b/src/routes/responses/utils.ts
new file mode 100644
index 00000000..734319cd
--- /dev/null
+++ b/src/routes/responses/utils.ts
@@ -0,0 +1,67 @@
+import type {
+  ResponseInputItem,
+  ResponsesPayload,
+} from "~/services/copilot/create-responses"
+
+export const getResponsesRequestOptions = (
+  payload: ResponsesPayload,
+): { vision: boolean; initiator: "agent" | "user" } => {
+  const vision = hasVisionInput(payload)
+  const initiator = hasAgentInitiator(payload) ? "agent" : "user"
+
+  return { vision, initiator }
+}
+
+export const hasAgentInitiator = (payload: ResponsesPayload): boolean =>
+  getPayloadItems(payload).some((item) => {
+    if (!("role" in item) || !item.role) {
+      return true
+    }
+    const role = typeof item.role === "string" ? item.role.toLowerCase() : ""
+    return role === "assistant"
+  })
+
+export const hasVisionInput = (payload: ResponsesPayload): boolean => {
+  const values = getPayloadItems(payload)
+  return values.some((item) => containsVisionContent(item))
+}
+
+const getPayloadItems = (
+  payload: ResponsesPayload,
+): Array<ResponseInputItem> => {
+  const result: Array<ResponseInputItem> = []
+
+  const { input } = payload
+
+  if (Array.isArray(input)) {
+    result.push(...input)
+  }
+
+  return result
+}
+
+const containsVisionContent = (value: unknown): boolean => {
+  if (!value) return false
+
+  if (Array.isArray(value)) {
+    return value.some((entry) => containsVisionContent(entry))
+  }
+
+  if (typeof value !== "object") {
+    return false
+  }
+
+  const record = value as Record<string, unknown>
+  const type =
+    typeof record.type === "string" ? record.type.toLowerCase() : undefined
+
+  if (type === "input_image") {
+    return true
+  }
+
+  if (Array.isArray(record.content)) {
+    return record.content.some((entry) => containsVisionContent(entry))
+  }
+
+  return false
+}
diff --git a/src/server.ts b/src/server.ts
index 462a278f..7b9387e6 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -6,6 +6,7 @@ import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
 import { modelRoutes } from "./routes/models/route"
+import { responsesRoutes } from "./routes/responses/route"
 import { tokenRoute } from "./routes/token/route"
 import { usageRoute } from "./routes/usage/route"
 
@@ -21,11 +22,13 @@ server.route("/models", modelRoutes)
 server.route("/embeddings", embeddingRoutes)
 server.route("/usage", usageRoute)
 server.route("/token", tokenRoute)
+server.route("/responses", responsesRoutes)
 
 // Compatibility with tools that expect v1/ prefix
 server.route("/v1/chat/completions", completionRoutes)
 server.route("/v1/models", modelRoutes)
 server.route("/v1/embeddings", embeddingRoutes)
+server.route("/v1/responses", responsesRoutes)
 
 // Anthropic compatible endpoints
 server.route("/v1/messages", messageRoutes)
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
new file mode 100644
index 00000000..bc24ce54
--- /dev/null
+++ b/src/services/copilot/create-responses.ts
@@ -0,0 +1,359 @@
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+export interface ResponsesPayload {
+  model: string
+  instructions?: string | null
+  input?: string | Array<ResponseInputItem>
+  tools?: Array<Tool> | null
+  tool_choice?: ToolChoiceOptions | ToolChoiceFunction
+  temperature?: number | null
+  top_p?: number | null
+  max_output_tokens?: number | null
+  metadata?: Metadata | null
+  stream?: boolean | null
+  safety_identifier?: string | null
+  prompt_cache_key?: string | null
+  parallel_tool_calls?: boolean | null
+  store?: boolean | null
+  reasoning?: Reasoning | null
+  include?: Array<ResponseIncludable>
+  service_tier?: string | null // NOTE: Unsupported by GitHub Copilot
+  [key: string]: unknown
+}
+
+export type ToolChoiceOptions = "none" | "auto" | "required"
+
+export interface ToolChoiceFunction {
+  name: string
+  type: "function"
+}
+
+export type Tool = FunctionTool
+
+export interface FunctionTool {
+  name: string
+  parameters: { [key: string]: unknown } | null
+  strict: boolean | null
+  type: "function"
+  description?: string | null
+}
+
+export type ResponseIncludable =
+  | "file_search_call.results"
+  | "message.input_image.image_url"
+  | "computer_call_output.output.image_url"
+  | "reasoning.encrypted_content"
+  | "code_interpreter_call.outputs"
+
+export interface Reasoning {
+  effort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh" | null
+  summary?: "auto" | "concise" | "detailed" | null
+}
+
+export interface ResponseInputMessage {
+  type?: "message"
+  role: "user" | "assistant" | "system" | "developer"
+  content?: string | Array<ResponseInputContent>
+  status?: string
+}
+
+export interface ResponseFunctionToolCallItem {
+  type: "function_call"
+  call_id: string
+  name: string
+  arguments: string
+  status?: "in_progress" | "completed" | "incomplete"
+}
+
+export interface ResponseFunctionCallOutputItem {
+  type: "function_call_output"
+  call_id: string
+  output: string | Array<ResponseInputContent>
+  status?: "in_progress" | "completed" | "incomplete"
+}
+
+export interface ResponseInputReasoning {
+  id?: string
+  type: "reasoning"
+  summary: Array<{
+    type: "summary_text"
+    text: string
+  }>
+  encrypted_content: string
+}
+
+export type ResponseInputItem =
+  | ResponseInputMessage
+  | ResponseFunctionToolCallItem
+  | ResponseFunctionCallOutputItem
+  | ResponseInputReasoning
+  | Record<string, unknown>
+
+export type ResponseInputContent =
+  | ResponseInputText
+  | ResponseInputImage
+  | Record<string, unknown>
+
+export interface ResponseInputText {
+  type: "input_text" | "output_text"
+  text: string
+}
+
+export interface ResponseInputImage {
+  type: "input_image"
+  image_url?: string | null
+  file_id?: string | null
+  detail: "low" | "high" | "auto"
+}
+
+export interface ResponsesResult {
+  id: string
+  object: "response"
+  created_at: number
+  model: string
+  output: Array<ResponseOutputItem>
+  output_text: string
+  status: string
+  usage?: ResponseUsage | null
+  error: ResponseError | null
+  incomplete_details: IncompleteDetails | null
+  instructions: string | null
+  metadata: Metadata | null
+  parallel_tool_calls: boolean
+  temperature: number | null
+  tool_choice: unknown
+  tools: Array<Tool>
+  top_p: number | null
+}
+
+export type Metadata = { [key: string]: string }
+
+export interface IncompleteDetails {
+  reason?: "max_output_tokens" | "content_filter"
+}
+
+export interface ResponseError {
+  message: string
+}
+
+export type ResponseOutputItem =
+  | ResponseOutputMessage
+  | ResponseOutputReasoning
+  | ResponseOutputFunctionCall
+
+export interface ResponseOutputMessage {
+  id: string
+  type: "message"
+  role: "assistant"
+  status: "completed" | "in_progress" | "incomplete"
+  content?: Array<ResponseOutputContentBlock>
+}
+
+export interface ResponseOutputReasoning {
+  id: string
+  type: "reasoning"
+  summary?: Array<ResponseReasoningBlock>
+  encrypted_content?: string
+  status?: "completed" | "in_progress" | "incomplete"
+}
+
+export interface ResponseReasoningBlock {
+  type: string
+  text?: string
+}
+
+export interface ResponseOutputFunctionCall {
+  id?: string
+  type: "function_call"
+  call_id: string
+  name: string
+  arguments: string
+  status?: "in_progress" | "completed" | "incomplete"
+}
+
+export type ResponseOutputContentBlock =
+  | ResponseOutputText
+  | ResponseOutputRefusal
+  | Record<string, unknown>
+
+export interface ResponseOutputText {
+  type: "output_text"
+  text: string
+  annotations: Array<unknown>
+}
+
+export interface ResponseOutputRefusal {
+  type: "refusal"
+  refusal: string
+}
+
+export interface ResponseUsage {
+  input_tokens: number
+  output_tokens?: number
+  total_tokens: number
+  input_tokens_details?: {
+    cached_tokens: number
+  }
+  output_tokens_details?: {
+    reasoning_tokens: number
+  }
+}
+
+export type ResponseStreamEvent =
+  | ResponseCompletedEvent
+  | ResponseIncompleteEvent
+  | ResponseCreatedEvent
+  | ResponseErrorEvent
+  | ResponseFunctionCallArgumentsDeltaEvent
+  | ResponseFunctionCallArgumentsDoneEvent
+  | ResponseFailedEvent
+  | ResponseOutputItemAddedEvent
+  | ResponseOutputItemDoneEvent
+  | ResponseReasoningSummaryTextDeltaEvent
+  | ResponseReasoningSummaryTextDoneEvent
+  | ResponseTextDeltaEvent
+  | ResponseTextDoneEvent
+
+export interface ResponseCompletedEvent {
+  response: ResponsesResult
+  sequence_number: number
+  type: "response.completed"
+}
+
+export interface ResponseIncompleteEvent {
+  response: ResponsesResult
+  sequence_number: number
+  type: "response.incomplete"
+}
+
+export interface ResponseCreatedEvent {
+  response: ResponsesResult
+  sequence_number: number
+  type: "response.created"
+}
+
+export interface ResponseErrorEvent {
+  code: string | null
+  message: string
+  param: string | null
+  sequence_number: number
+  type: "error"
+}
+
+export interface ResponseFunctionCallArgumentsDeltaEvent {
+  delta: string
+  item_id: string
+  output_index: number
+  sequence_number: number
+  type: "response.function_call_arguments.delta"
+}
+
+export interface ResponseFunctionCallArgumentsDoneEvent {
+  arguments: string
+  item_id: string
+  name: string
+  output_index: number
+  sequence_number: number
+  type: "response.function_call_arguments.done"
+}
+
+export interface ResponseFailedEvent {
+  response: ResponsesResult
+  sequence_number: number
+  type: "response.failed"
+}
+
+export interface ResponseOutputItemAddedEvent {
+  item: ResponseOutputItem
+  output_index: number
+  sequence_number: number
+  type: "response.output_item.added"
+}
+
+export interface ResponseOutputItemDoneEvent {
+  item: ResponseOutputItem
+  output_index: number
+  sequence_number: number
+  type: "response.output_item.done"
+}
+
+export interface ResponseReasoningSummaryTextDeltaEvent {
+  delta: string
+  item_id: string
+  output_index: number
+  sequence_number: number
+  summary_index: number
+  type: "response.reasoning_summary_text.delta"
+}
+
+export interface ResponseReasoningSummaryTextDoneEvent {
+  item_id: string
+  output_index: number
+  sequence_number: number
+  summary_index: number
+  text: string
+  type: "response.reasoning_summary_text.done"
+}
+
+export interface ResponseTextDeltaEvent {
+  content_index: number
+  delta: string
+  item_id: string
+  output_index: number
+  sequence_number: number
+  type: "response.output_text.delta"
+}
+
+export interface ResponseTextDoneEvent {
+  content_index: number
+  item_id: string
+  output_index: number
+  sequence_number: number
+  text: string
+  type: "response.output_text.done"
+}
+
+export type ResponsesStream = ReturnType<typeof events>
+export type CreateResponsesReturn = ResponsesResult | ResponsesStream
+
+interface ResponsesRequestOptions {
+  vision: boolean
+  initiator: "agent" | "user"
+}
+
+export const createResponses = async (
+  payload: ResponsesPayload,
+  { vision, initiator }: ResponsesRequestOptions,
+): Promise<CreateResponsesReturn> => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const headers: Record<string, string> = {
+    ...copilotHeaders(state, vision),
+    "X-Initiator": initiator,
+  }
+
+  // service_tier is not supported by github copilot
+  payload.service_tier = null
+
+  const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+
+  if (!response.ok) {
+    consola.error("Failed to create responses", response)
+    throw new HTTPError("Failed to create responses", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return (await response.json()) as ResponsesResult
+}
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 3cfa30af..3690ad3f 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -28,6 +28,9 @@ interface ModelSupports {
   tool_calls?: boolean
   parallel_tool_calls?: boolean
   dimensions?: boolean
+  streaming?: boolean
+  structured_outputs?: boolean
+  vision?: boolean
 }
 
 interface ModelCapabilities {
@@ -52,4 +55,5 @@ export interface Model {
     state: string
     terms: string
   }
+  supported_endpoints?: Array<string>
 }
diff --git a/src/start.ts b/src/start.ts
index 14abbbdf..85bfe4c4 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -6,13 +6,13 @@ import consola from "consola"
 import { serve, type ServerHandler } from "srvx"
 import invariant from "tiny-invariant"
 
+import { mergeConfigWithDefaults } from "./lib/config"
 import { ensurePaths } from "./lib/paths"
 import { initProxyFromEnv } from "./lib/proxy"
 import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
 import { setupCopilotToken, setupGitHubToken } from "./lib/token"
 import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
-import { server } from "./server"
 
 interface RunServerOptions {
   port: number
@@ -28,10 +28,14 @@ interface RunServerOptions {
 }
 
 export async function runServer(options: RunServerOptions): Promise<void> {
+  // Ensure config is merged with defaults at startup
+  mergeConfigWithDefaults()
+
   if (options.proxyEnv) {
     initProxyFromEnv()
   }
 
+  state.verbose = options.verbose
   if (options.verbose) {
     consola.level = 5
     consola.info("Verbose logging enabled")
@@ -114,9 +118,14 @@ export async function runServer(options: RunServerOptions): Promise<void> {
     `🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage`,
   )
 
+  const { server } = await import("./server")
+
   serve({
     fetch: server.fetch as ServerHandler,
     port: options.port,
+    bun: {
+      idleTimeout: 0,
+    },
   })
 }
 
diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts
index 06c66377..89f240f3 100644
--- a/tests/anthropic-request.test.ts
+++ b/tests/anthropic-request.test.ts
@@ -136,6 +136,7 @@ describe("Anthropic to OpenAI translation logic", () => {
             {
               type: "thinking",
               thinking: "Let me think about this simple math problem...",
+              signature: "abc123",
             },
             { type: "text", text: "2+2 equals 4." },
           ],
@@ -168,6 +169,7 @@ describe("Anthropic to OpenAI translation logic", () => {
               type: "thinking",
               thinking:
                 "I need to call the weather API to get current weather information.",
+              signature: "def456",
             },
             { type: "text", text: "I'll check the weather for you." },
             {
diff --git a/tests/responses-stream-translation.test.ts b/tests/responses-stream-translation.test.ts
new file mode 100644
index 00000000..885ac911
--- /dev/null
+++ b/tests/responses-stream-translation.test.ts
@@ -0,0 +1,140 @@
+import { describe, expect, test } from "bun:test"
+
+import type { AnthropicStreamEventData } from "~/routes/messages/anthropic-types"
+import type {
+  ResponseOutputItemAddedEvent,
+  ResponseFunctionCallArgumentsDeltaEvent,
+  ResponseFunctionCallArgumentsDoneEvent,
+} from "~/services/copilot/create-responses"
+
+import {
+  createResponsesStreamState,
+  translateResponsesStreamEvent,
+} from "~/routes/messages/responses-stream-translation"
+
+const createFunctionCallAddedEvent = (): ResponseOutputItemAddedEvent => ({
+  type: "response.output_item.added",
+  sequence_number: 1,
+  output_index: 1,
+  item: {
+    id: "item-1",
+    type: "function_call",
+    call_id: "call-1",
+    name: "TodoWrite",
+    arguments: "",
+    status: "in_progress",
+  },
+})
+
+describe("translateResponsesStreamEvent tool calls", () => {
+  test("streams function call arguments across deltas", () => {
+    const state = createResponsesStreamState()
+
+    const events = [
+      translateResponsesStreamEvent(createFunctionCallAddedEvent(), state),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.delta",
+          item_id: "item-1",
+          output_index: 1,
+          sequence_number: 2,
+          delta: '{"todos":',
+        } as ResponseFunctionCallArgumentsDeltaEvent,
+        state,
+      ),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.delta",
+          item_id: "item-1",
+          output_index: 1,
+          sequence_number: 3,
+          delta: "[]}",
+        } as ResponseFunctionCallArgumentsDeltaEvent,
+        state,
+      ),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.done",
+          item_id: "item-1",
+          name: "TodoWrite",
+          output_index: 1,
+          sequence_number: 4,
+          arguments: '{"todos":[]}',
+        } as ResponseFunctionCallArgumentsDoneEvent,
+        state,
+      ),
+    ].flat()
+
+    const blockStart = events.find(
+      (event) => event.type === "content_block_start",
+    )
+    expect(blockStart).toBeDefined()
+    if (blockStart?.type === "content_block_start") {
+      expect(blockStart.content_block).toEqual({
+        type: "tool_use",
+        id: "call-1",
+        name: "TodoWrite",
+        input: {},
+      })
+    }
+
+    const deltas = events.filter(
+      (
+        event,
+      ): event is Extract<
+        AnthropicStreamEventData,
+        { type: "content_block_delta" }
+      > => event.type === "content_block_delta",
+    )
+    expect(deltas).toHaveLength(2)
+    expect(deltas[0].delta).toEqual({
+      type: "input_json_delta",
+      partial_json: '{"todos":',
+    })
+    expect(deltas[1].delta).toEqual({
+      type: "input_json_delta",
+      partial_json: "[]}",
+    })
+
+    expect(state.openBlocks.size).toBe(1)
+    expect(state.functionCallStateByOutputIndex.size).toBe(0)
+  })
+
+  test("emits full arguments when only done payload is present", () => {
+    const state = createResponsesStreamState()
+
+    const events = [
+      translateResponsesStreamEvent(createFunctionCallAddedEvent(), state),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.done",
+          item_id: "item-1",
+          name: "TodoWrite",
+          output_index: 1,
+          sequence_number: 2,
+          arguments:
+            '{"todos":[{"content":"Review src/routes/responses/translation.ts"}]}',
+        } as ResponseFunctionCallArgumentsDoneEvent,
+        state,
+      ),
+    ].flat()
+
+    const deltas = events.filter(
+      (
+        event,
+      ): event is Extract<
+        AnthropicStreamEventData,
+        { type: "content_block_delta" }
+      > => event.type === "content_block_delta",
+    )
+    expect(deltas).toHaveLength(1)
+    expect(deltas[0].delta).toEqual({
+      type: "input_json_delta",
+      partial_json:
+        '{"todos":[{"content":"Review src/routes/responses/translation.ts"}]}',
+    })
+
+    expect(state.openBlocks.size).toBe(1)
+    expect(state.functionCallStateByOutputIndex.size).toBe(0)
+  })
+})
diff --git a/tests/responses-translation.test.ts b/tests/responses-translation.test.ts
new file mode 100644
index 00000000..3ce5f708
--- /dev/null
+++ b/tests/responses-translation.test.ts
@@ -0,0 +1,160 @@
+import { describe, expect, it } from "bun:test"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+import type {
+  ResponseInputMessage,
+  ResponsesResult,
+} from "~/services/copilot/create-responses"
+
+import {
+  translateAnthropicMessagesToResponsesPayload,
+  translateResponsesResultToAnthropic,
+} from "~/routes/messages/responses-translation"
+
+const samplePayload = {
+  model: "claude-3-5-sonnet",
+  max_tokens: 1024,
+  messages: [
+    {
+      role: "user",
+      content: [
+        {
+          type: "text",
+          text: "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "<system-reminder>\nAs you answer the user's questions, you can use the following context:\n# important-instruction-reminders\nDo what has been asked; nothing more, nothing less.\nNEVER create files unless they're absolutely necessary for achieving your goal.\nALWAYS prefer editing an existing file to creating a new one.\nNEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User.\n\n      \n      IMPORTANT: this context may or may not be relevant to your tasks. You should not respond to this context unless it is highly relevant to your task.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "hi",
+        },
+        {
+          type: "text",
+          text: "<system-reminder>\nThe user opened the file c:\\Work2\\copilot-api\\src\\routes\\responses\\translation.ts in the IDE. This may or may not be related to the current task.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "hi",
+          cache_control: {
+            type: "ephemeral",
+          },
+        },
+      ],
+    },
+  ],
+} as unknown as AnthropicMessagesPayload
+
+describe("translateAnthropicMessagesToResponsesPayload", () => {
+  it("converts anthropic text blocks into response input messages", () => {
+    const result = translateAnthropicMessagesToResponsesPayload(samplePayload)
+
+    expect(Array.isArray(result.input)).toBe(true)
+    const input = result.input as Array<ResponseInputMessage>
+    expect(input).toHaveLength(1)
+
+    const message = input[0]
+    expect(message.role).toBe("user")
+    expect(Array.isArray(message.content)).toBe(true)
+
+    const content = message.content as Array<{ text: string }>
+    expect(content.map((item) => item.text)).toEqual([
+      "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.\n</system-reminder>",
+      "<system-reminder>\nAs you answer the user's questions, you can use the following context:\n# important-instruction-reminders\nDo what has been asked; nothing more, nothing less.\nNEVER create files unless they're absolutely necessary for achieving your goal.\nALWAYS prefer editing an existing file to creating a new one.\nNEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User.\n\n      \n      IMPORTANT: this context may or may not be relevant to your tasks. You should not respond to this context unless it is highly relevant to your task.\n</system-reminder>",
+      "hi",
+      "<system-reminder>\nThe user opened the file c:\\Work2\\copilot-api\\src\\routes\\responses\\translation.ts in the IDE. This may or may not be related to the current task.\n</system-reminder>",
+      "hi",
+    ])
+  })
+})
+
+describe("translateResponsesResultToAnthropic", () => {
+  it("handles reasoning and function call items", () => {
+    const responsesResult: ResponsesResult = {
+      id: "resp_123",
+      object: "response",
+      created_at: 0,
+      model: "gpt-4.1",
+      output: [
+        {
+          id: "reason_1",
+          type: "reasoning",
+          summary: [{ type: "summary_text", text: "Thinking about the task." }],
+          status: "completed",
+          encrypted_content: "encrypted_reasoning_content",
+        },
+        {
+          id: "call_1",
+          type: "function_call",
+          call_id: "call_1",
+          name: "TodoWrite",
+          arguments:
+            '{"todos":[{"content":"Read src/routes/responses/translation.ts","status":"in_progress"}]}',
+          status: "completed",
+        },
+        {
+          id: "message_1",
+          type: "message",
+          role: "assistant",
+          status: "completed",
+          content: [
+            {
+              type: "output_text",
+              text: "Added the task to your todo list.",
+              annotations: [],
+            },
+          ],
+        },
+      ],
+      output_text: "Added the task to your todo list.",
+      status: "incomplete",
+      usage: {
+        input_tokens: 120,
+        output_tokens: 36,
+        total_tokens: 156,
+      },
+      error: null,
+      incomplete_details: { reason: "content_filter" },
+      instructions: null,
+      metadata: null,
+      parallel_tool_calls: false,
+      temperature: null,
+      tool_choice: null,
+      tools: [],
+      top_p: null,
+    }
+
+    const anthropicResponse =
+      translateResponsesResultToAnthropic(responsesResult)
+
+    expect(anthropicResponse.stop_reason).toBe("end_turn")
+    expect(anthropicResponse.content).toHaveLength(3)
+
+    const [thinkingBlock, toolUseBlock, textBlock] = anthropicResponse.content
+
+    expect(thinkingBlock.type).toBe("thinking")
+    if (thinkingBlock.type === "thinking") {
+      expect(thinkingBlock.thinking).toContain("Thinking about the task")
+    }
+
+    expect(toolUseBlock.type).toBe("tool_use")
+    if (toolUseBlock.type === "tool_use") {
+      expect(toolUseBlock.id).toBe("call_1")
+      expect(toolUseBlock.name).toBe("TodoWrite")
+      expect(toolUseBlock.input).toEqual({
+        todos: [
+          {
+            content: "Read src/routes/responses/translation.ts",
+            status: "in_progress",
+          },
+        ],
+      })
+    }
+
+    expect(textBlock.type).toBe("text")
+    if (textBlock.type === "text") {
+      expect(textBlock.text).toBe("Added the task to your todo list.")
+    }
+  })
+})