diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts index 83bce92a..5b2c611f 100644 --- a/src/lib/api-config.ts +++ b/src/lib/api-config.ts @@ -7,16 +7,15 @@ export const standardHeaders = () => ({ accept: "application/json", }) -const COPILOT_VERSION = "0.26.7" +const COPILOT_VERSION = "0.35.0" const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}` const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}` -const API_VERSION = "2025-04-01" +const API_VERSION = "2025-10-01" export const copilotBaseUrl = (state: State) => - state.accountType === "individual" ? - "https://api.githubcopilot.com" - : `https://api.${state.accountType}.githubcopilot.com` + `https://api.${state.accountType}.githubcopilot.com` + export const copilotHeaders = (state: State, vision: boolean = false) => { const headers: Record = { Authorization: `Bearer ${state.copilotToken}`, @@ -25,7 +24,7 @@ export const copilotHeaders = (state: State, vision: boolean = false) => { "editor-version": `vscode/${state.vsCodeVersion}`, "editor-plugin-version": EDITOR_PLUGIN_VERSION, "user-agent": USER_AGENT, - "openai-intent": "conversation-panel", + "openai-intent": "conversation-agent", "x-github-api-version": API_VERSION, "x-request-id": randomUUID(), "x-vscode-user-agent-library-version": "electron-fetch", diff --git a/src/lib/tokenizer.ts b/src/lib/tokenizer.ts index 8c3eda73..b9ebafad 100644 --- a/src/lib/tokenizer.ts +++ b/src/lib/tokenizer.ts @@ -73,6 +73,9 @@ const calculateMessageTokens = ( const tokensPerName = 1 let tokens = tokensPerMessage for (const [key, value] of Object.entries(message)) { + if (key === "reasoning_opaque") { + continue + } if (typeof value === "string") { tokens += encoder.encode(value).length } diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts index 881fffcc..2fb7849f 100644 --- a/src/routes/messages/anthropic-types.ts +++ b/src/routes/messages/anthropic-types.ts @@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock { export interface AnthropicThinkingBlock { type: "thinking" thinking: string + signature: string } export type AnthropicUserContentBlock = @@ -196,6 +197,7 @@ export interface AnthropicStreamState { messageStartSent: boolean contentBlockIndex: number contentBlockOpen: boolean + thinkingBlockOpen: boolean toolCalls: { [openAIToolIndex: number]: { id: string diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 85dbf624..a40d3f1d 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -60,6 +60,7 @@ export async function handleCompletion(c: Context) { contentBlockIndex: 0, contentBlockOpen: false, toolCalls: {}, + thinkingBlockOpen: false, } for await (const rawEvent of response) { diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts index dc41e638..e5a59a10 100644 --- a/src/routes/messages/non-stream-translation.ts +++ b/src/routes/messages/non-stream-translation.ts @@ -1,3 +1,6 @@ +import type { Model } from "~/services/copilot/get-models" + +import { state } from "~/lib/state" import { type ChatCompletionResponse, type ChatCompletionsPayload, @@ -29,11 +32,15 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils" export function translateToOpenAI( payload: AnthropicMessagesPayload, ): ChatCompletionsPayload { + const modelId = translateModelName(payload.model) + const model = state.models?.data.find((m) => m.id === modelId) + const thinkingBudget = getThinkingBudget(payload, model) return { - model: translateModelName(payload.model), + model: modelId, messages: translateAnthropicMessagesToOpenAI( payload.messages, payload.system, + modelId, ), max_tokens: payload.max_tokens, stop: payload.stop_sequences, @@ -43,14 +50,36 @@ export function translateToOpenAI( user: payload.metadata?.user_id, tools: translateAnthropicToolsToOpenAI(payload.tools), tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice), + thinking_budget: thinkingBudget, + } +} + +function getThinkingBudget( + payload: AnthropicMessagesPayload, + model: Model | undefined, +): number | undefined { + const thinking = payload.thinking + if (model && thinking) { + const maxThinkingBudget = Math.min( + model.capabilities.supports.max_thinking_budget ?? 0, + (model.capabilities.limits.max_output_tokens ?? 0) - 1, + ) + if (maxThinkingBudget > 0 && thinking.budget_tokens !== undefined) { + const budgetTokens = Math.min(thinking.budget_tokens, maxThinkingBudget) + return Math.max( + budgetTokens, + model.capabilities.supports.min_thinking_budget ?? 1024, + ) + } } + return undefined } function translateModelName(model: string): string { // Subagent requests use a specific model number which Copilot doesn't support if (model.startsWith("claude-sonnet-4-")) { return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4") - } else if (model.startsWith("claude-opus-")) { + } else if (model.startsWith("claude-opus-4-")) { return model.replace(/^claude-opus-4-.*/, "claude-opus-4") } return model @@ -59,13 +88,14 @@ function translateModelName(model: string): string { function translateAnthropicMessagesToOpenAI( anthropicMessages: Array, system: string | Array | undefined, + modelId: string, ): Array { const systemMessages = handleSystemPrompt(system) const otherMessages = anthropicMessages.flatMap((message) => message.role === "user" ? handleUserMessage(message) - : handleAssistantMessage(message), + : handleAssistantMessage(message, modelId), ) return [...systemMessages, ...otherMessages] @@ -125,6 +155,7 @@ function handleUserMessage(message: AnthropicUserMessage): Array { function handleAssistantMessage( message: AnthropicAssistantMessage, + modelId: string, ): Array { if (!Array.isArray(message.content)) { return [ @@ -139,25 +170,40 @@ function handleAssistantMessage( (block): block is AnthropicToolUseBlock => block.type === "tool_use", ) - const textBlocks = message.content.filter( - (block): block is AnthropicTextBlock => block.type === "text", - ) - - const thinkingBlocks = message.content.filter( + let thinkingBlocks = message.content.filter( (block): block is AnthropicThinkingBlock => block.type === "thinking", ) - // Combine text and thinking blocks, as OpenAI doesn't have separate thinking blocks - const allTextContent = [ - ...textBlocks.map((b) => b.text), - ...thinkingBlocks.map((b) => b.thinking), - ].join("\n\n") + if (modelId.startsWith("claude")) { + thinkingBlocks = thinkingBlocks.filter( + (b) => + b.thinking + && b.thinking.length > 0 + && b.signature + && b.signature.length > 0 + // gpt signature has @ in it, so filter those out for claude models + && !b.signature.includes("@"), + ) + } + + const thinkingContents = thinkingBlocks + .filter((b) => b.thinking && b.thinking.length > 0) + .map((b) => b.thinking) + + const allThinkingContent = + thinkingContents.length > 0 ? thinkingContents.join("\n\n") : undefined + + const signature = thinkingBlocks.find( + (b) => b.signature && b.signature.length > 0, + )?.signature return toolUseBlocks.length > 0 ? [ { role: "assistant", - content: allTextContent || null, + content: mapContent(message.content), + reasoning_text: allThinkingContent, + reasoning_opaque: signature, tool_calls: toolUseBlocks.map((toolUse) => ({ id: toolUse.id, type: "function", @@ -172,6 +218,8 @@ function handleAssistantMessage( { role: "assistant", content: mapContent(message.content), + reasoning_text: allThinkingContent, + reasoning_opaque: signature, }, ] } @@ -191,11 +239,8 @@ function mapContent( const hasImage = content.some((block) => block.type === "image") if (!hasImage) { return content - .filter( - (block): block is AnthropicTextBlock | AnthropicThinkingBlock => - block.type === "text" || block.type === "thinking", - ) - .map((block) => (block.type === "text" ? block.text : block.thinking)) + .filter((block): block is AnthropicTextBlock => block.type === "text") + .map((block) => block.text) .join("\n\n") } @@ -204,12 +249,6 @@ function mapContent( switch (block.type) { case "text": { contentParts.push({ type: "text", text: block.text }) - - break - } - case "thinking": { - contentParts.push({ type: "text", text: block.thinking }) - break } case "image": { @@ -219,7 +258,6 @@ function mapContent( url: `data:${block.source.media_type};base64,${block.source.data}`, }, }) - break } // No default @@ -282,19 +320,19 @@ export function translateToAnthropic( response: ChatCompletionResponse, ): AnthropicResponse { // Merge content from all choices - const allTextBlocks: Array = [] - const allToolUseBlocks: Array = [] - let stopReason: "stop" | "length" | "tool_calls" | "content_filter" | null = - null // default - stopReason = response.choices[0]?.finish_reason ?? stopReason + const assistantContentBlocks: Array = [] + let stopReason = response.choices[0]?.finish_reason ?? null // Process all choices to extract text and tool use blocks for (const choice of response.choices) { const textBlocks = getAnthropicTextBlocks(choice.message.content) + const thinkBlocks = getAnthropicThinkBlocks( + choice.message.reasoning_text, + choice.message.reasoning_opaque, + ) const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls) - allTextBlocks.push(...textBlocks) - allToolUseBlocks.push(...toolUseBlocks) + assistantContentBlocks.push(...thinkBlocks, ...textBlocks, ...toolUseBlocks) // Use the finish_reason from the first choice, or prioritize tool_calls if (choice.finish_reason === "tool_calls" || stopReason === "stop") { @@ -302,14 +340,12 @@ export function translateToAnthropic( } } - // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses - return { id: response.id, type: "message", role: "assistant", model: response.model, - content: [...allTextBlocks, ...allToolUseBlocks], + content: assistantContentBlocks, stop_reason: mapOpenAIStopReasonToAnthropic(stopReason), stop_sequence: null, usage: { @@ -329,7 +365,7 @@ export function translateToAnthropic( function getAnthropicTextBlocks( messageContent: Message["content"], ): Array { - if (typeof messageContent === "string") { + if (typeof messageContent === "string" && messageContent.length > 0) { return [{ type: "text", text: messageContent }] } @@ -342,6 +378,31 @@ function getAnthropicTextBlocks( return [] } +function getAnthropicThinkBlocks( + reasoningText: string | null | undefined, + reasoningOpaque: string | null | undefined, +): Array { + if (reasoningText && reasoningText.length > 0) { + return [ + { + type: "thinking", + thinking: reasoningText, + signature: reasoningOpaque || "", + }, + ] + } + if (reasoningOpaque && reasoningOpaque.length > 0) { + return [ + { + type: "thinking", + thinking: "", + signature: reasoningOpaque, + }, + ] + } + return [] +} + function getAnthropicToolUseBlocks( toolCalls: Array | undefined, ): Array { diff --git a/src/routes/messages/stream-translation.ts b/src/routes/messages/stream-translation.ts index 55094448..b492d10f 100644 --- a/src/routes/messages/stream-translation.ts +++ b/src/routes/messages/stream-translation.ts @@ -1,4 +1,8 @@ -import { type ChatCompletionChunk } from "~/services/copilot/create-chat-completions" +import { + type ChatCompletionChunk, + type Choice, + type Delta, +} from "~/services/copilot/create-chat-completions" import { type AnthropicStreamEventData, @@ -16,7 +20,6 @@ function isToolBlockOpen(state: AnthropicStreamState): boolean { ) } -// eslint-disable-next-line max-lines-per-function, complexity export function translateChunkToAnthropicEvents( chunk: ChatCompletionChunk, state: AnthropicStreamState, @@ -30,22 +33,54 @@ export function translateChunkToAnthropicEvents( const choice = chunk.choices[0] const { delta } = choice - if (!state.messageStartSent) { - events.push({ - type: "message_start", - message: { - id: chunk.id, - type: "message", - role: "assistant", - content: [], - model: chunk.model, - stop_reason: null, - stop_sequence: null, + handleMessageStart(state, events, chunk) + + handleThinkingText(delta, state, events) + + handleContent(delta, state, events) + + handleToolCalls(delta, state, events) + + handleFinish(choice, state, { events, chunk }) + + return events +} + +function handleFinish( + choice: Choice, + state: AnthropicStreamState, + context: { + events: Array + chunk: ChatCompletionChunk + }, +) { + const { events, chunk } = context + if (choice.finish_reason && choice.finish_reason.length > 0) { + if (state.contentBlockOpen) { + const toolBlockOpen = isToolBlockOpen(state) + context.events.push({ + type: "content_block_stop", + index: state.contentBlockIndex, + }) + state.contentBlockOpen = false + state.contentBlockIndex++ + if (!toolBlockOpen) { + handleReasoningOpaque(choice.delta, events, state) + } + } + + events.push( + { + type: "message_delta", + delta: { + stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason), + stop_sequence: null, + }, usage: { input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0), - output_tokens: 0, // Will be updated in message_delta when finished + output_tokens: chunk.usage?.completion_tokens ?? 0, ...(chunk.usage?.prompt_tokens_details?.cached_tokens !== undefined && { cache_read_input_tokens: @@ -53,44 +88,23 @@ export function translateChunkToAnthropicEvents( }), }, }, - }) - state.messageStartSent = true + { + type: "message_stop", + }, + ) } +} - if (delta.content) { - if (isToolBlockOpen(state)) { - // A tool block was open, so close it before starting a text block. - events.push({ - type: "content_block_stop", - index: state.contentBlockIndex, - }) - state.contentBlockIndex++ - state.contentBlockOpen = false - } - - if (!state.contentBlockOpen) { - events.push({ - type: "content_block_start", - index: state.contentBlockIndex, - content_block: { - type: "text", - text: "", - }, - }) - state.contentBlockOpen = true - } +function handleToolCalls( + delta: Delta, + state: AnthropicStreamState, + events: Array, +) { + if (delta.tool_calls && delta.tool_calls.length > 0) { + closeThinkingBlockIfOpen(state, events) - events.push({ - type: "content_block_delta", - index: state.contentBlockIndex, - delta: { - type: "text_delta", - text: delta.content, - }, - }) - } + handleReasoningOpaqueInToolCalls(state, events, delta) - if (delta.tool_calls) { for (const toolCall of delta.tool_calls) { if (toolCall.id && toolCall.function?.name) { // New tool call starting. @@ -141,28 +155,110 @@ export function translateChunkToAnthropicEvents( } } } +} - if (choice.finish_reason) { - if (state.contentBlockOpen) { +function handleReasoningOpaqueInToolCalls( + state: AnthropicStreamState, + events: Array, + delta: Delta, +) { + if (state.contentBlockOpen && !isToolBlockOpen(state)) { + events.push({ + type: "content_block_stop", + index: state.contentBlockIndex, + }) + state.contentBlockIndex++ + state.contentBlockOpen = false + } + handleReasoningOpaque(delta, events, state) +} + +function handleContent( + delta: Delta, + state: AnthropicStreamState, + events: Array, +) { + if (delta.content && delta.content.length > 0) { + closeThinkingBlockIfOpen(state, events) + + if (isToolBlockOpen(state)) { + // A tool block was open, so close it before starting a text block. events.push({ type: "content_block_stop", index: state.contentBlockIndex, }) + state.contentBlockIndex++ state.contentBlockOpen = false } + if (!state.contentBlockOpen) { + events.push({ + type: "content_block_start", + index: state.contentBlockIndex, + content_block: { + type: "text", + text: "", + }, + }) + state.contentBlockOpen = true + } + + events.push({ + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "text_delta", + text: delta.content, + }, + }) + } + + // handle for claude model + if ( + delta.content === "" + && delta.reasoning_opaque + && delta.reasoning_opaque.length > 0 + ) { events.push( { - type: "message_delta", + type: "content_block_delta", + index: state.contentBlockIndex, delta: { - stop_reason: mapOpenAIStopReasonToAnthropic(choice.finish_reason), - stop_sequence: null, + type: "signature_delta", + signature: delta.reasoning_opaque, }, + }, + { + type: "content_block_stop", + index: state.contentBlockIndex, + }, + ) + state.contentBlockIndex++ + state.thinkingBlockOpen = false + } +} + +function handleMessageStart( + state: AnthropicStreamState, + events: Array, + chunk: ChatCompletionChunk, +) { + if (!state.messageStartSent) { + events.push({ + type: "message_start", + message: { + id: chunk.id, + type: "message", + role: "assistant", + content: [], + model: chunk.model, + stop_reason: null, + stop_sequence: null, usage: { input_tokens: (chunk.usage?.prompt_tokens ?? 0) - (chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0), - output_tokens: chunk.usage?.completion_tokens ?? 0, + output_tokens: 0, // Will be updated in message_delta when finished ...(chunk.usage?.prompt_tokens_details?.cached_tokens !== undefined && { cache_read_input_tokens: @@ -170,13 +266,102 @@ export function translateChunkToAnthropicEvents( }), }, }, + }) + state.messageStartSent = true + } +} + +function handleReasoningOpaque( + delta: Delta, + events: Array, + state: AnthropicStreamState, +) { + if (delta.reasoning_opaque && delta.reasoning_opaque.length > 0) { + events.push( { - type: "message_stop", + type: "content_block_start", + index: state.contentBlockIndex, + content_block: { + type: "thinking", + thinking: "", + }, + }, + { + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "thinking_delta", + thinking: "", + }, + }, + { + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "signature_delta", + signature: delta.reasoning_opaque, + }, + }, + { + type: "content_block_stop", + index: state.contentBlockIndex, }, ) + state.contentBlockIndex++ } +} - return events +function handleThinkingText( + delta: Delta, + state: AnthropicStreamState, + events: Array, +) { + if (delta.reasoning_text && delta.reasoning_text.length > 0) { + if (!state.thinkingBlockOpen) { + events.push({ + type: "content_block_start", + index: state.contentBlockIndex, + content_block: { + type: "thinking", + thinking: "", + }, + }) + state.thinkingBlockOpen = true + } + + events.push({ + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "thinking_delta", + thinking: delta.reasoning_text, + }, + }) + } +} + +function closeThinkingBlockIfOpen( + state: AnthropicStreamState, + events: Array, +): void { + if (state.thinkingBlockOpen) { + events.push( + { + type: "content_block_delta", + index: state.contentBlockIndex, + delta: { + type: "signature_delta", + signature: "", + }, + }, + { + type: "content_block_stop", + index: state.contentBlockIndex, + }, + ) + state.contentBlockIndex++ + state.thinkingBlockOpen = false + } } export function translateErrorToAnthropicErrorEvent(): AnthropicStreamEventData { diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 8534151d..2713a80f 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -69,7 +69,7 @@ export interface ChatCompletionChunk { } } -interface Delta { +export interface Delta { content?: string | null role?: "user" | "assistant" | "system" | "tool" tool_calls?: Array<{ @@ -81,9 +81,11 @@ interface Delta { arguments?: string } }> + reasoning_text?: string | null + reasoning_opaque?: string | null } -interface Choice { +export interface Choice { index: number delta: Delta finish_reason: "stop" | "length" | "tool_calls" | "content_filter" | null @@ -112,6 +114,8 @@ export interface ChatCompletionResponse { interface ResponseMessage { role: "assistant" content: string | null + reasoning_text?: string | null + reasoning_opaque?: string | null tool_calls?: Array } @@ -148,6 +152,7 @@ export interface ChatCompletionsPayload { | { type: "function"; function: { name: string } } | null user?: string | null + thinking_budget?: number } export interface Tool { @@ -166,6 +171,8 @@ export interface Message { name?: string tool_calls?: Array tool_call_id?: string + reasoning_text?: string | null + reasoning_opaque?: string | null } export interface ToolCall { diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts index 3cfa30af..a7fffe93 100644 --- a/src/services/copilot/get-models.ts +++ b/src/services/copilot/get-models.ts @@ -25,6 +25,8 @@ interface ModelLimits { } interface ModelSupports { + max_thinking_budget?: number + min_thinking_budget?: number tool_calls?: boolean parallel_tool_calls?: boolean dimensions?: boolean diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts index 6078f09b..89c39dc0 100644 --- a/src/services/get-vscode-version.ts +++ b/src/services/get-vscode-version.ts @@ -1,4 +1,4 @@ -const FALLBACK = "1.104.3" +const FALLBACK = "1.107.0" export async function getVSCodeVersion() { const controller = new AbortController() diff --git a/src/start.ts b/src/start.ts index 14abbbdf..171d4ac9 100644 --- a/src/start.ts +++ b/src/start.ts @@ -117,6 +117,9 @@ export async function runServer(options: RunServerOptions): Promise { serve({ fetch: server.fetch as ServerHandler, port: options.port, + bun: { + idleTimeout: 0, + }, }) } diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts index 06c66377..baed2f6d 100644 --- a/tests/anthropic-request.test.ts +++ b/tests/anthropic-request.test.ts @@ -136,6 +136,7 @@ describe("Anthropic to OpenAI translation logic", () => { { type: "thinking", thinking: "Let me think about this simple math problem...", + signature: "abc123", }, { type: "text", text: "2+2 equals 4." }, ], @@ -150,7 +151,7 @@ describe("Anthropic to OpenAI translation logic", () => { const assistantMessage = openAIPayload.messages.find( (m) => m.role === "assistant", ) - expect(assistantMessage?.content).toContain( + expect(assistantMessage?.reasoning_text).toContain( "Let me think about this simple math problem...", ) expect(assistantMessage?.content).toContain("2+2 equals 4.") @@ -168,6 +169,7 @@ describe("Anthropic to OpenAI translation logic", () => { type: "thinking", thinking: "I need to call the weather API to get current weather information.", + signature: "def456", }, { type: "text", text: "I'll check the weather for you." }, { @@ -188,7 +190,7 @@ describe("Anthropic to OpenAI translation logic", () => { const assistantMessage = openAIPayload.messages.find( (m) => m.role === "assistant", ) - expect(assistantMessage?.content).toContain( + expect(assistantMessage?.reasoning_text).toContain( "I need to call the weather API", ) expect(assistantMessage?.content).toContain( diff --git a/tests/anthropic-response.test.ts b/tests/anthropic-response.test.ts index ecd71aac..e849a02a 100644 --- a/tests/anthropic-response.test.ts +++ b/tests/anthropic-response.test.ts @@ -252,6 +252,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => { contentBlockIndex: 0, contentBlockOpen: false, toolCalls: {}, + thinkingBlockOpen: false, } const translatedStream = openAIStream.flatMap((chunk) => translateChunkToAnthropicEvents(chunk, streamState), @@ -352,6 +353,7 @@ describe("OpenAI to Anthropic Streaming Response Translation", () => { contentBlockIndex: 0, contentBlockOpen: false, toolCalls: {}, + thinkingBlockOpen: false, } const translatedStream = openAIStream.flatMap((chunk) => translateChunkToAnthropicEvents(chunk, streamState),