o3 pro max mode (#171)

jahooma · web-flow · commit 72fbb9d90739 · 2025-06-10T20:42:24.000-05:00
diff --git a/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts b/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts
@@ -42,6 +42,9 @@ const modelToAiSDKModel = (model: Model): LanguageModelV1 => {
   if (Object.values(geminiModels).includes(model as GeminiModel)) {
     return google.languageModel(model)
   }
+  if (model === openaiModels.o3pro || model === openaiModels.o3) {
+    return openai.responses(model)
+  }
   if (Object.values(openaiModels).includes(model as OpenAIModel)) {
     return openai.languageModel(model)
   }
diff --git a/backend/src/thinking-stream.ts b/backend/src/thinking-stream.ts
@@ -1,9 +1,9 @@
-import { CostMode } from 'common/constants'
+import { CostMode, models } from 'common/constants'
 
 import { CoreMessage } from 'ai'
 import { getAgentStream } from './prompt-agent-stream'
-import { TOOL_LIST } from './tools'
 import { logger } from './util/logger'
+import { TOOL_LIST } from './tools'
 
 export async function getThinkingStream(
   messages: CoreMessage[],
@@ -16,13 +16,14 @@ export async function getThinkingStream(
     userId: string | undefined
   }
 ) {
-  const { getStream } = getAgentStream({
+  const { getStream, model } = getAgentStream({
     costMode: options.costMode,
     selectedModel: 'gemini-2.5-pro',
     stopSequences: [
       '</think_deeply>',
       '<think_deeply>',
       '<read_files>',
+      '<write_file>',
       '<end_turn>',
     ],
     clientSessionId: options.clientSessionId,
@@ -31,7 +32,31 @@ export async function getThinkingStream(
     userId: options.userId,
   })
 
-  const thinkingPrompt = `You are an expert programmer. Think deeply about the user request in the message history and how to best approach it. Consider edge cases, potential issues, and alternative approaches. Only think - do not take any actions or make any changes.
+  const isO3 = model === models.o3pro || model === models.o3
+  const thinkingPrompt = isO3
+    ? `You are an expert programmer. Think deeply about the user request in the message history and how to best approach it. Consider edge cases, potential issues, and alternative approaches.
+
+When the next action is clear, you can stop your thinking immediately. For example:
+- If you realize you need to read files, say what files you should read next, and then end your thinking.
+- If you realize you completed the user request, say it is time to end your response and end your thinking.
+- If you already did thinking previously that outlines a plan you are continuing to implement, you can stop your thinking immediately and continue following the plan.
+
+Guidelines:
+- Respond with your analysis or plan inside a think_deeply tool call.
+- Explain clearly and concisely what would be helpful for a junior engineer to know to handle the user request.
+- Show key snippets of code to guide the implementation to be as clean as possible.
+- Figure out the solution to any errors or bugs and give instructions on how to fix them.
+- DO NOT use any tools! You are only thinking, not taking any actions. You should refer to tool calls without angle brackets when talking about them: "I should use the read_files tool" and NOT "I should use <read_files>"
+- Make sure to end your response with "</thought>\n</think_deeply> and don't write anything after that."
+
+Example:
+<think_deeply>
+<thought>
+The next step is to read src/foo.ts and src/bar.ts
+</thought>
+</think_deeply>
+`.trim()
+    : `You are an expert programmer. Think deeply about the user request in the message history and how to best approach it. Consider edge cases, potential issues, and alternative approaches. Only think - do not take any actions or make any changes.
 
 The user cannot see anything you write, this is thinking that will be used to generate the response in the next step.
 
@@ -67,8 +92,18 @@ Important: Keep your thinking as short as possible! Just a few words suffices. E
   onChunk(thinkDeeplyPrefix)
 
   let wasTruncated = false
-  for await (const chunk of stream) {
+  let prefix = ''
+  for await (let chunk of stream) {
+    // Remove a prefix of the the think deeply tool call if it exists.
+    prefix += chunk
     response += chunk
+    if (thinkDeeplyPrefix.startsWith(prefix)) {
+      continue
+    }
+    if (response.startsWith(thinkDeeplyPrefix)) {
+      response = response.slice(thinkDeeplyPrefix.length)
+      chunk = chunk.slice(chunk.length - response.length)
+    }
 
     // Check for any complete tool tag
     for (const tool of TOOL_LIST) {
@@ -94,8 +129,10 @@ Important: Keep your thinking as short as possible! Just a few words suffices. E
     onChunk('</thought>\n')
     response += '</thought>\n'
   }
-  onChunk('</think_deeply>')
-  response += '</think_deeply>'
+  if (!response.includes('</think_deeply>')) {
+    onChunk('</think_deeply>')
+    response += '</think_deeply>'
+  }
 
   logger.debug({ response: response }, 'Thinking stream')
   return response
diff --git a/common/src/constants.ts b/common/src/constants.ts
@@ -225,6 +225,8 @@ export const models = {
 
 export const shortModelNames = {
   'gemini-2.5-pro': models.gemini2_5_pro_preview,
+  'flash-2.5': models.gemini2_5_flash,
+  'opus-4': models.opus4,
   'sonnet-4': models.sonnet,
   'sonnet-3.7': models.sonnet3_7,
   'sonnet-3.6': models.sonnet3_5,

Original file line number	Diff line number	Diff line change
`@@ -42,6 +42,9 @@ const modelToAiSDKModel = (model: Model): LanguageModelV1 => {`
`42`	`42`	`if (Object.values(geminiModels).includes(model as GeminiModel)) {`
`43`	`43`	`return google.languageModel(model)`
`44`	`44`	`}`
	`45`	`+ if (model === openaiModels.o3pro \|\| model === openaiModels.o3) {`
	`46`	`+ return openai.responses(model)`
	`47`	`+ }`
`45`	`48`	`if (Object.values(openaiModels).includes(model as OpenAIModel)) {`
`46`	`49`	`return openai.languageModel(model)`
`47`	`50`	`}`