Skip to content

Commit 72fbb9d

Browse files
authored
o3 pro max mode (#171)
1 parent 40962b6 commit 72fbb9d

File tree

3 files changed

+49
-7
lines changed

3 files changed

+49
-7
lines changed

backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ const modelToAiSDKModel = (model: Model): LanguageModelV1 => {
4242
if (Object.values(geminiModels).includes(model as GeminiModel)) {
4343
return google.languageModel(model)
4444
}
45+
if (model === openaiModels.o3pro || model === openaiModels.o3) {
46+
return openai.responses(model)
47+
}
4548
if (Object.values(openaiModels).includes(model as OpenAIModel)) {
4649
return openai.languageModel(model)
4750
}

backend/src/thinking-stream.ts

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import { CostMode } from 'common/constants'
1+
import { CostMode, models } from 'common/constants'
22

33
import { CoreMessage } from 'ai'
44
import { getAgentStream } from './prompt-agent-stream'
5-
import { TOOL_LIST } from './tools'
65
import { logger } from './util/logger'
6+
import { TOOL_LIST } from './tools'
77

88
export async function getThinkingStream(
99
messages: CoreMessage[],
@@ -16,13 +16,14 @@ export async function getThinkingStream(
1616
userId: string | undefined
1717
}
1818
) {
19-
const { getStream } = getAgentStream({
19+
const { getStream, model } = getAgentStream({
2020
costMode: options.costMode,
2121
selectedModel: 'gemini-2.5-pro',
2222
stopSequences: [
2323
'</think_deeply>',
2424
'<think_deeply>',
2525
'<read_files>',
26+
'<write_file>',
2627
'<end_turn>',
2728
],
2829
clientSessionId: options.clientSessionId,
@@ -31,7 +32,31 @@ export async function getThinkingStream(
3132
userId: options.userId,
3233
})
3334

34-
const thinkingPrompt = `You are an expert programmer. Think deeply about the user request in the message history and how to best approach it. Consider edge cases, potential issues, and alternative approaches. Only think - do not take any actions or make any changes.
35+
const isO3 = model === models.o3pro || model === models.o3
36+
const thinkingPrompt = isO3
37+
? `You are an expert programmer. Think deeply about the user request in the message history and how to best approach it. Consider edge cases, potential issues, and alternative approaches.
38+
39+
When the next action is clear, you can stop your thinking immediately. For example:
40+
- If you realize you need to read files, say what files you should read next, and then end your thinking.
41+
- If you realize you completed the user request, say it is time to end your response and end your thinking.
42+
- If you already did thinking previously that outlines a plan you are continuing to implement, you can stop your thinking immediately and continue following the plan.
43+
44+
Guidelines:
45+
- Respond with your analysis or plan inside a think_deeply tool call.
46+
- Explain clearly and concisely what would be helpful for a junior engineer to know to handle the user request.
47+
- Show key snippets of code to guide the implementation to be as clean as possible.
48+
- Figure out the solution to any errors or bugs and give instructions on how to fix them.
49+
- DO NOT use any tools! You are only thinking, not taking any actions. You should refer to tool calls without angle brackets when talking about them: "I should use the read_files tool" and NOT "I should use <read_files>"
50+
- Make sure to end your response with "</thought>\n</think_deeply> and don't write anything after that."
51+
52+
Example:
53+
<think_deeply>
54+
<thought>
55+
The next step is to read src/foo.ts and src/bar.ts
56+
</thought>
57+
</think_deeply>
58+
`.trim()
59+
: `You are an expert programmer. Think deeply about the user request in the message history and how to best approach it. Consider edge cases, potential issues, and alternative approaches. Only think - do not take any actions or make any changes.
3560
3661
The user cannot see anything you write, this is thinking that will be used to generate the response in the next step.
3762
@@ -67,8 +92,18 @@ Important: Keep your thinking as short as possible! Just a few words suffices. E
6792
onChunk(thinkDeeplyPrefix)
6893

6994
let wasTruncated = false
70-
for await (const chunk of stream) {
95+
let prefix = ''
96+
for await (let chunk of stream) {
97+
// Remove a prefix of the the think deeply tool call if it exists.
98+
prefix += chunk
7199
response += chunk
100+
if (thinkDeeplyPrefix.startsWith(prefix)) {
101+
continue
102+
}
103+
if (response.startsWith(thinkDeeplyPrefix)) {
104+
response = response.slice(thinkDeeplyPrefix.length)
105+
chunk = chunk.slice(chunk.length - response.length)
106+
}
72107

73108
// Check for any complete tool tag
74109
for (const tool of TOOL_LIST) {
@@ -94,8 +129,10 @@ Important: Keep your thinking as short as possible! Just a few words suffices. E
94129
onChunk('</thought>\n')
95130
response += '</thought>\n'
96131
}
97-
onChunk('</think_deeply>')
98-
response += '</think_deeply>'
132+
if (!response.includes('</think_deeply>')) {
133+
onChunk('</think_deeply>')
134+
response += '</think_deeply>'
135+
}
99136

100137
logger.debug({ response: response }, 'Thinking stream')
101138
return response

common/src/constants.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ export const models = {
225225

226226
export const shortModelNames = {
227227
'gemini-2.5-pro': models.gemini2_5_pro_preview,
228+
'flash-2.5': models.gemini2_5_flash,
229+
'opus-4': models.opus4,
228230
'sonnet-4': models.sonnet,
229231
'sonnet-3.7': models.sonnet3_7,
230232
'sonnet-3.6': models.sonnet3_5,

0 commit comments

Comments
 (0)