From 888e1213e861cd7fefda1b3f5337b75610178bf3 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 24 Jan 2026 23:50:04 -0800 Subject: [PATCH 1/9] Make Lite into Free mode! --- agents/base2/base2-free.ts | 8 ++++ agents/base2/base2-lite.ts | 8 ---- agents/base2/base2.ts | 32 ++++++------- .../__tests__/unit/agent-mode-toggle.test.ts | 2 +- cli/src/components/blocks/thinking-block.tsx | 8 ++-- cli/src/hooks/use-send-message.ts | 3 +- cli/src/index.tsx | 6 ++- cli/src/utils/constants.ts | 13 ++++- cli/src/utils/create-run-config.ts | 3 ++ cli/src/utils/message-updater.ts | 34 +++++++++++++- cli/src/utils/settings.ts | 2 + common/src/constants/free-agents.ts | 47 ++++++++++++++++++- common/src/constants/model-config.ts | 8 ++-- common/src/types/contracts/llm.ts | 4 ++ common/src/types/session-state.ts | 2 +- evals/buffbench/main-nightly.ts | 2 +- .../find-files/custom-file-picker-config.ts | 2 +- .../src/llm-api/gemini-with-fallbacks.ts | 4 +- packages/agent-runtime/src/main-prompt.ts | 3 +- .../agent-runtime/src/prompt-agent-stream.ts | 3 ++ packages/agent-runtime/src/run-agent-step.ts | 3 ++ .../tools/handlers/tool/spawn-agent-utils.ts | 2 +- sdk/src/impl/llm.ts | 3 ++ sdk/src/run.ts | 5 +- web/src/app/store/store-client.tsx | 2 +- web/src/content/advanced/how-does-it-work.mdx | 2 +- web/src/llm-api/helpers.ts | 17 +++++-- web/src/llm-api/openai.ts | 3 +- web/src/llm-api/openrouter.ts | 17 +++++-- 29 files changed, 188 insertions(+), 60 deletions(-) create mode 100644 agents/base2/base2-free.ts delete mode 100644 agents/base2/base2-lite.ts diff --git a/agents/base2/base2-free.ts b/agents/base2/base2-free.ts new file mode 100644 index 0000000000..464defff24 --- /dev/null +++ b/agents/base2/base2-free.ts @@ -0,0 +1,8 @@ +import { createBase2 } from './base2' + +const definition = { + ...createBase2('free'), + id: 'base2-free', + displayName: 'Buffy the Free Orchestrator', +} +export default definition diff --git a/agents/base2/base2-lite.ts b/agents/base2/base2-lite.ts deleted file mode 100644 index 166e7820c2..0000000000 --- a/agents/base2/base2-lite.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { createBase2 } from './base2' - -const definition = { - ...createBase2('lite'), - id: 'base2-lite', - displayName: 'Buffy the Lite Orchestrator', -} -export default definition diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index e63c0376e2..66584c215a 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -7,7 +7,7 @@ import { } from '../types/secret-agent-definition' export function createBase2( - mode: 'default' | 'lite' | 'max' | 'fast', + mode: 'default' | 'free' | 'max' | 'fast', options?: { hasNoValidation?: boolean planOnly?: boolean @@ -22,15 +22,15 @@ export function createBase2( const isDefault = mode === 'default' const isFast = mode === 'fast' const isMax = mode === 'max' - const isLite = mode === 'lite' + const isFree = mode === 'free' - const isOpus = !isLite + const isOpus = !isFree const isSonnet = false const isGemini = false return { publisher, - model: isLite ? 'x-ai/grok-4.1-fast' : 'anthropic/claude-opus-4.5', + model: isFree ? 'x-ai/grok-4.1-fast' : 'anthropic/claude-opus-4.5', displayName: 'Buffy the Orchestrator', spawnerPrompt: 'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks', @@ -55,7 +55,7 @@ export function createBase2( 'spawn_agents', 'read_files', 'read_subtree', - !isFast && !isLite && 'write_todos', + !isFast && !isFree && 'write_todos', !isFast && !noAskUser && 'suggest_followups', 'str_replace', 'write_file', @@ -72,11 +72,11 @@ export function createBase2( 'glob-matcher', 'researcher-web', 'researcher-docs', - isLite ? 'commander-lite' : 'commander', + isFree ? 'commander-lite' : 'commander', isDefault && 'thinker', (isDefault || isMax) && ['opus-agent', 'gpt-5-agent'], isMax && 'thinker-best-of-n-opus', - isLite && 'editor-glm', + isFree && 'editor-glm', isDefault && 'editor', isMax && 'editor-multi-prompt', isDefault && 'code-reviewer', @@ -133,7 +133,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u - **Sequence agents properly:** Keep in mind dependencies when spawning different agents. Don't spawn agents in parallel that depend on each other. ${buildArray( '- Spawn context-gathering agents (file pickers, code-searcher, directory-lister, glob-matcher, and web/docs researchers) before making edits.', - isLite && + isFree && '- Spawn the editor-glm agent to implement the changes after you have gathered all the context you need.', isDefault && '- Spawn the editor agent to implement the changes after you have gathered all the context you need.', @@ -198,7 +198,7 @@ ${isDefault ? `[ You implement the changes using the editor agent ]` : isFast ? '[ You implement the changes using the str_replace or write_file tools ]' - : isLite + : isFree ? '[ You implement the changes using the editor-glm agent ]' : '[ You implement the changes using the editor-multi-prompt agent ]' } @@ -248,7 +248,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT} isFast, isDefault, isMax, - isLite, + isFree, hasNoValidation, noAskUser, }), @@ -260,7 +260,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT} isMax, hasNoValidation, isSonnet, - isLite, + isFree, noAskUser, }), @@ -292,7 +292,7 @@ function buildImplementationInstructionsPrompt({ isFast, isDefault, isMax, - isLite, + isFree, hasNoValidation, noAskUser, }: { @@ -300,7 +300,7 @@ function buildImplementationInstructionsPrompt({ isFast: boolean isDefault: boolean isMax: boolean - isLite: boolean + isFree: boolean hasNoValidation: boolean noAskUser: boolean }) { @@ -320,7 +320,7 @@ ${buildArray( `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`, (isDefault || isMax) && `- For quick problems, briefly explain your reasoning to the user. If you need to think longer, write your thoughts within the tags. Finally, for complex problems, spawn the thinker agent to help find the best solution. (gpt-5-agent is a last resort for complex problems)`, - isLite && + isFree && '- IMPORTANT: You must spawn the editor-glm agent to implement the changes after you have gathered all the context you need. This agent will do the best job of implementing the changes so you must spawn it for all changes. Do not pass any prompt or params to the editor agent when spawning it. It will make its own best choices of what to do.', isDefault && '- IMPORTANT: You must spawn the editor agent to implement the changes after you have gathered all the context you need. This agent will do the best job of implementing the changes so you must spawn it for all non-trivial changes. Do not pass any prompt or params to the editor agent when spawning it. It will make its own best choices of what to do.', @@ -347,7 +347,7 @@ function buildImplementationStepPrompt({ isMax, hasNoValidation, isSonnet, - isLite, + isFree, noAskUser, }: { isDefault: boolean @@ -355,7 +355,7 @@ function buildImplementationStepPrompt({ isMax: boolean hasNoValidation: boolean isSonnet: boolean - isLite: boolean + isFree: boolean noAskUser: boolean }) { return buildArray( diff --git a/cli/src/__tests__/unit/agent-mode-toggle.test.ts b/cli/src/__tests__/unit/agent-mode-toggle.test.ts index c39d8e3889..92d448ae00 100644 --- a/cli/src/__tests__/unit/agent-mode-toggle.test.ts +++ b/cli/src/__tests__/unit/agent-mode-toggle.test.ts @@ -17,7 +17,7 @@ describe('AgentModeToggle - buildExpandedSegments', () => { for (const mode of modes) { test(`returns segments with active indicator for ${mode}`, () => { const segs = buildExpandedSegments(mode) - // 4 mode options (DEFAULT, LITE, MAX, PLAN) + 1 active indicator + // 4 mode options (DEFAULT, FREE, MAX, PLAN) + 1 active indicator expect(segs.length).toBe(5) // Current mode is disabled among the choices diff --git a/cli/src/components/blocks/thinking-block.tsx b/cli/src/components/blocks/thinking-block.tsx index 9ef354fe51..032a910fd0 100644 --- a/cli/src/components/blocks/thinking-block.tsx +++ b/cli/src/components/blocks/thinking-block.tsx @@ -42,12 +42,10 @@ export const ThinkingBlock = memo( } }, [onToggleCollapsed, thinkingId]) - // thinkingOpen === true means still streaming - // thinkingOpen === false means explicitly closed with tag - // thinkingOpen === undefined means native reasoning block - complete when message is complete + // thinkingOpen === false means explicitly closed (with tag or message completion) + // Otherwise (true or undefined), completion is determined by message completion const isThinkingComplete = - firstBlock?.thinkingOpen === false || - (firstBlock?.thinkingOpen === undefined && isMessageComplete) + firstBlock?.thinkingOpen === false || isMessageComplete // Hide if no content or no thinkingId (but NOT when thinking is complete) if (!combinedContent || !thinkingId) { diff --git a/cli/src/hooks/use-send-message.ts b/cli/src/hooks/use-send-message.ts index 2c60735dc3..bf6274f6e0 100644 --- a/cli/src/hooks/use-send-message.ts +++ b/cli/src/hooks/use-send-message.ts @@ -4,7 +4,7 @@ import { setCurrentChatId } from '../project-files' import { createStreamController } from './stream-state' import { useChatStore } from '../state/chat-store' import { getCodebuffClient } from '../utils/codebuff-client' -import { AGENT_MODE_TO_ID } from '../utils/constants' +import { AGENT_MODE_TO_ID, AGENT_MODE_TO_COST_MODE } from '../utils/constants' import { createEventHandlerState } from '../utils/create-event-handler-state' import { createRunConfig } from '../utils/create-run-config' import { loadAgentDefinitions } from '../utils/local-agent-registry' @@ -443,6 +443,7 @@ export const useSendMessage = ({ agentDefinitions, eventHandlerState, signal: abortController.signal, + costMode: AGENT_MODE_TO_COST_MODE[agentMode], }) logger.info({ runConfig }, '[send-message] Sending message with sdk run config') diff --git a/cli/src/index.tsx b/cli/src/index.tsx index 384b476d30..1614423aa9 100644 --- a/cli/src/index.tsx +++ b/cli/src/index.tsx @@ -114,7 +114,8 @@ function parseArgs(): ParsedArgs { '--cwd ', 'Set the working directory (default: current directory)', ) - .option('--lite', 'Start in LITE mode') + .option('--free', 'Start in FREE mode') + .option('--lite', 'Start in FREE mode (deprecated, use --free)') .option('--max', 'Start in MAX mode') .option('--plan', 'Start in PLAN mode') .helpOption('-h, --help', 'Show this help message') @@ -129,7 +130,7 @@ function parseArgs(): ParsedArgs { // Determine initial mode from flags (last flag wins if multiple specified) let initialMode: AgentMode | undefined - if (options.lite) initialMode = 'LITE' + if (options.free || options.lite) initialMode = 'FREE' if (options.max) initialMode = 'MAX' if (options.plan) initialMode = 'PLAN' @@ -148,6 +149,7 @@ function parseArgs(): ParsedArgs { } async function main(): Promise { + console.log() // Run OSC theme detection BEFORE anything else. // This MUST happen before OpenTUI starts because OSC responses come through stdin, // and OpenTUI also listens to stdin. Running detection here ensures stdin is clean. diff --git a/cli/src/utils/constants.ts b/cli/src/utils/constants.ts index cbfea66610..b34dc9d58d 100644 --- a/cli/src/utils/constants.ts +++ b/cli/src/utils/constants.ts @@ -107,10 +107,21 @@ export const MAIN_AGENT_ID = 'main-agent' */ export const AGENT_MODE_TO_ID = { DEFAULT: 'base2', - LITE: 'base2-lite', + FREE: 'base2-free', MAX: 'base2-max', PLAN: 'base2-plan', } as const export type AgentMode = keyof typeof AGENT_MODE_TO_ID export const AGENT_MODES = Object.keys(AGENT_MODE_TO_ID) as AgentMode[] + +/** + * Maps CLI agent mode to cost mode for billing. + * FREE mode maps to 'free' cost mode where all agents cost 0 credits. + */ +export const AGENT_MODE_TO_COST_MODE = { + DEFAULT: 'normal', + FREE: 'free', + MAX: 'max', + PLAN: 'normal', +} as const satisfies Record diff --git a/cli/src/utils/create-run-config.ts b/cli/src/utils/create-run-config.ts index 5a734af35b..3055f4e2c2 100644 --- a/cli/src/utils/create-run-config.ts +++ b/cli/src/utils/create-run-config.ts @@ -23,6 +23,7 @@ export type CreateRunConfigParams = { agentDefinitions: AgentDefinition[] eventHandlerState: EventHandlerState signal: AbortSignal + costMode?: 'free' | 'normal' | 'max' | 'experimental' | 'ask' } const SENSITIVE_EXTENSIONS = new Set([ @@ -98,6 +99,7 @@ export const createRunConfig = (params: CreateRunConfigParams) => { previousRunState, agentDefinitions, eventHandlerState, + costMode, } = params return { @@ -111,6 +113,7 @@ export const createRunConfig = (params: CreateRunConfigParams) => { handleStreamChunk: createStreamChunkHandler(eventHandlerState), handleEvent: createEventHandler(eventHandlerState), signal: params.signal, + costMode, fileFilter: ((filePath: string) => { if (isSensitiveFile(filePath)) return { status: 'blocked' } if (isEnvTemplateFile(filePath)) return { status: 'allow-example' } diff --git a/cli/src/utils/message-updater.ts b/cli/src/utils/message-updater.ts index f9cfbe6300..2fba21cde3 100644 --- a/cli/src/utils/message-updater.ts +++ b/cli/src/utils/message-updater.ts @@ -1,4 +1,4 @@ -import type { ChatMessage, ContentBlock } from '../types/chat' +import type { ChatMessage, ContentBlock, TextContentBlock } from '../types/chat' // Small wrapper to avoid repeating the ai-message map/update pattern. export type SetMessagesFn = ( @@ -57,9 +57,25 @@ export const createMessageUpdater = ( const markComplete = (metadata?: Partial) => { updateAiMessage((msg) => { const { metadata: messageMetadata, ...rest } = metadata ?? {} + + // Mark native reasoning blocks as complete by setting thinkingOpen = false + // This ensures thinking blocks auto-collapse when the message finishes + // Check for thinkingOpen !== false to handle both true (native) and undefined (legacy) + const updatedBlocks = msg.blocks?.map((block) => { + if ( + block.type === 'text' && + (block as TextContentBlock).textType === 'reasoning' && + (block as TextContentBlock).thinkingOpen !== false + ) { + return { ...block, thinkingOpen: false } as ContentBlock + } + return block + }) + const nextMessage: ChatMessage = { ...msg, isComplete: true, + ...(updatedBlocks && { blocks: updatedBlocks }), ...rest, } @@ -184,9 +200,25 @@ export const createBatchedMessageUpdater = ( prev.map((msg) => { if (msg.id !== aiMessageId) return msg const { metadata: messageMetadata, ...rest } = metadata ?? {} + + // Mark native reasoning blocks as complete by setting thinkingOpen = false + // This ensures thinking blocks auto-collapse when the message finishes + // Check for thinkingOpen !== false to handle both true (native) and undefined (legacy) + const updatedBlocks = msg.blocks?.map((block) => { + if ( + block.type === 'text' && + (block as TextContentBlock).textType === 'reasoning' && + (block as TextContentBlock).thinkingOpen !== false + ) { + return { ...block, thinkingOpen: false } as ContentBlock + } + return block + }) + const nextMessage: ChatMessage = { ...msg, isComplete: true, + ...(updatedBlocks && { blocks: updatedBlocks }), ...rest, } if (messageMetadata) { diff --git a/cli/src/utils/settings.ts b/cli/src/utils/settings.ts index 903a955009..14a9f20fdd 100644 --- a/cli/src/utils/settings.ts +++ b/cli/src/utils/settings.ts @@ -12,6 +12,8 @@ const DEFAULT_SETTINGS: Settings = { adsEnabled: true, } +// Note: FREE mode is now a valid AgentMode (was previously LITE) + /** * Settings schema - add new settings here as the product evolves */ diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index 514afc6056..b7b6cb84d5 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -1,10 +1,39 @@ import { parseAgentId } from '../util/agent-id-parsing' +import type { CostMode } from './model-config' + +/** + * The cost mode that indicates FREE mode - all agents in this mode cost 0 credits. + */ +export const FREE_COST_MODE = 'free' as const + +/** + * Models that are allowed in FREE mode. + * Only these cheap/fast models get 0 credits in FREE mode. + * This prevents abuse by users trying to use expensive models for free. + */ +export const FREE_MODE_ALLOWED_MODELS = new Set([ + // Grok models used by base2-free, commander-lite, file-lister, file-picker-max + 'x-ai/grok-4.1-fast', + 'x-ai/grok-4-fast', // researcher agents + + // Gemini flash models used by file-picker and other subagents + 'google/gemini-2.5-flash', + 'google/gemini-2.5-flash-lite', + 'google/gemini-2.5-flash-preview-09-2025', + 'google/gemini-2.5-flash-lite-preview-09-2025', + + // GPT models used by editor-gpt-5, thinker, context-pruner + 'openai/gpt-5.1', + 'openai/gpt-5.1-chat', + 'openai/gpt-5-mini', +]) + /** * Agents that don't charge credits. * * These are typically lightweight utility agents that: - * - Use cheap models (e.g., Gemini Flash Lite) + * - Use cheap models (e.g., Gemini Flash) * - Have limited, programmatic capabilities * - Are frequently spawned as subagents * @@ -19,6 +48,22 @@ export const FREE_TIER_AGENTS = new Set([ 'researcher-docs', ]) +/** + * Check if the current cost mode is FREE mode. + * In FREE mode, agents using allowed models cost 0 credits. + */ +export function isFreeMode(costMode: CostMode | string | undefined): boolean { + return costMode === FREE_COST_MODE +} + +/** + * Check if a model is allowed in FREE mode. + * Only whitelisted cheap/fast models can be used for free. + */ +export function isFreeModeAllowedModel(model: string): boolean { + return FREE_MODE_ALLOWED_MODELS.has(model) +} + /** * Check if an agent should be free (no credit charge). * Handles all agent ID formats: diff --git a/common/src/constants/model-config.ts b/common/src/constants/model-config.ts index 3c8e605db7..c75bda26e0 100644 --- a/common/src/constants/model-config.ts +++ b/common/src/constants/model-config.ts @@ -9,7 +9,7 @@ export const ALLOWED_MODEL_PREFIXES = [ ] as const export const costModes = [ - 'lite', + 'free', 'normal', 'max', 'experimental', @@ -194,7 +194,7 @@ export const getModelForMode = ( ) => { if (operation === 'agent') { return { - lite: models.openrouter_gemini2_5_flash, + free: models.openrouter_gemini2_5_flash, normal: models.openrouter_claude_sonnet_4, max: models.openrouter_claude_sonnet_4, experimental: models.openrouter_gemini2_5_pro_preview, @@ -203,7 +203,7 @@ export const getModelForMode = ( } if (operation === 'file-requests') { return { - lite: models.openrouter_claude_3_5_haiku, + free: models.openrouter_claude_3_5_haiku, normal: models.openrouter_claude_3_5_haiku, max: models.openrouter_claude_sonnet_4, experimental: models.openrouter_claude_sonnet_4, @@ -212,7 +212,7 @@ export const getModelForMode = ( } if (operation === 'check-new-files') { return { - lite: models.openrouter_claude_3_5_haiku, + free: models.openrouter_claude_3_5_haiku, normal: models.openrouter_claude_sonnet_4, max: models.openrouter_claude_sonnet_4, experimental: models.openrouter_claude_sonnet_4, diff --git a/common/src/types/contracts/llm.ts b/common/src/types/contracts/llm.ts index 19b9e1abc2..6db226ce13 100644 --- a/common/src/types/contracts/llm.ts +++ b/common/src/types/contracts/llm.ts @@ -46,6 +46,8 @@ export type PromptAiSdkStreamFn = ( spawnableAgents?: string[] /** Map of locally available agent templates - used to transform agent tool calls */ localAgentTemplates?: Record + /** Cost mode - 'free' mode means 0 credits charged for all agents */ + costMode?: string sendAction: SendActionFn logger: Logger trackEvent: TrackEventFn @@ -69,6 +71,8 @@ export type PromptAiSdkFn = ( includeCacheControl?: boolean agentProviderOptions?: OpenRouterProviderRoutingOptions maxRetries?: number + /** Cost mode - 'free' mode means 0 credits charged for all agents */ + costMode?: string sendAction: SendActionFn logger: Logger trackEvent: TrackEventFn diff --git a/common/src/types/session-state.ts b/common/src/types/session-state.ts index 40e9707e4a..f4ac626747 100644 --- a/common/src/types/session-state.ts +++ b/common/src/types/session-state.ts @@ -75,7 +75,7 @@ export type AgentOutput = z.infer export const AgentTemplateTypeList = [ // Base agents 'base', - 'base_lite', + 'base_free', 'base_max', 'base_experimental', 'claude4_gemini_thinking', diff --git a/evals/buffbench/main-nightly.ts b/evals/buffbench/main-nightly.ts index 840365a0bd..351fee617e 100644 --- a/evals/buffbench/main-nightly.ts +++ b/evals/buffbench/main-nightly.ts @@ -13,7 +13,7 @@ async function main() { const results = await runBuffBench({ evalDataPaths: [ path.join(__dirname, 'eval-codebuff.json')], - agents: ['base2-lite'], + agents: ['base2-free'], taskConcurrency: 3, }) diff --git a/packages/agent-runtime/src/find-files/custom-file-picker-config.ts b/packages/agent-runtime/src/find-files/custom-file-picker-config.ts index dd54618d7b..afdd8c8d4f 100644 --- a/packages/agent-runtime/src/find-files/custom-file-picker-config.ts +++ b/packages/agent-runtime/src/find-files/custom-file-picker-config.ts @@ -5,7 +5,7 @@ import { } from '@codebuff/common/old-constants' import { z } from 'zod/v4' -// Create the customFileCounts shape using the centralized costModes +// Create the customFileCounts shape using the centralized costModes ('free', 'normal', 'max', etc.) const customFileCountsShape = costModes.reduce( (acc, mode) => { acc[mode] = z.number().int().positive().optional() diff --git a/packages/agent-runtime/src/llm-api/gemini-with-fallbacks.ts b/packages/agent-runtime/src/llm-api/gemini-with-fallbacks.ts index 4a2cb71350..fb93420cc4 100644 --- a/packages/agent-runtime/src/llm-api/gemini-with-fallbacks.ts +++ b/packages/agent-runtime/src/llm-api/gemini-with-fallbacks.ts @@ -29,7 +29,7 @@ import type { Message } from '@codebuff/common/types/messages/codebuff-message' * @param options.userId - The ID of the user making the request. * @param options.maxTokens - Optional maximum number of tokens for the response. * @param options.temperature - Optional temperature setting for generation (0-1). - * @param options.costMode - Optional cost mode ('lite', 'normal', 'max') influencing fallback model choice. + * @param options.costMode - Optional cost mode ('free', 'normal', 'max') influencing fallback model choice. * @param options.useGPT4oInsteadOfClaude - Optional flag to use GPT-4o instead of Claude as the final fallback. * @returns A promise that resolves to the complete response string from the successful API call. * @throws If all API calls (primary and fallbacks) fail. @@ -84,7 +84,7 @@ export async function promptFlashWithFallbacks( model: useGPT4oInsteadOfClaude ? openaiModels.gpt4o : { - lite: openrouterModels.openrouter_claude_3_5_haiku, + free: openrouterModels.openrouter_claude_3_5_haiku, normal: openrouterModels.openrouter_claude_3_5_haiku, max: openrouterModels.openrouter_claude_sonnet_4, experimental: openrouterModels.openrouter_claude_3_5_haiku, diff --git a/packages/agent-runtime/src/main-prompt.ts b/packages/agent-runtime/src/main-prompt.ts index d31d26a29f..ac1a016805 100644 --- a/packages/agent-runtime/src/main-prompt.ts +++ b/packages/agent-runtime/src/main-prompt.ts @@ -107,7 +107,7 @@ export async function mainPrompt( agentType = ( { ask: AgentTemplateTypes.ask, - lite: AgentTemplateTypes.base_lite, + free: AgentTemplateTypes.base_free, normal: AgentTemplateTypes.base, max: AgentTemplateTypes.base_max, experimental: 'base2', @@ -136,6 +136,7 @@ export async function mainPrompt( agentType, fingerprintId, fileContext, + costMode, }) logger.debug({ output }, 'Main prompt finished') diff --git a/packages/agent-runtime/src/prompt-agent-stream.ts b/packages/agent-runtime/src/prompt-agent-stream.ts index 353e4ef6b9..b1fbb89dc5 100644 --- a/packages/agent-runtime/src/prompt-agent-stream.ts +++ b/packages/agent-runtime/src/prompt-agent-stream.ts @@ -14,6 +14,7 @@ export const getAgentStreamFromTemplate = (params: { agentId?: string apiKey: string clientSessionId: string + costMode?: string fingerprintId: string includeCacheControl?: boolean localAgentTemplates: Record @@ -35,6 +36,7 @@ export const getAgentStreamFromTemplate = (params: { agentId, apiKey, clientSessionId, + costMode, fingerprintId, includeCacheControl, localAgentTemplates, @@ -62,6 +64,7 @@ export const getAgentStreamFromTemplate = (params: { agentId, apiKey, clientSessionId, + costMode, fingerprintId, includeCacheControl, logger, diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts index b82b26a40a..9135827984 100644 --- a/packages/agent-runtime/src/run-agent-step.ts +++ b/packages/agent-runtime/src/run-agent-step.ts @@ -89,6 +89,7 @@ export const runAgentStep = async ( userId: string | undefined userInputId: string clientSessionId: string + costMode?: string fingerprintId: string repoId: string | undefined onResponseChunk: (chunk: string | PrintModeEvent) => void @@ -320,6 +321,7 @@ export const runAgentStep = async ( const stream = getAgentStreamFromTemplate({ ...params, agentId: agentState.parentId ? agentState.agentId : undefined, + costMode: params.costMode, includeCacheControl: supportsCacheControl(agentTemplate.model), messages: [systemMessage(system), ...agentState.messageHistory], template: agentTemplate, @@ -441,6 +443,7 @@ export async function loopAgentSteps( clearUserPromptMessagesAfterResponse?: boolean clientSessionId: string content?: Array + costMode?: string fileContext: ProjectFileContext finishAgentRun: FinishAgentRunFn localAgentTemplates: Record diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts index c1d24ff674..c26aa9ae11 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts @@ -243,7 +243,7 @@ export async function validateAndGetAgentTemplate( if (!agentTemplate) { throw new Error(`Agent type ${agentTypeStr} not found.`) } - const BASE_AGENTS = ['base', 'base-lite', 'base-max', 'base-experimental'] + const BASE_AGENTS = ['base', 'base-free', 'base-max', 'base-experimental'] // Base agent can spawn any agent if (BASE_AGENTS.includes(parentAgentTemplate.id)) { return { agentTemplate, agentType: agentTypeStr } diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts index 77c6b50d5f..4b74c16138 100644 --- a/sdk/src/impl/llm.ts +++ b/sdk/src/impl/llm.ts @@ -61,6 +61,7 @@ function getProviderOptions(params: { providerOptions?: Record agentProviderOptions?: OpenRouterProviderRoutingOptions n?: number + costMode?: string }): { codebuff: JSONObject } { const { model, @@ -69,6 +70,7 @@ function getProviderOptions(params: { providerOptions, agentProviderOptions, n, + costMode, } = params let providerConfig: Record @@ -96,6 +98,7 @@ function getProviderOptions(params: { run_id: runId, client_id: clientSessionId, ...(n && { n }), + ...(costMode && { cost_mode: costMode }), }, provider: providerConfig, }, diff --git a/sdk/src/run.ts b/sdk/src/run.ts index bb26ccd72d..98f0fbc04a 100644 --- a/sdk/src/run.ts +++ b/sdk/src/run.ts @@ -139,6 +139,8 @@ export type RunOptions = { previousRun?: RunState extraToolResults?: ToolMessage[] signal?: AbortSignal + /** Cost mode - 'free' mode makes all agents cost 0 credits */ + costMode?: 'free' | 'normal' | 'max' | 'experimental' | 'ask' } const createAbortError = (signal?: AbortSignal) => { @@ -203,6 +205,7 @@ async function runOnce({ previousRun, extraToolResults, signal, + costMode, }: RunExecutionOptions): Promise { const fsSourceValue = typeof fsSource === 'function' ? fsSource() : fsSource const fs = await fsSourceValue @@ -493,7 +496,7 @@ async function runOnce({ promptParams: params, content: preparedContent, fingerprintId: fingerprintId, - costMode: 'normal', + costMode: costMode ?? 'normal', sessionState, toolResults: extraToolResults ?? [], agentId, diff --git a/web/src/app/store/store-client.tsx b/web/src/app/store/store-client.tsx index 455a675a02..3c35dfd779 100644 --- a/web/src/app/store/store-client.tsx +++ b/web/src/app/store/store-client.tsx @@ -116,7 +116,7 @@ interface AgentStoreClientProps { // Hard-coded list of editor's choice agents const EDITORS_CHOICE_AGENTS = [ 'base2', - 'base2-lite', + 'base2-free', 'base2-max', 'base2-plan', 'deep-code-reviewer', diff --git a/web/src/content/advanced/how-does-it-work.mdx b/web/src/content/advanced/how-does-it-work.mdx index b57b066d1f..262ffb0abb 100644 --- a/web/src/content/advanced/how-does-it-work.mdx +++ b/web/src/content/advanced/how-does-it-work.mdx @@ -14,7 +14,7 @@ Codebuff runs multiple agents, each tuned for a specific task. The main agent ("Buffy") runs on Claude Opus 4.5. It reads your prompt, gathers context, and spawns subagents. The orchestrator is available in several variants: - [`base2`](/publishers/codebuff/agents/base2) - Default mode orchestrator -- [`base2-lite`](/publishers/codebuff/agents/base2-lite) - Lite mode (faster, cheaper) +- [`base2-free`](/publishers/codebuff/agents/base2-free) - Free mode (faster, no credit cost) - [`base2-max`](/publishers/codebuff/agents/base2-max) - Max mode (best-of-N selection) - [`base2-plan`](/publishers/codebuff/agents/base2-plan) - Plan mode (no file writes) diff --git a/web/src/llm-api/helpers.ts b/web/src/llm-api/helpers.ts index b31b4e18c9..32f57a7fbf 100644 --- a/web/src/llm-api/helpers.ts +++ b/web/src/llm-api/helpers.ts @@ -1,6 +1,6 @@ import { setupBigQuery } from '@codebuff/bigquery' import { consumeCreditsAndAddAgentStep } from '@codebuff/billing' -import { isFreeAgent } from '@codebuff/common/constants/free-agents' +import { isFreeAgent, isFreeMode, isFreeModeAllowedModel } from '@codebuff/common/constants/free-agents' import { PROFIT_MARGIN } from '@codebuff/common/old-constants' import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery' @@ -34,7 +34,9 @@ export function extractRequestMetadata(params: { } const n = (body as any)?.codebuff_metadata?.n - return { clientId, clientRequestId, ...(n && { n }) } + const rawCostMode = (body as any)?.codebuff_metadata?.cost_mode + const costMode = typeof rawCostMode === 'string' ? rawCostMode : undefined + return { clientId, clientRequestId, costMode, ...(n && { n }) } } export async function insertMessageToBigQuery(params: { @@ -102,6 +104,7 @@ export async function consumeCreditsForMessage(params: { usageData: UsageData byok: boolean logger: Logger + costMode?: string }): Promise { const { messageId, @@ -117,12 +120,16 @@ export async function consumeCreditsForMessage(params: { usageData, byok, logger, + costMode, } = params - // Free tier agents (like file-picker) don't charge credits to avoid confusion - // when users connect their Claude subscription but subagents use other models + // FREE mode: agents using allowed models cost 0 credits + // Only whitelisted cheap models (grok-4.1-fast, gemini flash, gpt-5.1, etc.) are free + // This prevents abuse by using expensive models in FREE mode + // Free tier agents (like file-picker) also don't charge credits const initialCredits = Math.round(usageData.cost * 100 * (1 + PROFIT_MARGIN)) - const credits = isFreeAgent(agentId) && initialCredits < 5 ? 0 : initialCredits + const isFreeModeAndAllowed = isFreeMode(costMode) && isFreeModeAllowedModel(model) + const credits = isFreeModeAndAllowed || (isFreeAgent(agentId) && initialCredits < 5) ? 0 : initialCredits await consumeCreditsAndAddAgentStep({ messageId, diff --git a/web/src/llm-api/openai.ts b/web/src/llm-api/openai.ts index 59c3986b69..3e70fa0ac9 100644 --- a/web/src/llm-api/openai.ts +++ b/web/src/llm-api/openai.ts @@ -80,7 +80,7 @@ export async function handleOpenAINonStream({ insertMessageBigquery: InsertMessageBigqueryFn }) { const startTime = new Date() - const { clientId, clientRequestId, n } = extractRequestMetadata({ + const { clientId, clientRequestId, costMode, n } = extractRequestMetadata({ body, logger, }) @@ -195,6 +195,7 @@ export async function handleOpenAINonStream({ usageData, byok: false, logger, + costMode, }) return { diff --git a/web/src/llm-api/openrouter.ts b/web/src/llm-api/openrouter.ts index 536b9891b8..978d51c2e1 100644 --- a/web/src/llm-api/openrouter.ts +++ b/web/src/llm-api/openrouter.ts @@ -60,9 +60,9 @@ function extractRequestMetadataWithN(params: { logger: Logger }) { const { body, logger } = params - const { clientId, clientRequestId } = extractRequestMetadata({ body, logger }) + const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) const n = (body as any)?.codebuff_metadata?.n - return { clientId, clientRequestId, ...(n && { n }) } + return { clientId, clientRequestId, costMode, ...(n && { n }) } } export async function handleOpenRouterNonStream({ @@ -91,7 +91,7 @@ export async function handleOpenRouterNonStream({ body.usage.include = true const startTime = new Date() - const { clientId, clientRequestId, n } = extractRequestMetadataWithN({ + const { clientId, clientRequestId, costMode, n } = extractRequestMetadataWithN({ body, logger, }) @@ -166,6 +166,7 @@ export async function handleOpenRouterNonStream({ usageData: aggregatedUsage, byok, logger, + costMode, }) // Return the first response with aggregated data @@ -236,6 +237,7 @@ export async function handleOpenRouterNonStream({ usageData, byok, logger, + costMode, }) // Overwrite cost so SDK calculates exact credits we charged @@ -273,7 +275,7 @@ export async function handleOpenRouterStream({ body.usage.include = true const startTime = new Date() - const { clientId, clientRequestId } = extractRequestMetadata({ body, logger }) + const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) const byok = openrouterApiKey !== null const response = await createOpenRouterRequest({ @@ -345,6 +347,7 @@ export async function handleOpenRouterStream({ agentId, clientId, clientRequestId, + costMode, byok, startTime, request: body, @@ -414,6 +417,7 @@ async function handleLine({ agentId, clientId, clientRequestId, + costMode, byok, startTime, request, @@ -427,6 +431,7 @@ async function handleLine({ agentId: string clientId: string | null clientRequestId: string | null + costMode: string | undefined byok: boolean startTime: Date request: unknown @@ -472,6 +477,7 @@ async function handleLine({ agentId, clientId, clientRequestId, + costMode, byok, startTime, request, @@ -488,6 +494,7 @@ async function handleResponse({ agentId, clientId, clientRequestId, + costMode, byok, startTime, request, @@ -501,6 +508,7 @@ async function handleResponse({ agentId: string clientId: string | null clientRequestId: string | null + costMode: string | undefined byok: boolean startTime: Date request: unknown @@ -556,6 +564,7 @@ async function handleResponse({ usageData, byok, logger, + costMode, }) return { state, billedCredits } From 8ae5b0944223c634b328ee94ed141e50a5989514 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 24 Jan 2026 23:54:46 -0800 Subject: [PATCH 2/9] Always enable ads in Free mode --- cli/src/commands/ads.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cli/src/commands/ads.ts b/cli/src/commands/ads.ts index 009a14c3cc..10efdc4207 100644 --- a/cli/src/commands/ads.ts +++ b/cli/src/commands/ads.ts @@ -1,5 +1,6 @@ import { saveSettings, loadSettings } from '../utils/settings' import { getSystemMessage } from '../utils/message-history' +import { useChatStore } from '../state/chat-store' import { logger } from '../utils/logger' import type { ChatMessage } from '../types/chat' @@ -8,7 +9,7 @@ export const handleAdsEnable = (): { postUserMessage: (messages: ChatMessage[]) => ChatMessage[] } => { logger.info('[gravity] Enabling ads') - + saveSettings({ adsEnabled: true }) return { @@ -34,6 +35,15 @@ export const handleAdsDisable = (): { } export const getAdsEnabled = (): boolean => { + // If no mode provided, get it from the store + const mode = useChatStore.getState().agentMode + + // In FREE mode, ads are always enabled regardless of saved setting + if (mode === 'FREE') { + return true + } + + // Otherwise, use the saved setting const settings = loadSettings() return settings.adsEnabled ?? false } From 664f22527e306fd81a7645d9d1fb606e15e57678 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 24 Jan 2026 23:57:15 -0800 Subject: [PATCH 3/9] agents: Prevent file-lister from using read subtree again --- agents/file-explorer/file-lister.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/file-explorer/file-lister.ts b/agents/file-explorer/file-lister.ts index d7fdccab4d..6a27d37d08 100644 --- a/agents/file-explorer/file-lister.ts +++ b/agents/file-explorer/file-lister.ts @@ -27,7 +27,7 @@ export const createFileLister = (): Omit => ({ }, outputMode: 'last_message', includeMessageHistory: false, - toolNames: ['read_subtree'], + toolNames: [], spawnableAgents: [], systemPrompt: `You are an expert at finding relevant files in a codebase and listing them out.`, From e358331a895a34bcf957f2f89778eab2f088234d Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 25 Jan 2026 00:10:58 -0800 Subject: [PATCH 4/9] Don't grant ad credits in free mode --- cli/src/hooks/use-gravity-ad.ts | 5 ++++- web/src/app/api/v1/ads/impression/_post.ts | 12 ++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cli/src/hooks/use-gravity-ad.ts b/cli/src/hooks/use-gravity-ad.ts index ccb45d2cc0..e2e52dbc9f 100644 --- a/cli/src/hooks/use-gravity-ad.ts +++ b/cli/src/hooks/use-gravity-ad.ts @@ -99,13 +99,16 @@ export const useGravityAd = (): GravityAdState => { return } + // Include mode in request - FREE mode should not grant credits + const agentMode = useChatStore.getState().agentMode + fetch(`${WEBSITE_URL}/api/v1/ads/impression`, { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${authToken}`, }, - body: JSON.stringify({ impUrl }), + body: JSON.stringify({ impUrl, mode: agentMode }), }) .then((res) => res.json()) .then((data) => { diff --git a/web/src/app/api/v1/ads/impression/_post.ts b/web/src/app/api/v1/ads/impression/_post.ts index 7528b28902..0f0f704483 100644 --- a/web/src/app/api/v1/ads/impression/_post.ts +++ b/web/src/app/api/v1/ads/impression/_post.ts @@ -92,6 +92,8 @@ function generateImpressionOperationId(userId: string, impUrl: string): string { const bodySchema = z.object({ // Only impUrl needed - we look up the ad data from our database impUrl: z.url(), + // Mode to determine if credits should be granted (FREE mode gets no credits) + mode: z.string().optional(), }) export async function postAdImpression(params: { @@ -115,6 +117,7 @@ export async function postAdImpression(params: { // Parse and validate request body let impUrl: string + let mode: string | undefined try { const json = await req.json() const parsed = bodySchema.safeParse(json) @@ -125,6 +128,7 @@ export async function postAdImpression(params: { ) } impUrl = parsed.data.impUrl + mode = parsed.data.mode } catch { return NextResponse.json( { error: 'Invalid JSON in request body' }, @@ -230,9 +234,9 @@ export async function postAdImpression(params: { Math.floor(userShareDollars * 100), ) - // Grant credits if any let creditsGranted = 0 - if (creditsToGrant > 0) { + // FREE mode should not grant any credits + if (mode !== 'FREE' && creditsToGrant > 0) { try { await processAndGrantCredit({ userId, @@ -282,7 +286,7 @@ export async function postAdImpression(params: { } } - // Update the ad_impression record with impression details + // Update the ad_impression record with impression details (for ALL modes) try { await db .update(schema.adImpression) @@ -294,7 +298,7 @@ export async function postAdImpression(params: { .where(eq(schema.adImpression.id, adRecord.id)) logger.info( - { userId, impUrl, creditsGranted }, + { userId, impUrl, creditsGranted, creditsToGrant }, '[ads] Updated ad impression record', ) } catch (error) { From e95a0dda2e2714f97db04e642a009de0764f1e67 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 25 Jan 2026 11:30:08 -0800 Subject: [PATCH 5/9] Update checks for free mode cost --- common/src/constants/free-agents.ts | 137 +++++++++++++++++++++++----- web/src/llm-api/helpers.ts | 29 ++++-- 2 files changed, 138 insertions(+), 28 deletions(-) diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index b7b6cb84d5..b7cfd92372 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -7,30 +7,51 @@ import type { CostMode } from './model-config' */ export const FREE_COST_MODE = 'free' as const +/** + * Agents that are allowed to run in FREE mode. + * Only these specific agents (and their expected models) get 0 credits in FREE mode. + * This prevents abuse by users trying to use arbitrary agents for free. + * + * The mapping also specifies which models each agent is allowed to use in free mode. + * If an agent uses a different model, it will be charged full credits. + */ +export const FREE_MODE_AGENT_MODELS: Record> = { + // Root orchestrator + 'base2-free': new Set(['x-ai/grok-4.1-fast']), + + // File exploration agents + 'file-picker': new Set(['google/gemini-2.5-flash-lite']), + 'file-picker-max': new Set(['x-ai/grok-4.1-fast']), + 'file-lister': new Set(['x-ai/grok-4.1-fast']), + + // Research agents + 'researcher-web': new Set(['x-ai/grok-4.1-fast']), + 'researcher-docs': new Set(['x-ai/grok-4.1-fast']), + + // Command execution + 'commander-lite': new Set(['x-ai/grok-4.1-fast']), + + // Editor for free mode + 'editor-glm': new Set(['z-ai/glm-4.7', 'z-ai/glm-4.6']), +} + +/** + * Set of all agent IDs allowed in FREE mode. + * Derived from FREE_MODE_AGENT_MODELS for quick lookups. + */ +export const FREE_MODE_ALLOWED_AGENTS = new Set(Object.keys(FREE_MODE_AGENT_MODELS)) + /** * Models that are allowed in FREE mode. - * Only these cheap/fast models get 0 credits in FREE mode. + * Derived from FREE_MODE_AGENT_MODELS - this is the union of all allowed models. * This prevents abuse by users trying to use expensive models for free. */ -export const FREE_MODE_ALLOWED_MODELS = new Set([ - // Grok models used by base2-free, commander-lite, file-lister, file-picker-max - 'x-ai/grok-4.1-fast', - 'x-ai/grok-4-fast', // researcher agents - - // Gemini flash models used by file-picker and other subagents - 'google/gemini-2.5-flash', - 'google/gemini-2.5-flash-lite', - 'google/gemini-2.5-flash-preview-09-2025', - 'google/gemini-2.5-flash-lite-preview-09-2025', - - // GPT models used by editor-gpt-5, thinker, context-pruner - 'openai/gpt-5.1', - 'openai/gpt-5.1-chat', - 'openai/gpt-5-mini', -]) +export const FREE_MODE_ALLOWED_MODELS = new Set( + Object.values(FREE_MODE_AGENT_MODELS).flatMap((models) => Array.from(models)), +) /** - * Agents that don't charge credits. + * Agents that don't charge credits when credits would be very small (<5). * * These are typically lightweight utility agents that: * - Use cheap models (e.g., Gemini Flash) @@ -39,6 +60,10 @@ export const FREE_MODE_ALLOWED_MODELS = new Set([ * * Making them free avoids user confusion when they connect their own * Claude subscription (BYOK) but still see credit charges for non-Claude models. + * + * NOTE: This is separate from FREE_MODE_ALLOWED_AGENTS which is for the + * explicit "free" cost mode. These agents get free credits only when + * the cost would be trivial (<5 credits). */ export const FREE_TIER_AGENTS = new Set([ 'file-picker', @@ -65,13 +90,83 @@ export function isFreeModeAllowedModel(model: string): boolean { } /** - * Check if an agent should be free (no credit charge). + * Check if an agent is allowed to run in FREE mode. + * Validates both the agent ID and optionally the publisher. + * + * For security, we only allow: + * - Internal agents (no publisher, e.g., 'base2-free') + * - Codebuff-published agents (publisher === 'codebuff') + * + * This prevents attackers from creating agents with matching names + * under different publishers to abuse free mode. + */ +export function isFreeModeAllowedAgent(fullAgentId: string): boolean { + const { publisherId, agentId } = parseAgentId(fullAgentId) + + // Must have a valid agent ID + if (!agentId) return false + + // Must be in the allowed agents list + if (!FREE_MODE_ALLOWED_AGENTS.has(agentId)) return false + + // Must be either internal (no publisher) or from codebuff + if (publisherId && publisherId !== 'codebuff') return false + + return true +} + +/** + * Check if a specific agent is allowed to use a specific model in FREE mode. + * This is the strictest check - validates both the agent AND model combination. + * + * Returns true only if: + * 1. The agent is allowed in free mode (isFreeModeAllowedAgent) + * 2. The model is in that agent's allowed model set + */ +export function isFreeModeAllowedAgentModel( + fullAgentId: string, + model: string, +): boolean { + // First check if agent is allowed in free mode (includes publisher validation) + if (!isFreeModeAllowedAgent(fullAgentId)) return false + + // Parse to get the base agent ID for model lookup + const { agentId } = parseAgentId(fullAgentId) + if (!agentId) return false + + // Get the allowed models for this agent + const allowedModels = FREE_MODE_AGENT_MODELS[agentId] + if (!allowedModels) return false + + // Empty set means programmatic agent (no LLM calls expected) + // For these, any model check should fail (they shouldn't be making LLM calls) + if (allowedModels.size === 0) return false + + return allowedModels.has(model) +} + +/** + * Check if an agent should be free (no credit charge) for small requests. + * This is separate from FREE mode - these agents get free credits only + * when the cost would be trivial (<5 credits). + * * Handles all agent ID formats: * - 'file-picker' * - 'file-picker@1.0.0' * - 'codebuff/file-picker@0.0.2' */ export function isFreeAgent(fullAgentId: string): boolean { - const { agentId } = parseAgentId(fullAgentId) - return agentId ? FREE_TIER_AGENTS.has(agentId) : false + const { publisherId, agentId } = parseAgentId(fullAgentId) + + // Must have a valid agent ID + if (!agentId) return false + + // Must be in the free tier agents list + if (!FREE_TIER_AGENTS.has(agentId)) return false + + // Must be either internal (no publisher) or from codebuff + // This prevents publisher spoofing attacks + if (publisherId && publisherId !== 'codebuff') return false + + return true } diff --git a/web/src/llm-api/helpers.ts b/web/src/llm-api/helpers.ts index 32f57a7fbf..8a44f4a8bc 100644 --- a/web/src/llm-api/helpers.ts +++ b/web/src/llm-api/helpers.ts @@ -1,6 +1,10 @@ import { setupBigQuery } from '@codebuff/bigquery' import { consumeCreditsAndAddAgentStep } from '@codebuff/billing' -import { isFreeAgent, isFreeMode, isFreeModeAllowedModel } from '@codebuff/common/constants/free-agents' +import { + isFreeAgent, + isFreeMode, + isFreeModeAllowedAgentModel, +} from '@codebuff/common/constants/free-agents' import { PROFIT_MARGIN } from '@codebuff/common/old-constants' import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery' @@ -123,13 +127,24 @@ export async function consumeCreditsForMessage(params: { costMode, } = params - // FREE mode: agents using allowed models cost 0 credits - // Only whitelisted cheap models (grok-4.1-fast, gemini flash, gpt-5.1, etc.) are free - // This prevents abuse by using expensive models in FREE mode - // Free tier agents (like file-picker) also don't charge credits + // Calculate initial credits based on cost const initialCredits = Math.round(usageData.cost * 100 * (1 + PROFIT_MARGIN)) - const isFreeModeAndAllowed = isFreeMode(costMode) && isFreeModeAllowedModel(model) - const credits = isFreeModeAndAllowed || (isFreeAgent(agentId) && initialCredits < 5) ? 0 : initialCredits + + // FREE mode: only specific agents using their expected models cost 0 credits + // This is the strictest check - validates: + // 1. The cost mode is 'free' + // 2. The agent is in the allowed free-mode agents list + // 3. The model matches what that specific agent is allowed to use + // 4. The agent is either internal or published by 'codebuff' (prevents publisher spoofing) + const isFreeModeAndAllowed = + isFreeMode(costMode) && isFreeModeAllowedAgentModel(agentId, model) + + // Free tier agents (like file-picker) also don't charge credits for small requests + // This is separate from FREE mode and helps with BYOK users + // Also validates publisher to prevent spoofing attacks + const isFreeAgentSmallRequest = isFreeAgent(agentId) && initialCredits < 5 + + const credits = isFreeModeAndAllowed || isFreeAgentSmallRequest ? 0 : initialCredits await consumeCreditsAndAddAgentStep({ messageId, From 7dd97eedc8f2f1918b481f5c1c6285bc3c52ba53 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 25 Jan 2026 13:17:58 -0800 Subject: [PATCH 6/9] cleanup --- cli/src/utils/constants.ts | 2 +- common/src/constants/free-agents.ts | 67 +++++------------------------ 2 files changed, 12 insertions(+), 57 deletions(-) diff --git a/cli/src/utils/constants.ts b/cli/src/utils/constants.ts index b34dc9d58d..6262b85712 100644 --- a/cli/src/utils/constants.ts +++ b/cli/src/utils/constants.ts @@ -117,7 +117,7 @@ export const AGENT_MODES = Object.keys(AGENT_MODE_TO_ID) as AgentMode[] /** * Maps CLI agent mode to cost mode for billing. - * FREE mode maps to 'free' cost mode where all agents cost 0 credits. + * FREE mode maps to 'free' cost mode where allowlisted agent+model combos cost 0 credits. */ export const AGENT_MODE_TO_COST_MODE = { DEFAULT: 'normal', diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index b7cfd92372..e00dad0922 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -3,7 +3,8 @@ import { parseAgentId } from '../util/agent-id-parsing' import type { CostMode } from './model-config' /** - * The cost mode that indicates FREE mode - all agents in this mode cost 0 credits. + * The cost mode that indicates FREE mode. + * Only allowlisted agent+model combinations cost 0 credits in this mode. */ export const FREE_COST_MODE = 'free' as const @@ -35,21 +36,6 @@ export const FREE_MODE_AGENT_MODELS: Record> = { 'editor-glm': new Set(['z-ai/glm-4.7', 'z-ai/glm-4.6']), } -/** - * Set of all agent IDs allowed in FREE mode. - * Derived from FREE_MODE_AGENT_MODELS for quick lookups. - */ -export const FREE_MODE_ALLOWED_AGENTS = new Set(Object.keys(FREE_MODE_AGENT_MODELS)) - -/** - * Models that are allowed in FREE mode. - * Derived from FREE_MODE_AGENT_MODELS - this is the union of all allowed models. - * This prevents abuse by users trying to use expensive models for free. - */ -export const FREE_MODE_ALLOWED_MODELS = new Set( - Object.values(FREE_MODE_AGENT_MODELS).flatMap((models) => Array.from(models)), -) - /** * Agents that don't charge credits when credits would be very small (<5). * @@ -81,59 +67,28 @@ export function isFreeMode(costMode: CostMode | string | undefined): boolean { return costMode === FREE_COST_MODE } -/** - * Check if a model is allowed in FREE mode. - * Only whitelisted cheap/fast models can be used for free. - */ -export function isFreeModeAllowedModel(model: string): boolean { - return FREE_MODE_ALLOWED_MODELS.has(model) -} - -/** - * Check if an agent is allowed to run in FREE mode. - * Validates both the agent ID and optionally the publisher. - * - * For security, we only allow: - * - Internal agents (no publisher, e.g., 'base2-free') - * - Codebuff-published agents (publisher === 'codebuff') - * - * This prevents attackers from creating agents with matching names - * under different publishers to abuse free mode. - */ -export function isFreeModeAllowedAgent(fullAgentId: string): boolean { - const { publisherId, agentId } = parseAgentId(fullAgentId) - - // Must have a valid agent ID - if (!agentId) return false - - // Must be in the allowed agents list - if (!FREE_MODE_ALLOWED_AGENTS.has(agentId)) return false - - // Must be either internal (no publisher) or from codebuff - if (publisherId && publisherId !== 'codebuff') return false - - return true -} - /** * Check if a specific agent is allowed to use a specific model in FREE mode. * This is the strictest check - validates both the agent AND model combination. * * Returns true only if: - * 1. The agent is allowed in free mode (isFreeModeAllowedAgent) - * 2. The model is in that agent's allowed model set + * 1. The agent has a valid agent ID + * 2. The agent is in the allowed free-mode agents list + * 3. The agent is either internal or published by 'codebuff' (prevents spoofing) + * 4. The model is in that agent's allowed model set */ export function isFreeModeAllowedAgentModel( fullAgentId: string, model: string, ): boolean { - // First check if agent is allowed in free mode (includes publisher validation) - if (!isFreeModeAllowedAgent(fullAgentId)) return false + const { publisherId, agentId } = parseAgentId(fullAgentId) - // Parse to get the base agent ID for model lookup - const { agentId } = parseAgentId(fullAgentId) + // Must have a valid agent ID if (!agentId) return false + // Must be either internal (no publisher) or from codebuff + if (publisherId && publisherId !== 'codebuff') return false + // Get the allowed models for this agent const allowedModels = FREE_MODE_AGENT_MODELS[agentId] if (!allowedModels) return false From d4dee36a5e51f509ed1190ec70c8643deb5b1df6 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 25 Jan 2026 13:34:52 -0800 Subject: [PATCH 7/9] cleanup --- cli/src/index.tsx | 1 - common/src/actions.ts | 5 +-- .../src/llm-api/gemini-with-fallbacks.ts | 10 +---- sdk/src/run.ts | 37 +++++++++---------- 4 files changed, 22 insertions(+), 31 deletions(-) diff --git a/cli/src/index.tsx b/cli/src/index.tsx index 1614423aa9..2bb75ca5a9 100644 --- a/cli/src/index.tsx +++ b/cli/src/index.tsx @@ -149,7 +149,6 @@ function parseArgs(): ParsedArgs { } async function main(): Promise { - console.log() // Run OSC theme detection BEFORE anything else. // This MUST happen before OpenTUI starts because OSC responses come through stdin, // and OpenTUI also listens to stdin. Running detection here ensures stdin is clean. diff --git a/common/src/actions.ts b/common/src/actions.ts index 7644b2020d..eb5304fba9 100644 --- a/common/src/actions.ts +++ b/common/src/actions.ts @@ -1,6 +1,5 @@ import { z } from 'zod/v4' -import type { CostMode } from './old-constants' import type { GrantType } from './types/grant' import type { MCPConfig } from './types/mcp' import type { ToolMessage } from './types/messages/codebuff-message' @@ -30,7 +29,7 @@ type ClientActionPrompt = { promptParams?: Record // Additional json params. fingerprintId: string authToken?: string - costMode?: CostMode + costMode?: string sessionState: SessionState toolResults: ToolMessage[] model?: string @@ -70,7 +69,7 @@ type ClientActionMcpToolData = { tools: { name: string description?: string - inputSchema: { type: 'object'; [k: string]: unknown } + inputSchema: { type: 'object';[k: string]: unknown } }[] } diff --git a/packages/agent-runtime/src/llm-api/gemini-with-fallbacks.ts b/packages/agent-runtime/src/llm-api/gemini-with-fallbacks.ts index fb93420cc4..d5da090775 100644 --- a/packages/agent-runtime/src/llm-api/gemini-with-fallbacks.ts +++ b/packages/agent-runtime/src/llm-api/gemini-with-fallbacks.ts @@ -37,7 +37,7 @@ import type { Message } from '@codebuff/common/types/messages/codebuff-message' export async function promptFlashWithFallbacks( params: { messages: Message[] - costMode?: CostMode + costMode?: string useGPT4oInsteadOfClaude?: boolean thinkingBudget?: number useFinetunedModel?: FinetunedVertexModel | undefined @@ -83,13 +83,7 @@ export async function promptFlashWithFallbacks( messages, model: useGPT4oInsteadOfClaude ? openaiModels.gpt4o - : { - free: openrouterModels.openrouter_claude_3_5_haiku, - normal: openrouterModels.openrouter_claude_3_5_haiku, - max: openrouterModels.openrouter_claude_sonnet_4, - experimental: openrouterModels.openrouter_claude_3_5_haiku, - ask: openrouterModels.openrouter_claude_3_5_haiku, - }[costMode ?? 'normal'], + : openrouterModels.openrouter_claude_3_5_haiku, }) } } diff --git a/sdk/src/run.ts b/sdk/src/run.ts index 98f0fbc04a..76da594533 100644 --- a/sdk/src/run.ts +++ b/sdk/src/run.ts @@ -82,17 +82,17 @@ export type CodebuffClientOptions = { chunk: | string | { - type: 'subagent_chunk' - agentId: string - agentType: string - chunk: string - } + type: 'subagent_chunk' + agentId: string + agentType: string + chunk: string + } | { - type: 'reasoning_chunk' - agentId: string - ancestorRunIds: string[] - chunk: string - }, + type: 'reasoning_chunk' + agentId: string + ancestorRunIds: string[] + chunk: string + }, ) => void | Promise /** Optional filter to classify files before reading (runs before gitignore check) */ @@ -139,8 +139,7 @@ export type RunOptions = { previousRun?: RunState extraToolResults?: ToolMessage[] signal?: AbortSignal - /** Cost mode - 'free' mode makes all agents cost 0 credits */ - costMode?: 'free' | 'normal' | 'max' | 'experimental' | 'ask' + costMode?: string } const createAbortError = (signal?: AbortSignal) => { @@ -255,8 +254,8 @@ async function runOnce({ }) } - let resolve: (value: RunReturnType) => any = () => {} - let reject: (error: any) => any = () => {} + let resolve: (value: RunReturnType) => any = () => { } + let reject: (error: any) => any = () => { } const promise = new Promise((res, rej) => { resolve = res reject = rej @@ -369,8 +368,8 @@ async function runOnce({ overrides: overrideTools ?? {}, customToolDefinitions: customToolDefinitions ? Object.fromEntries( - customToolDefinitions.map((def) => [def.toolName, def]), - ) + customToolDefinitions.map((def) => [def.toolName, def]), + ) : {}, cwd, fs, @@ -674,9 +673,9 @@ async function handleToolCall({ value: { errorMessage: error && - typeof error === 'object' && - 'message' in error && - typeof error.message === 'string' + typeof error === 'object' && + 'message' in error && + typeof error.message === 'string' ? error.message : typeof error === 'string' ? error From eac6c3c0b74b416db10c49e5abfb471d23250896 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 25 Jan 2026 13:48:54 -0800 Subject: [PATCH 8/9] update openai-compatible to add a generated id if missing --- .../chat/openai-compatible-chat-language-model.ts | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/packages/internal/src/openai-compatible/chat/openai-compatible-chat-language-model.ts b/packages/internal/src/openai-compatible/chat/openai-compatible-chat-language-model.ts index 4f8d1fa7f5..ad312f3c50 100644 --- a/packages/internal/src/openai-compatible/chat/openai-compatible-chat-language-model.ts +++ b/packages/internal/src/openai-compatible/chat/openai-compatible-chat-language-model.ts @@ -512,13 +512,6 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { const index = toolCallDelta.index; if (toolCalls[index] == null) { - if (toolCallDelta.id == null) { - throw new InvalidResponseDataError({ - data: toolCallDelta, - message: `Expected 'id' to be a string.`, - }); - } - if (toolCallDelta.function?.name == null) { throw new InvalidResponseDataError({ data: toolCallDelta, @@ -526,14 +519,17 @@ export class OpenAICompatibleChatLanguageModel implements LanguageModelV2 { }); } + // UPDATED (James): Generate an ID if the provider doesn't include one (e.g., GLM models) + const toolCallId = toolCallDelta.id ?? generateId(); + controller.enqueue({ type: 'tool-input-start', - id: toolCallDelta.id, + id: toolCallId, toolName: toolCallDelta.function.name, }); toolCalls[index] = { - id: toolCallDelta.id, + id: toolCallId, type: 'function', function: { name: toolCallDelta.function.name, From 391a8a30c1a327fb47a4cd3a78ed94e15ad945ae Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 25 Jan 2026 13:52:58 -0800 Subject: [PATCH 9/9] fix types --- packages/agent-runtime/src/main-prompt.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/agent-runtime/src/main-prompt.ts b/packages/agent-runtime/src/main-prompt.ts index ac1a016805..25521808d6 100644 --- a/packages/agent-runtime/src/main-prompt.ts +++ b/packages/agent-runtime/src/main-prompt.ts @@ -112,7 +112,7 @@ export async function mainPrompt( max: AgentTemplateTypes.base_max, experimental: 'base2', } satisfies Record - )[costMode ?? 'normal'] + )[costMode ?? 'normal'] ?? 'base2' } mainAgentState.agentType = agentType