Skip to content

Commit 01d4adb

Browse files
committed
fix: update token counting logic for GPT and Claude and Grok models, adjusting input token calculations and handling tool prompts
1 parent 6f47926 commit 01d4adb

File tree

2 files changed

+58
-22
lines changed

2 files changed

+58
-22
lines changed

src/lib/tokenizer.ts

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@ const calculateToolCallsTokens = (
3737
let tokens = 0
3838
for (const toolCall of toolCalls) {
3939
tokens += constants.funcInit
40-
tokens += encoder.encode(JSON.stringify(toolCall)).length
40+
tokens += encoder.encode(toolCall.id).length
41+
tokens += encoder.encode(toolCall.function.name).length
42+
tokens += encoder.encode(toolCall.function.arguments).length
4143
}
4244
tokens += constants.funcEnd
4345
return tokens
@@ -158,6 +160,7 @@ const getModelConstants = (model: Model) => {
158160
enumInit: -3,
159161
enumItem: 3,
160162
funcEnd: 12,
163+
isGpt: true,
161164
}
162165
: {
163166
funcInit: 7,
@@ -166,6 +169,7 @@ const getModelConstants = (model: Model) => {
166169
enumInit: -3,
167170
enumItem: 3,
168171
funcEnd: 12,
172+
isGpt: model.id.startsWith("gpt-"),
169173
}
170174
}
171175

@@ -218,8 +222,12 @@ const calculateParameterTokens = (
218222
const line = `${paramName}:${paramType}:${paramDesc}`
219223
tokens += encoder.encode(line).length
220224

225+
if (param.type === "array" && param["items"]) {
226+
tokens += calculateParametersTokens(param["items"], encoder, constants)
227+
}
228+
221229
// Handle additional properties (excluding standard ones)
222-
const excludedKeys = new Set(["type", "description", "enum"])
230+
const excludedKeys = new Set(["type", "description", "enum", "items"])
223231
for (const propertyName of Object.keys(param)) {
224232
if (!excludedKeys.has(propertyName)) {
225233
const propertyValue = param[propertyName]
@@ -234,6 +242,27 @@ const calculateParameterTokens = (
234242
return tokens
235243
}
236244

245+
/**
246+
* Calculate tokens for properties object
247+
*/
248+
const calculatePropertiesTokens = (
249+
properties: Record<string, unknown>,
250+
encoder: Encoder,
251+
constants: ReturnType<typeof getModelConstants>,
252+
): number => {
253+
let tokens = 0
254+
if (Object.keys(properties).length > 0) {
255+
tokens += constants.propInit
256+
for (const propKey of Object.keys(properties)) {
257+
tokens += calculateParameterTokens(propKey, properties[propKey], {
258+
encoder,
259+
constants,
260+
})
261+
}
262+
}
263+
return tokens
264+
}
265+
237266
/**
238267
* Calculate tokens for function parameters
239268
*/
@@ -249,18 +278,17 @@ const calculateParametersTokens = (
249278
const params = parameters as Record<string, unknown>
250279
let tokens = 0
251280

281+
const excludedKeys = new Set(["$schema", "additionalProperties"])
252282
for (const [key, value] of Object.entries(params)) {
283+
if (excludedKeys.has(key)) {
284+
continue
285+
}
253286
if (key === "properties") {
254-
const properties = value as Record<string, unknown>
255-
if (Object.keys(properties).length > 0) {
256-
tokens += constants.propInit
257-
for (const propKey of Object.keys(properties)) {
258-
tokens += calculateParameterTokens(propKey, properties[propKey], {
259-
encoder,
260-
constants,
261-
})
262-
}
263-
}
287+
tokens += calculatePropertiesTokens(
288+
value as Record<string, unknown>,
289+
encoder,
290+
constants,
291+
)
264292
} else {
265293
const paramText =
266294
typeof value === "string" ? value : JSON.stringify(value)
@@ -306,10 +334,16 @@ export const numTokensForTools = (
306334
constants: ReturnType<typeof getModelConstants>,
307335
): number => {
308336
let funcTokenCount = 0
309-
for (const tool of tools) {
310-
funcTokenCount += calculateToolTokens(tool, encoder, constants)
337+
if (constants.isGpt) {
338+
for (const tool of tools) {
339+
funcTokenCount += calculateToolTokens(tool, encoder, constants)
340+
}
341+
funcTokenCount += constants.funcEnd
342+
} else {
343+
for (const tool of tools) {
344+
funcTokenCount += encoder.encode(JSON.stringify(tool)).length
345+
}
311346
}
312-
funcTokenCount += constants.funcEnd
313347
return funcTokenCount
314348
}
315349

@@ -335,6 +369,7 @@ export const getTokenCount = async (
335369
)
336370

337371
const constants = getModelConstants(model)
372+
// gpt count token https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
338373
let inputTokens = calculateTokens(inputMessages, encoder, constants)
339374
if (payload.tools && payload.tools.length > 0) {
340375
inputTokens += numTokensForTools(payload.tools, encoder, constants)

src/routes/messages/count-tokens-handler.ts

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,27 +33,28 @@ export async function handleCountTokens(c: Context) {
3333
const tokenCount = await getTokenCount(openAIPayload, selectedModel)
3434

3535
if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
36-
let mcpToolExist = false
36+
let addToolSystemPromptCount = false
3737
if (anthropicBeta?.startsWith("claude-code")) {
38-
mcpToolExist = anthropicPayload.tools.some((tool) =>
39-
tool.name.startsWith("mcp__"),
38+
const toolsLength = anthropicPayload.tools.length
39+
addToolSystemPromptCount = !anthropicPayload.tools.some(
40+
(tool) =>
41+
tool.name.startsWith("mcp__")
42+
|| (tool.name === "Skill" && toolsLength === 1),
4043
)
4144
}
42-
if (!mcpToolExist) {
45+
if (addToolSystemPromptCount) {
4346
if (anthropicPayload.model.startsWith("claude")) {
4447
// https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/overview#pricing
4548
tokenCount.input = tokenCount.input + 346
4649
} else if (anthropicPayload.model.startsWith("grok")) {
47-
tokenCount.input = tokenCount.input + 480
50+
tokenCount.input = tokenCount.input + 120
4851
}
4952
}
5053
}
5154

5255
let finalTokenCount = tokenCount.input + tokenCount.output
5356
if (anthropicPayload.model.startsWith("claude")) {
5457
finalTokenCount = Math.round(finalTokenCount * 1.15)
55-
} else if (anthropicPayload.model.startsWith("grok")) {
56-
finalTokenCount = Math.round(finalTokenCount * 1.03)
5758
}
5859

5960
consola.info("Token count:", finalTokenCount)

0 commit comments

Comments
 (0)