Skip to content

Commit 0a43d8b

Browse files
committed
Include tools in token count api. Only gemini/openai get 30% token increase for token counter
1 parent d2c5fab commit 0a43d8b

File tree

3 files changed

+41
-11
lines changed

3 files changed

+41
-11
lines changed

packages/agent-runtime/src/llm-api/codebuff-web-api.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,14 @@ export async function callTokenCountAPI(params: {
230230
messages: unknown[]
231231
system?: string
232232
model?: string
233+
tools?: Array<{ name: string; description?: string; input_schema?: unknown }>
233234
fetch: typeof globalThis.fetch
234235
logger: Logger
235236
env: CodebuffWebApiEnv
236237
baseUrl?: string
237238
apiKey?: string
238239
}): Promise<{ inputTokens?: number; error?: string }> {
239-
const { messages, system, model, fetch, logger, env } = params
240+
const { messages, system, model, tools, fetch, logger, env } = params
240241
const baseUrl = params.baseUrl ?? env.clientEnv.NEXT_PUBLIC_CODEBUFF_APP_URL
241242
const apiKey = params.apiKey ?? env.ciEnv.CODEBUFF_API_KEY
242243

@@ -248,6 +249,7 @@ export async function callTokenCountAPI(params: {
248249
const payload: Record<string, unknown> = { messages }
249250
if (system) payload.system = system
250251
if (model) payload.model = model
252+
if (tools) payload.tools = tools
251253

252254
try {
253255
const res = await withTimeout(

packages/agent-runtime/src/run-agent-step.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,18 @@ export async function loopAgentSteps(
806806
systemPrompt: system,
807807
toolDefinitions,
808808
}
809+
810+
// Convert tool definitions to Anthropic format for accurate token counting
811+
// Tool definitions are stored as { [name]: { description, inputSchema } }
812+
// Anthropic count_tokens API expects [{ name, description, input_schema }]
813+
const toolsForTokenCount = Object.entries(toolDefinitions).map(
814+
([name, def]) => ({
815+
name,
816+
...(def.description && { description: def.description }),
817+
...(def.inputSchema && { input_schema: def.inputSchema }),
818+
}),
819+
)
820+
809821
let shouldEndTurn = false
810822
let hasRetriedOutputSchema = false
811823
let currentPrompt = prompt
@@ -845,6 +857,7 @@ export async function loopAgentSteps(
845857
messages: messagesWithStepPrompt,
846858
system,
847859
model: agentTemplate.model,
860+
tools: toolsForTokenCount,
848861
fetch,
849862
logger,
850863
env: { clientEnv, ciEnv },

web/src/app/api/v1/token-count/_post.ts

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
isClaudeModel,
44
toAnthropicModelId,
55
} from '@codebuff/common/constants/claude-oauth'
6+
import { isOpenAIProviderModel } from '@codebuff/common/constants/chatgpt-oauth'
67
import { getErrorObject } from '@codebuff/common/util/error'
78
import { env } from '@codebuff/internal/env'
89
import { NextResponse } from 'next/server'
@@ -22,6 +23,11 @@ const tokenCountRequestSchema = z.object({
2223
messages: z.array(z.any()),
2324
system: z.string().optional(),
2425
model: z.string().optional(),
26+
tools: z.array(z.object({
27+
name: z.string(),
28+
description: z.string().optional(),
29+
input_schema: z.any().optional(),
30+
})).optional(),
2531
})
2632

2733
type TokenCountRequest = z.infer<typeof tokenCountRequestSchema>
@@ -74,24 +80,27 @@ export async function postTokenCount(params: {
7480
return bodyResult.response
7581
}
7682

77-
const { messages, system, model } = bodyResult.data
83+
const { messages, system, model, tools } = bodyResult.data
7884

7985
try {
8086
const useOpenAI = model != null && false // isOpenAIProviderModel(model)
8187
const inputTokens = useOpenAI
8288
? await countTokensViaOpenAI({ messages, system, model, fetch, logger })
8389
: await countTokensViaAnthropic({
84-
messages,
85-
system,
86-
model,
87-
fetch,
88-
logger,
89-
})
90+
messages,
91+
system,
92+
model,
93+
tools,
94+
fetch,
95+
logger,
96+
})
9097

9198
logger.info({
9299
userId,
93100
messageCount: messages.length,
94101
hasSystem: !!system,
102+
hasTools: !!tools,
103+
toolCount: tools?.length,
95104
model: model ?? DEFAULT_ANTHROPIC_MODEL,
96105
tokenCount: inputTokens,
97106
provider: useOpenAI ? 'openai' : 'anthropic',
@@ -285,10 +294,11 @@ async function countTokensViaAnthropic(params: {
285294
messages: TokenCountRequest['messages']
286295
system: string | undefined
287296
model: string | undefined
297+
tools: TokenCountRequest['tools']
288298
fetch: typeof globalThis.fetch
289299
logger: Logger
290300
}): Promise<number> {
291-
const { messages, system, model, fetch, logger } = params
301+
const { messages, system, model, tools, fetch, logger } = params
292302

293303
// Convert messages to Anthropic format
294304
const anthropicMessages = convertToAnthropicMessages(messages)
@@ -315,6 +325,7 @@ async function countTokensViaAnthropic(params: {
315325
model: anthropicModelId,
316326
messages: anthropicMessages,
317327
...(system && { system }),
328+
...(tools && { tools }),
318329
}),
319330
},
320331
)
@@ -337,8 +348,12 @@ async function countTokensViaAnthropic(params: {
337348
const data = await response.json()
338349
const baseTokens = data.input_tokens
339350

340-
// Add 30% buffer for non-Anthropic models since tokenizers differ
341-
if (isNonAnthropicModel) {
351+
// Add 30% buffer for OpenAI and Gemini models since their tokenizers differ from Anthropic's
352+
// Other non-Anthropic models (x-ai, qwen, deepseek, etc.) are routed through providers that
353+
// use similar tokenization, so the buffer is not needed and was causing premature context pruning.
354+
const isOpenAIModel = model ? isOpenAIProviderModel(model) : false
355+
const isGeminiModel = model?.startsWith('google/') ?? false
356+
if (isOpenAIModel || isGeminiModel) {
342357
return Math.ceil(baseTokens * (1 + NON_ANTHROPIC_TOKEN_BUFFER))
343358
}
344359

0 commit comments

Comments
 (0)