33 isClaudeModel ,
44 toAnthropicModelId ,
55} from '@codebuff/common/constants/claude-oauth'
6+ import { isOpenAIProviderModel } from '@codebuff/common/constants/chatgpt-oauth'
67import { getErrorObject } from '@codebuff/common/util/error'
78import { env } from '@codebuff/internal/env'
89import { NextResponse } from 'next/server'
@@ -22,6 +23,11 @@ const tokenCountRequestSchema = z.object({
2223 messages : z . array ( z . any ( ) ) ,
2324 system : z . string ( ) . optional ( ) ,
2425 model : z . string ( ) . optional ( ) ,
26+ tools : z . array ( z . object ( {
27+ name : z . string ( ) ,
28+ description : z . string ( ) . optional ( ) ,
29+ input_schema : z . any ( ) . optional ( ) ,
30+ } ) ) . optional ( ) ,
2531} )
2632
2733type TokenCountRequest = z . infer < typeof tokenCountRequestSchema >
@@ -74,24 +80,27 @@ export async function postTokenCount(params: {
7480 return bodyResult . response
7581 }
7682
77- const { messages, system, model } = bodyResult . data
83+ const { messages, system, model, tools } = bodyResult . data
7884
7985 try {
8086 const useOpenAI = model != null && false // isOpenAIProviderModel(model)
8187 const inputTokens = useOpenAI
8288 ? await countTokensViaOpenAI ( { messages, system, model, fetch, logger } )
8389 : await countTokensViaAnthropic ( {
84- messages,
85- system,
86- model,
87- fetch,
88- logger,
89- } )
90+ messages,
91+ system,
92+ model,
93+ tools,
94+ fetch,
95+ logger,
96+ } )
9097
9198 logger . info ( {
9299 userId,
93100 messageCount : messages . length ,
94101 hasSystem : ! ! system ,
102+ hasTools : ! ! tools ,
103+ toolCount : tools ?. length ,
95104 model : model ?? DEFAULT_ANTHROPIC_MODEL ,
96105 tokenCount : inputTokens ,
97106 provider : useOpenAI ? 'openai' : 'anthropic' ,
@@ -285,10 +294,11 @@ async function countTokensViaAnthropic(params: {
285294 messages : TokenCountRequest [ 'messages' ]
286295 system : string | undefined
287296 model : string | undefined
297+ tools : TokenCountRequest [ 'tools' ]
288298 fetch : typeof globalThis . fetch
289299 logger : Logger
290300} ) : Promise < number > {
291- const { messages, system, model, fetch, logger } = params
301+ const { messages, system, model, tools , fetch, logger } = params
292302
293303 // Convert messages to Anthropic format
294304 const anthropicMessages = convertToAnthropicMessages ( messages )
@@ -315,6 +325,7 @@ async function countTokensViaAnthropic(params: {
315325 model : anthropicModelId ,
316326 messages : anthropicMessages ,
317327 ...( system && { system } ) ,
328+ ...( tools && { tools } ) ,
318329 } ) ,
319330 } ,
320331 )
@@ -337,8 +348,12 @@ async function countTokensViaAnthropic(params: {
337348 const data = await response . json ( )
338349 const baseTokens = data . input_tokens
339350
340- // Add 30% buffer for non-Anthropic models since tokenizers differ
341- if ( isNonAnthropicModel ) {
351+ // Add 30% buffer for OpenAI and Gemini models since their tokenizers differ from Anthropic's
352+ // Other non-Anthropic models (x-ai, qwen, deepseek, etc.) are routed through providers that
353+ // use similar tokenization, so the buffer is not needed and was causing premature context pruning.
354+ const isOpenAIModel = model ? isOpenAIProviderModel ( model ) : false
355+ const isGeminiModel = model ?. startsWith ( 'google/' ) ?? false
356+ if ( isOpenAIModel || isGeminiModel ) {
342357 return Math . ceil ( baseTokens * ( 1 + NON_ANTHROPIC_TOKEN_BUFFER ) )
343358 }
344359
0 commit comments