From 1fe86cf7fb59d3580ed46116bbc68bd14c2b91ff Mon Sep 17 00:00:00 2001 From: _Kerman Date: Mon, 1 Jun 2026 14:46:35 +0800 Subject: [PATCH 1/4] fix(agent-core): name truncated compaction errors --- .changeset/named-compaction-truncation.md | 6 ++++ .../agent-core/src/agent/compaction/full.ts | 18 +++++++--- .../test/agent/compaction/full.test.ts | 36 +++++++++++++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 .changeset/named-compaction-truncation.md diff --git a/.changeset/named-compaction-truncation.md b/.changeset/named-compaction-truncation.md new file mode 100644 index 00000000..e3110e53 --- /dev/null +++ b/.changeset/named-compaction-truncation.md @@ -0,0 +1,6 @@ +--- +"@moonshot-ai/agent-core": patch +"@moonshot-ai/kimi-code": patch +--- + +Report truncated compaction summaries with a specific error name. diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts index ade727d4..2b3c9069 100644 --- a/packages/agent-core/src/agent/compaction/full.ts +++ b/packages/agent-core/src/agent/compaction/full.ts @@ -38,6 +38,13 @@ export interface CompactedHistory { export const MAX_COMPACTION_RETRY_ATTEMPTS = 5; +class CompactionTruncatedError extends Error { + constructor() { + super('Compaction response was truncated before producing a complete summary.'); + this.name = 'CompactionTruncatedError'; + } +} + export class FullCompaction { protected compactionCountInTurn = 0; protected compacting: { @@ -225,6 +232,10 @@ export class FullCompaction { await this.triggerPreCompactHook(data, tokensBefore, signal); const model = this.agent.config.model; + const provider = + this.agent.config.provider.withMaxCompletionTokens?.( + this.agent.config.modelCapabilities.max_context_tokens, + ) ?? this.agent.config.provider; const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS); let usage: TokenUsage | null; @@ -244,10 +255,9 @@ export class FullCompaction { toolCalls: [], } satisfies Message, ]; - class TruncatedError extends Error {} try { const response = await this.agent.generate( - this.agent.config.provider, + provider, this.agent.config.systemPrompt, [...this.agent.tools.loopTools], messages, @@ -255,13 +265,13 @@ export class FullCompaction { { signal }, ); if (response.finishReason === 'truncated') { - throw new TruncatedError(); + throw new CompactionTruncatedError(); } usage = response.usage; summary = extractCompactionSummary(response); break; } catch (error) { - if (error instanceof APIContextOverflowError || error instanceof TruncatedError) { + if (error instanceof APIContextOverflowError || error instanceof CompactionTruncatedError) { compactedCount = this.strategy.reduceCompactOnOverflow(messagesToCompact); } else if (!isRetryableGenerateError(error)) { diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts index f3148d0c..ae6f0e01 100644 --- a/packages/agent-core/test/agent/compaction/full.test.ts +++ b/packages/agent-core/test/agent/compaction/full.test.ts @@ -731,6 +731,42 @@ describe('FullCompaction', () => { await ctx.expectResumeMatches(); }); + it('names truncated compaction responses when retries are exhausted', async () => { + vi.useFakeTimers(); + let attempts = 0; + const generate: GenerateFn = async () => { + attempts += 1; + return { + ...textResult('Partial summary.'), + finishReason: 'truncated', + rawFinishReason: 'length', + }; + }; + const ctx = testAgent({ generate, compactionStrategy: alwaysCompactOnce }); + ctx.configure(); + + await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Trigger truncated auto compaction' }] }); + await vi.advanceTimersByTimeAsync(60_000); + const events = await ctx.untilTurnEnd(); + + expect(attempts).toBe(5); + expect(events).toContainEqual( + expect.objectContaining({ + event: 'turn.ended', + args: { + turnId: 0, + reason: 'failed', + error: expect.objectContaining({ + code: 'compaction.failed', + message: + 'CompactionTruncatedError: Compaction response was truncated before producing a complete summary.', + }), + }, + }), + ); + await ctx.expectResumeMatches(); + }); + it('reports compaction retry_count when retryable generation failures are exhausted', async () => { vi.useFakeTimers(); const records: TelemetryRecord[] = []; From 6a15625d405d45aff145bdfbdca9a88aa74ea00f Mon Sep 17 00:00:00 2001 From: _Kerman Date: Mon, 1 Jun 2026 15:12:29 +0800 Subject: [PATCH 2/4] fix: handle compaction truncation and output budgets --- .changeset/named-compaction-truncation.md | 3 +- .../agent-core/src/agent/compaction/full.ts | 8 +++-- .../test/agent/compaction/full.test.ts | 13 +++++++- packages/kosong/src/providers/anthropic.ts | 6 ++++ packages/kosong/src/providers/google-genai.ts | 4 +++ .../kosong/src/providers/openai-legacy.ts | 4 +++ .../kosong/src/providers/openai-responses.ts | 4 +++ packages/kosong/test/anthropic.test.ts | 30 +++++++++++++++++++ packages/kosong/test/google-genai.test.ts | 13 ++++++++ packages/kosong/test/openai-legacy.test.ts | 12 ++++++++ packages/kosong/test/openai-responses.test.ts | 12 ++++++++ 11 files changed, 104 insertions(+), 5 deletions(-) diff --git a/.changeset/named-compaction-truncation.md b/.changeset/named-compaction-truncation.md index e3110e53..8e121027 100644 --- a/.changeset/named-compaction-truncation.md +++ b/.changeset/named-compaction-truncation.md @@ -1,6 +1,7 @@ --- "@moonshot-ai/agent-core": patch +"@moonshot-ai/kosong": patch "@moonshot-ai/kimi-code": patch --- -Report truncated compaction summaries with a specific error name. +Report truncated compaction summaries clearly and apply valid completion token budgets across supported providers. diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts index 2b3c9069..4e1d75a6 100644 --- a/packages/agent-core/src/agent/compaction/full.ts +++ b/packages/agent-core/src/agent/compaction/full.ts @@ -232,10 +232,12 @@ export class FullCompaction { await this.triggerPreCompactHook(data, tokensBefore, signal); const model = this.agent.config.model; + const baseProvider = this.agent.config.provider; + const maxContextTokens = this.agent.config.modelCapabilities.max_context_tokens; const provider = - this.agent.config.provider.withMaxCompletionTokens?.( - this.agent.config.modelCapabilities.max_context_tokens, - ) ?? this.agent.config.provider; + maxContextTokens > 0 + ? baseProvider.withMaxCompletionTokens?.(maxContextTokens) ?? baseProvider + : baseProvider; const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS); let usage: TokenUsage | null; diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts index ae6f0e01..2bc461a9 100644 --- a/packages/agent-core/test/agent/compaction/full.test.ts +++ b/packages/agent-core/test/agent/compaction/full.test.ts @@ -1418,12 +1418,14 @@ describe('FullCompaction', () => { it('compacts provider overflow when model context size is unknown', async () => { let callCount = 0; - const generate: GenerateFn = async (_provider, _system, _tools, _history, callbacks) => { + const compactionMaxCompletionTokens: unknown[] = []; + const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { callCount += 1; if (callCount === 1) { throw new APIContextOverflowError(400, 'Context length exceeded', 'req-unknown-context'); } if (callCount === 2) { + compactionMaxCompletionTokens.push(providerMaxCompletionTokens(provider)); return textResult('Unknown window compacted summary.'); } if (callCount === 3) { @@ -1455,6 +1457,7 @@ describe('FullCompaction', () => { const events = await ctx.untilTurnEnd(); expect(callCount).toBe(3); + expect(compactionMaxCompletionTokens).toEqual([undefined]); expect(events).toContainEqual( expect.objectContaining({ event: 'compaction.started', @@ -1661,6 +1664,14 @@ function oauthTestAgentOptions( }; } +function providerMaxCompletionTokens(provider: Parameters[0]): unknown { + return ( + provider as { + readonly modelParameters?: Record; + } + ).modelParameters?.['max_completion_tokens']; +} + function textResult(text: string): Awaited> { return { id: 'mock-compaction-oauth-retry', diff --git a/packages/kosong/src/providers/anthropic.ts b/packages/kosong/src/providers/anthropic.ts index 4028ce93..b0344d78 100644 --- a/packages/kosong/src/providers/anthropic.ts +++ b/packages/kosong/src/providers/anthropic.ts @@ -1082,6 +1082,12 @@ export class AnthropicChatProvider implements ChatProvider { return this._withGenerationKwargs(kwargs); } + withMaxCompletionTokens(maxCompletionTokens: number): AnthropicChatProvider { + return this._withGenerationKwargs({ + max_tokens: resolveDefaultMaxTokens(this._model, maxCompletionTokens), + }); + } + private _withGenerationKwargs(kwargs: Partial): AnthropicChatProvider { const clone = this._clone(); clone._generationKwargs = { ...clone._generationKwargs, ...kwargs }; diff --git a/packages/kosong/src/providers/google-genai.ts b/packages/kosong/src/providers/google-genai.ts index c1fef436..1feadb63 100644 --- a/packages/kosong/src/providers/google-genai.ts +++ b/packages/kosong/src/providers/google-genai.ts @@ -888,6 +888,10 @@ export class GoogleGenAIChatProvider implements ChatProvider { return clone; } + withMaxCompletionTokens(maxCompletionTokens: number): GoogleGenAIChatProvider { + return this.withGenerationKwargs({ max_output_tokens: maxCompletionTokens }); + } + private _clone(): GoogleGenAIChatProvider { const clone = Object.assign( Object.create(Object.getPrototypeOf(this) as object) as GoogleGenAIChatProvider, diff --git a/packages/kosong/src/providers/openai-legacy.ts b/packages/kosong/src/providers/openai-legacy.ts index 75c57e31..7254dae7 100644 --- a/packages/kosong/src/providers/openai-legacy.ts +++ b/packages/kosong/src/providers/openai-legacy.ts @@ -476,6 +476,10 @@ export class OpenAILegacyChatProvider implements ChatProvider { return clone; } + withMaxCompletionTokens(maxCompletionTokens: number): OpenAILegacyChatProvider { + return this.withGenerationKwargs({ max_tokens: maxCompletionTokens }); + } + private _clone(): OpenAILegacyChatProvider { const clone = Object.assign( Object.create(Object.getPrototypeOf(this) as object) as OpenAILegacyChatProvider, diff --git a/packages/kosong/src/providers/openai-responses.ts b/packages/kosong/src/providers/openai-responses.ts index 4c1677b8..336da282 100644 --- a/packages/kosong/src/providers/openai-responses.ts +++ b/packages/kosong/src/providers/openai-responses.ts @@ -975,6 +975,10 @@ export class OpenAIResponsesChatProvider implements ChatProvider { return clone; } + withMaxCompletionTokens(maxCompletionTokens: number): OpenAIResponsesChatProvider { + return this.withGenerationKwargs({ max_output_tokens: maxCompletionTokens }); + } + private _clone(): OpenAIResponsesChatProvider { const clone = Object.assign( Object.create(Object.getPrototypeOf(this) as object) as OpenAIResponsesChatProvider, diff --git a/packages/kosong/test/anthropic.test.ts b/packages/kosong/test/anthropic.test.ts index b539ca1d..b65a607b 100644 --- a/packages/kosong/test/anthropic.test.ts +++ b/packages/kosong/test/anthropic.test.ts @@ -2138,4 +2138,34 @@ describe('AnthropicChatProvider constructor max_tokens', () => { it('clamps defaultMaxTokens above the documented ceiling for known models', async () => { expect(await maxTokensFor('claude-opus-4-7', { defaultMaxTokens: 999999 })).toBe(128000); }); + + it('withMaxCompletionTokens sets max_tokens on the cloned provider', async () => { + const original = new AnthropicChatProvider({ + model: 'claude-opus-4-7', + apiKey: 'test-key', + stream: false, + }); + const provider = original.withMaxCompletionTokens(2048); + const history: Message[] = [ + { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] }, + ]; + const body = await captureRequestBody(provider, '', [], history); + + expect(provider).not.toBe(original); + expect(body['max_tokens']).toBe(2048); + }); + + it('withMaxCompletionTokens clamps above the documented ceiling for known models', async () => { + const provider = new AnthropicChatProvider({ + model: 'claude-opus-4-7', + apiKey: 'test-key', + stream: false, + }).withMaxCompletionTokens(999999); + const history: Message[] = [ + { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] }, + ]; + const body = await captureRequestBody(provider, '', [], history); + + expect(body['max_tokens']).toBe(128000); + }); }); diff --git a/packages/kosong/test/google-genai.test.ts b/packages/kosong/test/google-genai.test.ts index 41a24a01..66ee1c4a 100644 --- a/packages/kosong/test/google-genai.test.ts +++ b/packages/kosong/test/google-genai.test.ts @@ -605,6 +605,19 @@ describe('GoogleGenAIChatProvider', () => { expect(config['temperature']).toBe(0.7); expect(config['max_output_tokens']).toBe(2048); }); + + it('withMaxCompletionTokens sets max_output_tokens on the cloned provider', async () => { + const original = createProvider(); + const provider = original.withMaxCompletionTokens(1024); + const history: Message[] = [ + { role: 'user', content: [{ type: 'text', text: 'Hi' }], toolCalls: [] }, + ]; + const body = await captureRequestBody(provider, '', [], history); + + const config = body['config'] as Record; + expect(provider).not.toBe(original); + expect(config['max_output_tokens']).toBe(1024); + }); }); describe('tool name inference from tool_call_id (orphan tool messages)', () => { diff --git a/packages/kosong/test/openai-legacy.test.ts b/packages/kosong/test/openai-legacy.test.ts index fc851fc6..f01c6aad 100644 --- a/packages/kosong/test/openai-legacy.test.ts +++ b/packages/kosong/test/openai-legacy.test.ts @@ -424,6 +424,18 @@ describe('OpenAILegacyChatProvider', () => { expect(body['temperature']).toBe(0.7); expect(body['max_tokens']).toBe(2048); }); + + it('withMaxCompletionTokens sets max_tokens on the cloned provider', async () => { + const original = createProvider(); + const provider = original.withMaxCompletionTokens(1024); + const history: Message[] = [ + { role: 'user', content: [{ type: 'text', text: 'Hi' }], toolCalls: [] }, + ]; + const body = await captureRequestBody(provider, '', [], history); + + expect(provider).not.toBe(original); + expect(body['max_tokens']).toBe(1024); + }); }); describe('maxTokens option', () => { diff --git a/packages/kosong/test/openai-responses.test.ts b/packages/kosong/test/openai-responses.test.ts index 93e27b7d..e3b69d07 100644 --- a/packages/kosong/test/openai-responses.test.ts +++ b/packages/kosong/test/openai-responses.test.ts @@ -742,6 +742,18 @@ describe('OpenAIResponsesChatProvider', () => { expect(body['temperature']).toBe(0.7); expect(body['max_output_tokens']).toBe(2048); }); + + it('withMaxCompletionTokens sets max_output_tokens on the cloned provider', async () => { + const original = createProvider(); + const provider = original.withMaxCompletionTokens(1024); + const history: Message[] = [ + { role: 'user', content: [{ type: 'text', text: 'Hi' }], toolCalls: [] }, + ]; + const body = await captureRequestBody(provider, '', [], history); + + expect(provider).not.toBe(original); + expect(body['max_output_tokens']).toBe(1024); + }); }); describe('reasoning configuration', () => { From 5c160ae55085519ccd6dd3afaa1a227d66c2fe8d Mon Sep 17 00:00:00 2001 From: _Kerman Date: Mon, 1 Jun 2026 16:18:06 +0800 Subject: [PATCH 3/4] fix: honor compaction output budgets --- .../agent-core/src/agent/compaction/full.ts | 23 ++++-- .../test/agent/compaction/full.test.ts | 70 ++++++++++++++++++- packages/kosong/src/providers/anthropic.ts | 4 +- packages/kosong/test/anthropic.test.ts | 36 +++++++++- 4 files changed, 122 insertions(+), 11 deletions(-) diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts index 4e1d75a6..eac1b65b 100644 --- a/packages/agent-core/src/agent/compaction/full.ts +++ b/packages/agent-core/src/agent/compaction/full.ts @@ -25,10 +25,18 @@ import { estimateTokens, estimateTokensForMessages, } from '../../utils/tokens'; +import { + applyCompletionBudget, + resolveCompletionBudget, +} from '../../utils/completion-budget'; import compactionInstructionTemplate from './compaction-instruction.md'; import { renderMessagesToText } from './render-messages'; import type { CompactionBeginData, CompactionResult } from './types'; -import { DEFAULT_COMPACTION_CONFIG, DefaultCompactionStrategy, type CompactionStrategy } from './strategy'; +import { + DEFAULT_COMPACTION_CONFIG, + DefaultCompactionStrategy, + type CompactionStrategy, +} from './strategy'; type CompactionTelemetryTrigger = CompactionBeginData['source'] | 'manual-with-prompt' | 'unknown'; @@ -232,12 +240,13 @@ export class FullCompaction { await this.triggerPreCompactHook(data, tokensBefore, signal); const model = this.agent.config.model; - const baseProvider = this.agent.config.provider; - const maxContextTokens = this.agent.config.modelCapabilities.max_context_tokens; - const provider = - maxContextTokens > 0 - ? baseProvider.withMaxCompletionTokens?.(maxContextTokens) ?? baseProvider - : baseProvider; + const provider = applyCompletionBudget({ + provider: this.agent.config.provider, + budget: resolveCompletionBudget({ + reservedContextSize: this.agent.kimiConfig?.loopControl?.reservedContextSize, + }), + capability: this.agent.config.modelCapabilities, + }); const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS); let usage: TokenUsage | null; diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts index 2bc461a9..703413cf 100644 --- a/packages/agent-core/test/agent/compaction/full.test.ts +++ b/packages/agent-core/test/agent/compaction/full.test.ts @@ -1457,7 +1457,7 @@ describe('FullCompaction', () => { const events = await ctx.untilTurnEnd(); expect(callCount).toBe(3); - expect(compactionMaxCompletionTokens).toEqual([undefined]); + expect(compactionMaxCompletionTokens).toEqual([32000]); expect(events).toContainEqual( expect.objectContaining({ event: 'compaction.started', @@ -1481,6 +1481,74 @@ describe('FullCompaction', () => { ); }); + it('honors completion budget env hard caps during compaction', async () => { + vi.stubEnv('KIMI_MODEL_MAX_COMPLETION_TOKENS', '8192'); + let callCount = 0; + const compactionMaxCompletionTokens: unknown[] = []; + const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { + callCount += 1; + if (callCount === 1) { + throw new APIContextOverflowError(400, 'Context length exceeded', 'req-hard-cap'); + } + if (callCount === 2) { + compactionMaxCompletionTokens.push(providerMaxCompletionTokens(provider)); + return textResult('Hard cap compacted summary.'); + } + await callbacks?.onMessagePart?.({ + type: 'text', + text: 'Recovered with hard cap.', + }); + return textResult('Recovered with hard cap.'); + }; + const ctx = testAgent({ generate }); + ctx.configure({ + provider: CATALOGUED_PROVIDER, + modelCapabilities: CATALOGUED_MODEL_CAPABILITIES, + }); + ctx.appendExchange(1, 'old user one', 'old assistant one', 20); + ctx.newEvents(); + + await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Retry with hard cap' }] }); + await ctx.untilTurnEnd(); + + expect(callCount).toBe(3); + expect(compactionMaxCompletionTokens).toEqual([8192]); + }); + + it('honors completion budget env opt-out during compaction', async () => { + vi.stubEnv('KIMI_MODEL_MAX_COMPLETION_TOKENS', '0'); + let callCount = 0; + const compactionMaxCompletionTokens: unknown[] = []; + const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { + callCount += 1; + if (callCount === 1) { + throw new APIContextOverflowError(400, 'Context length exceeded', 'req-opt-out'); + } + if (callCount === 2) { + compactionMaxCompletionTokens.push(providerMaxCompletionTokens(provider)); + return textResult('Opt-out compacted summary.'); + } + await callbacks?.onMessagePart?.({ + type: 'text', + text: 'Recovered with opt-out.', + }); + return textResult('Recovered with opt-out.'); + }; + const ctx = testAgent({ generate }); + ctx.configure({ + provider: CATALOGUED_PROVIDER, + modelCapabilities: CATALOGUED_MODEL_CAPABILITIES, + }); + ctx.appendExchange(1, 'old user one', 'old assistant one', 20); + ctx.newEvents(); + + await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Retry with opt-out' }] }); + await ctx.untilTurnEnd(); + + expect(callCount).toBe(3); + expect(compactionMaxCompletionTokens).toEqual([undefined]); + }); + it('ignores filtered assistant placeholders when checking the retained overflow suffix', async () => { let callCount = 0; const generate: GenerateFn = async (_provider, _system, _tools, _history, callbacks) => { diff --git a/packages/kosong/src/providers/anthropic.ts b/packages/kosong/src/providers/anthropic.ts index b0344d78..63405280 100644 --- a/packages/kosong/src/providers/anthropic.ts +++ b/packages/kosong/src/providers/anthropic.ts @@ -1084,7 +1084,9 @@ export class AnthropicChatProvider implements ChatProvider { withMaxCompletionTokens(maxCompletionTokens: number): AnthropicChatProvider { return this._withGenerationKwargs({ - max_tokens: resolveDefaultMaxTokens(this._model, maxCompletionTokens), + max_tokens: + this._generationKwargs.max_tokens ?? + resolveDefaultMaxTokens(this._model, maxCompletionTokens), }); } diff --git a/packages/kosong/test/anthropic.test.ts b/packages/kosong/test/anthropic.test.ts index b65a607b..62f7ebaa 100644 --- a/packages/kosong/test/anthropic.test.ts +++ b/packages/kosong/test/anthropic.test.ts @@ -2139,13 +2139,15 @@ describe('AnthropicChatProvider constructor max_tokens', () => { expect(await maxTokensFor('claude-opus-4-7', { defaultMaxTokens: 999999 })).toBe(128000); }); - it('withMaxCompletionTokens sets max_tokens on the cloned provider', async () => { + it('withMaxCompletionTokens sets max_tokens when no existing cap is present', async () => { const original = new AnthropicChatProvider({ model: 'claude-opus-4-7', apiKey: 'test-key', stream: false, }); - const provider = original.withMaxCompletionTokens(2048); + const provider = original + .withGenerationKwargs({ max_tokens: undefined }) + .withMaxCompletionTokens(2048); const history: Message[] = [ { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] }, ]; @@ -2155,6 +2157,36 @@ describe('AnthropicChatProvider constructor max_tokens', () => { expect(body['max_tokens']).toBe(2048); }); + it('withMaxCompletionTokens preserves an existing lower max_tokens cap', async () => { + const provider = new AnthropicChatProvider({ + model: 'claude-opus-4-7', + apiKey: 'test-key', + stream: false, + defaultMaxTokens: 1024, + }).withMaxCompletionTokens(128000); + const history: Message[] = [ + { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] }, + ]; + const body = await captureRequestBody(provider, '', [], history); + + expect(body['max_tokens']).toBe(1024); + }); + + it('withMaxCompletionTokens preserves an existing higher max_tokens cap', async () => { + const provider = new AnthropicChatProvider({ + model: 'unknown-model', + apiKey: 'test-key', + stream: false, + defaultMaxTokens: 128000, + }).withMaxCompletionTokens(1024); + const history: Message[] = [ + { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] }, + ]; + const body = await captureRequestBody(provider, '', [], history); + + expect(body['max_tokens']).toBe(128000); + }); + it('withMaxCompletionTokens clamps above the documented ceiling for known models', async () => { const provider = new AnthropicChatProvider({ model: 'claude-opus-4-7', From 534844a62ea6e91f467427397f4e78e2780d9a6d Mon Sep 17 00:00:00 2001 From: _Kerman Date: Mon, 1 Jun 2026 16:48:31 +0800 Subject: [PATCH 4/4] fix: honor anthropic completion hard caps --- packages/kosong/src/providers/anthropic.ts | 16 +++++++++++++--- packages/kosong/test/anthropic.test.ts | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/packages/kosong/src/providers/anthropic.ts b/packages/kosong/src/providers/anthropic.ts index 63405280..06e5b6c9 100644 --- a/packages/kosong/src/providers/anthropic.ts +++ b/packages/kosong/src/providers/anthropic.ts @@ -815,6 +815,7 @@ export class AnthropicChatProvider implements ChatProvider { private _defaultHeaders: Record | undefined; private _clientFactory: ((auth: ProviderRequestAuth) => Anthropic) | undefined; private _adaptiveThinking: boolean | undefined; + private _explicitMaxTokens: boolean; constructor(options: AnthropicOptions) { this._model = options.model; @@ -827,6 +828,7 @@ export class AnthropicChatProvider implements ChatProvider { this._defaultHeaders = options.defaultHeaders; this._clientFactory = options.clientFactory; this._client = this._apiKey === undefined ? undefined : this._buildClient(this._apiKey); + this._explicitMaxTokens = options.defaultMaxTokens !== undefined; this._generationKwargs = { max_tokens: resolveDefaultMaxTokens(options.model, options.defaultMaxTokens), betaFeatures: options.betaFeatures ?? [INTERLEAVED_THINKING_BETA], @@ -1083,16 +1085,24 @@ export class AnthropicChatProvider implements ChatProvider { } withMaxCompletionTokens(maxCompletionTokens: number): AnthropicChatProvider { - return this._withGenerationKwargs({ + const requestedCap = resolveDefaultMaxTokens(this._model, maxCompletionTokens); + const existingCap = this._generationKwargs.max_tokens; + const clone = this._withGenerationKwargs({ max_tokens: - this._generationKwargs.max_tokens ?? - resolveDefaultMaxTokens(this._model, maxCompletionTokens), + existingCap === undefined || this._explicitMaxTokens + ? existingCap ?? requestedCap + : Math.min(existingCap, requestedCap), }); + clone._explicitMaxTokens = this._explicitMaxTokens; + return clone; } private _withGenerationKwargs(kwargs: Partial): AnthropicChatProvider { const clone = this._clone(); clone._generationKwargs = { ...clone._generationKwargs, ...kwargs }; + if ('max_tokens' in kwargs) { + clone._explicitMaxTokens = kwargs.max_tokens !== undefined; + } return clone; } diff --git a/packages/kosong/test/anthropic.test.ts b/packages/kosong/test/anthropic.test.ts index 62f7ebaa..5f40f9e6 100644 --- a/packages/kosong/test/anthropic.test.ts +++ b/packages/kosong/test/anthropic.test.ts @@ -2157,6 +2157,20 @@ describe('AnthropicChatProvider constructor max_tokens', () => { expect(body['max_tokens']).toBe(2048); }); + it('withMaxCompletionTokens lowers the inferred model default cap', async () => { + const provider = new AnthropicChatProvider({ + model: 'claude-opus-4-7', + apiKey: 'test-key', + stream: false, + }).withMaxCompletionTokens(8192); + const history: Message[] = [ + { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] }, + ]; + const body = await captureRequestBody(provider, '', [], history); + + expect(body['max_tokens']).toBe(8192); + }); + it('withMaxCompletionTokens preserves an existing lower max_tokens cap', async () => { const provider = new AnthropicChatProvider({ model: 'claude-opus-4-7',