From 1fe86cf7fb59d3580ed46116bbc68bd14c2b91ff Mon Sep 17 00:00:00 2001
From: _Kerman <kermanx@qq.com>
Date: Mon, 1 Jun 2026 14:46:35 +0800
Subject: [PATCH 1/4] fix(agent-core): name truncated compaction errors

---
 .changeset/named-compaction-truncation.md     |  6 ++++
 .../agent-core/src/agent/compaction/full.ts   | 18 +++++++---
 .../test/agent/compaction/full.test.ts        | 36 +++++++++++++++++++
 3 files changed, 56 insertions(+), 4 deletions(-)
 create mode 100644 .changeset/named-compaction-truncation.md

diff --git a/.changeset/named-compaction-truncation.md b/.changeset/named-compaction-truncation.md
new file mode 100644
index 00000000..e3110e53
--- /dev/null
+++ b/.changeset/named-compaction-truncation.md
@@ -0,0 +1,6 @@
+---
+"@moonshot-ai/agent-core": patch
+"@moonshot-ai/kimi-code": patch
+---
+
+Report truncated compaction summaries with a specific error name.
diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index ade727d4..2b3c9069 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -38,6 +38,13 @@ export interface CompactedHistory {
 
 export const MAX_COMPACTION_RETRY_ATTEMPTS = 5;
 
+class CompactionTruncatedError extends Error {
+  constructor() {
+    super('Compaction response was truncated before producing a complete summary.');
+    this.name = 'CompactionTruncatedError';
+  }
+}
+
 export class FullCompaction {
   protected compactionCountInTurn = 0;
   protected compacting: {
@@ -225,6 +232,10 @@ export class FullCompaction {
       await this.triggerPreCompactHook(data, tokensBefore, signal);
 
       const model = this.agent.config.model;
+      const provider =
+        this.agent.config.provider.withMaxCompletionTokens?.(
+          this.agent.config.modelCapabilities.max_context_tokens,
+        ) ?? this.agent.config.provider;
 
       const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS);
       let usage: TokenUsage | null;
@@ -244,10 +255,9 @@ export class FullCompaction {
             toolCalls: [],
           } satisfies Message,
         ];
-        class TruncatedError extends Error {}
         try {
           const response = await this.agent.generate(
-            this.agent.config.provider,
+            provider,
             this.agent.config.systemPrompt,
             [...this.agent.tools.loopTools],
             messages,
@@ -255,13 +265,13 @@ export class FullCompaction {
             { signal },
           );
           if (response.finishReason === 'truncated') {
-            throw new TruncatedError();
+            throw new CompactionTruncatedError();
           }
           usage = response.usage;
           summary = extractCompactionSummary(response);
           break;
         } catch (error) {
-          if (error instanceof APIContextOverflowError || error instanceof TruncatedError) {
+          if (error instanceof APIContextOverflowError || error instanceof CompactionTruncatedError) {
             compactedCount = this.strategy.reduceCompactOnOverflow(messagesToCompact);
           }
           else if (!isRetryableGenerateError(error)) {
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index f3148d0c..ae6f0e01 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -731,6 +731,42 @@ describe('FullCompaction', () => {
     await ctx.expectResumeMatches();
   });
 
+  it('names truncated compaction responses when retries are exhausted', async () => {
+    vi.useFakeTimers();
+    let attempts = 0;
+    const generate: GenerateFn = async () => {
+      attempts += 1;
+      return {
+        ...textResult('Partial summary.'),
+        finishReason: 'truncated',
+        rawFinishReason: 'length',
+      };
+    };
+    const ctx = testAgent({ generate, compactionStrategy: alwaysCompactOnce });
+    ctx.configure();
+
+    await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Trigger truncated auto compaction' }] });
+    await vi.advanceTimersByTimeAsync(60_000);
+    const events = await ctx.untilTurnEnd();
+
+    expect(attempts).toBe(5);
+    expect(events).toContainEqual(
+      expect.objectContaining({
+        event: 'turn.ended',
+        args: {
+          turnId: 0,
+          reason: 'failed',
+          error: expect.objectContaining({
+            code: 'compaction.failed',
+            message:
+              'CompactionTruncatedError: Compaction response was truncated before producing a complete summary.',
+          }),
+        },
+      }),
+    );
+    await ctx.expectResumeMatches();
+  });
+
   it('reports compaction retry_count when retryable generation failures are exhausted', async () => {
     vi.useFakeTimers();
     const records: TelemetryRecord[] = [];

From 6a15625d405d45aff145bdfbdca9a88aa74ea00f Mon Sep 17 00:00:00 2001
From: _Kerman <kermanx@qq.com>
Date: Mon, 1 Jun 2026 15:12:29 +0800
Subject: [PATCH 2/4] fix: handle compaction truncation and output budgets

---
 .changeset/named-compaction-truncation.md     |  3 +-
 .../agent-core/src/agent/compaction/full.ts   |  8 +++--
 .../test/agent/compaction/full.test.ts        | 13 +++++++-
 packages/kosong/src/providers/anthropic.ts    |  6 ++++
 packages/kosong/src/providers/google-genai.ts |  4 +++
 .../kosong/src/providers/openai-legacy.ts     |  4 +++
 .../kosong/src/providers/openai-responses.ts  |  4 +++
 packages/kosong/test/anthropic.test.ts        | 30 +++++++++++++++++++
 packages/kosong/test/google-genai.test.ts     | 13 ++++++++
 packages/kosong/test/openai-legacy.test.ts    | 12 ++++++++
 packages/kosong/test/openai-responses.test.ts | 12 ++++++++
 11 files changed, 104 insertions(+), 5 deletions(-)

diff --git a/.changeset/named-compaction-truncation.md b/.changeset/named-compaction-truncation.md
index e3110e53..8e121027 100644
--- a/.changeset/named-compaction-truncation.md
+++ b/.changeset/named-compaction-truncation.md
@@ -1,6 +1,7 @@
 ---
 "@moonshot-ai/agent-core": patch
+"@moonshot-ai/kosong": patch
 "@moonshot-ai/kimi-code": patch
 ---
 
-Report truncated compaction summaries with a specific error name.
+Report truncated compaction summaries clearly and apply valid completion token budgets across supported providers.
diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index 2b3c9069..4e1d75a6 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -232,10 +232,12 @@ export class FullCompaction {
       await this.triggerPreCompactHook(data, tokensBefore, signal);
 
       const model = this.agent.config.model;
+      const baseProvider = this.agent.config.provider;
+      const maxContextTokens = this.agent.config.modelCapabilities.max_context_tokens;
       const provider =
-        this.agent.config.provider.withMaxCompletionTokens?.(
-          this.agent.config.modelCapabilities.max_context_tokens,
-        ) ?? this.agent.config.provider;
+        maxContextTokens > 0
+          ? baseProvider.withMaxCompletionTokens?.(maxContextTokens) ?? baseProvider
+          : baseProvider;
 
       const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS);
       let usage: TokenUsage | null;
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index ae6f0e01..2bc461a9 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -1418,12 +1418,14 @@ describe('FullCompaction', () => {
 
   it('compacts provider overflow when model context size is unknown', async () => {
     let callCount = 0;
-    const generate: GenerateFn = async (_provider, _system, _tools, _history, callbacks) => {
+    const compactionMaxCompletionTokens: unknown[] = [];
+    const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => {
       callCount += 1;
       if (callCount === 1) {
         throw new APIContextOverflowError(400, 'Context length exceeded', 'req-unknown-context');
       }
       if (callCount === 2) {
+        compactionMaxCompletionTokens.push(providerMaxCompletionTokens(provider));
         return textResult('Unknown window compacted summary.');
       }
       if (callCount === 3) {
@@ -1455,6 +1457,7 @@ describe('FullCompaction', () => {
     const events = await ctx.untilTurnEnd();
 
     expect(callCount).toBe(3);
+    expect(compactionMaxCompletionTokens).toEqual([undefined]);
     expect(events).toContainEqual(
       expect.objectContaining({
         event: 'compaction.started',
@@ -1661,6 +1664,14 @@ function oauthTestAgentOptions(
   };
 }
 
+function providerMaxCompletionTokens(provider: Parameters<GenerateFn>[0]): unknown {
+  return (
+    provider as {
+      readonly modelParameters?: Record<string, unknown>;
+    }
+  ).modelParameters?.['max_completion_tokens'];
+}
+
 function textResult(text: string): Awaited<ReturnType<GenerateFn>> {
   return {
     id: 'mock-compaction-oauth-retry',
diff --git a/packages/kosong/src/providers/anthropic.ts b/packages/kosong/src/providers/anthropic.ts
index 4028ce93..b0344d78 100644
--- a/packages/kosong/src/providers/anthropic.ts
+++ b/packages/kosong/src/providers/anthropic.ts
@@ -1082,6 +1082,12 @@ export class AnthropicChatProvider implements ChatProvider {
     return this._withGenerationKwargs(kwargs);
   }
 
+  withMaxCompletionTokens(maxCompletionTokens: number): AnthropicChatProvider {
+    return this._withGenerationKwargs({
+      max_tokens: resolveDefaultMaxTokens(this._model, maxCompletionTokens),
+    });
+  }
+
   private _withGenerationKwargs(kwargs: Partial<AnthropicGenerationKwargs>): AnthropicChatProvider {
     const clone = this._clone();
     clone._generationKwargs = { ...clone._generationKwargs, ...kwargs };
diff --git a/packages/kosong/src/providers/google-genai.ts b/packages/kosong/src/providers/google-genai.ts
index c1fef436..1feadb63 100644
--- a/packages/kosong/src/providers/google-genai.ts
+++ b/packages/kosong/src/providers/google-genai.ts
@@ -888,6 +888,10 @@ export class GoogleGenAIChatProvider implements ChatProvider {
     return clone;
   }
 
+  withMaxCompletionTokens(maxCompletionTokens: number): GoogleGenAIChatProvider {
+    return this.withGenerationKwargs({ max_output_tokens: maxCompletionTokens });
+  }
+
   private _clone(): GoogleGenAIChatProvider {
     const clone = Object.assign(
       Object.create(Object.getPrototypeOf(this) as object) as GoogleGenAIChatProvider,
diff --git a/packages/kosong/src/providers/openai-legacy.ts b/packages/kosong/src/providers/openai-legacy.ts
index 75c57e31..7254dae7 100644
--- a/packages/kosong/src/providers/openai-legacy.ts
+++ b/packages/kosong/src/providers/openai-legacy.ts
@@ -476,6 +476,10 @@ export class OpenAILegacyChatProvider implements ChatProvider {
     return clone;
   }
 
+  withMaxCompletionTokens(maxCompletionTokens: number): OpenAILegacyChatProvider {
+    return this.withGenerationKwargs({ max_tokens: maxCompletionTokens });
+  }
+
   private _clone(): OpenAILegacyChatProvider {
     const clone = Object.assign(
       Object.create(Object.getPrototypeOf(this) as object) as OpenAILegacyChatProvider,
diff --git a/packages/kosong/src/providers/openai-responses.ts b/packages/kosong/src/providers/openai-responses.ts
index 4c1677b8..336da282 100644
--- a/packages/kosong/src/providers/openai-responses.ts
+++ b/packages/kosong/src/providers/openai-responses.ts
@@ -975,6 +975,10 @@ export class OpenAIResponsesChatProvider implements ChatProvider {
     return clone;
   }
 
+  withMaxCompletionTokens(maxCompletionTokens: number): OpenAIResponsesChatProvider {
+    return this.withGenerationKwargs({ max_output_tokens: maxCompletionTokens });
+  }
+
   private _clone(): OpenAIResponsesChatProvider {
     const clone = Object.assign(
       Object.create(Object.getPrototypeOf(this) as object) as OpenAIResponsesChatProvider,
diff --git a/packages/kosong/test/anthropic.test.ts b/packages/kosong/test/anthropic.test.ts
index b539ca1d..b65a607b 100644
--- a/packages/kosong/test/anthropic.test.ts
+++ b/packages/kosong/test/anthropic.test.ts
@@ -2138,4 +2138,34 @@ describe('AnthropicChatProvider constructor max_tokens', () => {
   it('clamps defaultMaxTokens above the documented ceiling for known models', async () => {
     expect(await maxTokensFor('claude-opus-4-7', { defaultMaxTokens: 999999 })).toBe(128000);
   });
+
+  it('withMaxCompletionTokens sets max_tokens on the cloned provider', async () => {
+    const original = new AnthropicChatProvider({
+      model: 'claude-opus-4-7',
+      apiKey: 'test-key',
+      stream: false,
+    });
+    const provider = original.withMaxCompletionTokens(2048);
+    const history: Message[] = [
+      { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] },
+    ];
+    const body = await captureRequestBody(provider, '', [], history);
+
+    expect(provider).not.toBe(original);
+    expect(body['max_tokens']).toBe(2048);
+  });
+
+  it('withMaxCompletionTokens clamps above the documented ceiling for known models', async () => {
+    const provider = new AnthropicChatProvider({
+      model: 'claude-opus-4-7',
+      apiKey: 'test-key',
+      stream: false,
+    }).withMaxCompletionTokens(999999);
+    const history: Message[] = [
+      { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] },
+    ];
+    const body = await captureRequestBody(provider, '', [], history);
+
+    expect(body['max_tokens']).toBe(128000);
+  });
 });
diff --git a/packages/kosong/test/google-genai.test.ts b/packages/kosong/test/google-genai.test.ts
index 41a24a01..66ee1c4a 100644
--- a/packages/kosong/test/google-genai.test.ts
+++ b/packages/kosong/test/google-genai.test.ts
@@ -605,6 +605,19 @@ describe('GoogleGenAIChatProvider', () => {
       expect(config['temperature']).toBe(0.7);
       expect(config['max_output_tokens']).toBe(2048);
     });
+
+    it('withMaxCompletionTokens sets max_output_tokens on the cloned provider', async () => {
+      const original = createProvider();
+      const provider = original.withMaxCompletionTokens(1024);
+      const history: Message[] = [
+        { role: 'user', content: [{ type: 'text', text: 'Hi' }], toolCalls: [] },
+      ];
+      const body = await captureRequestBody(provider, '', [], history);
+
+      const config = body['config'] as Record<string, unknown>;
+      expect(provider).not.toBe(original);
+      expect(config['max_output_tokens']).toBe(1024);
+    });
   });
 
   describe('tool name inference from tool_call_id (orphan tool messages)', () => {
diff --git a/packages/kosong/test/openai-legacy.test.ts b/packages/kosong/test/openai-legacy.test.ts
index fc851fc6..f01c6aad 100644
--- a/packages/kosong/test/openai-legacy.test.ts
+++ b/packages/kosong/test/openai-legacy.test.ts
@@ -424,6 +424,18 @@ describe('OpenAILegacyChatProvider', () => {
       expect(body['temperature']).toBe(0.7);
       expect(body['max_tokens']).toBe(2048);
     });
+
+    it('withMaxCompletionTokens sets max_tokens on the cloned provider', async () => {
+      const original = createProvider();
+      const provider = original.withMaxCompletionTokens(1024);
+      const history: Message[] = [
+        { role: 'user', content: [{ type: 'text', text: 'Hi' }], toolCalls: [] },
+      ];
+      const body = await captureRequestBody(provider, '', [], history);
+
+      expect(provider).not.toBe(original);
+      expect(body['max_tokens']).toBe(1024);
+    });
   });
 
   describe('maxTokens option', () => {
diff --git a/packages/kosong/test/openai-responses.test.ts b/packages/kosong/test/openai-responses.test.ts
index 93e27b7d..e3b69d07 100644
--- a/packages/kosong/test/openai-responses.test.ts
+++ b/packages/kosong/test/openai-responses.test.ts
@@ -742,6 +742,18 @@ describe('OpenAIResponsesChatProvider', () => {
       expect(body['temperature']).toBe(0.7);
       expect(body['max_output_tokens']).toBe(2048);
     });
+
+    it('withMaxCompletionTokens sets max_output_tokens on the cloned provider', async () => {
+      const original = createProvider();
+      const provider = original.withMaxCompletionTokens(1024);
+      const history: Message[] = [
+        { role: 'user', content: [{ type: 'text', text: 'Hi' }], toolCalls: [] },
+      ];
+      const body = await captureRequestBody(provider, '', [], history);
+
+      expect(provider).not.toBe(original);
+      expect(body['max_output_tokens']).toBe(1024);
+    });
   });
 
   describe('reasoning configuration', () => {

From 5c160ae55085519ccd6dd3afaa1a227d66c2fe8d Mon Sep 17 00:00:00 2001
From: _Kerman <kermanx@qq.com>
Date: Mon, 1 Jun 2026 16:18:06 +0800
Subject: [PATCH 3/4] fix: honor compaction output budgets

---
 .../agent-core/src/agent/compaction/full.ts   | 23 ++++--
 .../test/agent/compaction/full.test.ts        | 70 ++++++++++++++++++-
 packages/kosong/src/providers/anthropic.ts    |  4 +-
 packages/kosong/test/anthropic.test.ts        | 36 +++++++++-
 4 files changed, 122 insertions(+), 11 deletions(-)

diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index 4e1d75a6..eac1b65b 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -25,10 +25,18 @@ import {
   estimateTokens,
   estimateTokensForMessages,
 } from '../../utils/tokens';
+import {
+  applyCompletionBudget,
+  resolveCompletionBudget,
+} from '../../utils/completion-budget';
 import compactionInstructionTemplate from './compaction-instruction.md';
 import { renderMessagesToText } from './render-messages';
 import type { CompactionBeginData, CompactionResult } from './types';
-import { DEFAULT_COMPACTION_CONFIG, DefaultCompactionStrategy, type CompactionStrategy } from './strategy';
+import {
+  DEFAULT_COMPACTION_CONFIG,
+  DefaultCompactionStrategy,
+  type CompactionStrategy,
+} from './strategy';
 
 type CompactionTelemetryTrigger = CompactionBeginData['source'] | 'manual-with-prompt' | 'unknown';
 
@@ -232,12 +240,13 @@ export class FullCompaction {
       await this.triggerPreCompactHook(data, tokensBefore, signal);
 
       const model = this.agent.config.model;
-      const baseProvider = this.agent.config.provider;
-      const maxContextTokens = this.agent.config.modelCapabilities.max_context_tokens;
-      const provider =
-        maxContextTokens > 0
-          ? baseProvider.withMaxCompletionTokens?.(maxContextTokens) ?? baseProvider
-          : baseProvider;
+      const provider = applyCompletionBudget({
+        provider: this.agent.config.provider,
+        budget: resolveCompletionBudget({
+          reservedContextSize: this.agent.kimiConfig?.loopControl?.reservedContextSize,
+        }),
+        capability: this.agent.config.modelCapabilities,
+      });
 
       const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS);
       let usage: TokenUsage | null;
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 2bc461a9..703413cf 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -1457,7 +1457,7 @@ describe('FullCompaction', () => {
     const events = await ctx.untilTurnEnd();
 
     expect(callCount).toBe(3);
-    expect(compactionMaxCompletionTokens).toEqual([undefined]);
+    expect(compactionMaxCompletionTokens).toEqual([32000]);
     expect(events).toContainEqual(
       expect.objectContaining({
         event: 'compaction.started',
@@ -1481,6 +1481,74 @@ describe('FullCompaction', () => {
     );
   });
 
+  it('honors completion budget env hard caps during compaction', async () => {
+    vi.stubEnv('KIMI_MODEL_MAX_COMPLETION_TOKENS', '8192');
+    let callCount = 0;
+    const compactionMaxCompletionTokens: unknown[] = [];
+    const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => {
+      callCount += 1;
+      if (callCount === 1) {
+        throw new APIContextOverflowError(400, 'Context length exceeded', 'req-hard-cap');
+      }
+      if (callCount === 2) {
+        compactionMaxCompletionTokens.push(providerMaxCompletionTokens(provider));
+        return textResult('Hard cap compacted summary.');
+      }
+      await callbacks?.onMessagePart?.({
+        type: 'text',
+        text: 'Recovered with hard cap.',
+      });
+      return textResult('Recovered with hard cap.');
+    };
+    const ctx = testAgent({ generate });
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+    ctx.newEvents();
+
+    await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Retry with hard cap' }] });
+    await ctx.untilTurnEnd();
+
+    expect(callCount).toBe(3);
+    expect(compactionMaxCompletionTokens).toEqual([8192]);
+  });
+
+  it('honors completion budget env opt-out during compaction', async () => {
+    vi.stubEnv('KIMI_MODEL_MAX_COMPLETION_TOKENS', '0');
+    let callCount = 0;
+    const compactionMaxCompletionTokens: unknown[] = [];
+    const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => {
+      callCount += 1;
+      if (callCount === 1) {
+        throw new APIContextOverflowError(400, 'Context length exceeded', 'req-opt-out');
+      }
+      if (callCount === 2) {
+        compactionMaxCompletionTokens.push(providerMaxCompletionTokens(provider));
+        return textResult('Opt-out compacted summary.');
+      }
+      await callbacks?.onMessagePart?.({
+        type: 'text',
+        text: 'Recovered with opt-out.',
+      });
+      return textResult('Recovered with opt-out.');
+    };
+    const ctx = testAgent({ generate });
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+    ctx.newEvents();
+
+    await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Retry with opt-out' }] });
+    await ctx.untilTurnEnd();
+
+    expect(callCount).toBe(3);
+    expect(compactionMaxCompletionTokens).toEqual([undefined]);
+  });
+
   it('ignores filtered assistant placeholders when checking the retained overflow suffix', async () => {
     let callCount = 0;
     const generate: GenerateFn = async (_provider, _system, _tools, _history, callbacks) => {
diff --git a/packages/kosong/src/providers/anthropic.ts b/packages/kosong/src/providers/anthropic.ts
index b0344d78..63405280 100644
--- a/packages/kosong/src/providers/anthropic.ts
+++ b/packages/kosong/src/providers/anthropic.ts
@@ -1084,7 +1084,9 @@ export class AnthropicChatProvider implements ChatProvider {
 
   withMaxCompletionTokens(maxCompletionTokens: number): AnthropicChatProvider {
     return this._withGenerationKwargs({
-      max_tokens: resolveDefaultMaxTokens(this._model, maxCompletionTokens),
+      max_tokens:
+        this._generationKwargs.max_tokens ??
+        resolveDefaultMaxTokens(this._model, maxCompletionTokens),
     });
   }
 
diff --git a/packages/kosong/test/anthropic.test.ts b/packages/kosong/test/anthropic.test.ts
index b65a607b..62f7ebaa 100644
--- a/packages/kosong/test/anthropic.test.ts
+++ b/packages/kosong/test/anthropic.test.ts
@@ -2139,13 +2139,15 @@ describe('AnthropicChatProvider constructor max_tokens', () => {
     expect(await maxTokensFor('claude-opus-4-7', { defaultMaxTokens: 999999 })).toBe(128000);
   });
 
-  it('withMaxCompletionTokens sets max_tokens on the cloned provider', async () => {
+  it('withMaxCompletionTokens sets max_tokens when no existing cap is present', async () => {
     const original = new AnthropicChatProvider({
       model: 'claude-opus-4-7',
       apiKey: 'test-key',
       stream: false,
     });
-    const provider = original.withMaxCompletionTokens(2048);
+    const provider = original
+      .withGenerationKwargs({ max_tokens: undefined })
+      .withMaxCompletionTokens(2048);
     const history: Message[] = [
       { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] },
     ];
@@ -2155,6 +2157,36 @@ describe('AnthropicChatProvider constructor max_tokens', () => {
     expect(body['max_tokens']).toBe(2048);
   });
 
+  it('withMaxCompletionTokens preserves an existing lower max_tokens cap', async () => {
+    const provider = new AnthropicChatProvider({
+      model: 'claude-opus-4-7',
+      apiKey: 'test-key',
+      stream: false,
+      defaultMaxTokens: 1024,
+    }).withMaxCompletionTokens(128000);
+    const history: Message[] = [
+      { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] },
+    ];
+    const body = await captureRequestBody(provider, '', [], history);
+
+    expect(body['max_tokens']).toBe(1024);
+  });
+
+  it('withMaxCompletionTokens preserves an existing higher max_tokens cap', async () => {
+    const provider = new AnthropicChatProvider({
+      model: 'unknown-model',
+      apiKey: 'test-key',
+      stream: false,
+      defaultMaxTokens: 128000,
+    }).withMaxCompletionTokens(1024);
+    const history: Message[] = [
+      { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] },
+    ];
+    const body = await captureRequestBody(provider, '', [], history);
+
+    expect(body['max_tokens']).toBe(128000);
+  });
+
   it('withMaxCompletionTokens clamps above the documented ceiling for known models', async () => {
     const provider = new AnthropicChatProvider({
       model: 'claude-opus-4-7',

From 534844a62ea6e91f467427397f4e78e2780d9a6d Mon Sep 17 00:00:00 2001
From: _Kerman <kermanx@qq.com>
Date: Mon, 1 Jun 2026 16:48:31 +0800
Subject: [PATCH 4/4] fix: honor anthropic completion hard caps

---
 packages/kosong/src/providers/anthropic.ts | 16 +++++++++++++---
 packages/kosong/test/anthropic.test.ts     | 14 ++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/packages/kosong/src/providers/anthropic.ts b/packages/kosong/src/providers/anthropic.ts
index 63405280..06e5b6c9 100644
--- a/packages/kosong/src/providers/anthropic.ts
+++ b/packages/kosong/src/providers/anthropic.ts
@@ -815,6 +815,7 @@ export class AnthropicChatProvider implements ChatProvider {
   private _defaultHeaders: Record<string, string> | undefined;
   private _clientFactory: ((auth: ProviderRequestAuth) => Anthropic) | undefined;
   private _adaptiveThinking: boolean | undefined;
+  private _explicitMaxTokens: boolean;
 
   constructor(options: AnthropicOptions) {
     this._model = options.model;
@@ -827,6 +828,7 @@ export class AnthropicChatProvider implements ChatProvider {
     this._defaultHeaders = options.defaultHeaders;
     this._clientFactory = options.clientFactory;
     this._client = this._apiKey === undefined ? undefined : this._buildClient(this._apiKey);
+    this._explicitMaxTokens = options.defaultMaxTokens !== undefined;
     this._generationKwargs = {
       max_tokens: resolveDefaultMaxTokens(options.model, options.defaultMaxTokens),
       betaFeatures: options.betaFeatures ?? [INTERLEAVED_THINKING_BETA],
@@ -1083,16 +1085,24 @@ export class AnthropicChatProvider implements ChatProvider {
   }
 
   withMaxCompletionTokens(maxCompletionTokens: number): AnthropicChatProvider {
-    return this._withGenerationKwargs({
+    const requestedCap = resolveDefaultMaxTokens(this._model, maxCompletionTokens);
+    const existingCap = this._generationKwargs.max_tokens;
+    const clone = this._withGenerationKwargs({
       max_tokens:
-        this._generationKwargs.max_tokens ??
-        resolveDefaultMaxTokens(this._model, maxCompletionTokens),
+        existingCap === undefined || this._explicitMaxTokens
+          ? existingCap ?? requestedCap
+          : Math.min(existingCap, requestedCap),
     });
+    clone._explicitMaxTokens = this._explicitMaxTokens;
+    return clone;
   }
 
   private _withGenerationKwargs(kwargs: Partial<AnthropicGenerationKwargs>): AnthropicChatProvider {
     const clone = this._clone();
     clone._generationKwargs = { ...clone._generationKwargs, ...kwargs };
+    if ('max_tokens' in kwargs) {
+      clone._explicitMaxTokens = kwargs.max_tokens !== undefined;
+    }
     return clone;
   }
 
diff --git a/packages/kosong/test/anthropic.test.ts b/packages/kosong/test/anthropic.test.ts
index 62f7ebaa..5f40f9e6 100644
--- a/packages/kosong/test/anthropic.test.ts
+++ b/packages/kosong/test/anthropic.test.ts
@@ -2157,6 +2157,20 @@ describe('AnthropicChatProvider constructor max_tokens', () => {
     expect(body['max_tokens']).toBe(2048);
   });
 
+  it('withMaxCompletionTokens lowers the inferred model default cap', async () => {
+    const provider = new AnthropicChatProvider({
+      model: 'claude-opus-4-7',
+      apiKey: 'test-key',
+      stream: false,
+    }).withMaxCompletionTokens(8192);
+    const history: Message[] = [
+      { role: 'user', content: [{ type: 'text', text: 'hi' }], toolCalls: [] },
+    ];
+    const body = await captureRequestBody(provider, '', [], history);
+
+    expect(body['max_tokens']).toBe(8192);
+  });
+
   it('withMaxCompletionTokens preserves an existing lower max_tokens cap', async () => {
     const provider = new AnthropicChatProvider({
       model: 'claude-opus-4-7',