From 08f74b9bb5a7dac12b2624a9b1dba4abf7d5b3f2 Mon Sep 17 00:00:00 2001 From: Gladys Chen Cheng Date: Sun, 24 May 2026 18:38:34 -0400 Subject: [PATCH 1/3] fix(opencode): break auto-compact loop when compaction makes no progress Closes #28543 When a model's configured context window is smaller than what the provider actually serves (e.g. GitHub Copilot's claude-opus-4.7 mapped at 144K in models.dev when the real ceiling is higher), every successful turn keeps reporting "overflowing" token counts. Auto-compaction then fires before each new prompt AND inside the processor on each finish-step, and we never escape it. Add a stall detector that compares the reported token count between consecutive auto-compaction triggers in a single run. If a second auto-compaction would fire with the token count not having dropped by at least 5%, throw a typed ContextOverflowError instead of recreating the compaction task forever. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/opencode/src/session/overflow.ts | 22 +++++- packages/opencode/src/session/prompt.ts | 61 ++++++++++++-- .../opencode/test/session/compaction.test.ts | 56 +++++++++++++ packages/opencode/test/session/prompt.test.ts | 79 +++++++++++++++++++ 4 files changed, 208 insertions(+), 10 deletions(-) diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts index d01fe5c624dd..f1c857ed941a 100644 --- a/packages/opencode/src/session/overflow.ts +++ b/packages/opencode/src/session/overflow.ts @@ -26,7 +26,23 @@ export function isOverflow(input: { if (input.cfg.compaction?.auto === false) return false if (input.model.limit.context === 0) return false - const count = - input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write - return count >= usable(input) + return tokenCount(input.tokens) >= usable(input) +} + +export function tokenCount(tokens: MessageV2.Assistant["tokens"]) { + return tokens.total || tokens.input + tokens.output + tokens.cache.read + tokens.cache.write +} + +// Returns true if a previous auto-compaction triggered at `previousTokens` did +// not reduce reported token usage by at least (1 - threshold). Defaults to a +// 5% reduction; less than that signals compaction is not making progress — +// typically because the configured context window is smaller than what the +// provider actually serves, so auto-compaction would loop indefinitely. +export function autoCompactStalled(input: { + previousTokens: number | undefined + currentTokens: number + threshold?: number +}) { + if (input.previousTokens === undefined) return false + return input.currentTokens >= input.previousTokens * (input.threshold ?? 0.95) } diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index 2fc93c482521..36e346e00d0e 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -11,6 +11,7 @@ import { ModelID, ProviderID } from "../provider/schema" import { type Tool as AITool, tool, jsonSchema } from "ai" import type { JSONSchema7 } from "@ai-sdk/provider" import { SessionCompaction } from "./compaction" +import { autoCompactStalled, tokenCount } from "./overflow" import { Bus } from "../bus" import { SystemPrompt } from "./system" import { Instruction } from "./instruction" @@ -1242,6 +1243,12 @@ export const layer = Layer.effect( const slog = elog.with({ sessionID }) let structured: unknown let step = 0 + // Token count reported when the previous auto-compaction was triggered + // in this run. Used to detect a stalled compaction loop — e.g. when the + // model's configured context window is smaller than what the provider + // actually serves, so isOverflow stays true after every compaction and + // we'd otherwise spin forever. See autoCompactStalled in overflow.ts. + let prevAutoCompactTokens: number | undefined const session = yield* sessions.get(sessionID).pipe(Effect.orDie) while (true) { @@ -1303,13 +1310,33 @@ export const layer = Layer.effect( continue } - if ( - lastFinished && - lastFinished.summary !== true && - (yield* compaction.isOverflow({ tokens: lastFinished.tokens, model })) - ) { - yield* compaction.create({ sessionID, agent: lastUser.agent, model: lastUser.model, auto: true }) - continue + if (lastFinished && lastFinished.summary !== true) { + const overflowing = yield* compaction.isOverflow({ tokens: lastFinished.tokens, model }) + if (overflowing) { + const current = tokenCount(lastFinished.tokens) + if (autoCompactStalled({ previousTokens: prevAutoCompactTokens, currentTokens: current })) { + const error = new MessageV2.ContextOverflowError({ + message: + `Auto-compaction made no meaningful progress ` + + `(${prevAutoCompactTokens} → ${current} tokens reported). ` + + `Aborting to prevent an infinite loop. The model's configured context window ` + + `may be smaller than what the provider actually serves — consider disabling ` + + `auto-compaction in your config, or update the model's context limit.`, + }) + yield* slog.warn("auto-compact loop guard tripped", { + previousTokens: prevAutoCompactTokens, + currentTokens: current, + modelID: model.id, + providerID: model.providerID, + }) + yield* bus.publish(Session.Event.Error, { sessionID, error: error.toObject() }) + throw error + } + prevAutoCompactTokens = current + yield* compaction.create({ sessionID, agent: lastUser.agent, model: lastUser.model, auto: true }) + continue + } + prevAutoCompactTokens = undefined } const agent = yield* agents.get(lastUser.agent) @@ -1459,6 +1486,26 @@ export const layer = Layer.effect( if (result === "stop") return "break" as const if (result === "compact") { + const current = tokenCount(handle.message.tokens) + if (autoCompactStalled({ previousTokens: prevAutoCompactTokens, currentTokens: current })) { + const error = new MessageV2.ContextOverflowError({ + message: + `Auto-compaction made no meaningful progress ` + + `(${prevAutoCompactTokens} → ${current} tokens reported). ` + + `Aborting to prevent an infinite loop. The model's configured context window ` + + `may be smaller than what the provider actually serves — consider disabling ` + + `auto-compaction in your config, or update the model's context limit.`, + }) + yield* slog.warn("auto-compact loop guard tripped (post-call)", { + previousTokens: prevAutoCompactTokens, + currentTokens: current, + modelID: model.id, + providerID: model.providerID, + }) + yield* bus.publish(Session.Event.Error, { sessionID, error: error.toObject() }) + throw error + } + prevAutoCompactTokens = current yield* compaction.create({ sessionID, agent: lastUser.agent, diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 55ddc621cac2..1e9e9abfc8ae 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -8,6 +8,7 @@ import { Image } from "@/image/image" import { Agent } from "../../src/agent/agent" import { LLM } from "../../src/session/llm" import { SessionCompaction } from "../../src/session/compaction" +import { autoCompactStalled, tokenCount } from "../../src/session/overflow" import { Token } from "@/util/token" import * as Log from "@opencode-ai/core/util/log" import { Permission } from "../../src/permission" @@ -558,6 +559,61 @@ describe("session.compaction.isOverflow", () => { ) }) +describe("session.overflow.tokenCount", () => { + test("prefers tokens.total when provided", () => { + const tokens = { total: 12_345, input: 1, output: 2, reasoning: 3, cache: { read: 4, write: 5 } } + expect(tokenCount(tokens)).toBe(12_345) + }) + + test("sums input + output + cache.read + cache.write when total is missing", () => { + // reasoning is intentionally not summed — preserves the existing isOverflow accounting. + const tokens = { input: 100, output: 50, reasoning: 25, cache: { read: 30, write: 20 } } + expect(tokenCount(tokens)).toBe(200) + }) + + test("sums when total is 0 (falsy)", () => { + const tokens = { total: 0, input: 10, output: 20, reasoning: 5, cache: { read: 3, write: 2 } } + expect(tokenCount(tokens)).toBe(35) + }) +}) + +describe("session.overflow.autoCompactStalled", () => { + test("returns false on the first auto-compaction (no prior token count)", () => { + expect(autoCompactStalled({ previousTokens: undefined, currentTokens: 200_000 })).toBe(false) + }) + + test("returns true when token count did not drop (bug #28543 repro)", () => { + expect(autoCompactStalled({ previousTokens: 236_900, currentTokens: 236_900 })).toBe(true) + }) + + test("returns true when reduction is below the 5% default threshold", () => { + // previous=200K, after compaction current=195K, reduction=2.5% → still stalled + expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 195_000 })).toBe(true) + }) + + test("returns false when reduction is at or above the 5% default threshold", () => { + // previous=200K, after compaction current=180K, reduction=10% → healthy + expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 180_000 })).toBe(false) + }) + + test("returns false right at the 5% boundary", () => { + // previous=200K, current=190K = exactly 95% → stalled (>=) + expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_000 })).toBe(true) + // current=189_999 → just below 95% → progress + expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 189_999 })).toBe(false) + }) + + test("honors a custom threshold override", () => { + // With threshold=0.5 we require a 50% reduction + expect(autoCompactStalled({ previousTokens: 100_000, currentTokens: 60_000, threshold: 0.5 })).toBe(true) + expect(autoCompactStalled({ previousTokens: 100_000, currentTokens: 40_000, threshold: 0.5 })).toBe(false) + }) + + test("returns true when current somehow exceeds previous", () => { + expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 240_000 })).toBe(true) + }) +}) + describe("session.compaction.create", () => { it.live( "creates a compaction user message and part", diff --git a/packages/opencode/test/session/prompt.test.ts b/packages/opencode/test/session/prompt.test.ts index ff9ded4d1927..ec26a838228d 100644 --- a/packages/opencode/test/session/prompt.test.ts +++ b/packages/opencode/test/session/prompt.test.ts @@ -2314,3 +2314,82 @@ noLLMServer.instance( }), 30_000, ) + +// Regression test for #28543: when the model's configured context window is +// smaller than what the provider actually serves, every successful model call +// keeps reporting "overflowing" token counts. Without a loop guard the runner +// keeps re-triggering auto-compaction even though it cannot reduce usage; with +// the guard it bails with a typed ContextOverflowError after the second stalled +// attempt. +it.instance( + "auto-compact loop guard breaks when compaction makes no progress", + () => + Effect.gen(function* () { + const { llm } = yield* useServerConfig(providerCfg) + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + const sessionSvc = yield* Session.Service + const session = yield* sessions.create({ + title: "Loop guard repro", + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + + // Test model: context=100K, output=10K → usable = 90K. A reported + // count of 90_001 tokens is just over the overflow bar. + const overflow = { input: 80_000, output: 10_001 } + + // Seed a prior finished assistant whose reported tokens already + // overflow. This mimics the real-world pattern where the previous turn + // already consumed more "tokens" than the model's misconfigured limit. + const seedUser = yield* user(session.id, "earlier prompt") + yield* sessionSvc.updateMessage({ + id: MessageID.ascending(), + role: "assistant", + parentID: seedUser.id, + sessionID: session.id, + mode: "build", + agent: "build", + cost: 0, + path: { cwd: "/tmp", root: "/tmp" }, + tokens: { + input: overflow.input, + output: overflow.output, + reasoning: 0, + cache: { read: 0, write: 0 }, + total: overflow.input + overflow.output, + }, + modelID: ref.modelID, + providerID: ref.providerID, + time: { created: Date.now() }, + finish: "stop", + }) + + yield* prompt.prompt({ + sessionID: session.id, + agent: "build", + noReply: true, + parts: [{ type: "text", text: "next prompt" }], + }) + + // 1) compaction summary call — small response so the summary itself + // finishes cleanly. summary=true marks this message so it never + // triggers the pre-call overflow check. + yield* llm.text("compaction summary", { usage: { input: 1, output: 1 } }) + // 2) post-compact continue turn — STILL reports overflow → the + // processor sets needsCompaction=true → the post-call guard trips. + yield* llm.text("second response (still overflowing)", { usage: overflow }) + + const exit = yield* prompt.loop({ sessionID: session.id }).pipe(Effect.exit) + + expect(Exit.isFailure(exit)).toBe(true) + if (Exit.isFailure(exit)) { + const err = Cause.squash(exit.cause) + expect(MessageV2.ContextOverflowError.isInstance(err)).toBe(true) + if (MessageV2.ContextOverflowError.isInstance(err)) { + expect(err.data.message).toContain("Auto-compaction made no meaningful progress") + } + } + }), + 30_000, +) + From 40506be937a50919a626cdfb5119cfc8a1497ce6 Mon Sep 17 00:00:00 2001 From: Gladys Chen Cheng Date: Sun, 24 May 2026 22:28:23 -0400 Subject: [PATCH 2/3] fix(opencode): treat exactly-5% reduction as progress, not stalled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review found the boundary check used >= instead of >, so a reduction that hit exactly the threshold (e.g. 200K → 190K) was flagged as stalled and threw ContextOverflowError, contradicting the PR description that requires "at least 5% reduction" to escape stall. The boundary test name said "returns false right at the 5% boundary" but the assertion said true — which made the inconsistency obvious. Change the comparison to strict `>`. Now exactly-(1-threshold) reduction counts as progress; only reductions strictly less than the threshold trip the guard. Update the boundary test name, comment, and assertions to match. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/opencode/src/session/overflow.ts | 11 +++++++---- packages/opencode/test/session/compaction.test.ts | 12 +++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts index f1c857ed941a..96b1283fa27f 100644 --- a/packages/opencode/src/session/overflow.ts +++ b/packages/opencode/src/session/overflow.ts @@ -35,14 +35,17 @@ export function tokenCount(tokens: MessageV2.Assistant["tokens"]) { // Returns true if a previous auto-compaction triggered at `previousTokens` did // not reduce reported token usage by at least (1 - threshold). Defaults to a -// 5% reduction; less than that signals compaction is not making progress — -// typically because the configured context window is smaller than what the -// provider actually serves, so auto-compaction would loop indefinitely. +// 5% reduction; anything less than that signals compaction is not making +// progress — typically because the configured context window is smaller than +// what the provider actually serves, so auto-compaction would loop forever. +// +// An exactly-(1-threshold) reduction (e.g. 200K → 190K at the default) counts +// as progress and does NOT trip the guard. export function autoCompactStalled(input: { previousTokens: number | undefined currentTokens: number threshold?: number }) { if (input.previousTokens === undefined) return false - return input.currentTokens >= input.previousTokens * (input.threshold ?? 0.95) + return input.currentTokens > input.previousTokens * (input.threshold ?? 0.95) } diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 1e9e9abfc8ae..cbc559c8919a 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -596,11 +596,13 @@ describe("session.overflow.autoCompactStalled", () => { expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 180_000 })).toBe(false) }) - test("returns false right at the 5% boundary", () => { - // previous=200K, current=190K = exactly 95% → stalled (>=) - expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_000 })).toBe(true) - // current=189_999 → just below 95% → progress - expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 189_999 })).toBe(false) + test("counts an exactly-threshold reduction as progress (not stalled)", () => { + // previous=200K, current=190K = exactly a 5% reduction. The PR semantics + // require "at least 5% reduction" to escape the stall classification, and + // 5% satisfies "at least 5%" — so the guard must NOT trip here. + expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_000 })).toBe(false) + // current=190_001 — reduction is 4.9995% (strictly less than 5%) → stalled. + expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_001 })).toBe(true) }) test("honors a custom threshold override", () => { From 18e11209ec63e9187d3d7e6bb3f677f797c0ced4 Mon Sep 17 00:00:00 2001 From: Gladys Chen Cheng Date: Sun, 24 May 2026 22:45:55 -0400 Subject: [PATCH 3/3] fix(opencode): treat 0 -> 0 token compactions as stalled Switching the percentage check from >= to > closed the boundary false-positive (200K -> 190K now counts as progress) but opened a zero-token hole: when the provider directly throws ContextOverflowError, SessionProcessor.halt sets needsCompaction without running step-finish, so handle.message.tokens stays at the zero-initialized values. The percentage check then evaluates 0 > 0 * 0.95 = false on every fire and the loop keeps recreating compactions. Add a one-line guard so two consecutive zero-token compactions trip the stall detector. Unit test added alongside the boundary cases. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/opencode/src/session/overflow.ts | 4 ++++ packages/opencode/test/session/compaction.test.ts | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts index 96b1283fa27f..4c6cc212d45b 100644 --- a/packages/opencode/src/session/overflow.ts +++ b/packages/opencode/src/session/overflow.ts @@ -47,5 +47,9 @@ export function autoCompactStalled(input: { threshold?: number }) { if (input.previousTokens === undefined) return false + // Provider-error compactions don't populate tokens (no step-finish runs), + // so we see 0 → 0. With no progress signal, treat as stalled — otherwise + // the percentage check below would keep returning false forever. + if (input.previousTokens === 0 && input.currentTokens === 0) return true return input.currentTokens > input.previousTokens * (input.threshold ?? 0.95) } diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index cbc559c8919a..10f27a76347c 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -614,6 +614,15 @@ describe("session.overflow.autoCompactStalled", () => { test("returns true when current somehow exceeds previous", () => { expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 240_000 })).toBe(true) }) + + test("counts repeated zero-token compactions as stalled", () => { + // The provider-error compaction path (ContextOverflowError caught in + // SessionProcessor.halt) skips step-finish, so handle.message.tokens + // stays at the zero-initialized value. Two such triggers in a row give + // us no progress signal at all — without this guard the percentage + // check would keep returning false (0 > 0 * 0.95 = false) forever. + expect(autoCompactStalled({ previousTokens: 0, currentTokens: 0 })).toBe(true) + }) }) describe("session.compaction.create", () => {