From 08f74b9bb5a7dac12b2624a9b1dba4abf7d5b3f2 Mon Sep 17 00:00:00 2001
From: Gladys Chen Cheng <chen0552@algonquinlive.com>
Date: Sun, 24 May 2026 18:38:34 -0400
Subject: [PATCH 1/3] fix(opencode): break auto-compact loop when compaction
 makes no progress

Closes #28543

When a model's configured context window is smaller than what the
provider actually serves (e.g. GitHub Copilot's claude-opus-4.7 mapped
at 144K in models.dev when the real ceiling is higher), every successful
turn keeps reporting "overflowing" token counts. Auto-compaction then
fires before each new prompt AND inside the processor on each finish-step,
and we never escape it.

Add a stall detector that compares the reported token count between
consecutive auto-compaction triggers in a single run. If a second
auto-compaction would fire with the token count not having dropped by at
least 5%, throw a typed ContextOverflowError instead of recreating the
compaction task forever.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/opencode/src/session/overflow.ts     | 22 +++++-
 packages/opencode/src/session/prompt.ts       | 61 ++++++++++++--
 .../opencode/test/session/compaction.test.ts  | 56 +++++++++++++
 packages/opencode/test/session/prompt.test.ts | 79 +++++++++++++++++++
 4 files changed, 208 insertions(+), 10 deletions(-)

diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts
index d01fe5c624dd..f1c857ed941a 100644
--- a/packages/opencode/src/session/overflow.ts
+++ b/packages/opencode/src/session/overflow.ts
@@ -26,7 +26,23 @@ export function isOverflow(input: {
   if (input.cfg.compaction?.auto === false) return false
   if (input.model.limit.context === 0) return false
 
-  const count =
-    input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
-  return count >= usable(input)
+  return tokenCount(input.tokens) >= usable(input)
+}
+
+export function tokenCount(tokens: MessageV2.Assistant["tokens"]) {
+  return tokens.total || tokens.input + tokens.output + tokens.cache.read + tokens.cache.write
+}
+
+// Returns true if a previous auto-compaction triggered at `previousTokens` did
+// not reduce reported token usage by at least (1 - threshold). Defaults to a
+// 5% reduction; less than that signals compaction is not making progress —
+// typically because the configured context window is smaller than what the
+// provider actually serves, so auto-compaction would loop indefinitely.
+export function autoCompactStalled(input: {
+  previousTokens: number | undefined
+  currentTokens: number
+  threshold?: number
+}) {
+  if (input.previousTokens === undefined) return false
+  return input.currentTokens >= input.previousTokens * (input.threshold ?? 0.95)
 }
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index 2fc93c482521..36e346e00d0e 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -11,6 +11,7 @@ import { ModelID, ProviderID } from "../provider/schema"
 import { type Tool as AITool, tool, jsonSchema } from "ai"
 import type { JSONSchema7 } from "@ai-sdk/provider"
 import { SessionCompaction } from "./compaction"
+import { autoCompactStalled, tokenCount } from "./overflow"
 import { Bus } from "../bus"
 import { SystemPrompt } from "./system"
 import { Instruction } from "./instruction"
@@ -1242,6 +1243,12 @@ export const layer = Layer.effect(
         const slog = elog.with({ sessionID })
         let structured: unknown
         let step = 0
+        // Token count reported when the previous auto-compaction was triggered
+        // in this run. Used to detect a stalled compaction loop — e.g. when the
+        // model's configured context window is smaller than what the provider
+        // actually serves, so isOverflow stays true after every compaction and
+        // we'd otherwise spin forever. See autoCompactStalled in overflow.ts.
+        let prevAutoCompactTokens: number | undefined
         const session = yield* sessions.get(sessionID).pipe(Effect.orDie)
 
         while (true) {
@@ -1303,13 +1310,33 @@ export const layer = Layer.effect(
             continue
           }
 
-          if (
-            lastFinished &&
-            lastFinished.summary !== true &&
-            (yield* compaction.isOverflow({ tokens: lastFinished.tokens, model }))
-          ) {
-            yield* compaction.create({ sessionID, agent: lastUser.agent, model: lastUser.model, auto: true })
-            continue
+          if (lastFinished && lastFinished.summary !== true) {
+            const overflowing = yield* compaction.isOverflow({ tokens: lastFinished.tokens, model })
+            if (overflowing) {
+              const current = tokenCount(lastFinished.tokens)
+              if (autoCompactStalled({ previousTokens: prevAutoCompactTokens, currentTokens: current })) {
+                const error = new MessageV2.ContextOverflowError({
+                  message:
+                    `Auto-compaction made no meaningful progress ` +
+                    `(${prevAutoCompactTokens} → ${current} tokens reported). ` +
+                    `Aborting to prevent an infinite loop. The model's configured context window ` +
+                    `may be smaller than what the provider actually serves — consider disabling ` +
+                    `auto-compaction in your config, or update the model's context limit.`,
+                })
+                yield* slog.warn("auto-compact loop guard tripped", {
+                  previousTokens: prevAutoCompactTokens,
+                  currentTokens: current,
+                  modelID: model.id,
+                  providerID: model.providerID,
+                })
+                yield* bus.publish(Session.Event.Error, { sessionID, error: error.toObject() })
+                throw error
+              }
+              prevAutoCompactTokens = current
+              yield* compaction.create({ sessionID, agent: lastUser.agent, model: lastUser.model, auto: true })
+              continue
+            }
+            prevAutoCompactTokens = undefined
           }
 
           const agent = yield* agents.get(lastUser.agent)
@@ -1459,6 +1486,26 @@ export const layer = Layer.effect(
 
             if (result === "stop") return "break" as const
             if (result === "compact") {
+              const current = tokenCount(handle.message.tokens)
+              if (autoCompactStalled({ previousTokens: prevAutoCompactTokens, currentTokens: current })) {
+                const error = new MessageV2.ContextOverflowError({
+                  message:
+                    `Auto-compaction made no meaningful progress ` +
+                    `(${prevAutoCompactTokens} → ${current} tokens reported). ` +
+                    `Aborting to prevent an infinite loop. The model's configured context window ` +
+                    `may be smaller than what the provider actually serves — consider disabling ` +
+                    `auto-compaction in your config, or update the model's context limit.`,
+                })
+                yield* slog.warn("auto-compact loop guard tripped (post-call)", {
+                  previousTokens: prevAutoCompactTokens,
+                  currentTokens: current,
+                  modelID: model.id,
+                  providerID: model.providerID,
+                })
+                yield* bus.publish(Session.Event.Error, { sessionID, error: error.toObject() })
+                throw error
+              }
+              prevAutoCompactTokens = current
               yield* compaction.create({
                 sessionID,
                 agent: lastUser.agent,
diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
index 55ddc621cac2..1e9e9abfc8ae 100644
--- a/packages/opencode/test/session/compaction.test.ts
+++ b/packages/opencode/test/session/compaction.test.ts
@@ -8,6 +8,7 @@ import { Image } from "@/image/image"
 import { Agent } from "../../src/agent/agent"
 import { LLM } from "../../src/session/llm"
 import { SessionCompaction } from "../../src/session/compaction"
+import { autoCompactStalled, tokenCount } from "../../src/session/overflow"
 import { Token } from "@/util/token"
 import * as Log from "@opencode-ai/core/util/log"
 import { Permission } from "../../src/permission"
@@ -558,6 +559,61 @@ describe("session.compaction.isOverflow", () => {
   )
 })
 
+describe("session.overflow.tokenCount", () => {
+  test("prefers tokens.total when provided", () => {
+    const tokens = { total: 12_345, input: 1, output: 2, reasoning: 3, cache: { read: 4, write: 5 } }
+    expect(tokenCount(tokens)).toBe(12_345)
+  })
+
+  test("sums input + output + cache.read + cache.write when total is missing", () => {
+    // reasoning is intentionally not summed — preserves the existing isOverflow accounting.
+    const tokens = { input: 100, output: 50, reasoning: 25, cache: { read: 30, write: 20 } }
+    expect(tokenCount(tokens)).toBe(200)
+  })
+
+  test("sums when total is 0 (falsy)", () => {
+    const tokens = { total: 0, input: 10, output: 20, reasoning: 5, cache: { read: 3, write: 2 } }
+    expect(tokenCount(tokens)).toBe(35)
+  })
+})
+
+describe("session.overflow.autoCompactStalled", () => {
+  test("returns false on the first auto-compaction (no prior token count)", () => {
+    expect(autoCompactStalled({ previousTokens: undefined, currentTokens: 200_000 })).toBe(false)
+  })
+
+  test("returns true when token count did not drop (bug #28543 repro)", () => {
+    expect(autoCompactStalled({ previousTokens: 236_900, currentTokens: 236_900 })).toBe(true)
+  })
+
+  test("returns true when reduction is below the 5% default threshold", () => {
+    // previous=200K, after compaction current=195K, reduction=2.5% → still stalled
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 195_000 })).toBe(true)
+  })
+
+  test("returns false when reduction is at or above the 5% default threshold", () => {
+    // previous=200K, after compaction current=180K, reduction=10% → healthy
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 180_000 })).toBe(false)
+  })
+
+  test("returns false right at the 5% boundary", () => {
+    // previous=200K, current=190K = exactly 95% → stalled (>=)
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_000 })).toBe(true)
+    // current=189_999 → just below 95% → progress
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 189_999 })).toBe(false)
+  })
+
+  test("honors a custom threshold override", () => {
+    // With threshold=0.5 we require a 50% reduction
+    expect(autoCompactStalled({ previousTokens: 100_000, currentTokens: 60_000, threshold: 0.5 })).toBe(true)
+    expect(autoCompactStalled({ previousTokens: 100_000, currentTokens: 40_000, threshold: 0.5 })).toBe(false)
+  })
+
+  test("returns true when current somehow exceeds previous", () => {
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 240_000 })).toBe(true)
+  })
+})
+
 describe("session.compaction.create", () => {
   it.live(
     "creates a compaction user message and part",
diff --git a/packages/opencode/test/session/prompt.test.ts b/packages/opencode/test/session/prompt.test.ts
index ff9ded4d1927..ec26a838228d 100644
--- a/packages/opencode/test/session/prompt.test.ts
+++ b/packages/opencode/test/session/prompt.test.ts
@@ -2314,3 +2314,82 @@ noLLMServer.instance(
     }),
   30_000,
 )
+
+// Regression test for #28543: when the model's configured context window is
+// smaller than what the provider actually serves, every successful model call
+// keeps reporting "overflowing" token counts. Without a loop guard the runner
+// keeps re-triggering auto-compaction even though it cannot reduce usage; with
+// the guard it bails with a typed ContextOverflowError after the second stalled
+// attempt.
+it.instance(
+  "auto-compact loop guard breaks when compaction makes no progress",
+  () =>
+    Effect.gen(function* () {
+      const { llm } = yield* useServerConfig(providerCfg)
+      const prompt = yield* SessionPrompt.Service
+      const sessions = yield* Session.Service
+      const sessionSvc = yield* Session.Service
+      const session = yield* sessions.create({
+        title: "Loop guard repro",
+        permission: [{ permission: "*", pattern: "*", action: "allow" }],
+      })
+
+      // Test model: context=100K, output=10K → usable = 90K. A reported
+      // count of 90_001 tokens is just over the overflow bar.
+      const overflow = { input: 80_000, output: 10_001 }
+
+      // Seed a prior finished assistant whose reported tokens already
+      // overflow. This mimics the real-world pattern where the previous turn
+      // already consumed more "tokens" than the model's misconfigured limit.
+      const seedUser = yield* user(session.id, "earlier prompt")
+      yield* sessionSvc.updateMessage({
+        id: MessageID.ascending(),
+        role: "assistant",
+        parentID: seedUser.id,
+        sessionID: session.id,
+        mode: "build",
+        agent: "build",
+        cost: 0,
+        path: { cwd: "/tmp", root: "/tmp" },
+        tokens: {
+          input: overflow.input,
+          output: overflow.output,
+          reasoning: 0,
+          cache: { read: 0, write: 0 },
+          total: overflow.input + overflow.output,
+        },
+        modelID: ref.modelID,
+        providerID: ref.providerID,
+        time: { created: Date.now() },
+        finish: "stop",
+      })
+
+      yield* prompt.prompt({
+        sessionID: session.id,
+        agent: "build",
+        noReply: true,
+        parts: [{ type: "text", text: "next prompt" }],
+      })
+
+      // 1) compaction summary call — small response so the summary itself
+      //    finishes cleanly. summary=true marks this message so it never
+      //    triggers the pre-call overflow check.
+      yield* llm.text("compaction summary", { usage: { input: 1, output: 1 } })
+      // 2) post-compact continue turn — STILL reports overflow → the
+      //    processor sets needsCompaction=true → the post-call guard trips.
+      yield* llm.text("second response (still overflowing)", { usage: overflow })
+
+      const exit = yield* prompt.loop({ sessionID: session.id }).pipe(Effect.exit)
+
+      expect(Exit.isFailure(exit)).toBe(true)
+      if (Exit.isFailure(exit)) {
+        const err = Cause.squash(exit.cause)
+        expect(MessageV2.ContextOverflowError.isInstance(err)).toBe(true)
+        if (MessageV2.ContextOverflowError.isInstance(err)) {
+          expect(err.data.message).toContain("Auto-compaction made no meaningful progress")
+        }
+      }
+    }),
+  30_000,
+)
+

From 40506be937a50919a626cdfb5119cfc8a1497ce6 Mon Sep 17 00:00:00 2001
From: Gladys Chen Cheng <chen0552@algonquinlive.com>
Date: Sun, 24 May 2026 22:28:23 -0400
Subject: [PATCH 2/3] fix(opencode): treat exactly-5% reduction as progress,
 not stalled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Review found the boundary check used >= instead of >, so a reduction that
hit exactly the threshold (e.g. 200K → 190K) was flagged as stalled and
threw ContextOverflowError, contradicting the PR description that requires
"at least 5% reduction" to escape stall. The boundary test name said
"returns false right at the 5% boundary" but the assertion said true —
which made the inconsistency obvious.

Change the comparison to strict `>`. Now exactly-(1-threshold) reduction
counts as progress; only reductions strictly less than the threshold trip
the guard. Update the boundary test name, comment, and assertions to
match.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/opencode/src/session/overflow.ts         | 11 +++++++----
 packages/opencode/test/session/compaction.test.ts | 12 +++++++-----
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts
index f1c857ed941a..96b1283fa27f 100644
--- a/packages/opencode/src/session/overflow.ts
+++ b/packages/opencode/src/session/overflow.ts
@@ -35,14 +35,17 @@ export function tokenCount(tokens: MessageV2.Assistant["tokens"]) {
 
 // Returns true if a previous auto-compaction triggered at `previousTokens` did
 // not reduce reported token usage by at least (1 - threshold). Defaults to a
-// 5% reduction; less than that signals compaction is not making progress —
-// typically because the configured context window is smaller than what the
-// provider actually serves, so auto-compaction would loop indefinitely.
+// 5% reduction; anything less than that signals compaction is not making
+// progress — typically because the configured context window is smaller than
+// what the provider actually serves, so auto-compaction would loop forever.
+//
+// An exactly-(1-threshold) reduction (e.g. 200K → 190K at the default) counts
+// as progress and does NOT trip the guard.
 export function autoCompactStalled(input: {
   previousTokens: number | undefined
   currentTokens: number
   threshold?: number
 }) {
   if (input.previousTokens === undefined) return false
-  return input.currentTokens >= input.previousTokens * (input.threshold ?? 0.95)
+  return input.currentTokens > input.previousTokens * (input.threshold ?? 0.95)
 }
diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
index 1e9e9abfc8ae..cbc559c8919a 100644
--- a/packages/opencode/test/session/compaction.test.ts
+++ b/packages/opencode/test/session/compaction.test.ts
@@ -596,11 +596,13 @@ describe("session.overflow.autoCompactStalled", () => {
     expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 180_000 })).toBe(false)
   })
 
-  test("returns false right at the 5% boundary", () => {
-    // previous=200K, current=190K = exactly 95% → stalled (>=)
-    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_000 })).toBe(true)
-    // current=189_999 → just below 95% → progress
-    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 189_999 })).toBe(false)
+  test("counts an exactly-threshold reduction as progress (not stalled)", () => {
+    // previous=200K, current=190K = exactly a 5% reduction. The PR semantics
+    // require "at least 5% reduction" to escape the stall classification, and
+    // 5% satisfies "at least 5%" — so the guard must NOT trip here.
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_000 })).toBe(false)
+    // current=190_001 — reduction is 4.9995% (strictly less than 5%) → stalled.
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_001 })).toBe(true)
   })
 
   test("honors a custom threshold override", () => {

From 18e11209ec63e9187d3d7e6bb3f677f797c0ced4 Mon Sep 17 00:00:00 2001
From: Gladys Chen Cheng <chen0552@algonquinlive.com>
Date: Sun, 24 May 2026 22:45:55 -0400
Subject: [PATCH 3/3] fix(opencode): treat 0 -> 0 token compactions as stalled

Switching the percentage check from >= to > closed the boundary
false-positive (200K -> 190K now counts as progress) but opened a
zero-token hole: when the provider directly throws ContextOverflowError,
SessionProcessor.halt sets needsCompaction without running step-finish,
so handle.message.tokens stays at the zero-initialized values. The
percentage check then evaluates 0 > 0 * 0.95 = false on every fire and
the loop keeps recreating compactions.

Add a one-line guard so two consecutive zero-token compactions trip the
stall detector. Unit test added alongside the boundary cases.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/opencode/src/session/overflow.ts         | 4 ++++
 packages/opencode/test/session/compaction.test.ts | 9 +++++++++
 2 files changed, 13 insertions(+)

diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts
index 96b1283fa27f..4c6cc212d45b 100644
--- a/packages/opencode/src/session/overflow.ts
+++ b/packages/opencode/src/session/overflow.ts
@@ -47,5 +47,9 @@ export function autoCompactStalled(input: {
   threshold?: number
 }) {
   if (input.previousTokens === undefined) return false
+  // Provider-error compactions don't populate tokens (no step-finish runs),
+  // so we see 0 → 0. With no progress signal, treat as stalled — otherwise
+  // the percentage check below would keep returning false forever.
+  if (input.previousTokens === 0 && input.currentTokens === 0) return true
   return input.currentTokens > input.previousTokens * (input.threshold ?? 0.95)
 }
diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
index cbc559c8919a..10f27a76347c 100644
--- a/packages/opencode/test/session/compaction.test.ts
+++ b/packages/opencode/test/session/compaction.test.ts
@@ -614,6 +614,15 @@ describe("session.overflow.autoCompactStalled", () => {
   test("returns true when current somehow exceeds previous", () => {
     expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 240_000 })).toBe(true)
   })
+
+  test("counts repeated zero-token compactions as stalled", () => {
+    // The provider-error compaction path (ContextOverflowError caught in
+    // SessionProcessor.halt) skips step-finish, so handle.message.tokens
+    // stays at the zero-initialized value. Two such triggers in a row give
+    // us no progress signal at all — without this guard the percentage
+    // check would keep returning false (0 > 0 * 0.95 = false) forever.
+    expect(autoCompactStalled({ previousTokens: 0, currentTokens: 0 })).toBe(true)
+  })
 })
 
 describe("session.compaction.create", () => {