aictrl-dev · byapparov · Jun 22, 2026 · Jun 22, 2026
diff --git a/EVENTS.md b/EVENTS.md
@@ -82,13 +82,36 @@ Emitted when an assistant message finishes (one per LLM turn).
   "providerID": "anthropic",
   "agent": "default",
   "cost": { "input": 0.003, "output": 0.012, "cache": { "read": 0, "write": 0 } },
-  "tokens": { "input": 1024, "output": 512, "cache": { "read": 0, "write": 0 } },
+  "tokens": {
+    "input": 1024,
+    "output": 512,
+    "reasoning": 0,
+    "cache": { "read": 8800, "write": 1024 }
+  },
+  "context": { "used": 9824, "limit": 200000, "ratio": 0.049 },
   "finish": "tool-calls"
 }
 ```
 
 `finish` values: `"tool-calls"` (model wants to call tools), `"end_turn"` (model is done), `"max_tokens"` (output truncated).
 
+**`tokens`** (5-way breakdown, mirrors upstream `LLM.Usage`):
+
+- `input` (number) — raw input tokens billed at the standard input rate.
+- `output` (number) — output (completion) tokens.
+- `reasoning` (number) — extended-thinking / reasoning tokens (0 when thinking is off).
+- `cache.read` (number) — tokens served from the prompt cache (billed at cache-read rate). Distinguishes cache hits from fresh input.
+- `cache.write` (number) — tokens written to the prompt cache (billed at cache-write rate).
+
+These fields are non-overlapping: a token is counted in exactly one bucket.
+
+**`context`** (context-window utilization):
+
+- `used` (number) — tokens occupying the model's context window this turn: `input + cache.read`.
+- `limit` (number) — the model's total context-window size in tokens, sourced from the models.dev registry (`model.limit.context`).
+- `ratio` (number) — `used / limit` (0–1). A value approaching 1 signals context-exhaustion risk.
+- `null` — emitted when the model's context limit is not known (e.g. unregistered custom endpoint).
+
 ### `text`
 
 Emitted when a text block from the assistant is complete.

diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts
@@ -80,6 +80,30 @@ function fallback(part: ToolPart) {
   })
 }
 
+/**
+ * Build the context-window utilization object for `message_complete` events.
+ *
+ * Returns `null` (meaning "unknown") when:
+ * - `contextLimit` is `null` — Provider.getModel threw (unregistered model)
+ * - `contextLimit` is `0`   — custom model without a registered limit defaults to
+ *   `context: 0` (provider.ts:929). A zero limit would yield `Infinity`/`NaN` for
+ *   ratio, which `JSON.stringify` serialises as `null` inside the object — diverging
+ *   from the documented top-level `null` contract (EVENTS.md).
+ *
+ * @internal exported for unit-testing only
+ */
+export function buildContextWindow(
+  contextLimit: number | null,
+  contextUsed: number,
+): { used: number; limit: number; ratio: number } | null {
+  if (contextLimit == null || contextLimit <= 0) return null
+  return {
+    used: contextUsed,
+    limit: contextLimit,
+    ratio: contextUsed / contextLimit,
+  }
+}
+
 function glob(info: ToolProps<typeof GlobTool>) {
   const root = info.input.path ?? ""
   const title = `Glob "${info.input.pattern}"`
@@ -463,12 +487,39 @@ export const RunCommand = cmd({
             const info = event.properties.info
             if (args.format === "json") {
               if (info.finish) {
+                // Build 5-way token breakdown mirroring upstream LLM.Usage shape.
+                // info.tokens already carries the full breakdown from StepFinishPart
+                // accumulation — reasoning and cache split are not dropped upstream.
+                const tokens = {
+                  input: info.tokens.input,
+                  output: info.tokens.output,
+                  reasoning: info.tokens.reasoning,
+                  cache: {
+                    read: info.tokens.cache.read,
+                    write: info.tokens.cache.write,
+                  },
+                }
+
+                // Context-window utilization: used = input + cache.read (prompt tokens
+                // that actually hit the model's context window). limit comes from the
+                // model registry (models.dev). On lookup failure (or limit===0 for
+                // custom models) buildContextWindow returns null to signal "unknown".
+                const contextLimit = await Provider.getModel(info.providerID, info.modelID)
+                  .then((m) => m.limit.context)
+                  .catch(() => null)
+                const contextUsed = tokens.input + tokens.cache.read
+                const context = buildContextWindow(contextLimit, contextUsed)
+
                 emit("message_complete", {
                   modelID: info.modelID,
                   providerID: info.providerID,
                   agent: info.agent,
+                  // cost is sourced from info.cost which accumulates real per-step costs
+                  // from StepFinishPart. Do NOT use the new step.ended event cost field
+                  // which emits cost:0 and is reconciled later (the cost:0 trap).
                   cost: info.cost,
-                  tokens: info.tokens,
+                  tokens,
+                  context,
                   finish: info.finish,
                 })
               }

diff --git a/packages/cli/test/cli/usage-token-breakdown.test.ts b/packages/cli/test/cli/usage-token-breakdown.test.ts
@@ -0,0 +1,155 @@
+import path from "path"
+import { describe, expect, test } from "bun:test"
+import { buildContextWindow } from "../../src/cli/cmd/run"
+
+const EVENTS_MD = path.resolve(import.meta.dir, "../../../../EVENTS.md")
+
+/**
+ * Tests for issue #86 — 5-way token breakdown + context-window utilization
+ * in message_complete events.
+ */
+describe("buildContextWindow (#86)", () => {
+  // 🟠 regression: limit===0 (custom model default) must return null, not {ratio:Infinity}
+  test("returns null when contextLimit is 0 (custom model without registered limit)", () => {
+    const result = buildContextWindow(0, 9824)
+    expect(result).toBeNull()
+  })
+
+  test("returns null when contextLimit is null (Provider.getModel threw)", () => {
+    const result = buildContextWindow(null, 9824)
+    expect(result).toBeNull()
+  })
+
+  test("returns null when both limit and used are 0", () => {
+    const result = buildContextWindow(0, 0)
+    expect(result).toBeNull()
+  })
+
+  test("computes used as the value passed in (caller sets input + cache.read)", () => {
+    const input = 8000
+    const cacheRead = 1824
+    const contextUsed = input + cacheRead
+    const result = buildContextWindow(200_000, contextUsed)
+    expect(result).not.toBeNull()
+    expect(result!.used).toBe(9824)
+  })
+
+  test("sets limit to the contextLimit value", () => {
+    const result = buildContextWindow(200_000, 9824)
+    expect(result).not.toBeNull()
+    expect(result!.limit).toBe(200_000)
+  })
+
+  test("ratio is used / limit", () => {
+    const result = buildContextWindow(200_000, 9824)
+    expect(result).not.toBeNull()
+    expect(result!.ratio).toBeCloseTo(9824 / 200_000, 10)
+  })
+
+  test("ratio is between 0 and 1 for realistic values", () => {
+    const result = buildContextWindow(128_000, 64_000)
+    expect(result).not.toBeNull()
+    expect(result!.ratio).toBe(0.5)
+  })
+
+  test("ratio is exactly 1 when context is fully used", () => {
+    const result = buildContextWindow(100_000, 100_000)
+    expect(result).not.toBeNull()
+    expect(result!.ratio).toBe(1)
+  })
+
+  test("ratio is 0 when no tokens used (empty prompt start)", () => {
+    const result = buildContextWindow(200_000, 0)
+    expect(result).not.toBeNull()
+    expect(result!.ratio).toBe(0)
+  })
+
+  test("result is JSON-serialisable without Infinity or NaN", () => {
+    const result = buildContextWindow(200_000, 9824)
+    const serialised = JSON.stringify(result)
+    expect(serialised).not.toContain("null")
+    const parsed = JSON.parse(serialised)
+    expect(parsed.ratio).toBeCloseTo(9824 / 200_000, 10)
+  })
+
+  test("top-level null serialises cleanly (not as object with null ratio)", () => {
+    // The documented contract: limit unknown → top-level null, not {ratio:null}
+    const result = buildContextWindow(0, 9824)
+    expect(JSON.stringify(result)).toBe("null")
+  })
+})
+
+describe("message_complete emit block shape (source-verified, #86)", () => {
+  // These source-text checks verify structural wiring in the emit call site
+  // that cannot be covered by pure unit-testing buildContextWindow.
+  const RUN_SRC = path.resolve(import.meta.dir, "../../src/cli/cmd/run.ts")
+
+  test("emit block passes tokens with reasoning and cache read/write fields", async () => {
+    const source = await Bun.file(RUN_SRC).text()
+    const emitIdx = source.indexOf('emit("message_complete"')
+    expect(emitIdx).toBeGreaterThan(-1)
+    const blockStart = Math.max(0, emitIdx - 1500)
+    const block = source.slice(blockStart, emitIdx + 200)
+    expect(block).toContain("reasoning")
+    expect(block).toContain("cache")
+    expect(block).toContain("read")
+    expect(block).toContain("write")
+  })
+
+  test("emit block calls buildContextWindow (not inline ternary)", async () => {
+    const source = await Bun.file(RUN_SRC).text()
+    const emitIdx = source.indexOf('emit("message_complete"')
+    expect(emitIdx).toBeGreaterThan(-1)
+    const blockStart = Math.max(0, emitIdx - 1500)
+    const block = source.slice(blockStart, emitIdx + 200)
+    expect(block).toContain("buildContextWindow")
+  })
+
+  test("emit block includes context field", async () => {
+    const source = await Bun.file(RUN_SRC).text()
+    const emitIdx = source.indexOf('emit("message_complete"')
+    expect(emitIdx).toBeGreaterThan(-1)
+    // emit object spans ~400 chars; search up to closing paren
+    const block = source.slice(emitIdx, emitIdx + 500)
+    expect(block).toContain("context")
+  })
+
+  test("cost field is still emitted (not regressed)", async () => {
+    const source = await Bun.file(RUN_SRC).text()
+    const idx = source.indexOf('emit("message_complete"')
+    expect(idx).toBeGreaterThan(-1)
+    const block = source.slice(idx, idx + 800)
+    expect(block).toContain("cost:")
+  })
+})
+
+describe("EVENTS.md documents token breakdown and context (#86)", () => {
+  test("EVENTS.md message_complete section includes reasoning token field", async () => {
+    const doc = await Bun.file(EVENTS_MD).text()
+    const idx = doc.indexOf("message_complete")
+    expect(idx).toBeGreaterThan(-1)
+    const section = doc.slice(idx, idx + 1500)
+    expect(section).toContain("reasoning")
+  })
+
+  test("EVENTS.md message_complete section documents context field", async () => {
+    const doc = await Bun.file(EVENTS_MD).text()
+    const idx = doc.indexOf("message_complete")
+    expect(idx).toBeGreaterThan(-1)
+    const section = doc.slice(idx, idx + 1500)
+    expect(section).toContain("context")
+    expect(section).toContain("used")
+    expect(section).toContain("limit")
+    expect(section).toContain("ratio")
+  })
+
+  test("EVENTS.md documents null as the unknown-limit sentinel", async () => {
+    const doc = await Bun.file(EVENTS_MD).text()
+    const idx = doc.indexOf("message_complete")
+    expect(idx).toBeGreaterThan(-1)
+    // null sentinel doc is ~1593 chars after message_complete heading; use 2000 window
+    const section = doc.slice(idx, idx + 2000)
+    // The documented contract: null = context limit not known
+    expect(section).toContain("null")
+  })
+})