Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion EVENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,36 @@ Emitted when an assistant message finishes (one per LLM turn).
"providerID": "anthropic",
"agent": "default",
"cost": { "input": 0.003, "output": 0.012, "cache": { "read": 0, "write": 0 } },
"tokens": { "input": 1024, "output": 512, "cache": { "read": 0, "write": 0 } },
"tokens": {
"input": 1024,
"output": 512,
"reasoning": 0,
"cache": { "read": 8800, "write": 1024 }
},
"context": { "used": 9824, "limit": 200000, "ratio": 0.049 },
"finish": "tool-calls"
}
```

`finish` values: `"tool-calls"` (model wants to call tools), `"end_turn"` (model is done), `"max_tokens"` (output truncated).

**`tokens`** (5-way breakdown, mirrors upstream `LLM.Usage`):

- `input` (number) — raw input tokens billed at the standard input rate.
- `output` (number) — output (completion) tokens.
- `reasoning` (number) — extended-thinking / reasoning tokens (0 when thinking is off).
- `cache.read` (number) — tokens served from the prompt cache (billed at cache-read rate). Distinguishes cache hits from fresh input.
- `cache.write` (number) — tokens written to the prompt cache (billed at cache-write rate).

These fields are non-overlapping: a token is counted in exactly one bucket.

**`context`** (context-window utilization):

- `used` (number) — tokens occupying the model's context window this turn: `input + cache.read`.
- `limit` (number) — the model's total context-window size in tokens, sourced from the models.dev registry (`model.limit.context`).
- `ratio` (number) — `used / limit` (0–1). A value approaching 1 signals context-exhaustion risk.
- `null` — emitted when the model's context limit is not known (e.g. unregistered custom endpoint).

### `text`

Emitted when a text block from the assistant is complete.
Expand Down
53 changes: 52 additions & 1 deletion packages/cli/src/cli/cmd/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,30 @@ function fallback(part: ToolPart) {
})
}

/**
* Build the context-window utilization object for `message_complete` events.
*
* Returns `null` (meaning "unknown") when:
* - `contextLimit` is `null` — Provider.getModel threw (unregistered model)
* - `contextLimit` is `0` — custom model without a registered limit defaults to
* `context: 0` (provider.ts:929). A zero limit would yield `Infinity`/`NaN` for
* ratio, which `JSON.stringify` serialises as `null` inside the object — diverging
* from the documented top-level `null` contract (EVENTS.md).
*
* @internal exported for unit-testing only
*/
export function buildContextWindow(
contextLimit: number | null,
contextUsed: number,
): { used: number; limit: number; ratio: number } | null {
if (contextLimit == null || contextLimit <= 0) return null
return {
used: contextUsed,
limit: contextLimit,
ratio: contextUsed / contextLimit,
}
}

function glob(info: ToolProps<typeof GlobTool>) {
const root = info.input.path ?? ""
const title = `Glob "${info.input.pattern}"`
Expand Down Expand Up @@ -463,12 +487,39 @@ export const RunCommand = cmd({
const info = event.properties.info
if (args.format === "json") {
if (info.finish) {
// Build 5-way token breakdown mirroring upstream LLM.Usage shape.
// info.tokens already carries the full breakdown from StepFinishPart
// accumulation — reasoning and cache split are not dropped upstream.
const tokens = {
input: info.tokens.input,
output: info.tokens.output,
reasoning: info.tokens.reasoning,
cache: {
read: info.tokens.cache.read,
write: info.tokens.cache.write,
},
}

// Context-window utilization: used = input + cache.read (prompt tokens
// that actually hit the model's context window). limit comes from the
// model registry (models.dev). On lookup failure (or limit===0 for
// custom models) buildContextWindow returns null to signal "unknown".
const contextLimit = await Provider.getModel(info.providerID, info.modelID)
.then((m) => m.limit.context)
.catch(() => null)
const contextUsed = tokens.input + tokens.cache.read
const context = buildContextWindow(contextLimit, contextUsed)

emit("message_complete", {
modelID: info.modelID,
providerID: info.providerID,
agent: info.agent,
// cost is sourced from info.cost which accumulates real per-step costs
// from StepFinishPart. Do NOT use the new step.ended event cost field
// which emits cost:0 and is reconciled later (the cost:0 trap).
cost: info.cost,
tokens: info.tokens,
tokens,
context,
finish: info.finish,
})
}
Expand Down
155 changes: 155 additions & 0 deletions packages/cli/test/cli/usage-token-breakdown.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import path from "path"
import { describe, expect, test } from "bun:test"
import { buildContextWindow } from "../../src/cli/cmd/run"

const EVENTS_MD = path.resolve(import.meta.dir, "../../../../EVENTS.md")

/**
* Tests for issue #86 — 5-way token breakdown + context-window utilization
* in message_complete events.
*/
describe("buildContextWindow (#86)", () => {
// 🟠 regression: limit===0 (custom model default) must return null, not {ratio:Infinity}
test("returns null when contextLimit is 0 (custom model without registered limit)", () => {
const result = buildContextWindow(0, 9824)
expect(result).toBeNull()
})

test("returns null when contextLimit is null (Provider.getModel threw)", () => {
const result = buildContextWindow(null, 9824)
expect(result).toBeNull()
})

test("returns null when both limit and used are 0", () => {
const result = buildContextWindow(0, 0)
expect(result).toBeNull()
})

test("computes used as the value passed in (caller sets input + cache.read)", () => {
const input = 8000
const cacheRead = 1824
const contextUsed = input + cacheRead
const result = buildContextWindow(200_000, contextUsed)
expect(result).not.toBeNull()
expect(result!.used).toBe(9824)
})

test("sets limit to the contextLimit value", () => {
const result = buildContextWindow(200_000, 9824)
expect(result).not.toBeNull()
expect(result!.limit).toBe(200_000)
})

test("ratio is used / limit", () => {
const result = buildContextWindow(200_000, 9824)
expect(result).not.toBeNull()
expect(result!.ratio).toBeCloseTo(9824 / 200_000, 10)
})

test("ratio is between 0 and 1 for realistic values", () => {
const result = buildContextWindow(128_000, 64_000)
expect(result).not.toBeNull()
expect(result!.ratio).toBe(0.5)
})

test("ratio is exactly 1 when context is fully used", () => {
const result = buildContextWindow(100_000, 100_000)
expect(result).not.toBeNull()
expect(result!.ratio).toBe(1)
})

test("ratio is 0 when no tokens used (empty prompt start)", () => {
const result = buildContextWindow(200_000, 0)
expect(result).not.toBeNull()
expect(result!.ratio).toBe(0)
})

test("result is JSON-serialisable without Infinity or NaN", () => {
const result = buildContextWindow(200_000, 9824)
const serialised = JSON.stringify(result)
expect(serialised).not.toContain("null")
const parsed = JSON.parse(serialised)
expect(parsed.ratio).toBeCloseTo(9824 / 200_000, 10)
})

test("top-level null serialises cleanly (not as object with null ratio)", () => {
// The documented contract: limit unknown → top-level null, not {ratio:null}
const result = buildContextWindow(0, 9824)
expect(JSON.stringify(result)).toBe("null")
})
})

describe("message_complete emit block shape (source-verified, #86)", () => {
// These source-text checks verify structural wiring in the emit call site
// that cannot be covered by pure unit-testing buildContextWindow.
const RUN_SRC = path.resolve(import.meta.dir, "../../src/cli/cmd/run.ts")

test("emit block passes tokens with reasoning and cache read/write fields", async () => {
const source = await Bun.file(RUN_SRC).text()
const emitIdx = source.indexOf('emit("message_complete"')
expect(emitIdx).toBeGreaterThan(-1)
const blockStart = Math.max(0, emitIdx - 1500)
const block = source.slice(blockStart, emitIdx + 200)
expect(block).toContain("reasoning")
expect(block).toContain("cache")
expect(block).toContain("read")
expect(block).toContain("write")
})

test("emit block calls buildContextWindow (not inline ternary)", async () => {
const source = await Bun.file(RUN_SRC).text()
const emitIdx = source.indexOf('emit("message_complete"')
expect(emitIdx).toBeGreaterThan(-1)
const blockStart = Math.max(0, emitIdx - 1500)
const block = source.slice(blockStart, emitIdx + 200)
expect(block).toContain("buildContextWindow")
})

test("emit block includes context field", async () => {
const source = await Bun.file(RUN_SRC).text()
const emitIdx = source.indexOf('emit("message_complete"')
expect(emitIdx).toBeGreaterThan(-1)
// emit object spans ~400 chars; search up to closing paren
const block = source.slice(emitIdx, emitIdx + 500)
expect(block).toContain("context")
})

test("cost field is still emitted (not regressed)", async () => {
const source = await Bun.file(RUN_SRC).text()
const idx = source.indexOf('emit("message_complete"')
expect(idx).toBeGreaterThan(-1)
const block = source.slice(idx, idx + 800)
expect(block).toContain("cost:")
})
})

describe("EVENTS.md documents token breakdown and context (#86)", () => {
test("EVENTS.md message_complete section includes reasoning token field", async () => {
const doc = await Bun.file(EVENTS_MD).text()
const idx = doc.indexOf("message_complete")
expect(idx).toBeGreaterThan(-1)
const section = doc.slice(idx, idx + 1500)
expect(section).toContain("reasoning")
})

test("EVENTS.md message_complete section documents context field", async () => {
const doc = await Bun.file(EVENTS_MD).text()
const idx = doc.indexOf("message_complete")
expect(idx).toBeGreaterThan(-1)
const section = doc.slice(idx, idx + 1500)
expect(section).toContain("context")
expect(section).toContain("used")
expect(section).toContain("limit")
expect(section).toContain("ratio")
})

test("EVENTS.md documents null as the unknown-limit sentinel", async () => {
const doc = await Bun.file(EVENTS_MD).text()
const idx = doc.indexOf("message_complete")
expect(idx).toBeGreaterThan(-1)
// null sentinel doc is ~1593 chars after message_complete heading; use 2000 window
const section = doc.slice(idx, idx + 2000)
// The documented contract: null = context limit not known
expect(section).toContain("null")
})
})