Skip to content

Commit 21b5a26

Browse files
committed
Fix potential Anthropic double-charge bug (didn't reach prod)
1 parent 39d3588 commit 21b5a26

File tree

2 files changed

+189
-4
lines changed

2 files changed

+189
-4
lines changed
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import { describe, expect, it } from 'bun:test'
2+
3+
import { extractUsageAndCost } from '../openrouter'
4+
5+
describe('extractUsageAndCost', () => {
6+
describe('OpenRouter response shapes', () => {
7+
it('Anthropic shape: both cost and upstream_inference_cost populated with the SAME value (NOT additive)', () => {
8+
// This is the shape that caused the 2x overcharge bug on every Anthropic call.
9+
// The two fields report the same dollars via different routes (OR-billed-us
10+
// and what-upstream-charged-us). Summing them doubles the bill.
11+
const usage = {
12+
prompt_tokens: 91437,
13+
completion_tokens: 1209,
14+
prompt_tokens_details: { cached_tokens: 87047 },
15+
completion_tokens_details: { reasoning_tokens: 0 },
16+
cost: 0.1171,
17+
cost_details: { upstream_inference_cost: 0.1171 },
18+
}
19+
const result = extractUsageAndCost(usage)
20+
expect(result.cost).toBeCloseTo(0.1171, 6)
21+
expect(result.cost).not.toBeCloseTo(0.2342, 6) // the old, buggy sum
22+
expect(result.inputTokens).toBe(91437)
23+
expect(result.outputTokens).toBe(1209)
24+
expect(result.cacheReadInputTokens).toBe(87047)
25+
})
26+
27+
it('Google shape: cost=0, upstream_inference_cost holds the real charge', () => {
28+
const usage = {
29+
prompt_tokens: 500,
30+
completion_tokens: 200,
31+
prompt_tokens_details: { cached_tokens: 0 },
32+
completion_tokens_details: { reasoning_tokens: 0 },
33+
cost: 0,
34+
cost_details: { upstream_inference_cost: 0.000547 },
35+
}
36+
const result = extractUsageAndCost(usage)
37+
expect(result.cost).toBeCloseTo(0.000547, 9)
38+
})
39+
40+
it('Legacy shape: cost populated, cost_details missing', () => {
41+
const usage = {
42+
prompt_tokens: 100,
43+
completion_tokens: 50,
44+
cost: 0.042,
45+
}
46+
const result = extractUsageAndCost(usage)
47+
expect(result.cost).toBeCloseTo(0.042, 6)
48+
})
49+
50+
it('Legacy shape: cost populated, cost_details present but upstream_inference_cost absent', () => {
51+
const usage = {
52+
prompt_tokens: 100,
53+
completion_tokens: 50,
54+
cost: 0.042,
55+
cost_details: {},
56+
}
57+
const result = extractUsageAndCost(usage)
58+
expect(result.cost).toBeCloseTo(0.042, 6)
59+
})
60+
61+
it('Legacy shape: cost populated, upstream_inference_cost null', () => {
62+
const usage = {
63+
prompt_tokens: 100,
64+
completion_tokens: 50,
65+
cost: 0.042,
66+
cost_details: { upstream_inference_cost: null },
67+
}
68+
const result = extractUsageAndCost(usage)
69+
expect(result.cost).toBeCloseTo(0.042, 6)
70+
})
71+
72+
it('Anthropic shape with slight rounding drift: picks the larger of the two', () => {
73+
// Defensive: if the two fields ever diverge due to OR-side rounding,
74+
// using max avoids under-reporting our spend.
75+
const usage = {
76+
prompt_tokens: 1000,
77+
completion_tokens: 100,
78+
cost: 0.005,
79+
cost_details: { upstream_inference_cost: 0.0051 },
80+
}
81+
const result = extractUsageAndCost(usage)
82+
expect(result.cost).toBeCloseTo(0.0051, 6)
83+
})
84+
85+
it('both cost and upstream missing: returns 0', () => {
86+
const usage = {
87+
prompt_tokens: 100,
88+
completion_tokens: 50,
89+
}
90+
const result = extractUsageAndCost(usage)
91+
expect(result.cost).toBe(0)
92+
})
93+
94+
it('entire usage object undefined: returns zeros', () => {
95+
const result = extractUsageAndCost(undefined)
96+
expect(result.cost).toBe(0)
97+
expect(result.inputTokens).toBe(0)
98+
expect(result.outputTokens).toBe(0)
99+
expect(result.cacheReadInputTokens).toBe(0)
100+
expect(result.reasoningTokens).toBe(0)
101+
})
102+
103+
it('entire usage object null: returns zeros', () => {
104+
const result = extractUsageAndCost(null)
105+
expect(result.cost).toBe(0)
106+
})
107+
108+
it('cost is non-number (string): treated as 0', () => {
109+
const usage = {
110+
cost: '0.042' as unknown as number,
111+
cost_details: { upstream_inference_cost: 0.01 },
112+
}
113+
const result = extractUsageAndCost(usage)
114+
expect(result.cost).toBeCloseTo(0.01, 6)
115+
})
116+
})
117+
118+
describe('token extraction', () => {
119+
it('extracts all token counts correctly', () => {
120+
const usage = {
121+
prompt_tokens: 1000,
122+
completion_tokens: 500,
123+
prompt_tokens_details: { cached_tokens: 900 },
124+
completion_tokens_details: { reasoning_tokens: 200 },
125+
cost: 0.01,
126+
}
127+
const result = extractUsageAndCost(usage)
128+
expect(result.inputTokens).toBe(1000)
129+
expect(result.outputTokens).toBe(500)
130+
expect(result.cacheReadInputTokens).toBe(900)
131+
expect(result.reasoningTokens).toBe(200)
132+
})
133+
134+
it('missing nested token detail objects default to 0', () => {
135+
const usage = {
136+
prompt_tokens: 100,
137+
completion_tokens: 50,
138+
cost: 0.001,
139+
}
140+
const result = extractUsageAndCost(usage)
141+
expect(result.cacheReadInputTokens).toBe(0)
142+
expect(result.reasoningTokens).toBe(0)
143+
})
144+
})
145+
146+
describe('regression: the exact bug from prod logs', () => {
147+
// Pulled from debug/web.jsonl `openrouter-cost-audit` entries.
148+
// Every one of these was billed at 2x the real price before the fix.
149+
it.each([
150+
{ cost: 0.1155, expected: 0.1155 },
151+
{ cost: 0.0534, expected: 0.0534 },
152+
{ cost: 0.0584, expected: 0.0584 },
153+
{ cost: 0.1171, expected: 0.1171 },
154+
])('bills $expected (not 2x) when cost === upstream === $cost', ({ cost, expected }) => {
155+
const usage = {
156+
prompt_tokens: 100000,
157+
completion_tokens: 500,
158+
prompt_tokens_details: { cached_tokens: 95000 },
159+
cost,
160+
cost_details: { upstream_inference_cost: cost },
161+
}
162+
const result = extractUsageAndCost(usage)
163+
expect(result.cost).toBeCloseTo(expected, 6)
164+
})
165+
})
166+
})

web/src/llm-api/openrouter.ts

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,34 @@ function createOpenRouterRequest(params: {
6161
})
6262
}
6363

64-
function extractUsageAndCost(usage: any): UsageData {
65-
const openRouterCost = usage?.cost ?? 0
66-
const upstreamCost = usage?.cost_details?.upstream_inference_cost ?? 0
64+
/**
65+
* Extract token counts and billed cost from an OpenRouter `usage` object.
66+
*
67+
* OpenRouter reports the billed charge in ONE of two fields — or in BOTH
68+
* with the SAME value (observed on Anthropic routes). They are NOT additive:
69+
*
70+
* Anthropic routes: { cost: X, cost_details: { upstream_inference_cost: X } }
71+
* Google routes: { cost: 0, cost_details: { upstream_inference_cost: X } }
72+
* Some routes: { cost: X, cost_details: null }
73+
*
74+
* We previously summed the two fields, which double-charged every Anthropic
75+
* call. Taking the max handles all three shapes safely.
76+
*
77+
* See: investigation notes + scripts/refund-openrouter-overcharge.ts
78+
*/
79+
export function extractUsageAndCost(usage: any): UsageData {
80+
const openRouterCost =
81+
typeof usage?.cost === 'number' ? usage.cost : 0
82+
const upstreamCost =
83+
typeof usage?.cost_details?.upstream_inference_cost === 'number'
84+
? usage.cost_details.upstream_inference_cost
85+
: 0
6786
return {
6887
inputTokens: usage?.prompt_tokens ?? 0,
6988
outputTokens: usage?.completion_tokens ?? 0,
7089
cacheReadInputTokens: usage?.prompt_tokens_details?.cached_tokens ?? 0,
7190
reasoningTokens: usage?.completion_tokens_details?.reasoning_tokens ?? 0,
72-
cost: openRouterCost + upstreamCost,
91+
cost: Math.max(openRouterCost, upstreamCost),
7392
}
7493
}
7594

0 commit comments

Comments
 (0)