Skip to content

Commit 984e868

Browse files
committed
query costs script using big query
1 parent 21b5a26 commit 984e868

File tree

1 file changed

+263
-0
lines changed

1 file changed

+263
-0
lines changed

scripts/query-message-costs.ts

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
/**
2+
* Queries the BigQuery `message` table for the most recent rows and prints
3+
* cost, upstream_inference_cost, token breakdown, and model.
4+
*
5+
* Used to investigate whether OpenRouter is populating BOTH `usage.cost` and
6+
* `usage.cost_details.upstream_inference_cost` for non-BYOK requests, which
7+
* would cause `web/src/llm-api/openrouter.ts#extractUsageAndCost` to double-
8+
* count (that function returns `openRouterCost + upstreamCost`).
9+
*
10+
* Usage:
11+
* bun run scripts/query-message-costs.ts # dev dataset
12+
* bun run scripts/query-message-costs.ts --prod # prod dataset
13+
* bun run scripts/query-message-costs.ts --prod --limit 200
14+
* bun run scripts/query-message-costs.ts --prod --model anthropic/claude-opus-4.7
15+
*
16+
* Note: `model` is NOT a top-level column in the BigQuery `message` schema;
17+
* it lives inside the `request` JSON blob, so we extract it with
18+
* JSON_EXTRACT_SCALAR.
19+
*/
20+
21+
import { BigQuery } from '@google-cloud/bigquery'
22+
23+
type Args = {
24+
isProd: boolean
25+
limit: number
26+
modelFilter: string | null
27+
}
28+
29+
function parseArgs(): Args {
30+
const argv = process.argv.slice(2)
31+
const isProd = argv.includes('--prod')
32+
33+
const limitIdx = argv.indexOf('--limit')
34+
const limit =
35+
limitIdx >= 0 && argv[limitIdx + 1] ? parseInt(argv[limitIdx + 1], 10) : 100
36+
37+
const modelIdx = argv.indexOf('--model')
38+
const modelFilter =
39+
modelIdx >= 0 && argv[modelIdx + 1] ? argv[modelIdx + 1] : null
40+
41+
return { isProd, limit, modelFilter }
42+
}
43+
44+
function fmtNum(n: number | null | undefined, digits = 0): string {
45+
if (n === null || n === undefined || Number.isNaN(n)) return '-'
46+
return n.toLocaleString(undefined, {
47+
minimumFractionDigits: digits,
48+
maximumFractionDigits: digits,
49+
})
50+
}
51+
52+
function fmtCost(n: number | null | undefined): string {
53+
if (n === null || n === undefined || Number.isNaN(n)) return '-'
54+
return `$${n.toFixed(6)}`
55+
}
56+
57+
// Anthropic Opus 4.6 / 4.7 per-1M-token pricing.
58+
// Used for a quick "expected cost" sanity column on Opus rows only.
59+
const OPUS_INPUT_PER_M = 5.0
60+
const OPUS_CACHE_READ_PER_M = 0.5
61+
const OPUS_OUTPUT_PER_M = 25.0
62+
63+
function expectedOpusCost(row: {
64+
input_tokens: number
65+
cache_read_input_tokens: number
66+
output_tokens: number
67+
}): number {
68+
const uncachedInput = Math.max(
69+
0,
70+
(row.input_tokens ?? 0) - (row.cache_read_input_tokens ?? 0),
71+
)
72+
return (
73+
(uncachedInput * OPUS_INPUT_PER_M) / 1_000_000 +
74+
((row.cache_read_input_tokens ?? 0) * OPUS_CACHE_READ_PER_M) / 1_000_000 +
75+
((row.output_tokens ?? 0) * OPUS_OUTPUT_PER_M) / 1_000_000
76+
)
77+
}
78+
79+
async function main() {
80+
const { isProd, limit, modelFilter } = parseArgs()
81+
const dataset = isProd ? 'codebuff_data' : 'codebuff_data_dev'
82+
const table = `${dataset}.message`
83+
84+
console.log(
85+
`Querying last ${limit} rows from \`${table}\`${
86+
modelFilter ? ` (model = ${modelFilter})` : ''
87+
}`,
88+
)
89+
console.log('')
90+
91+
const client = new BigQuery()
92+
93+
// Model isn't a column — pull from request JSON.
94+
// Cache creation tokens also not in schema (OpenRouter path is always 0 there).
95+
const query = `
96+
SELECT
97+
id,
98+
finished_at,
99+
JSON_EXTRACT_SCALAR(request, '$.model') AS model,
100+
input_tokens,
101+
cache_read_input_tokens,
102+
output_tokens,
103+
cost,
104+
upstream_inference_cost,
105+
-- cache_creation_input_tokens lives in BigQuery too; null-safe cast
106+
SAFE_CAST(JSON_EXTRACT_SCALAR(request, '$.usage') AS STRING) AS request_usage_raw
107+
FROM \`${table}\`
108+
WHERE TRUE
109+
${
110+
modelFilter
111+
? `AND JSON_EXTRACT_SCALAR(request, '$.model') = @modelFilter`
112+
: ''
113+
}
114+
AND JSON_EXTRACT_SCALAR(request, '$.model') LIKE '%opus%'
115+
AND cost BETWEEN 0.10 AND 0.25
116+
ORDER BY finished_at DESC
117+
LIMIT @limit
118+
`
119+
120+
const [rows] = await client.query({
121+
query,
122+
params: {
123+
limit,
124+
...(modelFilter ? { modelFilter } : {}),
125+
},
126+
})
127+
128+
if (rows.length === 0) {
129+
console.log('No rows found.')
130+
return
131+
}
132+
133+
// Per-row table. `ups/cost` ≈ 1.0 on a row means upstream equals the billed
134+
// cost on that row — the classic signature of a double-count.
135+
const header = [
136+
'finished_at',
137+
'model',
138+
'input',
139+
'cache_read',
140+
'uncached_in',
141+
'output',
142+
'cost',
143+
'upstream',
144+
'cost+ups',
145+
'ups/cost',
146+
'expected_opus',
147+
]
148+
console.log(header.join('\t'))
149+
150+
let doubleCountHits = 0
151+
let upstreamPopulatedCount = 0
152+
let totalCost = 0
153+
let totalUpstream = 0
154+
let opusCostSum = 0
155+
let opusExpectedSum = 0
156+
157+
for (const row of rows) {
158+
const input = Number(row.input_tokens ?? 0)
159+
const cacheRead = Number(row.cache_read_input_tokens ?? 0)
160+
const output = Number(row.output_tokens ?? 0)
161+
const uncachedIn = Math.max(0, input - cacheRead)
162+
const cost = row.cost === null || row.cost === undefined ? null : Number(row.cost)
163+
const upstream =
164+
row.upstream_inference_cost === null ||
165+
row.upstream_inference_cost === undefined
166+
? null
167+
: Number(row.upstream_inference_cost)
168+
const sum = (cost ?? 0) + (upstream ?? 0)
169+
const ratio =
170+
cost && upstream !== null && cost > 0 ? upstream / cost : null
171+
172+
const finished =
173+
row.finished_at?.value ?? row.finished_at?.toString() ?? String(row.finished_at)
174+
175+
const model = row.model ?? '-'
176+
const isOpus = typeof model === 'string' && model.includes('opus')
177+
178+
const expected = expectedOpusCost({
179+
input_tokens: input,
180+
cache_read_input_tokens: cacheRead,
181+
output_tokens: output,
182+
})
183+
184+
console.log(
185+
[
186+
String(finished).slice(0, 19),
187+
model,
188+
fmtNum(input),
189+
fmtNum(cacheRead),
190+
fmtNum(uncachedIn),
191+
fmtNum(output),
192+
fmtCost(cost),
193+
fmtCost(upstream),
194+
fmtCost(sum),
195+
ratio !== null ? ratio.toFixed(2) : '-',
196+
isOpus ? fmtCost(expected) : '-',
197+
].join('\t'),
198+
)
199+
200+
if (upstream !== null && upstream > 0) {
201+
upstreamPopulatedCount++
202+
totalUpstream += upstream
203+
}
204+
if (cost !== null) totalCost += cost
205+
206+
if (isOpus) {
207+
if (cost !== null) opusCostSum += cost
208+
opusExpectedSum += expected
209+
}
210+
211+
// Heuristic: flag rows where upstream+cost > 1.5x cost alone (likely double-count)
212+
if (cost !== null && upstream !== null && upstream > 0.5 * cost) {
213+
doubleCountHits++
214+
}
215+
}
216+
217+
console.log('')
218+
console.log('─────────────── Summary ───────────────')
219+
console.log(`Total rows: ${rows.length}`)
220+
console.log(
221+
`Rows with non-zero upstream: ${upstreamPopulatedCount} / ${rows.length}`,
222+
)
223+
console.log(`Σ cost (billed): ${fmtCost(totalCost)}`)
224+
console.log(`Σ upstream_inference_cost: ${fmtCost(totalUpstream)}`)
225+
console.log(`Σ cost + upstream: ${fmtCost(totalCost + totalUpstream)}`)
226+
227+
if (opusExpectedSum > 0) {
228+
console.log('')
229+
console.log('─── Opus-only comparison ───')
230+
console.log(`Σ actual cost (opus rows): ${fmtCost(opusCostSum)}`)
231+
console.log(`Σ expected (Opus 4.6/4.7 list): ${fmtCost(opusExpectedSum)}`)
232+
console.log(
233+
`Actual / expected ratio: ${(opusCostSum / opusExpectedSum).toFixed(
234+
2,
235+
)}x`,
236+
)
237+
console.log(
238+
' (If ≈2.0x → double-count confirmed. If ≈1.0x → cost is accurate.)',
239+
)
240+
}
241+
242+
console.log('')
243+
console.log(
244+
`Rows flagged as likely double-count (upstream > 0.5 × cost): ${doubleCountHits}`,
245+
)
246+
console.log('')
247+
console.log(
248+
'Hypothesis check: in web/src/llm-api/openrouter.ts#extractUsageAndCost,',
249+
)
250+
console.log(
251+
'we do `cost = openRouterCost + upstreamCost`. If upstream is routinely',
252+
)
253+
console.log(
254+
'populated (not 0/null) for non-BYOK rows, that addition double-counts.',
255+
)
256+
}
257+
258+
main()
259+
.then(() => process.exit(0))
260+
.catch((err) => {
261+
console.error('Error:', err)
262+
process.exit(1)
263+
})

0 commit comments

Comments
 (0)