|
| 1 | +/** |
| 2 | + * Queries the BigQuery `message` table for the most recent rows and prints |
| 3 | + * cost, upstream_inference_cost, token breakdown, and model. |
| 4 | + * |
| 5 | + * Used to investigate whether OpenRouter is populating BOTH `usage.cost` and |
| 6 | + * `usage.cost_details.upstream_inference_cost` for non-BYOK requests, which |
| 7 | + * would cause `web/src/llm-api/openrouter.ts#extractUsageAndCost` to double- |
| 8 | + * count (that function returns `openRouterCost + upstreamCost`). |
| 9 | + * |
| 10 | + * Usage: |
| 11 | + * bun run scripts/query-message-costs.ts # dev dataset |
| 12 | + * bun run scripts/query-message-costs.ts --prod # prod dataset |
| 13 | + * bun run scripts/query-message-costs.ts --prod --limit 200 |
| 14 | + * bun run scripts/query-message-costs.ts --prod --model anthropic/claude-opus-4.7 |
| 15 | + * |
| 16 | + * Note: `model` is NOT a top-level column in the BigQuery `message` schema; |
| 17 | + * it lives inside the `request` JSON blob, so we extract it with |
| 18 | + * JSON_EXTRACT_SCALAR. |
| 19 | + */ |
| 20 | + |
| 21 | +import { BigQuery } from '@google-cloud/bigquery' |
| 22 | + |
| 23 | +type Args = { |
| 24 | + isProd: boolean |
| 25 | + limit: number |
| 26 | + modelFilter: string | null |
| 27 | +} |
| 28 | + |
| 29 | +function parseArgs(): Args { |
| 30 | + const argv = process.argv.slice(2) |
| 31 | + const isProd = argv.includes('--prod') |
| 32 | + |
| 33 | + const limitIdx = argv.indexOf('--limit') |
| 34 | + const limit = |
| 35 | + limitIdx >= 0 && argv[limitIdx + 1] ? parseInt(argv[limitIdx + 1], 10) : 100 |
| 36 | + |
| 37 | + const modelIdx = argv.indexOf('--model') |
| 38 | + const modelFilter = |
| 39 | + modelIdx >= 0 && argv[modelIdx + 1] ? argv[modelIdx + 1] : null |
| 40 | + |
| 41 | + return { isProd, limit, modelFilter } |
| 42 | +} |
| 43 | + |
| 44 | +function fmtNum(n: number | null | undefined, digits = 0): string { |
| 45 | + if (n === null || n === undefined || Number.isNaN(n)) return '-' |
| 46 | + return n.toLocaleString(undefined, { |
| 47 | + minimumFractionDigits: digits, |
| 48 | + maximumFractionDigits: digits, |
| 49 | + }) |
| 50 | +} |
| 51 | + |
| 52 | +function fmtCost(n: number | null | undefined): string { |
| 53 | + if (n === null || n === undefined || Number.isNaN(n)) return '-' |
| 54 | + return `$${n.toFixed(6)}` |
| 55 | +} |
| 56 | + |
| 57 | +// Anthropic Opus 4.6 / 4.7 per-1M-token pricing. |
| 58 | +// Used for a quick "expected cost" sanity column on Opus rows only. |
| 59 | +const OPUS_INPUT_PER_M = 5.0 |
| 60 | +const OPUS_CACHE_READ_PER_M = 0.5 |
| 61 | +const OPUS_OUTPUT_PER_M = 25.0 |
| 62 | + |
| 63 | +function expectedOpusCost(row: { |
| 64 | + input_tokens: number |
| 65 | + cache_read_input_tokens: number |
| 66 | + output_tokens: number |
| 67 | +}): number { |
| 68 | + const uncachedInput = Math.max( |
| 69 | + 0, |
| 70 | + (row.input_tokens ?? 0) - (row.cache_read_input_tokens ?? 0), |
| 71 | + ) |
| 72 | + return ( |
| 73 | + (uncachedInput * OPUS_INPUT_PER_M) / 1_000_000 + |
| 74 | + ((row.cache_read_input_tokens ?? 0) * OPUS_CACHE_READ_PER_M) / 1_000_000 + |
| 75 | + ((row.output_tokens ?? 0) * OPUS_OUTPUT_PER_M) / 1_000_000 |
| 76 | + ) |
| 77 | +} |
| 78 | + |
| 79 | +async function main() { |
| 80 | + const { isProd, limit, modelFilter } = parseArgs() |
| 81 | + const dataset = isProd ? 'codebuff_data' : 'codebuff_data_dev' |
| 82 | + const table = `${dataset}.message` |
| 83 | + |
| 84 | + console.log( |
| 85 | + `Querying last ${limit} rows from \`${table}\`${ |
| 86 | + modelFilter ? ` (model = ${modelFilter})` : '' |
| 87 | + }`, |
| 88 | + ) |
| 89 | + console.log('') |
| 90 | + |
| 91 | + const client = new BigQuery() |
| 92 | + |
| 93 | + // Model isn't a column — pull from request JSON. |
| 94 | + // Cache creation tokens also not in schema (OpenRouter path is always 0 there). |
| 95 | + const query = ` |
| 96 | + SELECT |
| 97 | + id, |
| 98 | + finished_at, |
| 99 | + JSON_EXTRACT_SCALAR(request, '$.model') AS model, |
| 100 | + input_tokens, |
| 101 | + cache_read_input_tokens, |
| 102 | + output_tokens, |
| 103 | + cost, |
| 104 | + upstream_inference_cost, |
| 105 | + -- cache_creation_input_tokens lives in BigQuery too; null-safe cast |
| 106 | + SAFE_CAST(JSON_EXTRACT_SCALAR(request, '$.usage') AS STRING) AS request_usage_raw |
| 107 | + FROM \`${table}\` |
| 108 | + WHERE TRUE |
| 109 | + ${ |
| 110 | + modelFilter |
| 111 | + ? `AND JSON_EXTRACT_SCALAR(request, '$.model') = @modelFilter` |
| 112 | + : '' |
| 113 | + } |
| 114 | + AND JSON_EXTRACT_SCALAR(request, '$.model') LIKE '%opus%' |
| 115 | + AND cost BETWEEN 0.10 AND 0.25 |
| 116 | + ORDER BY finished_at DESC |
| 117 | + LIMIT @limit |
| 118 | + ` |
| 119 | + |
| 120 | + const [rows] = await client.query({ |
| 121 | + query, |
| 122 | + params: { |
| 123 | + limit, |
| 124 | + ...(modelFilter ? { modelFilter } : {}), |
| 125 | + }, |
| 126 | + }) |
| 127 | + |
| 128 | + if (rows.length === 0) { |
| 129 | + console.log('No rows found.') |
| 130 | + return |
| 131 | + } |
| 132 | + |
| 133 | + // Per-row table. `ups/cost` ≈ 1.0 on a row means upstream equals the billed |
| 134 | + // cost on that row — the classic signature of a double-count. |
| 135 | + const header = [ |
| 136 | + 'finished_at', |
| 137 | + 'model', |
| 138 | + 'input', |
| 139 | + 'cache_read', |
| 140 | + 'uncached_in', |
| 141 | + 'output', |
| 142 | + 'cost', |
| 143 | + 'upstream', |
| 144 | + 'cost+ups', |
| 145 | + 'ups/cost', |
| 146 | + 'expected_opus', |
| 147 | + ] |
| 148 | + console.log(header.join('\t')) |
| 149 | + |
| 150 | + let doubleCountHits = 0 |
| 151 | + let upstreamPopulatedCount = 0 |
| 152 | + let totalCost = 0 |
| 153 | + let totalUpstream = 0 |
| 154 | + let opusCostSum = 0 |
| 155 | + let opusExpectedSum = 0 |
| 156 | + |
| 157 | + for (const row of rows) { |
| 158 | + const input = Number(row.input_tokens ?? 0) |
| 159 | + const cacheRead = Number(row.cache_read_input_tokens ?? 0) |
| 160 | + const output = Number(row.output_tokens ?? 0) |
| 161 | + const uncachedIn = Math.max(0, input - cacheRead) |
| 162 | + const cost = row.cost === null || row.cost === undefined ? null : Number(row.cost) |
| 163 | + const upstream = |
| 164 | + row.upstream_inference_cost === null || |
| 165 | + row.upstream_inference_cost === undefined |
| 166 | + ? null |
| 167 | + : Number(row.upstream_inference_cost) |
| 168 | + const sum = (cost ?? 0) + (upstream ?? 0) |
| 169 | + const ratio = |
| 170 | + cost && upstream !== null && cost > 0 ? upstream / cost : null |
| 171 | + |
| 172 | + const finished = |
| 173 | + row.finished_at?.value ?? row.finished_at?.toString() ?? String(row.finished_at) |
| 174 | + |
| 175 | + const model = row.model ?? '-' |
| 176 | + const isOpus = typeof model === 'string' && model.includes('opus') |
| 177 | + |
| 178 | + const expected = expectedOpusCost({ |
| 179 | + input_tokens: input, |
| 180 | + cache_read_input_tokens: cacheRead, |
| 181 | + output_tokens: output, |
| 182 | + }) |
| 183 | + |
| 184 | + console.log( |
| 185 | + [ |
| 186 | + String(finished).slice(0, 19), |
| 187 | + model, |
| 188 | + fmtNum(input), |
| 189 | + fmtNum(cacheRead), |
| 190 | + fmtNum(uncachedIn), |
| 191 | + fmtNum(output), |
| 192 | + fmtCost(cost), |
| 193 | + fmtCost(upstream), |
| 194 | + fmtCost(sum), |
| 195 | + ratio !== null ? ratio.toFixed(2) : '-', |
| 196 | + isOpus ? fmtCost(expected) : '-', |
| 197 | + ].join('\t'), |
| 198 | + ) |
| 199 | + |
| 200 | + if (upstream !== null && upstream > 0) { |
| 201 | + upstreamPopulatedCount++ |
| 202 | + totalUpstream += upstream |
| 203 | + } |
| 204 | + if (cost !== null) totalCost += cost |
| 205 | + |
| 206 | + if (isOpus) { |
| 207 | + if (cost !== null) opusCostSum += cost |
| 208 | + opusExpectedSum += expected |
| 209 | + } |
| 210 | + |
| 211 | + // Heuristic: flag rows where upstream+cost > 1.5x cost alone (likely double-count) |
| 212 | + if (cost !== null && upstream !== null && upstream > 0.5 * cost) { |
| 213 | + doubleCountHits++ |
| 214 | + } |
| 215 | + } |
| 216 | + |
| 217 | + console.log('') |
| 218 | + console.log('─────────────── Summary ───────────────') |
| 219 | + console.log(`Total rows: ${rows.length}`) |
| 220 | + console.log( |
| 221 | + `Rows with non-zero upstream: ${upstreamPopulatedCount} / ${rows.length}`, |
| 222 | + ) |
| 223 | + console.log(`Σ cost (billed): ${fmtCost(totalCost)}`) |
| 224 | + console.log(`Σ upstream_inference_cost: ${fmtCost(totalUpstream)}`) |
| 225 | + console.log(`Σ cost + upstream: ${fmtCost(totalCost + totalUpstream)}`) |
| 226 | + |
| 227 | + if (opusExpectedSum > 0) { |
| 228 | + console.log('') |
| 229 | + console.log('─── Opus-only comparison ───') |
| 230 | + console.log(`Σ actual cost (opus rows): ${fmtCost(opusCostSum)}`) |
| 231 | + console.log(`Σ expected (Opus 4.6/4.7 list): ${fmtCost(opusExpectedSum)}`) |
| 232 | + console.log( |
| 233 | + `Actual / expected ratio: ${(opusCostSum / opusExpectedSum).toFixed( |
| 234 | + 2, |
| 235 | + )}x`, |
| 236 | + ) |
| 237 | + console.log( |
| 238 | + ' (If ≈2.0x → double-count confirmed. If ≈1.0x → cost is accurate.)', |
| 239 | + ) |
| 240 | + } |
| 241 | + |
| 242 | + console.log('') |
| 243 | + console.log( |
| 244 | + `Rows flagged as likely double-count (upstream > 0.5 × cost): ${doubleCountHits}`, |
| 245 | + ) |
| 246 | + console.log('') |
| 247 | + console.log( |
| 248 | + 'Hypothesis check: in web/src/llm-api/openrouter.ts#extractUsageAndCost,', |
| 249 | + ) |
| 250 | + console.log( |
| 251 | + 'we do `cost = openRouterCost + upstreamCost`. If upstream is routinely', |
| 252 | + ) |
| 253 | + console.log( |
| 254 | + 'populated (not 0/null) for non-BYOK rows, that addition double-counts.', |
| 255 | + ) |
| 256 | +} |
| 257 | + |
| 258 | +main() |
| 259 | + .then(() => process.exit(0)) |
| 260 | + .catch((err) => { |
| 261 | + console.error('Error:', err) |
| 262 | + process.exit(1) |
| 263 | + }) |
0 commit comments