|
| 1 | +#!/usr/bin/env bun |
| 2 | + |
| 3 | +/** |
| 4 | + * Fetch and render Fireworks deployment health + runtime stats. |
| 5 | + * |
| 6 | + * Data sources: |
| 7 | + * - GET /v1/accounts/{account}/deployments (list / per-deployment state) |
| 8 | + * - GET /v1/accounts/{account}/metrics (Prometheus text, all deployments) |
| 9 | + * |
| 10 | + * Usage: |
| 11 | + * bun scripts/fireworks-deployment-stats.ts # all deployments in the account |
| 12 | + * bun scripts/fireworks-deployment-stats.ts <deployment_id> # filter to one deployment |
| 13 | + * |
| 14 | + * Env: |
| 15 | + * FIREWORKS_API_KEY (required) — auto-loaded from .env.local via bun |
| 16 | + * FIREWORKS_ACCOUNT_ID (optional) — defaults to the account in fireworks-config.ts |
| 17 | + */ |
| 18 | + |
| 19 | +import { FIREWORKS_ACCOUNT_ID } from '../web/src/llm-api/fireworks-config' |
| 20 | + |
| 21 | +const API_BASE = 'https://api.fireworks.ai/v1' |
| 22 | + |
| 23 | +type Deployment = { |
| 24 | + name: string |
| 25 | + baseModel: string |
| 26 | + state: string |
| 27 | + status: { code: string; message: string } |
| 28 | + replicaCount: number |
| 29 | + desiredReplicaCount: number |
| 30 | + minReplicaCount: number |
| 31 | + maxReplicaCount: number |
| 32 | + replicaStats: { |
| 33 | + readyReplicaCount: number |
| 34 | + initializingReplicaCount: number |
| 35 | + pendingSchedulingReplicaCount: number |
| 36 | + downloadingModelReplicaCount: number |
| 37 | + } |
| 38 | + createTime: string |
| 39 | + updateTime: string |
| 40 | + deploymentShape: string |
| 41 | + autoscalingPolicy: { |
| 42 | + loadTargets: Record<string, number> |
| 43 | + scaleUpWindow: string |
| 44 | + scaleDownWindow: string |
| 45 | + scaleToZeroWindow: string |
| 46 | + } |
| 47 | +} |
| 48 | + |
| 49 | +type PromSample = { name: string; labels: Record<string, string>; value: number } |
| 50 | + |
| 51 | +const HISTOGRAM_METRICS = [ |
| 52 | + { key: 'latency_to_first_token_ms', label: 'TTFT (ms)' }, |
| 53 | + { key: 'latency_prefill_ms', label: 'prefill (ms)' }, |
| 54 | + { key: 'latency_prefill_queue_ms', label: 'prefill-queue (ms)' }, |
| 55 | + { key: 'latency_generation_queue_ms', label: 'gen-queue (ms)' }, |
| 56 | + { key: 'latency_generation_per_token_ms', label: 'inter-token (ms)' }, |
| 57 | + { key: 'latency_overall_ms', label: 'overall (ms)' }, |
| 58 | + { key: 'tokens_prompt_per_request', label: 'prompt toks/req' }, |
| 59 | + { key: 'tokens_generated_per_request', label: 'gen toks/req' }, |
| 60 | +] as const |
| 61 | + |
| 62 | +async function fetchDeployments(apiKey: string, accountId: string): Promise<Deployment[]> { |
| 63 | + const res = await fetch(`${API_BASE}/accounts/${accountId}/deployments`, { |
| 64 | + headers: { Authorization: `Bearer ${apiKey}` }, |
| 65 | + }) |
| 66 | + if (!res.ok) throw new Error(`Deployments list ${res.status}: ${await res.text()}`) |
| 67 | + const data = (await res.json()) as { deployments: Deployment[] } |
| 68 | + return data.deployments ?? [] |
| 69 | +} |
| 70 | + |
| 71 | +async function fetchPrometheusMetrics(apiKey: string, accountId: string): Promise<PromSample[]> { |
| 72 | + const res = await fetch(`${API_BASE}/accounts/${accountId}/metrics`, { |
| 73 | + headers: { Authorization: `Bearer ${apiKey}` }, |
| 74 | + }) |
| 75 | + if (!res.ok) throw new Error(`Metrics ${res.status}: ${await res.text()}`) |
| 76 | + const text = await res.text() |
| 77 | + return parsePrometheus(text) |
| 78 | +} |
| 79 | + |
| 80 | +function parsePrometheus(text: string): PromSample[] { |
| 81 | + const samples: PromSample[] = [] |
| 82 | + for (const line of text.split('\n')) { |
| 83 | + if (!line || line.startsWith('#')) continue |
| 84 | + const braceStart = line.indexOf('{') |
| 85 | + const braceEnd = line.indexOf('}') |
| 86 | + let name: string |
| 87 | + let labelStr = '' |
| 88 | + let rest: string |
| 89 | + if (braceStart === -1) { |
| 90 | + const parts = line.split(/\s+/) |
| 91 | + name = parts[0] |
| 92 | + rest = parts.slice(1).join(' ') |
| 93 | + } else { |
| 94 | + name = line.slice(0, braceStart) |
| 95 | + labelStr = line.slice(braceStart + 1, braceEnd) |
| 96 | + rest = line.slice(braceEnd + 1).trim() |
| 97 | + } |
| 98 | + const valueToken = rest.split(/\s+/)[0] |
| 99 | + const value = Number(valueToken) |
| 100 | + if (!Number.isFinite(value)) continue |
| 101 | + const labels: Record<string, string> = {} |
| 102 | + if (labelStr) { |
| 103 | + const re = /(\w+)="((?:[^"\\]|\\.)*)"/g |
| 104 | + let m: RegExpExecArray | null |
| 105 | + while ((m = re.exec(labelStr)) !== null) labels[m[1]] = m[2] |
| 106 | + } |
| 107 | + samples.push({ name, labels, value }) |
| 108 | + } |
| 109 | + return samples |
| 110 | +} |
| 111 | + |
| 112 | +function scalarFor(samples: PromSample[], name: string, deploymentId: string): number | undefined { |
| 113 | + return samples.find((s) => s.name === name && s.labels.deployment_id === deploymentId)?.value |
| 114 | +} |
| 115 | + |
| 116 | +function bucketPercentiles( |
| 117 | + samples: PromSample[], |
| 118 | + metricKey: string, |
| 119 | + deploymentId: string, |
| 120 | + percentiles: number[] = [50, 90, 95, 99], |
| 121 | +): { total: number; values: Record<number, number> } | null { |
| 122 | + const buckets = samples |
| 123 | + .filter( |
| 124 | + (s) => s.name === `${metricKey}_bucket:sum_by_deployment` && s.labels.deployment_id === deploymentId, |
| 125 | + ) |
| 126 | + .map((s) => ({ |
| 127 | + le: s.labels.le === '+Inf' ? Number.POSITIVE_INFINITY : Number(s.labels.le), |
| 128 | + cum: s.value, |
| 129 | + })) |
| 130 | + .sort((a, b) => a.le - b.le) |
| 131 | + |
| 132 | + if (buckets.length === 0) return null |
| 133 | + const total = buckets[buckets.length - 1].cum |
| 134 | + if (total === 0) return { total, values: Object.fromEntries(percentiles.map((p) => [p, 0])) } |
| 135 | + |
| 136 | + const values: Record<number, number> = {} |
| 137 | + for (const p of percentiles) { |
| 138 | + const target = total * (p / 100) |
| 139 | + let prevLe = 0 |
| 140 | + let prevCum = 0 |
| 141 | + let picked = Number.POSITIVE_INFINITY |
| 142 | + for (const { le, cum } of buckets) { |
| 143 | + if (cum >= target) { |
| 144 | + if (!Number.isFinite(le)) { |
| 145 | + picked = prevLe |
| 146 | + } else if (cum === prevCum) { |
| 147 | + picked = le |
| 148 | + } else { |
| 149 | + const frac = (target - prevCum) / (cum - prevCum) |
| 150 | + picked = prevLe + frac * (le - prevLe) |
| 151 | + } |
| 152 | + break |
| 153 | + } |
| 154 | + prevLe = le |
| 155 | + prevCum = cum |
| 156 | + } |
| 157 | + values[p] = picked |
| 158 | + } |
| 159 | + return { total, values } |
| 160 | +} |
| 161 | + |
| 162 | +function fmt(n: number | undefined, digits = 0): string { |
| 163 | + if (n === undefined || !Number.isFinite(n)) return '—' |
| 164 | + if (Math.abs(n) >= 1000) return n.toFixed(0) |
| 165 | + return n.toFixed(digits) |
| 166 | +} |
| 167 | + |
| 168 | +function fmtPct(n: number | undefined): string { |
| 169 | + return n === undefined ? '—' : `${(n * 100).toFixed(1)}%` |
| 170 | +} |
| 171 | + |
| 172 | +function parseDuration(d: string): string { |
| 173 | + const match = /^([\d.]+)s$/.exec(d) |
| 174 | + if (!match) return d |
| 175 | + const secs = Number(match[1]) |
| 176 | + if (secs >= 60) return `${(secs / 60).toFixed(0)}m` |
| 177 | + return `${secs}s` |
| 178 | +} |
| 179 | + |
| 180 | +function renderDeployment(d: Deployment, samples: PromSample[]): void { |
| 181 | + const deploymentId = d.name.split('/').pop()! |
| 182 | + const shape = d.deploymentShape.split('/').slice(-3, -2)[0] ?? d.deploymentShape |
| 183 | + |
| 184 | + const stateIcon = d.state === 'READY' ? '✅' : d.state === 'UPDATING' ? '🔄' : '⚠️' |
| 185 | + |
| 186 | + console.log('━'.repeat(80)) |
| 187 | + console.log(`${stateIcon} ${d.name}`) |
| 188 | + console.log(` model=${d.baseModel} shape=${shape}`) |
| 189 | + console.log( |
| 190 | + ` state=${d.state} (${d.status.code}) replicas ready=${d.replicaStats.readyReplicaCount}/${d.replicaCount} ` + |
| 191 | + `min=${d.minReplicaCount} max=${d.maxReplicaCount}`, |
| 192 | + ) |
| 193 | + const p = d.autoscalingPolicy |
| 194 | + console.log( |
| 195 | + ` autoscale target=${p.loadTargets.default} up=${parseDuration(p.scaleUpWindow)} ` + |
| 196 | + `down=${parseDuration(p.scaleDownWindow)} to-zero=${parseDuration(p.scaleToZeroWindow)}`, |
| 197 | + ) |
| 198 | + console.log(` updated=${d.updateTime}`) |
| 199 | + |
| 200 | + const kvBlocks = scalarFor(samples, 'generator_kv_blocks_fraction:avg_by_deployment', deploymentId) |
| 201 | + const kvSlots = scalarFor(samples, 'generator_kv_slots_fraction:avg_by_deployment', deploymentId) |
| 202 | + const active = scalarFor(samples, 'generator_num_active_fraction:avg_by_deployment', deploymentId) |
| 203 | + const fwdTime = scalarFor(samples, 'generator_model_forward_time:avg_by_deployment', deploymentId) |
| 204 | + |
| 205 | + const reqRate = scalarFor(samples, 'request_counter_total:sum_by_deployment', deploymentId) |
| 206 | + const promptTokRate = scalarFor(samples, 'tokens_prompt_total:sum_by_deployment', deploymentId) |
| 207 | + const cachedPromptRate = scalarFor(samples, 'tokens_cached_prompt_total:sum_by_deployment', deploymentId) |
| 208 | + const genTokGauge = scalarFor(samples, 'tokens_generated_gauge:sum_by_deployment', deploymentId) |
| 209 | + const err400 = samples.find( |
| 210 | + (s) => |
| 211 | + s.name === 'requests_error_total:sum_by_deployment' && |
| 212 | + s.labels.deployment_id === deploymentId && |
| 213 | + s.labels.code === '400', |
| 214 | + )?.value |
| 215 | + const err500 = samples.find( |
| 216 | + (s) => |
| 217 | + s.name === 'requests_error_total:sum_by_deployment' && |
| 218 | + s.labels.deployment_id === deploymentId && |
| 219 | + s.labels.code === '500', |
| 220 | + )?.value |
| 221 | + |
| 222 | + const cacheHitRate = |
| 223 | + promptTokRate && promptTokRate > 0 && cachedPromptRate !== undefined |
| 224 | + ? cachedPromptRate / promptTokRate |
| 225 | + : undefined |
| 226 | + const errRate400 = |
| 227 | + reqRate && reqRate > 0 && err400 !== undefined ? err400 / reqRate : undefined |
| 228 | + |
| 229 | + console.log('\n GPU / capacity') |
| 230 | + console.log( |
| 231 | + ` kv_blocks=${fmtPct(kvBlocks)} kv_slots=${fmtPct(kvSlots)} ` + |
| 232 | + `active_generators=${fmt(active, 2)} fwd_time=${fmt((fwdTime ?? 0) * 1000, 1)}ms`, |
| 233 | + ) |
| 234 | + |
| 235 | + console.log('\n Throughput (per-sec rates)') |
| 236 | + console.log( |
| 237 | + ` requests=${fmt(reqRate, 2)}/s prompt_tokens=${fmt(promptTokRate)}/s ` + |
| 238 | + `cached_prompt=${fmt(cachedPromptRate)}/s cache_hit=${fmtPct(cacheHitRate)} ` + |
| 239 | + `generated_gauge=${fmt(genTokGauge, 1)}`, |
| 240 | + ) |
| 241 | + |
| 242 | + console.log('\n Errors (per-sec)') |
| 243 | + console.log( |
| 244 | + ` 400=${fmt(err400 ?? 0, 3)}/s (${fmtPct(errRate400)}) 500=${fmt(err500 ?? 0, 3)}/s`, |
| 245 | + ) |
| 246 | + |
| 247 | + console.log('\n Latency & size percentiles') |
| 248 | + console.log( |
| 249 | + ` ${'metric'.padEnd(22)} ${'events'.padStart(9)} ${'p50'.padStart(9)} ${'p90'.padStart(9)} ${'p95'.padStart(9)} ${'p99'.padStart(9)}`, |
| 250 | + ) |
| 251 | + for (const h of HISTOGRAM_METRICS) { |
| 252 | + const pct = bucketPercentiles(samples, h.key, deploymentId) |
| 253 | + if (!pct) { |
| 254 | + console.log(` ${h.label.padEnd(22)} ${'—'.padStart(9)}`) |
| 255 | + continue |
| 256 | + } |
| 257 | + console.log( |
| 258 | + ` ${h.label.padEnd(22)} ${fmt(pct.total, 2).padStart(9)} ` + |
| 259 | + `${fmt(pct.values[50]).padStart(9)} ${fmt(pct.values[90]).padStart(9)} ` + |
| 260 | + `${fmt(pct.values[95]).padStart(9)} ${fmt(pct.values[99]).padStart(9)}`, |
| 261 | + ) |
| 262 | + } |
| 263 | + console.log() |
| 264 | +} |
| 265 | + |
| 266 | +async function main() { |
| 267 | + const apiKey = process.env.FIREWORKS_API_KEY |
| 268 | + if (!apiKey || apiKey === 'dummy_fireworks_key') { |
| 269 | + console.error('FIREWORKS_API_KEY not set (check .env.local)') |
| 270 | + process.exit(1) |
| 271 | + } |
| 272 | + const accountId = process.env.FIREWORKS_ACCOUNT_ID ?? FIREWORKS_ACCOUNT_ID |
| 273 | + const filter = process.argv[2] |
| 274 | + |
| 275 | + const [deployments, samples] = await Promise.all([ |
| 276 | + fetchDeployments(apiKey, accountId), |
| 277 | + fetchPrometheusMetrics(apiKey, accountId), |
| 278 | + ]) |
| 279 | + |
| 280 | + const filtered = filter |
| 281 | + ? deployments.filter((d) => d.name.endsWith(`/${filter}`) || d.name === filter) |
| 282 | + : deployments |
| 283 | + |
| 284 | + if (filtered.length === 0) { |
| 285 | + console.error(`No deployments matched${filter ? ` "${filter}"` : ''} in account ${accountId}`) |
| 286 | + process.exit(1) |
| 287 | + } |
| 288 | + |
| 289 | + console.log(`Fireworks account: ${accountId} • ${filtered.length} deployment(s)`) |
| 290 | + console.log(`Rates below are per-second (Prometheus recording rules; ~30s update cadence).`) |
| 291 | + console.log() |
| 292 | + |
| 293 | + for (const d of filtered) renderDeployment(d, samples) |
| 294 | +} |
| 295 | + |
| 296 | +main().catch((err) => { |
| 297 | + console.error(err) |
| 298 | + process.exit(1) |
| 299 | +}) |
0 commit comments