|
| 1 | +#!/usr/bin/env bun |
| 2 | + |
| 3 | +/** |
| 4 | + * Scrape Fireworks metrics once and print the health snapshot the |
| 5 | + * web server's monitor would produce. Useful for ad-hoc verification. |
| 6 | + * |
| 7 | + * Usage: |
| 8 | + * bun scripts/check-fireworks-health.ts |
| 9 | + * bun scripts/check-fireworks-health.ts --raw # also print raw metrics count |
| 10 | + * bun scripts/check-fireworks-health.ts --json # machine-readable output |
| 11 | + * |
| 12 | + * Reads FIREWORKS_API_KEY from env (.env.local is loaded automatically by bun). |
| 13 | + */ |
| 14 | + |
| 15 | +import { computeSnapshot, DEFAULT_HEALTH_THRESHOLDS } from '../web/src/server/fireworks-monitor/compute-health' |
| 16 | +import { parsePrometheusText } from '../web/src/server/fireworks-monitor/parse-prometheus' |
| 17 | +import { |
| 18 | + FIREWORKS_ACCOUNT_ID, |
| 19 | + FIREWORKS_DEPLOYMENT_MAP, |
| 20 | +} from '../web/src/llm-api/fireworks-config' |
| 21 | + |
| 22 | +import type { DeploymentHealthStatus } from '../web/src/server/fireworks-monitor/types' |
| 23 | + |
| 24 | +const METRICS_URL = (accountId: string) => |
| 25 | + `https://api.fireworks.ai/v1/accounts/${accountId}/metrics` |
| 26 | + |
| 27 | +async function scrapeFireworksMetrics(params: { apiKey: string; accountId: string }) { |
| 28 | + const response = await fetch(METRICS_URL(params.accountId), { |
| 29 | + headers: { Authorization: `Bearer ${params.apiKey}` }, |
| 30 | + }) |
| 31 | + if (!response.ok) { |
| 32 | + const body = await response.text().catch(() => '') |
| 33 | + throw new Error( |
| 34 | + `Fireworks metrics scrape failed: ${response.status} ${response.statusText}${body ? ` — ${body.slice(0, 300)}` : ''}`, |
| 35 | + ) |
| 36 | + } |
| 37 | + const text = await response.text() |
| 38 | + return parsePrometheusText(text) |
| 39 | +} |
| 40 | + |
| 41 | +const STATUS_COLORS: Record<DeploymentHealthStatus, string> = { |
| 42 | + healthy: '\x1b[32m', |
| 43 | + degraded: '\x1b[33m', |
| 44 | + unhealthy: '\x1b[31m', |
| 45 | + unknown: '\x1b[90m', |
| 46 | +} |
| 47 | +const RESET = '\x1b[0m' |
| 48 | + |
| 49 | +function formatMs(value: number | null): string { |
| 50 | + if (value === null) return 'n/a' |
| 51 | + if (value >= 1000) return `${(value / 1000).toFixed(2)}s` |
| 52 | + return `${Math.round(value)}ms` |
| 53 | +} |
| 54 | + |
| 55 | +function formatPct(value: number, digits = 1): string { |
| 56 | + return `${(value * 100).toFixed(digits)}%` |
| 57 | +} |
| 58 | + |
| 59 | +async function main() { |
| 60 | + const args = process.argv.slice(2) |
| 61 | + const jsonMode = args.includes('--json') |
| 62 | + const showRaw = args.includes('--raw') |
| 63 | + |
| 64 | + const apiKey = process.env.FIREWORKS_API_KEY |
| 65 | + if (!apiKey) { |
| 66 | + console.error('❌ FIREWORKS_API_KEY is not set. Add it to .env.local or export it.') |
| 67 | + process.exit(1) |
| 68 | + } |
| 69 | + |
| 70 | + const accountId = process.env.FIREWORKS_ACCOUNT_ID ?? FIREWORKS_ACCOUNT_ID |
| 71 | + const deployments = Object.values(FIREWORKS_DEPLOYMENT_MAP) |
| 72 | + |
| 73 | + const scrapeStart = Date.now() |
| 74 | + let metrics |
| 75 | + try { |
| 76 | + metrics = await scrapeFireworksMetrics({ apiKey, accountId }) |
| 77 | + } catch (error) { |
| 78 | + console.error('❌ Scrape failed:', error instanceof Error ? error.message : error) |
| 79 | + process.exit(1) |
| 80 | + } |
| 81 | + const scrapeElapsedMs = Date.now() - scrapeStart |
| 82 | + |
| 83 | + const snapshot = computeSnapshot({ |
| 84 | + metrics, |
| 85 | + deployments, |
| 86 | + thresholds: DEFAULT_HEALTH_THRESHOLDS, |
| 87 | + }) |
| 88 | + |
| 89 | + if (jsonMode) { |
| 90 | + console.log(JSON.stringify({ scrapeElapsedMs, sampleCount: metrics.samples.length, snapshot }, null, 2)) |
| 91 | + return |
| 92 | + } |
| 93 | + |
| 94 | + console.log('🔥 Fireworks Deployment Health') |
| 95 | + console.log('='.repeat(78)) |
| 96 | + console.log(`Account: accounts/${accountId}`) |
| 97 | + console.log(`Scraped in: ${scrapeElapsedMs}ms`) |
| 98 | + console.log(`Samples: ${metrics.samples.length}`) |
| 99 | + console.log(`Overall: ${STATUS_COLORS[snapshot.overall]}${snapshot.overall.toUpperCase()}${RESET}`) |
| 100 | + if (snapshot.lastError) console.log(`Last error: ${snapshot.lastError}`) |
| 101 | + console.log() |
| 102 | + |
| 103 | + const modelByDeployment = Object.fromEntries( |
| 104 | + Object.entries(FIREWORKS_DEPLOYMENT_MAP).map(([model, dep]) => [dep, model]), |
| 105 | + ) |
| 106 | + |
| 107 | + for (const [deployment, health] of Object.entries(snapshot.deployments)) { |
| 108 | + const model = modelByDeployment[deployment] ?? '(unknown model)' |
| 109 | + const color = STATUS_COLORS[health.status] |
| 110 | + console.log(`── ${color}${health.status.toUpperCase().padEnd(9)}${RESET} ${model}`) |
| 111 | + console.log(` deployment: ${deployment}`) |
| 112 | + console.log(` base model: ${health.baseModel ?? 'n/a'}`) |
| 113 | + console.log(` request rate: ${health.metrics.requestRate.toFixed(3)} req/s`) |
| 114 | + console.log(` error rate: ${health.metrics.errorRate.toFixed(3)} err/s (${formatPct(health.metrics.errorFraction)})`) |
| 115 | + console.log(` concurrent requests: ${health.metrics.concurrentRequests.toFixed(2)}`) |
| 116 | + console.log(` KV blocks utilization: ${formatPct(health.metrics.kvBlocksFraction, 0)}`) |
| 117 | + console.log(` KV slots utilization: ${formatPct(health.metrics.kvSlotsFraction, 0)}`) |
| 118 | + console.log(` p50 queue wait: ${formatMs(health.metrics.p50GenerationQueueMs)}`) |
| 119 | + console.log(` p50 TTFT: ${formatMs(health.metrics.p50TimeToFirstTokenMs)}`) |
| 120 | + if (health.reasons.length > 0) { |
| 121 | + console.log(` reasons: ${health.reasons.join('; ')}`) |
| 122 | + } |
| 123 | + console.log() |
| 124 | + } |
| 125 | + |
| 126 | + if (showRaw) { |
| 127 | + console.log('── Metric name breakdown ─────────────────────────────') |
| 128 | + const counts = new Map<string, number>() |
| 129 | + for (const s of metrics.samples) { |
| 130 | + counts.set(s.name, (counts.get(s.name) ?? 0) + 1) |
| 131 | + } |
| 132 | + const sorted = [...counts.entries()].sort((a, b) => b[1] - a[1]) |
| 133 | + for (const [name, count] of sorted) { |
| 134 | + console.log(` ${String(count).padStart(4)} ${name}`) |
| 135 | + } |
| 136 | + } |
| 137 | + |
| 138 | + process.exit(snapshot.overall === 'unhealthy' ? 2 : 0) |
| 139 | +} |
| 140 | + |
| 141 | +main() |
0 commit comments