|
| 1 | +/** |
| 2 | + * Production health monitoring for Cloudflare Workers cron trigger. |
| 3 | + * |
| 4 | + * Checks three critical signals on every run: |
| 5 | + * 1. /api/packages returns a non-empty array |
| 6 | + * 2. /api/health returns status "healthy" |
| 7 | + * 3. DB is reachable (via health endpoint) |
| 8 | + * |
| 9 | + * If any check fails, posts a JSON alert to ALERT_WEBHOOK_URL (if set). |
| 10 | + * Works with Discord, Slack, PagerDuty, or any webhook receiver. |
| 11 | + */ |
| 12 | + |
| 13 | +interface CheckResult { |
| 14 | + name: string; |
| 15 | + ok: boolean; |
| 16 | + detail: string; |
| 17 | +} |
| 18 | + |
| 19 | +interface MonitorEnv { |
| 20 | + APP_URL: string; |
| 21 | + ALERT_WEBHOOK_URL?: string; |
| 22 | +} |
| 23 | + |
| 24 | +async function checkPackages(appUrl: string): Promise<CheckResult> { |
| 25 | + try { |
| 26 | + const res = await fetch(`${appUrl}/api/packages`, { signal: AbortSignal.timeout(10_000) }); |
| 27 | + if (!res.ok) { |
| 28 | + return { name: 'packages', ok: false, detail: `HTTP ${res.status}` }; |
| 29 | + } |
| 30 | + const body = (await res.json()) as { packages?: unknown[] }; |
| 31 | + const count = body?.packages?.length ?? 0; |
| 32 | + if (count === 0) { |
| 33 | + return { name: 'packages', ok: false, detail: 'returned empty array' }; |
| 34 | + } |
| 35 | + return { name: 'packages', ok: true, detail: `${count} packages` }; |
| 36 | + } catch (err) { |
| 37 | + return { name: 'packages', ok: false, detail: String(err) }; |
| 38 | + } |
| 39 | +} |
| 40 | + |
| 41 | +async function checkHealth(appUrl: string): Promise<CheckResult> { |
| 42 | + try { |
| 43 | + const res = await fetch(`${appUrl}/api/health`, { signal: AbortSignal.timeout(10_000) }); |
| 44 | + const body = (await res.json()) as { status?: string; checks?: Record<string, string> }; |
| 45 | + if (body?.status !== 'healthy') { |
| 46 | + const detail = body?.checks |
| 47 | + ? Object.entries(body.checks) |
| 48 | + .filter(([, v]) => v !== 'ok') |
| 49 | + .map(([k, v]) => `${k}=${v}`) |
| 50 | + .join(', ') |
| 51 | + : `status=${body?.status}`; |
| 52 | + return { name: 'health', ok: false, detail: detail || 'degraded' }; |
| 53 | + } |
| 54 | + return { name: 'health', ok: true, detail: 'healthy' }; |
| 55 | + } catch (err) { |
| 56 | + return { name: 'health', ok: false, detail: String(err) }; |
| 57 | + } |
| 58 | +} |
| 59 | + |
| 60 | +async function sendAlert(webhookUrl: string, failures: CheckResult[]): Promise<void> { |
| 61 | + const lines = failures.map((f) => `• **${f.name}**: ${f.detail}`).join('\n'); |
| 62 | + const payload = { |
| 63 | + // Discord-compatible format; Slack ignores unknown fields |
| 64 | + content: `🚨 **openboot.dev health alert** — ${failures.length} check(s) failed:\n${lines}`, |
| 65 | + // Slack-compatible fallback |
| 66 | + text: `openboot.dev health alert — ${failures.length} check(s) failed:\n${failures.map((f) => `• ${f.name}: ${f.detail}`).join('\n')}`, |
| 67 | + }; |
| 68 | + await fetch(webhookUrl, { |
| 69 | + method: 'POST', |
| 70 | + headers: { 'Content-Type': 'application/json' }, |
| 71 | + body: JSON.stringify(payload), |
| 72 | + signal: AbortSignal.timeout(10_000), |
| 73 | + }); |
| 74 | +} |
| 75 | + |
| 76 | +export async function runHealthChecks(env: MonitorEnv): Promise<void> { |
| 77 | + const appUrl = env.APP_URL ?? 'https://openboot.dev'; |
| 78 | + |
| 79 | + const results = await Promise.all([checkPackages(appUrl), checkHealth(appUrl)]); |
| 80 | + |
| 81 | + const failures = results.filter((r) => !r.ok); |
| 82 | + |
| 83 | + if (failures.length > 0 && env.ALERT_WEBHOOK_URL) { |
| 84 | + await sendAlert(env.ALERT_WEBHOOK_URL, failures); |
| 85 | + } |
| 86 | + |
| 87 | + // Always log — visible in Workers dashboard → Logs |
| 88 | + for (const r of results) { |
| 89 | + const icon = r.ok ? '✓' : '✗'; |
| 90 | + console.log(`[monitor] ${icon} ${r.name}: ${r.detail}`); |
| 91 | + } |
| 92 | + |
| 93 | + if (failures.length > 0) { |
| 94 | + // Non-fatal: don't throw, just log. Workers cron retries on exceptions |
| 95 | + // which could flood alerts. Log and return instead. |
| 96 | + console.error(`[monitor] ${failures.length} check(s) failed`); |
| 97 | + } |
| 98 | +} |
0 commit comments