ensure billed

jahooma · jahooma · commit fce31b8d61d7 · 2026-04-17T03:00:17.000-07:00
diff --git a/web/src/llm-api/openrouter.ts b/web/src/llm-api/openrouter.ts
@@ -23,7 +23,22 @@ import type {
   OpenRouterErrorMetadata,
 } from './types'
 
-type StreamState = { responseText: string; reasoningText: string; ttftMs: number | null }
+type StreamState = {
+  responseText: string
+  reasoningText: string
+  ttftMs: number | null
+  // Captured from the first regular chunk we see. Needed to bill via the
+  // generation-lookup fallback when a stream ends without a usage-bearing chunk
+  // (e.g., upstream error chunk, truncated response, network drop).
+  generationId: string | null
+  model: string | null
+  billed: boolean
+}
+
+// How long to wait after stream close before querying OpenRouter's generation
+// endpoint. OR finalizes generation records asynchronously; 500ms is enough
+// in practice and keeps the delay off the client response path.
+const GENERATION_LOOKUP_DELAY_MS = 500
 
 // Extended timeout for deep-thinking models (e.g., gpt-5) that can take
 // a long time to start streaming.
@@ -334,9 +349,45 @@ export async function handleOpenRouterStream({
   }
 
   let heartbeatInterval: NodeJS.Timeout
-  let state: StreamState = { responseText: '', reasoningText: '', ttftMs: null }
+  let state: StreamState = {
+    responseText: '',
+    reasoningText: '',
+    ttftMs: null,
+    generationId: null,
+    model: null,
+    billed: false,
+  }
   let clientDisconnected = false
 
+  // Runs once on any stream-exit path. If we didn't bill through the normal
+  // path (stream ended without a usage chunk, got a provider error chunk,
+  // network drop), ask OpenRouter for the generation's final cost so we still
+  // capture what we were charged. Without this, a well-timed mid-stream failure
+  // lets the caller walk away with free completion tokens.
+  const ensureBilled = async () => {
+    if (state.billed || !state.generationId) return
+    await new Promise((resolve) =>
+      setTimeout(resolve, GENERATION_LOOKUP_DELAY_MS),
+    )
+    await fallbackBillFromGeneration({
+      generationId: state.generationId,
+      openrouterApiKey,
+      userId,
+      stripeCustomerId,
+      agentId,
+      clientId,
+      clientRequestId,
+      costMode,
+      byok,
+      startTime,
+      state,
+      request: body,
+      fetch,
+      logger,
+      insertMessage: insertMessageBigquery,
+    })
+  }
+
   // Create a ReadableStream that Next.js can handle
   const stream = new ReadableStream({
     async start(controller) {
@@ -420,6 +471,7 @@ export async function handleOpenRouterStream({
         if (!clientDisconnected) {
           controller.close()
         }
+        await ensureBilled()
       } catch (error) {
         if (!clientDisconnected) {
           controller.error(error)
@@ -429,6 +481,7 @@ export async function handleOpenRouterStream({
             'Error after client disconnect in OpenRouter stream',
           )
         }
+        await ensureBilled()
       } finally {
         clearInterval(heartbeatInterval)
       }
@@ -609,6 +662,7 @@ async function handleResponse({
     ttftMs: state.ttftMs,
   })
 
+  state.billed = true
   return { state, billedCredits }
 }
 
@@ -633,6 +687,17 @@ async function handleStreamChunk({
   // still storing enough data for logging and billing. 1MB is a generous limit.
   const MAX_BUFFER_SIZE = 1 * 1024 * 1024 // 1MB
 
+  // Capture generation id and model from any regular chunk so we can still
+  // bill via the generation-lookup fallback if the stream never emits usage.
+  if (!('error' in data)) {
+    if (data.id && !state.generationId) {
+      state.generationId = data.id
+    }
+    if (data.model && !state.model) {
+      state.model = data.model
+    }
+  }
+
   if ('error' in data) {
     // Log detailed error information for stream errors (e.g., Forbidden from Anthropic)
     const errorData = data.error as {
@@ -819,6 +884,160 @@ function creditsToFakeCost(credits: number): number {
   return credits / ((1 + PROFIT_MARGIN) * 100)
 }
 
+/**
+ * Bill a stream that exited before a usage-bearing chunk arrived by looking up
+ * the generation cost from OpenRouter's /generation endpoint. Mutates
+ * `state.billed` on success so callers can tell the gap was filled.
+ *
+ * Never throws — failures are logged and swallowed. The worst case is that we
+ * miss this one request, which is still strictly better than the old behavior.
+ */
+async function fallbackBillFromGeneration(params: {
+  generationId: string
+  openrouterApiKey: string | null
+  userId: string
+  stripeCustomerId?: string | null
+  agentId: string
+  clientId: string | null
+  clientRequestId: string | null
+  costMode: string | undefined
+  byok: boolean
+  startTime: Date
+  state: StreamState
+  request: unknown
+  fetch: typeof globalThis.fetch
+  logger: Logger
+  insertMessage: InsertMessageBigqueryFn
+}): Promise<void> {
+  const {
+    generationId,
+    openrouterApiKey,
+    userId,
+    stripeCustomerId,
+    agentId,
+    clientId,
+    clientRequestId,
+    costMode,
+    byok,
+    startTime,
+    state,
+    request,
+    fetch,
+    logger,
+    insertMessage,
+  } = params
+
+  try {
+    const response = await fetch(
+      `https://openrouter.ai/api/v1/generation?id=${encodeURIComponent(generationId)}`,
+      {
+        method: 'GET',
+        headers: {
+          Authorization: `Bearer ${openrouterApiKey ?? env.OPEN_ROUTER_API_KEY}`,
+        },
+      },
+    )
+
+    if (!response.ok) {
+      logger.error(
+        {
+          generationId,
+          status: response.status,
+          statusText: response.statusText,
+          userId,
+          agentId,
+          model: state.model,
+          responseTextLength: state.responseText.length,
+        },
+        'fallbackBillFromGeneration: generation lookup failed',
+      )
+      return
+    }
+
+    const body = (await response.json()) as { data?: Record<string, unknown> }
+    const data = body?.data
+    if (!data) {
+      logger.warn(
+        { generationId, userId, agentId },
+        'fallbackBillFromGeneration: generation lookup returned no data',
+      )
+      return
+    }
+
+    const num = (v: unknown) => (typeof v === 'number' ? v : 0)
+    const usageData: UsageData = {
+      inputTokens: num(data.tokens_prompt) || num(data.native_tokens_prompt),
+      outputTokens:
+        num(data.tokens_completion) || num(data.native_tokens_completion),
+      cacheReadInputTokens: num(data.native_tokens_cached),
+      reasoningTokens: num(data.native_tokens_reasoning),
+      cost: num(data.total_cost),
+    }
+    const resolvedModel =
+      state.model ?? (typeof data.model === 'string' ? data.model : '')
+
+    logger.warn(
+      {
+        generationId,
+        userId,
+        agentId,
+        model: resolvedModel,
+        cost: usageData.cost,
+        inputTokens: usageData.inputTokens,
+        outputTokens: usageData.outputTokens,
+        responseTextLength: state.responseText.length,
+      },
+      'fallbackBillFromGeneration: billing from generation lookup (stream exited without usage chunk)',
+    )
+
+    insertMessageToBigQuery({
+      messageId: generationId,
+      userId,
+      startTime,
+      request,
+      reasoningText: state.reasoningText,
+      responseText: state.responseText,
+      usageData,
+      logger,
+      insertMessageBigquery: insertMessage,
+    }).catch((error) => {
+      logger.error(
+        { error: getErrorObject(error), generationId },
+        'fallbackBillFromGeneration: BigQuery insert failed',
+      )
+    })
+
+    await consumeCreditsForMessage({
+      messageId: generationId,
+      userId,
+      stripeCustomerId,
+      agentId,
+      clientId,
+      clientRequestId,
+      startTime,
+      model: resolvedModel,
+      reasoningText: state.reasoningText,
+      responseText: state.responseText,
+      usageData,
+      byok,
+      logger,
+      costMode,
+      ttftMs: state.ttftMs,
+    })
+    state.billed = true
+  } catch (error) {
+    logger.error(
+      {
+        error: getErrorObject(error),
+        generationId,
+        userId,
+        agentId,
+      },
+      'fallbackBillFromGeneration threw',
+    )
+  }
+}
+
 /**
  * Overwrite the cost field in the final SSE chunk to reflect actual billed credits.
  * This ensures the SDK calculates the exact credits value we stored in the database,