From c0e356408ea9c0ddd0f62847977d860fb0a75d12 Mon Sep 17 00:00:00 2001 From: Ben Schiller Date: Mon, 16 Feb 2026 20:21:28 -0800 Subject: [PATCH 1/2] Issue #10567: Add token usage display modes and turn/session/history usage surfacing --- core/config/types.ts | 15 ++ core/index.d.ts | 9 ++ core/llm/index.ts | 11 +- core/llm/llms/Anthropic.ts | 97 ++++++++---- core/llm/llms/Bedrock.ts | 65 +++++++- core/llm/llms/Cohere.ts | 69 +++++++++ core/llm/llms/Gemini.ts | 52 +++++++ core/llm/llms/Ollama.ts | 67 +++++++- core/llm/openaiTypeConverters.test.ts | 59 ++++++- core/llm/openaiTypeConverters.ts | 145 +++++++++++++++++- core/protocol/passThrough.ts | 1 + core/protocol/webview.ts | 2 + core/util/filesystem.ts | 1 + core/util/history.ts | 62 ++++++++ docs/ide-extensions/chat/how-to-customize.mdx | 19 ++- extensions/vscode/package.json | 17 ++ extensions/vscode/src/VsCodeIde.ts | 4 + .../vscode/src/extension/VsCodeExtension.ts | 1 + .../components/History/HistoryTableRow.tsx | 26 +++- gui/src/components/History/index.tsx | 16 +- gui/src/components/History/util.ts | 63 +++++++- .../StepContainer/StepContainer.tsx | 9 ++ gui/src/context/MockIdeMessenger.ts | 8 + gui/src/hooks/useTokenUsageSetting.ts | 33 ++++ gui/src/pages/gui/Chat.tsx | 30 +++- gui/src/pages/stats.tsx | 47 ++++-- gui/src/redux/slices/sessionSlice.test.ts | 66 ++++++++ gui/src/redux/slices/sessionSlice.ts | 8 + gui/src/redux/thunks/session.ts | 13 ++ gui/src/redux/thunks/streamNormalInput.ts | 54 ++++++- gui/src/util/usage.ts | 59 +++++++ 31 files changed, 1061 insertions(+), 67 deletions(-) create mode 100644 gui/src/hooks/useTokenUsageSetting.ts create mode 100644 gui/src/util/usage.ts diff --git a/core/config/types.ts b/core/config/types.ts index d9f58aca6e2..7f1088e512a 100644 --- a/core/config/types.ts +++ b/core/config/types.ts @@ -248,6 +248,7 @@ declare global { title: string; dateCreated: string; workspaceDirectory: string; + usage?: Usage; } export interface RangeInFile { @@ -323,11 +324,23 @@ declare global { role: "user"; content: MessageContent; } + + export interface Usage { + completionTokens: number; + promptTokens: number; + totalTokens?: number; + provider?: string; + model?: string; + source?: "provider" | "estimated"; + ts?: string; + raw?: Record; + } export interface AssistantChatMessage { role: "assistant"; content: MessageContent; toolCalls?: ToolCallDelta[]; + usage?: Usage; } export interface SystemChatMessage { @@ -387,6 +400,7 @@ declare global { completionOptions: CompletionOptions; prompt: string; completion: string; + usage?: Usage; } type MessageModes = "chat" | "edit"; @@ -651,6 +665,7 @@ declare global { remoteConfigSyncPeriod: number; userToken: string; pauseCodebaseIndexOnStart: boolean; + showTokenUsage?: "never" | "history" | "session" | "turn"; } export interface IDE { diff --git a/core/index.d.ts b/core/index.d.ts index 1e7c6ac4baf..8239afd0169 100644 --- a/core/index.d.ts +++ b/core/index.d.ts @@ -294,6 +294,7 @@ export interface BaseSessionMetadata { dateCreated: string; workspaceDirectory: string; messageCount?: number; + usage?: Usage; } export interface RangeInFile { @@ -406,6 +407,9 @@ export interface ThinkingChatMessage { export interface Usage { completionTokens: number; promptTokens: number; + totalTokens?: number; + provider?: string; + model?: string; promptTokensDetails?: { cachedTokens?: number; /** This an Anthropic-specific property */ @@ -418,6 +422,9 @@ export interface Usage { rejectedPredictionTokens?: number; audioTokens?: number; }; + source?: "provider" | "estimated"; + ts?: string; + raw?: Record; } export interface AssistantChatMessage { @@ -489,6 +496,7 @@ export interface PromptLog { modelProvider: string; prompt: string; completion: string; + usage?: Usage; } export type MessageModes = "chat" | "agent" | "plan" | "background"; @@ -815,6 +823,7 @@ export interface IdeSettings { userToken: string; continueTestEnvironment: "none" | "production" | "staging" | "local"; pauseCodebaseIndexOnStart: boolean; + showTokenUsage?: "never" | "history" | "session" | "turn"; } export interface FileStats { diff --git a/core/llm/index.ts b/core/llm/index.ts index dd6dd9c00aa..536b8796ee6 100644 --- a/core/llm/index.ts +++ b/core/llm/index.ts @@ -356,8 +356,14 @@ export abstract class BaseLLM implements ILLM { usage: Usage | undefined, error?: any, ): InteractionStatus { - let promptTokens = this.countTokens(prompt); - let generatedTokens = this.countTokens(completion); + let promptTokens = + typeof usage?.promptTokens === "number" + ? usage.promptTokens + : this.countTokens(prompt); + let generatedTokens = + typeof usage?.completionTokens === "number" + ? usage.completionTokens + : this.countTokens(completion); let thinkingTokens = thinking ? this.countTokens(thinking) : 0; TokensBatchingService.getInstance().addTokens( @@ -1353,6 +1359,7 @@ export abstract class BaseLLM implements ILLM { modelProvider: this.underlyingProviderName, prompt, completion: completion.join(""), + usage, }; } diff --git a/core/llm/llms/Anthropic.ts b/core/llm/llms/Anthropic.ts index fb7d7f8c681..83d906671d3 100644 --- a/core/llm/llms/Anthropic.ts +++ b/core/llm/llms/Anthropic.ts @@ -85,6 +85,55 @@ class Anthropic extends BaseLLM { return finalOptions; } + private parseAnthropicUsage( + rawUsage: + | { + input_tokens?: number | null; + output_tokens?: number | null; + cache_read_input_tokens?: number | null; + cache_creation_input_tokens?: number | null; + } + | undefined, + ): Usage | undefined { + if (!rawUsage) { + return undefined; + } + + const promptTokens = rawUsage.input_tokens; + const completionTokens = rawUsage.output_tokens; + const cachedTokens = rawUsage.cache_read_input_tokens; + const cacheWriteTokens = rawUsage.cache_creation_input_tokens; + + if ( + typeof promptTokens !== "number" && + typeof completionTokens !== "number" && + typeof cachedTokens !== "number" && + typeof cacheWriteTokens !== "number" + ) { + return undefined; + } + + const resolvedPromptTokens = promptTokens ?? 0; + const resolvedCompletionTokens = completionTokens ?? 0; + + return { + promptTokens: resolvedPromptTokens, + completionTokens: resolvedCompletionTokens, + totalTokens: resolvedPromptTokens + resolvedCompletionTokens, + promptTokensDetails: + typeof cachedTokens === "number" || typeof cacheWriteTokens === "number" + ? { + cachedTokens: + typeof cachedTokens === "number" ? cachedTokens : undefined, + cacheWriteTokens: + typeof cacheWriteTokens === "number" + ? cacheWriteTokens + : undefined, + } + : undefined, + }; + } + private convertMessageContentToBlocks( content: MessageContent, ): ContentBlockParam[] { @@ -282,31 +331,18 @@ class Anthropic extends BaseLLM { if (stream === false) { const json = await response.json(); - const cost = json.usage - ? { - inputTokens: json.usage.input_tokens, - outputTokens: json.usage.output_tokens, - totalTokens: json.usage.input_tokens + json.usage.output_tokens, - } - : {}; + const usage = this.parseAnthropicUsage(json.usage); yield { role: "assistant", content: json.content[0].text, - ...(Object.keys(cost).length > 0 ? { cost } : {}), + ...(usage ? { usage } : {}), }; return; } let lastToolUseId: string | undefined; let lastToolUseName: string | undefined; - let usage: Usage = { - promptTokens: 0, - completionTokens: 0, - promptTokensDetails: { - cachedTokens: 0, - cacheWriteTokens: 0, - }, - }; + let usage: Usage | undefined; for await (const event of streamSse(response)) { // https://docs.anthropic.com/en/api/messages-streaming#event-types @@ -315,17 +351,20 @@ class Anthropic extends BaseLLM { case "message_start": // Capture initial usage information const startEvent = rawEvent as RawMessageStartEvent; - usage.promptTokens = startEvent.message.usage.input_tokens; - usage.promptTokensDetails!.cachedTokens = - startEvent.message.usage.cache_read_input_tokens ?? undefined; - usage.promptTokensDetails!.cacheWriteTokens = - startEvent.message.usage.cache_creation_input_tokens ?? undefined; + usage = this.parseAnthropicUsage(startEvent.message.usage) ?? usage; break; case "message_delta": // Update usage information during streaming const deltaEvent = rawEvent as RawMessageDeltaEvent; - if (deltaEvent.usage) { - usage.completionTokens = deltaEvent.usage.output_tokens; + if (deltaEvent.usage?.output_tokens !== undefined) { + const promptTokens = usage?.promptTokens ?? 0; + const completionTokens = deltaEvent.usage.output_tokens; + usage = { + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + promptTokensDetails: usage?.promptTokensDetails, + }; } break; case "content_block_start": @@ -393,11 +432,13 @@ class Anthropic extends BaseLLM { } } - yield { - role: "assistant", - content: "", - usage, - }; + if (usage) { + yield { + role: "assistant", + content: "", + usage, + }; + } } protected async *_streamChat( diff --git a/core/llm/llms/Bedrock.ts b/core/llm/llms/Bedrock.ts index 7b338e9df59..ba35e84e0ff 100644 --- a/core/llm/llms/Bedrock.ts +++ b/core/llm/llms/Bedrock.ts @@ -18,7 +18,13 @@ import { import { fromNodeProviderChain } from "@aws-sdk/credential-providers"; import type { CompletionOptions } from "../../index.js"; -import { ChatMessage, Chunk, LLMOptions, MessageContent } from "../../index.js"; +import { + ChatMessage, + Chunk, + LLMOptions, + MessageContent, + Usage, +} from "../../index.js"; import { safeParseToolCallArgs } from "../../tools/parseArgs.js"; import { renderChatMessage, stripImages } from "../../util/messageContent.js"; import { parseDataUrl } from "../../util/url.js"; @@ -71,6 +77,52 @@ class Bedrock extends BaseLLM { }; } + private parseBedrockUsage(rawUsage: any): Usage | undefined { + if (!rawUsage) { + return undefined; + } + + const promptTokens = rawUsage.inputTokens ?? rawUsage.promptTokens; + const completionTokens = rawUsage.outputTokens ?? rawUsage.completionTokens; + const totalTokens = rawUsage.totalTokens; + const cachedTokens = + rawUsage.cacheReadInputTokens ?? rawUsage.cache_read_input_tokens; + const cacheWriteTokens = + rawUsage.cacheWriteInputTokens ?? rawUsage.cache_write_input_tokens; + + if ( + typeof promptTokens !== "number" && + typeof completionTokens !== "number" && + typeof totalTokens !== "number" && + typeof cachedTokens !== "number" && + typeof cacheWriteTokens !== "number" + ) { + return undefined; + } + + const resolvedPromptTokens = promptTokens ?? 0; + const resolvedCompletionTokens = completionTokens ?? 0; + const resolvedTotalTokens = + totalTokens ?? resolvedPromptTokens + resolvedCompletionTokens; + + return { + promptTokens: resolvedPromptTokens, + completionTokens: resolvedCompletionTokens, + totalTokens: resolvedTotalTokens, + promptTokensDetails: + typeof cachedTokens === "number" || typeof cacheWriteTokens === "number" + ? { + cachedTokens: + typeof cachedTokens === "number" ? cachedTokens : undefined, + cacheWriteTokens: + typeof cacheWriteTokens === "number" + ? cacheWriteTokens + : undefined, + } + : undefined, + }; + } + protected async *_streamComplete( prompt: string, signal: AbortSignal, @@ -139,11 +191,12 @@ class Bedrock extends BaseLLM { cacheReadInputTokens: 0, cacheWriteInputTokens: 0, }; + let usage: Usage | undefined; try { for await (const chunk of response.stream) { if (chunk.metadata?.usage) { - console.log(`${JSON.stringify(chunk.metadata.usage)}`); + usage = this.parseBedrockUsage(chunk.metadata.usage) ?? usage; } const contentBlockDelta: ContentBlockDelta | undefined = @@ -241,6 +294,14 @@ class Bedrock extends BaseLLM { // Clean up state and let the original error bubble up to the retry decorator throw error; } + + if (usage) { + yield { + role: "assistant", + content: "", + usage, + }; + } } /** diff --git a/core/llm/llms/Cohere.ts b/core/llm/llms/Cohere.ts index b0a27b6cf8c..6759f9c80ec 100644 --- a/core/llm/llms/Cohere.ts +++ b/core/llm/llms/Cohere.ts @@ -4,6 +4,7 @@ import { Chunk, CompletionOptions, LLMOptions, + Usage, } from "../../index.js"; import { renderChatMessage, stripImages } from "../../util/messageContent.js"; import { BaseLLM } from "../index.js"; @@ -17,6 +18,47 @@ class Cohere extends BaseLLM { }; static maxStopSequences = 5; + private parseCohereUsage(rawUsage: any): Usage | undefined { + if (!rawUsage) { + return undefined; + } + + const usage = rawUsage.tokens ?? rawUsage; + const promptTokens = usage.input_tokens ?? usage.inputTokens; + const completionTokens = usage.output_tokens ?? usage.outputTokens; + const totalTokens = usage.total_tokens ?? usage.totalTokens; + const cachedTokens = + rawUsage.cached_tokens ?? + usage.cached_tokens ?? + rawUsage.cache_read_input_tokens ?? + usage.cache_read_input_tokens; + + if ( + typeof promptTokens !== "number" && + typeof completionTokens !== "number" && + typeof totalTokens !== "number" + ) { + return undefined; + } + + const resolvedPromptTokens = promptTokens ?? 0; + const resolvedCompletionTokens = completionTokens ?? 0; + const resolvedTotalTokens = + totalTokens ?? resolvedPromptTokens + resolvedCompletionTokens; + + return { + promptTokens: resolvedPromptTokens, + completionTokens: resolvedCompletionTokens, + totalTokens: resolvedTotalTokens, + promptTokensDetails: + typeof cachedTokens === "number" + ? { + cachedTokens, + } + : undefined, + }; + } + private _convertMessages(msgs: ChatMessage[]): any[] { const messages = []; for (const m of msgs) { @@ -179,6 +221,7 @@ class Cohere extends BaseLLM { if (options.stream === false) { const data = await resp.json(); + const usage = this.parseCohereUsage(data?.usage); for (const content of data.message.content) { if (content.thinking) { yield { role: "thinking", content: content.thinking }; @@ -202,13 +245,28 @@ class Cohere extends BaseLLM { }, })), }; + if (usage) { + yield { + role: "assistant", + content: "", + usage, + }; + } return; } + if (usage) { + yield { + role: "assistant", + content: "", + usage, + }; + } return; } let lastToolUseId: string | undefined; let lastToolUseName: string | undefined; + let usage: Usage | undefined; for await (const value of streamSse(resp)) { // https://docs.cohere.com/v2/docs/streaming#stream-events switch (value.type) { @@ -274,10 +332,21 @@ class Cohere extends BaseLLM { lastToolUseId = undefined; lastToolUseName = undefined; break; + case "message-end": + usage = this.parseCohereUsage(value?.delta?.usage ?? value?.usage); + break; default: break; } } + + if (usage) { + yield { + role: "assistant", + content: "", + usage, + }; + } } protected async _embed(chunks: string[]): Promise { diff --git a/core/llm/llms/Gemini.ts b/core/llm/llms/Gemini.ts index 3e48a633196..55b20663f2e 100644 --- a/core/llm/llms/Gemini.ts +++ b/core/llm/llms/Gemini.ts @@ -8,6 +8,7 @@ import { MessagePart, TextMessagePart, ToolCallDelta, + Usage, } from "../../index.js"; import { safeParseToolCallArgs } from "../../tools/parseArgs.js"; import { renderChatMessage, stripImages } from "../../util/messageContent.js"; @@ -78,6 +79,43 @@ class Gemini extends BaseLLM { return finalOptions; } + private parseGeminiUsage(rawUsage: any): Usage | undefined { + if (!rawUsage) { + return undefined; + } + + const promptTokens = rawUsage.promptTokenCount; + const completionTokens = rawUsage.candidatesTokenCount; + const totalTokens = rawUsage.totalTokenCount; + const cachedTokens = rawUsage.cachedContentTokenCount; + + if ( + typeof promptTokens !== "number" && + typeof completionTokens !== "number" && + typeof totalTokens !== "number" && + typeof cachedTokens !== "number" + ) { + return undefined; + } + + const resolvedPromptTokens = promptTokens ?? 0; + const resolvedCompletionTokens = completionTokens ?? 0; + const resolvedTotalTokens = + totalTokens ?? resolvedPromptTokens + resolvedCompletionTokens; + + return { + promptTokens: resolvedPromptTokens, + completionTokens: resolvedCompletionTokens, + totalTokens: resolvedTotalTokens, + promptTokensDetails: + typeof cachedTokens === "number" + ? { + cachedTokens, + } + : undefined, + }; + } + protected async *_streamComplete( prompt: string, signal: AbortSignal, @@ -368,6 +406,7 @@ class Gemini extends BaseLLM { stream: AsyncIterable, ): AsyncGenerator { let buffer = ""; + let usage: Usage | undefined; for await (const chunk of stream) { buffer += chunk; if (buffer.startsWith("[")) { @@ -397,6 +436,11 @@ class Gemini extends BaseLLM { throw new Error(data.error.message); } + const parsedUsage = this.parseGeminiUsage((data as any).usageMetadata); + if (parsedUsage) { + usage = parsedUsage; + } + // In case of max tokens reached, gemini will sometimes return content with no parts, even though that doesn't match the API spec const contentParts = data?.candidates?.[0]?.content?.parts; if (contentParts) { @@ -453,6 +497,14 @@ class Gemini extends BaseLLM { buffer = ""; } } + + if (usage) { + yield { + role: "assistant", + content: "", + usage, + }; + } } private async *streamChatGemini( diff --git a/core/llm/llms/Ollama.ts b/core/llm/llms/Ollama.ts index d2d8660b644..08386f77f6d 100644 --- a/core/llm/llms/Ollama.ts +++ b/core/llm/llms/Ollama.ts @@ -10,6 +10,7 @@ import { LLMOptions, ModelInstaller, ThinkingChatMessage, + Usage, } from "../../index.js"; import { renderChatMessage } from "../../util/messageContent.js"; import { getRemoteModelInfo } from "../../util/ollamaHelper.js"; @@ -450,6 +451,61 @@ class Ollama extends BaseLLM implements ModelInstaller { }); let isThinking: boolean = false; + function extractUsage(res: OllamaChatResponse): Usage | undefined { + if ("error" in res || "type" in res) { + return undefined; + } + + if (!res.done) { + return undefined; + } + + if ( + typeof res.prompt_eval_count !== "number" && + typeof res.eval_count !== "number" + ) { + return undefined; + } + + const promptTokens = res.prompt_eval_count ?? 0; + const completionTokens = res.eval_count ?? 0; + return { + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + source: "provider", + ts: new Date().toISOString(), + raw: { + ollama_prompt_eval_count: res.prompt_eval_count, + ollama_eval_count: res.eval_count, + ollama_done_reason: res.done_reason, + }, + }; + } + + function attachUsage( + messages: ChatMessage[], + usage: Usage | undefined, + ): ChatMessage[] { + if (!usage) { + return messages; + } + + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i].role === "assistant") { + (messages[i] as ChatMessage & { usage?: Usage }).usage = usage; + return messages; + } + } + + messages.push({ + role: "assistant", + content: "", + usage, + }); + return messages; + } + function convertChatMessage(res: OllamaChatResponse): ChatMessage[] { if ("error" in res) { throw new Error(res.error); @@ -484,7 +540,7 @@ class Ollama extends BaseLLM implements ModelInstaller { role: "assistant", content: content, }; - return [chatMessage]; + return attachUsage([chatMessage], extractUsage(res)); } return []; } @@ -504,7 +560,7 @@ class Ollama extends BaseLLM implements ModelInstaller { if (thinkingMessage && !content) { // When Streaming you can't have both thinking and content - return [thinkingMessage]; + return attachUsage([thinkingMessage], extractUsage(res)); } // Either not thinking, or not streaming const chatMessage: ChatMessage = { role: "assistant", content }; @@ -523,11 +579,14 @@ class Ollama extends BaseLLM implements ModelInstaller { } // Return both thinking and chat messages if applicable - return thinkingMessage ? [thinkingMessage, chatMessage] : [chatMessage]; + return attachUsage( + thinkingMessage ? [thinkingMessage, chatMessage] : [chatMessage], + extractUsage(res), + ); } // Fallback for all other roles - return [{ role, content }]; + return attachUsage([{ role, content }], extractUsage(res)); } if (chatOptions.stream === false) { diff --git a/core/llm/openaiTypeConverters.test.ts b/core/llm/openaiTypeConverters.test.ts index b0c63862f4d..72dd83d277c 100644 --- a/core/llm/openaiTypeConverters.test.ts +++ b/core/llm/openaiTypeConverters.test.ts @@ -1,4 +1,8 @@ -import { toResponsesInput } from "./openaiTypeConverters"; +import { + fromChatCompletionChunk, + fromChatResponse, + toResponsesInput, +} from "./openaiTypeConverters"; import { ChatMessage } from ".."; import type { ResponseInputItem } from "openai/resources/responses/responses.mjs"; @@ -48,6 +52,59 @@ function getMessagesByRole( } describe("openaiTypeConverters", () => { + describe("usage extraction", () => { + it("extracts usage from non-stream responses", () => { + const messages = fromChatResponse({ + choices: [ + { + message: { + role: "assistant", + content: "hello", + }, + }, + ], + usage: { + prompt_tokens: 123, + completion_tokens: 45, + total_tokens: 168, + }, + } as any); + + expect(messages).toHaveLength(1); + expect(messages[0].role).toBe("assistant"); + if (messages[0].role === "assistant") { + expect(messages[0].usage).toMatchObject({ + promptTokens: 123, + completionTokens: 45, + totalTokens: 168, + source: "provider", + }); + } + }); + + it("extracts usage-only stream chunks", () => { + const message = fromChatCompletionChunk({ + choices: [], + usage: { + prompt_tokens: 30, + completion_tokens: 10, + total_tokens: 40, + }, + } as any); + + expect(message).toBeDefined(); + expect(message?.role).toBe("assistant"); + if (message?.role === "assistant") { + expect(message.content).toBe(""); + expect(message.usage).toMatchObject({ + promptTokens: 30, + completionTokens: 10, + totalTokens: 40, + }); + } + }); + }); + describe("toResponsesInput", () => { describe("tool calls handling - OpenAI Responses API", () => { it("should emit function_call items when fc_ ID is in metadata", () => { diff --git a/core/llm/openaiTypeConverters.ts b/core/llm/openaiTypeConverters.ts index 9280a51c706..66f65d870db 100644 --- a/core/llm/openaiTypeConverters.ts +++ b/core/llm/openaiTypeConverters.ts @@ -35,6 +35,7 @@ import { TextMessagePart, ThinkingChatMessage, ToolCallDelta, + Usage, } from ".."; function appendReasoningFieldsIfSupported( @@ -274,8 +275,94 @@ export function toFimBody( } as any; } +function firstNumber(...values: unknown[]): number | undefined { + for (const value of values) { + if (typeof value === "number" && Number.isFinite(value)) { + return value; + } + } + + return undefined; +} + +function parseProviderUsage(rawUsage: unknown): Usage | undefined { + if (!rawUsage || typeof rawUsage !== "object") { + return undefined; + } + + const usage = rawUsage as Record; + const promptTokens = firstNumber(usage.prompt_tokens, usage.input_tokens); + const completionTokens = firstNumber( + usage.completion_tokens, + usage.output_tokens, + ); + const totalTokens = firstNumber(usage.total_tokens); + + if ( + promptTokens === undefined && + completionTokens === undefined && + totalTokens === undefined + ) { + return undefined; + } + + const promptTokensDetailsRaw = + (usage.prompt_tokens_details as Record | undefined) ?? + (usage.input_tokens_details as Record | undefined); + const completionTokensDetailsRaw = + (usage.completion_tokens_details as Record | undefined) ?? + (usage.output_tokens_details as Record | undefined); + + return { + promptTokens: promptTokens ?? 0, + completionTokens: completionTokens ?? 0, + totalTokens, + promptTokensDetails: promptTokensDetailsRaw + ? { + cachedTokens: firstNumber( + promptTokensDetailsRaw.cached_tokens, + promptTokensDetailsRaw.cache_read_tokens, + promptTokensDetailsRaw.cachedTokens, + ), + cacheWriteTokens: firstNumber( + promptTokensDetailsRaw.cache_write_tokens, + promptTokensDetailsRaw.cacheWriteTokens, + ), + audioTokens: firstNumber( + promptTokensDetailsRaw.audio_tokens, + promptTokensDetailsRaw.audioTokens, + ), + } + : undefined, + completionTokensDetails: completionTokensDetailsRaw + ? { + acceptedPredictionTokens: firstNumber( + completionTokensDetailsRaw.accepted_prediction_tokens, + completionTokensDetailsRaw.acceptedPredictionTokens, + ), + reasoningTokens: firstNumber( + completionTokensDetailsRaw.reasoning_tokens, + completionTokensDetailsRaw.reasoningTokens, + ), + rejectedPredictionTokens: firstNumber( + completionTokensDetailsRaw.rejected_prediction_tokens, + completionTokensDetailsRaw.rejectedPredictionTokens, + ), + audioTokens: firstNumber( + completionTokensDetailsRaw.audio_tokens, + completionTokensDetailsRaw.audioTokens, + ), + } + : undefined, + source: "provider", + ts: new Date().toISOString(), + raw: usage, + }; +} + export function fromChatResponse(response: ChatCompletion): ChatMessage[] { const messages: ChatMessage[] = []; + const usage = parseProviderUsage((response as any).usage); const message = response.choices[0].message as ChatCompletionMessage & { reasoning?: string; reasoning_content?: string; @@ -320,11 +407,13 @@ export function fromChatResponse(response: ChatCompletion): ChatMessage[] { arguments: (tc as any).function?.arguments, }, })), + usage, }); } else { messages.push({ role: "assistant", content: message.content ?? "", + usage, }); } @@ -334,6 +423,7 @@ export function fromChatResponse(response: ChatCompletion): ChatMessage[] { export function fromChatCompletionChunk( chunk: ChatCompletionChunk, ): ChatMessage | undefined { + const usage = parseProviderUsage((chunk as any).usage); const delta = chunk.choices?.[0]?.delta as | (ChatCompletionChunk.Choice.Delta & { reasoning?: string; @@ -348,6 +438,7 @@ export function fromChatCompletionChunk( return { role: "assistant", content: delta.content, + usage, }; } else if (delta?.tool_calls) { const toolCalls = delta?.tool_calls @@ -366,6 +457,7 @@ export function fromChatCompletionChunk( role: "assistant", content: "", toolCalls, + usage, }; } } else if ( @@ -380,6 +472,12 @@ export function fromChatCompletionChunk( reasoning_details: delta?.reasoning_details as any[], }; return message; + } else if (usage) { + return { + role: "assistant", + content: "", + usage, + }; } return undefined; @@ -595,6 +693,47 @@ function handleResponsesStreamEvent( function handleResponsesFinal( resp: OpenAIResponse, ): ChatMessage | ChatMessage[] | undefined { + const usage = parseProviderUsage((resp as any).usage); + const attachUsage = ( + result: ChatMessage | ChatMessage[] | undefined, + ): ChatMessage | ChatMessage[] | undefined => { + if (!usage || !result) { + return result; + } + + if (Array.isArray(result)) { + for (let i = result.length - 1; i >= 0; i--) { + const message = result[i]; + if (message.role === "assistant") { + (message as AssistantChatMessage).usage = usage; + return result; + } + } + result.push({ + role: "assistant", + content: "", + usage, + }); + return result; + } + + if (result.role === "assistant") { + return { + ...result, + usage, + }; + } + + return [ + result, + { + role: "assistant", + content: "", + usage, + }, + ]; + }; + // Prefer structured output items when present if (Array.isArray(resp.output) && resp.output.length > 0) { const result: ChatMessage[] = []; @@ -693,15 +832,15 @@ function handleResponsesFinal( continue; } } - if (result.length > 0) return result; + if (result.length > 0) return attachUsage(result); } // Fallback to output_text when no structured output is present if (typeof resp.output_text === "string" && resp.output_text.length > 0) { - return { role: "assistant", content: resp.output_text }; + return attachUsage({ role: "assistant", content: resp.output_text }); } - return undefined; + return attachUsage(undefined); } export function fromResponsesChunk( diff --git a/core/protocol/passThrough.ts b/core/protocol/passThrough.ts index 9d752b31178..8e95c0d2827 100644 --- a/core/protocol/passThrough.ts +++ b/core/protocol/passThrough.ts @@ -111,4 +111,5 @@ export const CORE_TO_WEBVIEW_PASS_THROUGH: (keyof ToWebviewFromCoreProtocol)[] = "didCloseFiles", "toolCallPartialOutput", "freeTrialExceeded", + "ideSettingsUpdate", ]; diff --git a/core/protocol/webview.ts b/core/protocol/webview.ts index 1a8d40313fa..8c1aae0427d 100644 --- a/core/protocol/webview.ts +++ b/core/protocol/webview.ts @@ -5,6 +5,7 @@ import type { BrowserSerializedContinueConfig, ContextItemWithId, ContextProviderName, + IdeSettings, IndexingProgressUpdate, IndexingStatus, } from "../index.js"; @@ -44,4 +45,5 @@ export type ToWebviewFromIdeOrCoreProtocol = { sessionUpdate: [{ sessionInfo: ControlPlaneSessionInfo | undefined }, void]; toolCallPartialOutput: [{ toolCallId: string; contextItems: any[] }, void]; freeTrialExceeded: [undefined, void]; + ideSettingsUpdate: [IdeSettings, void]; }; diff --git a/core/util/filesystem.ts b/core/util/filesystem.ts index 6a813931250..74391d67a01 100644 --- a/core/util/filesystem.ts +++ b/core/util/filesystem.ts @@ -76,6 +76,7 @@ class FileSystemIde implements IDE { userToken: "", continueTestEnvironment: "none", pauseCodebaseIndexOnStart: false, + showTokenUsage: "never", }; } diff --git a/core/util/history.ts b/core/util/history.ts index 44241c4201e..551bee2a9ce 100644 --- a/core/util/history.ts +++ b/core/util/history.ts @@ -38,6 +38,10 @@ export class HistoryManager { }) .reverse(); + // Hydrate token usage for old sessions that were saved before session metadata + // started carrying usage in sessions.json. + sessions = sessions.map((session) => this.hydrateUsageFromSession(session)); + // Apply limit and offset if (options.limit) { const offset = options.offset || 0; @@ -149,6 +153,7 @@ export class HistoryManager { sessionMetadata.title = session.title; sessionMetadata.workspaceDirectory = session.workspaceDirectory; sessionMetadata.messageCount = messageCount; + sessionMetadata.usage = session.usage; found = true; break; } @@ -161,6 +166,7 @@ export class HistoryManager { dateCreated: String(Date.now()), workspaceDirectory: session.workspaceDirectory, messageCount, + usage: session.usage, }; sessionsList.push(sessionMetadata); } @@ -180,6 +186,62 @@ export class HistoryManager { ); } } + + private hydrateUsageFromSession( + metadata: BaseSessionMetadata, + ): BaseSessionMetadata { + if (metadata.usage) { + return metadata; + } + + try { + const sessionFile = getSessionFilePath(metadata.sessionId); + if (!fs.existsSync(sessionFile)) { + return metadata; + } + + const session = JSON.parse( + fs.readFileSync(sessionFile, "utf8"), + ) as Session; + if (session.usage) { + return { + ...metadata, + usage: session.usage, + }; + } + + const totals = session.history.reduce( + (acc, item) => { + if (item.message.role !== "assistant" || !item.message.usage) { + return acc; + } + const promptTokens = item.message.usage.promptTokens ?? 0; + const completionTokens = item.message.usage.completionTokens ?? 0; + acc.promptTokens += promptTokens; + acc.completionTokens += completionTokens; + acc.totalTokens += + item.message.usage.totalTokens ?? promptTokens + completionTokens; + return acc; + }, + { + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + }, + ); + + if (totals.totalTokens === 0) { + return metadata; + } + + return { + ...metadata, + usage: totals, + }; + } catch { + return metadata; + } + } } const historyManager = new HistoryManager(); diff --git a/docs/ide-extensions/chat/how-to-customize.mdx b/docs/ide-extensions/chat/how-to-customize.mdx index 75eb99508de..6cb2a96f61d 100644 --- a/docs/ide-extensions/chat/how-to-customize.mdx +++ b/docs/ide-extensions/chat/how-to-customize.mdx @@ -8,4 +8,21 @@ description: "Learn how to customize the Chat feature in Continue to better suit There are a number of different ways to customize Chat: - Add [rules](/customize/rules) to give the model persistent instructions through the system prompt -- Create [prompts](/customize/prompts) to kickoff workflows with instructions you repeat often \ No newline at end of file +- Create [prompts](/customize/prompts) to kickoff workflows with instructions you repeat often + +## Token Usage Display + +You can control where token usage is shown in the UI with the VS Code setting: + +```json +"continue.showTokenUsage": "never" | "history" | "session" | "turn" +``` + +- `never`: Hide token usage in Chat and History. +- `history`: Show token usage in History only. +- `session`: Show token usage in History and in the current session summary. +- `turn`: Show token usage in History, session summary, and each assistant turn. + +Usage is always tracked and persisted in local session files. + +The **Usage** page in Settings -> Help shows local analytics totals from the local usage database, so totals there can differ from visible chat history totals. diff --git a/extensions/vscode/package.json b/extensions/vscode/package.json index 9fbb02323a0..48633f2f19a 100644 --- a/extensions/vscode/package.json +++ b/extensions/vscode/package.json @@ -133,6 +133,23 @@ "default": false, "markdownDescription": "Enable a console to log and explore model inputs and outputs. It can be found in the bottom panel." }, + "continue.showTokenUsage": { + "type": "string", + "default": "never", + "enum": [ + "never", + "history", + "session", + "turn" + ], + "enumDescriptions": [ + "Never show token usage.", + "Show token usage in History only.", + "Show token usage in History and Session summaries.", + "Show token usage in History, Session summaries, and each assistant turn." + ], + "description": "Where to display token usage. Tracking is always persisted in session files." + }, "continue.remoteConfigServerUrl": { "type": "string", "default": null, diff --git a/extensions/vscode/src/VsCodeIde.ts b/extensions/vscode/src/VsCodeIde.ts index 9e5853afa3b..1e5fa73b283 100644 --- a/extensions/vscode/src/VsCodeIde.ts +++ b/extensions/vscode/src/VsCodeIde.ts @@ -694,6 +694,10 @@ class VsCodeIde implements IDE { "pauseCodebaseIndexOnStart", false, ), + showTokenUsage: settings.get<"never" | "history" | "session" | "turn">( + "showTokenUsage", + "never", + ), }; return ideSettings; } diff --git a/extensions/vscode/src/extension/VsCodeExtension.ts b/extensions/vscode/src/extension/VsCodeExtension.ts index 962c4ec01b5..20d52960795 100644 --- a/extensions/vscode/src/extension/VsCodeExtension.ts +++ b/extensions/vscode/src/extension/VsCodeExtension.ts @@ -663,6 +663,7 @@ export class VsCodeExtension { if (event.affectsConfiguration(EXTENSION_NAME)) { const settings = await this.ide.getIdeSettings(); void this.core.invoke("config/ideSettingsUpdate", settings); + this.sidebar.webviewProtocol.send("ideSettingsUpdate", settings); if (event.affectsConfiguration(`${EXTENSION_NAME}.enableNextEdit`)) { await this.updateNextEditState(context); diff --git a/gui/src/components/History/HistoryTableRow.tsx b/gui/src/components/History/HistoryTableRow.tsx index 45c7f130976..ab3c35a3164 100644 --- a/gui/src/components/History/HistoryTableRow.tsx +++ b/gui/src/components/History/HistoryTableRow.tsx @@ -21,6 +21,7 @@ import { updateSession, } from "../../redux/thunks/session"; import { isShareSessionSupported } from "../../util"; +import { formatTokenBreakdown } from "../../util/usage"; import HeaderButtonWithToolTip from "../gui/HeaderButtonWithToolTip"; import { ToolTip } from "../gui/Tooltip"; @@ -29,9 +30,11 @@ const shareSessionSupported = isShareSessionSupported(); export function HistoryTableRow({ sessionMetadata, index, + showTokenUsage = false, }: { sessionMetadata: BaseSessionMetadata | RemoteSessionMetadata; index: number; + showTokenUsage?: boolean; }) { const dispatch = useAppDispatch(); const navigate = useNavigate(); @@ -57,6 +60,16 @@ export function HistoryTableRow({ } }; const isRemote = "isRemote" in sessionMetadata && sessionMetadata.isRemote; + const sessionUsage = + "usage" in sessionMetadata && sessionMetadata.usage + ? sessionMetadata.usage + : undefined; + const sessionPromptTokens = sessionUsage?.promptTokens ?? 0; + const sessionCompletionTokens = sessionUsage?.completionTokens ?? 0; + const sessionTotalTokens = sessionUsage + ? (sessionUsage.totalTokens ?? + sessionPromptTokens + sessionCompletionTokens) + : 0; const handleKeyUp = async (e: React.KeyboardEvent) => { if (e.key === "Enter") { @@ -122,7 +135,7 @@ export function HistoryTableRow({ navigate("/"); }} > - + {editing ? (
)} -
+
{getUriPathBasename(sessionMetadata.workspaceDirectory || "")} @@ -176,6 +189,15 @@ export function HistoryTableRow({ })} */}
+ {showTokenUsage && sessionTotalTokens > 0 && ( +
+ {formatTokenBreakdown({ + promptTokens: sessionPromptTokens, + completionTokens: sessionCompletionTokens, + totalTokens: sessionTotalTokens, + })} +
+ )} {hovered && !editing && ( diff --git a/gui/src/components/History/index.tsx b/gui/src/components/History/index.tsx index db83f85af12..75c686249c9 100644 --- a/gui/src/components/History/index.tsx +++ b/gui/src/components/History/index.tsx @@ -14,6 +14,7 @@ import Shortcut from "../gui/Shortcut"; import { XMarkIcon } from "@heroicons/react/24/solid"; import { useNavigate } from "react-router-dom"; import { IdeMessengerContext } from "../../context/IdeMessenger"; +import { useTokenUsageSetting } from "../../hooks/useTokenUsageSetting"; import { useAppDispatch, useAppSelector } from "../../redux/hooks"; import { newSession, @@ -25,6 +26,7 @@ import { getFontSize, getPlatform } from "../../util"; import { ROUTES } from "../../util/navigation"; import ConfirmationDialog from "../dialogs/ConfirmationDialog"; import { Button } from "../ui"; +import { formatTokenBreakdown } from "../../util/usage"; import { HistoryTableRow } from "./HistoryTableRow"; import { groupSessionsByDate, parseDate } from "./util"; @@ -33,6 +35,7 @@ export function History() { const navigate = useNavigate(); const searchInputRef = React.useRef(null); const ideMessenger = useContext(IdeMessengerContext); + const tokenUsageDisplayMode = useTokenUsageSetting(); const [searchTerm, setSearchTerm] = useState(""); @@ -194,17 +197,26 @@ export function History() { {sessionGroups.map((group, groupIndex) => ( - {group.label} + + {group.label} + {tokenUsageDisplayMode !== "never" && + group.usageTotals.totalTokens > 0 && ( + + {formatTokenBreakdown(group.usageTotals)} + + )} + {group.sessions.map((session, sessionIndex) => ( ))} diff --git a/gui/src/components/History/util.ts b/gui/src/components/History/util.ts index 18c6f7d4213..795c85d5297 100644 --- a/gui/src/components/History/util.ts +++ b/gui/src/components/History/util.ts @@ -12,6 +12,45 @@ export const parseDate = (date: string): Date => { export interface SessionGroup { label: string; sessions: (BaseSessionMetadata | RemoteSessionMetadata)[]; + usageTotals: { + promptTokens: number; + completionTokens: number; + totalTokens: number; + }; +} + +function getSessionUsage(session: BaseSessionMetadata | RemoteSessionMetadata) { + if (!("usage" in session) || !session.usage) { + return { + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + }; + } + const promptTokens = session.usage.promptTokens ?? 0; + const completionTokens = session.usage.completionTokens ?? 0; + return { + promptTokens, + completionTokens, + totalTokens: session.usage.totalTokens ?? promptTokens + completionTokens, + }; +} + +function sumUsage(sessions: (BaseSessionMetadata | RemoteSessionMetadata)[]) { + return sessions.reduce( + (acc, session) => { + const usage = getSessionUsage(session); + acc.promptTokens += usage.promptTokens; + acc.completionTokens += usage.completionTokens; + acc.totalTokens += usage.totalTokens; + return acc; + }, + { + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + }, + ); } export const groupSessionsByDate = ( @@ -39,13 +78,29 @@ export const groupSessionsByDate = ( ); if (todaySessions.length > 0) - groups.push({ label: "Today", sessions: todaySessions }); + groups.push({ + label: "Today", + sessions: todaySessions, + usageTotals: sumUsage(todaySessions), + }); if (weekSessions.length > 0) - groups.push({ label: "This Week", sessions: weekSessions }); + groups.push({ + label: "This Week", + sessions: weekSessions, + usageTotals: sumUsage(weekSessions), + }); if (monthSessions.length > 0) - groups.push({ label: "This Month", sessions: monthSessions }); + groups.push({ + label: "This Month", + sessions: monthSessions, + usageTotals: sumUsage(monthSessions), + }); if (olderSessions.length > 0) - groups.push({ label: "Older", sessions: olderSessions }); + groups.push({ + label: "Older", + sessions: olderSessions, + usageTotals: sumUsage(olderSessions), + }); return groups; }; diff --git a/gui/src/components/StepContainer/StepContainer.tsx b/gui/src/components/StepContainer/StepContainer.tsx index 540f7cda35f..67645a1b45c 100644 --- a/gui/src/components/StepContainer/StepContainer.tsx +++ b/gui/src/components/StepContainer/StepContainer.tsx @@ -7,6 +7,7 @@ import { selectUIConfig } from "../../redux/slices/configSlice"; import { deleteMessage } from "../../redux/slices/sessionSlice"; import ThinkingBlockPeek from "../mainInput/belowMainInput/ThinkingBlockPeek"; import StyledMarkdownPreview from "../StyledMarkdownPreview"; +import { formatUsageFooter } from "../../util/usage"; import ConversationSummary from "./ConversationSummary"; import ResponseActions from "./ResponseActions"; import ThinkingIndicator from "./ThinkingIndicator"; @@ -16,6 +17,7 @@ interface StepContainerProps { index: number; isLast: boolean; latestSummaryIndex?: number; + showTurnTokenUsage?: boolean; } export default function StepContainer(props: StepContainerProps) { @@ -102,6 +104,13 @@ export default function StepContainer(props: StepContainerProps) { )} {props.isLast && } + {props.showTurnTokenUsage && + props.item.message.role === "assistant" && + props.item.message.usage && ( +
+ {formatUsageFooter(props.item.message.usage)} +
+ )}
{showResponseActions && ( diff --git a/gui/src/context/MockIdeMessenger.ts b/gui/src/context/MockIdeMessenger.ts index e7f78252dc5..d4fb75c31d8 100644 --- a/gui/src/context/MockIdeMessenger.ts +++ b/gui/src/context/MockIdeMessenger.ts @@ -46,6 +46,14 @@ const DEFAULT_MOCK_CORE_RESPONSES: MockResponses = { "history/list": [], "docs/getIndexedPages": [], "history/save": undefined, + getIdeSettings: { + remoteConfigServerUrl: undefined, + remoteConfigSyncPeriod: 60, + userToken: "", + continueTestEnvironment: "none", + pauseCodebaseIndexOnStart: false, + showTokenUsage: "never", + }, getControlPlaneSessionInfo: { AUTH_TYPE: AuthType.WorkOsStaging, accessToken: "", diff --git a/gui/src/hooks/useTokenUsageSetting.ts b/gui/src/hooks/useTokenUsageSetting.ts new file mode 100644 index 00000000000..2d2f9801187 --- /dev/null +++ b/gui/src/hooks/useTokenUsageSetting.ts @@ -0,0 +1,33 @@ +import { useContext, useEffect, useState } from "react"; +import { IdeMessengerContext } from "../context/IdeMessenger"; +import { useWebviewListener } from "./useWebviewListener"; + +export type TokenUsageDisplayMode = "never" | "history" | "session" | "turn"; + +export function useTokenUsageSetting(): TokenUsageDisplayMode { + const ideMessenger = useContext(IdeMessengerContext); + const [mode, setMode] = useState("never"); + + useEffect(() => { + let mounted = true; + void ideMessenger.ide.getIdeSettings().then((settings) => { + if (!mounted) { + return; + } + setMode((settings.showTokenUsage ?? "never") as TokenUsageDisplayMode); + }); + return () => { + mounted = false; + }; + }, [ideMessenger]); + + useWebviewListener( + "ideSettingsUpdate", + async (settings) => { + setMode((settings.showTokenUsage ?? "never") as TokenUsageDisplayMode); + }, + [], + ); + + return mode; +} diff --git a/gui/src/pages/gui/Chat.tsx b/gui/src/pages/gui/Chat.tsx index 45929ec12e2..835b57c12ba 100644 --- a/gui/src/pages/gui/Chat.tsx +++ b/gui/src/pages/gui/Chat.tsx @@ -26,6 +26,7 @@ import StepContainer from "../../components/StepContainer"; import { TabBar } from "../../components/TabBar/TabBar"; import { IdeMessengerContext } from "../../context/IdeMessenger"; import { useWebviewListener } from "../../hooks/useWebviewListener"; +import { useTokenUsageSetting } from "../../hooks/useTokenUsageSetting"; import { useAppDispatch, useAppSelector } from "../../redux/hooks"; import { selectDoneApplyStates, @@ -56,6 +57,7 @@ import { setDialogMessage, setShowDialog } from "../../redux/slices/uiSlice"; import { RootState } from "../../redux/store"; import { cancelStream } from "../../redux/thunks/cancelStream"; import { getLocalStorage, setLocalStorage } from "../../util/localStorage"; +import { formatTokenBreakdown, summarizeSessionUsage } from "../../util/usage"; import { EmptyChatBody } from "./EmptyChatBody"; import { ExploreDialogWatcher } from "./ExploreDialogWatcher"; import { useAutoScroll } from "./useAutoScroll"; @@ -116,6 +118,7 @@ export function Chat() { const isStreaming = useAppSelector((state) => state.session.isStreaming); const [stepsOpen] = useState<(boolean | undefined)[]>([]); const [isCreatingAgent, setIsCreatingAgent] = useState(false); + const tokenUsageDisplayMode = useTokenUsageSetting(); const mainTextInputRef = useRef(null); const stepsDivRef = useRef(null); const tabsRef = useRef(null); @@ -141,6 +144,11 @@ export function Chat() { useAutoScroll(stepsDivRef, history); + const sessionUsageTotals = useMemo( + () => summarizeSessionUsage(history), + [history], + ); + useEffect(() => { // Cmd + Backspace to delete current step const listener = (e: KeyboardEvent) => { @@ -378,6 +386,7 @@ export function Chat() { isLast={index === history.length - 1} item={item} latestSummaryIndex={latestSummaryIndex} + showTurnTokenUsage={tokenUsageDisplayMode === "turn"} />
@@ -423,12 +432,20 @@ export function Chat() { isLast={index === history.length - 1} item={item} latestSummaryIndex={latestSummaryIndex} + showTurnTokenUsage={tokenUsageDisplayMode === "turn"} /> ); }, - [sendInput, isLastUserInput, history, stepsOpen, isStreaming], + [ + sendInput, + isLastUserInput, + history, + stepsOpen, + isStreaming, + tokenUsageDisplayMode, + ], ); const showScrollbar = showChatScrollbar ?? window.innerHeight > 5000; @@ -499,6 +516,17 @@ export function Chat() { )} + {(tokenUsageDisplayMode === "session" || + tokenUsageDisplayMode === "turn") && + sessionUsageTotals.turnsWithUsage > 0 && ( +
+ {formatTokenBreakdown({ + promptTokens: sessionUsageTotals.promptTokens, + completionTokens: sessionUsageTotals.completionTokens, + totalTokens: sessionUsageTotals.totalTokens, + })} +
+ )} {!hasDismissedExploreDialog && } diff --git a/gui/src/pages/stats.tsx b/gui/src/pages/stats.tsx index fa1718cd7b7..f5bf288be10 100644 --- a/gui/src/pages/stats.tsx +++ b/gui/src/pages/stats.tsx @@ -20,7 +20,6 @@ const Tr = styled.tr` } overflow-wrap: anywhere; - border: 1px solid ${lightGray}; `; @@ -29,6 +28,13 @@ const Td = styled.td` border: 1px solid ${lightGray}; `; +const Note = styled.div` + margin: 0.5rem 0.5rem 0.75rem; + color: ${lightGray}; + font-size: 12px; + line-height: 1.4; +`; + function generateTable(data: unknown[][]) { return table(data); } @@ -47,17 +53,21 @@ function Stats() { useEffect(() => { ideMessenger.request("stats/getTokensPerDay", undefined).then((result) => { - result.status === "success" && setDays(result.content); + if (result.status === "success") { + setDays(result.content); + } }); - }, []); + }, [ideMessenger]); useEffect(() => { ideMessenger .request("stats/getTokensPerModel", undefined) .then((result) => { - result.status === "success" && setModels(result.content); + if (result.status === "success") { + setModels(result.content); + } }); - }, []); + }, [ideMessenger]); return (
- navigate(-1)} showBorder /> + navigate(-1)} showBorder />
+ + Local analytics totals are stored separately from chat history and can + differ from visible session totals. + +
-

Tokens per Day

+

Tokens Per Day

[ day.day, day.generatedTokens, @@ -86,13 +101,13 @@ function Stats() { Day - Generated Tokens - Prompt Tokens + Output Tokens + Input Tokens {days.map((day) => ( - + {day.day} {day.generatedTokens.toLocaleString()} {day.promptTokens.toLocaleString()} @@ -102,10 +117,10 @@ function Stats() {
-

Tokens per Model

+

Tokens Per Model

[ model.model, model.generatedTokens.toLocaleString(), @@ -119,13 +134,13 @@ function Stats() { Model - Generated Tokens - Prompt Tokens + Output Tokens + Input Tokens {models.map((model) => ( - + {model.model} {model.generatedTokens.toLocaleString()} {model.promptTokens.toLocaleString()} diff --git a/gui/src/redux/slices/sessionSlice.test.ts b/gui/src/redux/slices/sessionSlice.test.ts index e933d1dd879..ec0ac29d078 100644 --- a/gui/src/redux/slices/sessionSlice.test.ts +++ b/gui/src/redux/slices/sessionSlice.test.ts @@ -450,5 +450,71 @@ describe("sessionSlice streamUpdate", () => { expect(newState.history[1].message.role).toBe("assistant"); expect(newState.history[1].toolCallStates).toHaveLength(1); }); + + it("should persist usage metadata on assistant messages with content", () => { + const initialState = createInitialState(); + const action = { + type: "session/streamUpdate", + payload: [ + { + role: "assistant" as const, + content: "Done.", + usage: { + promptTokens: 120, + completionTokens: 30, + totalTokens: 150, + }, + }, + ], + }; + + const newState = sessionSlice.reducer(initialState, action); + expect(newState.history[1].message.role).toBe("assistant"); + if (newState.history[1].message.role === "assistant") { + expect(newState.history[1].message.usage).toEqual({ + promptTokens: 120, + completionTokens: 30, + totalTokens: 150, + }); + } + }); + + it("should persist usage metadata on usage-only assistant chunks", () => { + const initialState = createInitialState(); + initialState.history.push({ + message: { + role: "assistant", + content: "Partial response", + id: "assistant-message", + }, + contextItems: [], + }); + + const action = { + type: "session/streamUpdate", + payload: [ + { + role: "assistant" as const, + content: "", + usage: { + promptTokens: 200, + completionTokens: 80, + totalTokens: 280, + }, + }, + ], + }; + + const newState = sessionSlice.reducer(initialState, action); + expect(newState.history).toHaveLength(2); + if (newState.history[1].message.role === "assistant") { + expect(newState.history[1].message.content).toBe("Partial response"); + expect(newState.history[1].message.usage).toEqual({ + promptTokens: 200, + completionTokens: 80, + totalTokens: 280, + }); + } + }); }); }); diff --git a/gui/src/redux/slices/sessionSlice.ts b/gui/src/redux/slices/sessionSlice.ts index 1a0db2cc441..c2d900f2331 100644 --- a/gui/src/redux/slices/sessionSlice.ts +++ b/gui/src/redux/slices/sessionSlice.ts @@ -651,6 +651,14 @@ export const sessionSlice = createSlice({ handleStreamingToolCallUpdates(message, lastItem); } + if ( + message.role === "assistant" && + lastMessage.role === "assistant" && + message.usage + ) { + lastMessage.usage = message.usage; + } + // Attach Responses API output item id to the current assistant message if present // fromResponsesChunk sets message.metadata.responsesOutputItemId when it sees output_item.added for messages if ( diff --git a/gui/src/redux/thunks/session.ts b/gui/src/redux/thunks/session.ts index ea2cf86cfbc..0c6ae4ff38e 100644 --- a/gui/src/redux/thunks/session.ts +++ b/gui/src/redux/thunks/session.ts @@ -6,6 +6,7 @@ import { renderChatMessage } from "core/util/messageContent"; import { IIdeMessenger } from "../../context/IdeMessenger"; import { selectSelectedChatModel } from "../slices/configSlice"; import { selectSelectedProfile } from "../slices/profilesSlice"; +import { summarizeSessionUsage } from "../../util/usage"; import { deleteSessionMetadata, newSession, @@ -291,6 +292,17 @@ export const saveCurrentSession = createAsyncThunk< title = NEW_SESSION_TITLE; } + const sessionUsageTotals = summarizeSessionUsage(session.history); + const usage = + sessionUsageTotals.turnsWithUsage > 0 + ? { + promptTokens: sessionUsageTotals.promptTokens, + completionTokens: sessionUsageTotals.completionTokens, + totalTokens: sessionUsageTotals.totalTokens, + totalCost: 0, + } + : undefined; + const updatedSession: Session = { sessionId: session.id, title, @@ -298,6 +310,7 @@ export const saveCurrentSession = createAsyncThunk< history: session.history, mode: session.mode, chatModelTitle: selectedChatModel?.title ?? null, + ...(usage ? { usage } : {}), }; const result = await dispatch(updateSession(updatedSession)); diff --git a/gui/src/redux/thunks/streamNormalInput.ts b/gui/src/redux/thunks/streamNormalInput.ts index 13cc5ebbf60..f7a368d6cfa 100644 --- a/gui/src/redux/thunks/streamNormalInput.ts +++ b/gui/src/redux/thunks/streamNormalInput.ts @@ -1,5 +1,5 @@ import { createAsyncThunk, unwrapResult } from "@reduxjs/toolkit"; -import { LLMFullCompletionOptions, ModelDescription } from "core"; +import { LLMFullCompletionOptions, ModelDescription, Usage } from "core"; import { getRuleId } from "core/llm/rules/getSystemMessageWithRules"; import { ToCoreProtocol } from "core/protocol"; import { BUILT_IN_GROUP_NAME } from "core/tools/builtIn"; @@ -16,6 +16,7 @@ import { setInlineErrorMessage, setIsPruned, setToolGenerated, + updateHistoryItemAtIndex, streamUpdate, } from "../slices/sessionSlice"; import { ThunkApiType } from "../store"; @@ -68,6 +69,28 @@ function buildReasoningCompletionOptions( return reasoningOptions; } +function findLastAssistantMessageIndex(history: { message: any }[]): number { + for (let i = history.length - 1; i >= 0; i--) { + if (history[i]?.message?.role === "assistant") { + return i; + } + } + return -1; +} + +function enrichUsage(usage: Usage, provider: string, model: string): Usage { + return { + ...usage, + provider, + model, + source: usage.source ?? "provider", + ts: usage.ts ?? new Date().toISOString(), + totalTokens: + usage.totalTokens ?? + (usage.promptTokens ?? 0) + (usage.completionTokens ?? 0), + }; +} + export const streamNormalInput = createAsyncThunk< void, { @@ -216,6 +239,35 @@ export const streamNormalInput = createAsyncThunk< if (next.done && next.value) { dispatch(addPromptCompletionPair([next.value])); + if (next.value.usage) { + const usage = enrichUsage( + next.value.usage, + selectedChatModel.underlyingProviderName, + selectedChatModel.model, + ); + const latestState = getState(); + const assistantIndex = findLastAssistantMessageIndex( + latestState.session.history, + ); + + if (assistantIndex >= 0) { + const assistantItem = latestState.session.history[assistantIndex]; + if (assistantItem.message.role === "assistant") { + dispatch( + updateHistoryItemAtIndex({ + index: assistantIndex, + updates: { + message: { + ...assistantItem.message, + usage, + }, + }, + }), + ); + } + } + } + try { extra.ideMessenger.post("devdata/log", { name: "chatInteraction", diff --git a/gui/src/util/usage.ts b/gui/src/util/usage.ts new file mode 100644 index 00000000000..7e5e1f99266 --- /dev/null +++ b/gui/src/util/usage.ts @@ -0,0 +1,59 @@ +import { ChatHistoryItem, Usage } from "core"; + +export interface SessionUsageTotals { + promptTokens: number; + completionTokens: number; + totalTokens: number; + turnsWithUsage: number; +} + +export function summarizeSessionUsage( + history: ChatHistoryItem[], +): SessionUsageTotals { + return history.reduce( + (acc, item) => { + if (item.message.role !== "assistant" || !item.message.usage) { + return acc; + } + + const usage = item.message.usage; + const promptTokens = usage.promptTokens ?? 0; + const completionTokens = usage.completionTokens ?? 0; + const totalTokens = usage.totalTokens ?? promptTokens + completionTokens; + + acc.promptTokens += promptTokens; + acc.completionTokens += completionTokens; + acc.totalTokens += totalTokens; + acc.turnsWithUsage += 1; + return acc; + }, + { + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + turnsWithUsage: 0, + }, + ); +} + +export function formatUsageFooter(usage: Usage): string { + const promptTokens = usage.promptTokens ?? 0; + const completionTokens = usage.completionTokens ?? 0; + const totalTokens = usage.totalTokens ?? promptTokens + completionTokens; + return formatTokenBreakdown({ + promptTokens, + completionTokens, + totalTokens, + }); +} + +export function formatTokenBreakdown(params: { + promptTokens: number; + completionTokens: number; + totalTokens?: number; +}): string { + const promptTokens = params.promptTokens ?? 0; + const completionTokens = params.completionTokens ?? 0; + const totalTokens = params.totalTokens ?? promptTokens + completionTokens; + return `Tokens used: ${promptTokens.toLocaleString()} in / ${completionTokens.toLocaleString()} out (${totalTokens.toLocaleString()} total)`; +} From a4aaf9d07e124b477c9195eb6655c91c1c039d51 Mon Sep 17 00:00:00 2001 From: Ben Schiller Date: Mon, 16 Feb 2026 22:04:46 -0800 Subject: [PATCH 2/2] Address cubic feedback on passthrough, history hydration, and token fallback --- core/llm/index.ts | 31 ++++++++++++++----- core/llm/openaiTypeConverters.ts | 10 ++---- core/util/history.ts | 8 ++--- .../constants/MessageTypes.kt | 1 + 4 files changed, 31 insertions(+), 19 deletions(-) diff --git a/core/llm/index.ts b/core/llm/index.ts index 536b8796ee6..eaf1d689c40 100644 --- a/core/llm/index.ts +++ b/core/llm/index.ts @@ -356,14 +356,29 @@ export abstract class BaseLLM implements ILLM { usage: Usage | undefined, error?: any, ): InteractionStatus { - let promptTokens = - typeof usage?.promptTokens === "number" - ? usage.promptTokens - : this.countTokens(prompt); - let generatedTokens = - typeof usage?.completionTokens === "number" - ? usage.completionTokens - : this.countTokens(completion); + const promptHasContent = prompt.trim().length > 0; + const completionHasContent = completion.trim().length > 0; + const providerPromptTokens = usage?.promptTokens; + const providerCompletionTokens = usage?.completionTokens; + const hasProviderPromptTokens = typeof providerPromptTokens === "number"; + const hasProviderCompletionTokens = + typeof providerCompletionTokens === "number"; + + // Treat zero usage for non-empty text as missing provider usage and fall back + // to local token counting to avoid undercounting when providers omit fields. + const shouldUseProviderPromptTokens = + hasProviderPromptTokens && + (providerPromptTokens > 0 || !promptHasContent); + const shouldUseProviderCompletionTokens = + hasProviderCompletionTokens && + (providerCompletionTokens > 0 || !completionHasContent); + + let promptTokens = shouldUseProviderPromptTokens + ? providerPromptTokens + : this.countTokens(prompt); + let generatedTokens = shouldUseProviderCompletionTokens + ? providerCompletionTokens + : this.countTokens(completion); let thinkingTokens = thinking ? this.countTokens(thinking) : 0; TokensBatchingService.getInstance().addTokens( diff --git a/core/llm/openaiTypeConverters.ts b/core/llm/openaiTypeConverters.ts index 66f65d870db..ae1431de3c3 100644 --- a/core/llm/openaiTypeConverters.ts +++ b/core/llm/openaiTypeConverters.ts @@ -298,11 +298,7 @@ function parseProviderUsage(rawUsage: unknown): Usage | undefined { ); const totalTokens = firstNumber(usage.total_tokens); - if ( - promptTokens === undefined && - completionTokens === undefined && - totalTokens === undefined - ) { + if (promptTokens === undefined || completionTokens === undefined) { return undefined; } @@ -314,8 +310,8 @@ function parseProviderUsage(rawUsage: unknown): Usage | undefined { (usage.output_tokens_details as Record | undefined); return { - promptTokens: promptTokens ?? 0, - completionTokens: completionTokens ?? 0, + promptTokens, + completionTokens, totalTokens, promptTokensDetails: promptTokensDetailsRaw ? { diff --git a/core/util/history.ts b/core/util/history.ts index 551bee2a9ce..89f8b505f3b 100644 --- a/core/util/history.ts +++ b/core/util/history.ts @@ -38,16 +38,16 @@ export class HistoryManager { }) .reverse(); - // Hydrate token usage for old sessions that were saved before session metadata - // started carrying usage in sessions.json. - sessions = sessions.map((session) => this.hydrateUsageFromSession(session)); - // Apply limit and offset if (options.limit) { const offset = options.offset || 0; sessions = sessions.slice(offset, offset + options.limit); } + // Hydrate token usage for old sessions that were saved before session metadata + // started carrying usage in sessions.json. + sessions = sessions.map((session) => this.hydrateUsageFromSession(session)); + return sessions; } diff --git a/extensions/intellij/src/main/kotlin/com/github/continuedev/continueintellijextension/constants/MessageTypes.kt b/extensions/intellij/src/main/kotlin/com/github/continuedev/continueintellijextension/constants/MessageTypes.kt index b7224d8d5de..63a97905989 100644 --- a/extensions/intellij/src/main/kotlin/com/github/continuedev/continueintellijextension/constants/MessageTypes.kt +++ b/extensions/intellij/src/main/kotlin/com/github/continuedev/continueintellijextension/constants/MessageTypes.kt @@ -67,6 +67,7 @@ class MessageTypes { "sessionUpdate", "didCloseFiles", "toolCallPartialOutput", + "ideSettingsUpdate", ) // Note: If updating these values, make a corresponding update in