Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions core/config/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ declare global {
title: string;
dateCreated: string;
workspaceDirectory: string;
usage?: Usage;
}

export interface RangeInFile {
Expand Down Expand Up @@ -323,11 +324,23 @@ declare global {
role: "user";
content: MessageContent;
}

export interface Usage {
completionTokens: number;
promptTokens: number;
totalTokens?: number;
provider?: string;
model?: string;
source?: "provider" | "estimated";
ts?: string;
raw?: Record<string, unknown>;
}

export interface AssistantChatMessage {
role: "assistant";
content: MessageContent;
toolCalls?: ToolCallDelta[];
usage?: Usage;
}

export interface SystemChatMessage {
Expand Down Expand Up @@ -387,6 +400,7 @@ declare global {
completionOptions: CompletionOptions;
prompt: string;
completion: string;
usage?: Usage;
}

type MessageModes = "chat" | "edit";
Expand Down Expand Up @@ -651,6 +665,7 @@ declare global {
remoteConfigSyncPeriod: number;
userToken: string;
pauseCodebaseIndexOnStart: boolean;
showTokenUsage?: "never" | "history" | "session" | "turn";
}

export interface IDE {
Expand Down
9 changes: 9 additions & 0 deletions core/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ export interface BaseSessionMetadata {
dateCreated: string;
workspaceDirectory: string;
messageCount?: number;
usage?: Usage;
}

export interface RangeInFile {
Expand Down Expand Up @@ -406,6 +407,9 @@ export interface ThinkingChatMessage {
export interface Usage {
completionTokens: number;
promptTokens: number;
totalTokens?: number;
provider?: string;
model?: string;
promptTokensDetails?: {
cachedTokens?: number;
/** This an Anthropic-specific property */
Expand All @@ -418,6 +422,9 @@ export interface Usage {
rejectedPredictionTokens?: number;
audioTokens?: number;
};
source?: "provider" | "estimated";
ts?: string;
raw?: Record<string, unknown>;
}

export interface AssistantChatMessage {
Expand Down Expand Up @@ -489,6 +496,7 @@ export interface PromptLog {
modelProvider: string;
prompt: string;
completion: string;
usage?: Usage;
}

export type MessageModes = "chat" | "agent" | "plan" | "background";
Expand Down Expand Up @@ -815,6 +823,7 @@ export interface IdeSettings {
userToken: string;
continueTestEnvironment: "none" | "production" | "staging" | "local";
pauseCodebaseIndexOnStart: boolean;
showTokenUsage?: "never" | "history" | "session" | "turn";
}

export interface FileStats {
Expand Down
26 changes: 24 additions & 2 deletions core/llm/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,29 @@ export abstract class BaseLLM implements ILLM {
usage: Usage | undefined,
error?: any,
): InteractionStatus {
let promptTokens = this.countTokens(prompt);
let generatedTokens = this.countTokens(completion);
const promptHasContent = prompt.trim().length > 0;
const completionHasContent = completion.trim().length > 0;
const providerPromptTokens = usage?.promptTokens;
const providerCompletionTokens = usage?.completionTokens;
const hasProviderPromptTokens = typeof providerPromptTokens === "number";
const hasProviderCompletionTokens =
typeof providerCompletionTokens === "number";

// Treat zero usage for non-empty text as missing provider usage and fall back
// to local token counting to avoid undercounting when providers omit fields.
const shouldUseProviderPromptTokens =
hasProviderPromptTokens &&
(providerPromptTokens > 0 || !promptHasContent);
const shouldUseProviderCompletionTokens =
hasProviderCompletionTokens &&
(providerCompletionTokens > 0 || !completionHasContent);

let promptTokens = shouldUseProviderPromptTokens
? providerPromptTokens
: this.countTokens(prompt);
let generatedTokens = shouldUseProviderCompletionTokens
? providerCompletionTokens
: this.countTokens(completion);
let thinkingTokens = thinking ? this.countTokens(thinking) : 0;

TokensBatchingService.getInstance().addTokens(
Expand Down Expand Up @@ -1353,6 +1374,7 @@ export abstract class BaseLLM implements ILLM {
modelProvider: this.underlyingProviderName,
prompt,
completion: completion.join(""),
usage,
};
}

Expand Down
97 changes: 69 additions & 28 deletions core/llm/llms/Anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,55 @@ class Anthropic extends BaseLLM {
return finalOptions;
}

private parseAnthropicUsage(
rawUsage:
| {
input_tokens?: number | null;
output_tokens?: number | null;
cache_read_input_tokens?: number | null;
cache_creation_input_tokens?: number | null;
}
| undefined,
): Usage | undefined {
if (!rawUsage) {
return undefined;
}

const promptTokens = rawUsage.input_tokens;
const completionTokens = rawUsage.output_tokens;
const cachedTokens = rawUsage.cache_read_input_tokens;
const cacheWriteTokens = rawUsage.cache_creation_input_tokens;

if (
typeof promptTokens !== "number" &&
typeof completionTokens !== "number" &&
typeof cachedTokens !== "number" &&
typeof cacheWriteTokens !== "number"
) {
return undefined;
}

const resolvedPromptTokens = promptTokens ?? 0;
const resolvedCompletionTokens = completionTokens ?? 0;

return {
promptTokens: resolvedPromptTokens,
completionTokens: resolvedCompletionTokens,
totalTokens: resolvedPromptTokens + resolvedCompletionTokens,
promptTokensDetails:
typeof cachedTokens === "number" || typeof cacheWriteTokens === "number"
? {
cachedTokens:
typeof cachedTokens === "number" ? cachedTokens : undefined,
cacheWriteTokens:
typeof cacheWriteTokens === "number"
? cacheWriteTokens
: undefined,
}
: undefined,
};
}

private convertMessageContentToBlocks(
content: MessageContent,
): ContentBlockParam[] {
Expand Down Expand Up @@ -282,31 +331,18 @@ class Anthropic extends BaseLLM {

if (stream === false) {
const json = await response.json();
const cost = json.usage
? {
inputTokens: json.usage.input_tokens,
outputTokens: json.usage.output_tokens,
totalTokens: json.usage.input_tokens + json.usage.output_tokens,
}
: {};
const usage = this.parseAnthropicUsage(json.usage);
yield {
role: "assistant",
content: json.content[0].text,
...(Object.keys(cost).length > 0 ? { cost } : {}),
...(usage ? { usage } : {}),
};
return;
}

let lastToolUseId: string | undefined;
let lastToolUseName: string | undefined;
let usage: Usage = {
promptTokens: 0,
completionTokens: 0,
promptTokensDetails: {
cachedTokens: 0,
cacheWriteTokens: 0,
},
};
let usage: Usage | undefined;

for await (const event of streamSse(response)) {
// https://docs.anthropic.com/en/api/messages-streaming#event-types
Expand All @@ -315,17 +351,20 @@ class Anthropic extends BaseLLM {
case "message_start":
// Capture initial usage information
const startEvent = rawEvent as RawMessageStartEvent;
usage.promptTokens = startEvent.message.usage.input_tokens;
usage.promptTokensDetails!.cachedTokens =
startEvent.message.usage.cache_read_input_tokens ?? undefined;
usage.promptTokensDetails!.cacheWriteTokens =
startEvent.message.usage.cache_creation_input_tokens ?? undefined;
usage = this.parseAnthropicUsage(startEvent.message.usage) ?? usage;
break;
case "message_delta":
// Update usage information during streaming
const deltaEvent = rawEvent as RawMessageDeltaEvent;
if (deltaEvent.usage) {
usage.completionTokens = deltaEvent.usage.output_tokens;
if (deltaEvent.usage?.output_tokens !== undefined) {
const promptTokens = usage?.promptTokens ?? 0;
const completionTokens = deltaEvent.usage.output_tokens;
usage = {
promptTokens,
completionTokens,
totalTokens: promptTokens + completionTokens,
promptTokensDetails: usage?.promptTokensDetails,
};
}
break;
case "content_block_start":
Expand Down Expand Up @@ -393,11 +432,13 @@ class Anthropic extends BaseLLM {
}
}

yield {
role: "assistant",
content: "",
usage,
};
if (usage) {
yield {
role: "assistant",
content: "",
usage,
};
}
}

protected async *_streamChat(
Expand Down
65 changes: 63 additions & 2 deletions core/llm/llms/Bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ import {
import { fromNodeProviderChain } from "@aws-sdk/credential-providers";

import type { CompletionOptions } from "../../index.js";
import { ChatMessage, Chunk, LLMOptions, MessageContent } from "../../index.js";
import {
ChatMessage,
Chunk,
LLMOptions,
MessageContent,
Usage,
} from "../../index.js";
import { safeParseToolCallArgs } from "../../tools/parseArgs.js";
import { renderChatMessage, stripImages } from "../../util/messageContent.js";
import { parseDataUrl } from "../../util/url.js";
Expand Down Expand Up @@ -71,6 +77,52 @@ class Bedrock extends BaseLLM {
};
}

private parseBedrockUsage(rawUsage: any): Usage | undefined {
if (!rawUsage) {
return undefined;
}

const promptTokens = rawUsage.inputTokens ?? rawUsage.promptTokens;
const completionTokens = rawUsage.outputTokens ?? rawUsage.completionTokens;
const totalTokens = rawUsage.totalTokens;
const cachedTokens =
rawUsage.cacheReadInputTokens ?? rawUsage.cache_read_input_tokens;
const cacheWriteTokens =
rawUsage.cacheWriteInputTokens ?? rawUsage.cache_write_input_tokens;

if (
typeof promptTokens !== "number" &&
typeof completionTokens !== "number" &&
typeof totalTokens !== "number" &&
typeof cachedTokens !== "number" &&
typeof cacheWriteTokens !== "number"
) {
return undefined;
}

const resolvedPromptTokens = promptTokens ?? 0;
const resolvedCompletionTokens = completionTokens ?? 0;
const resolvedTotalTokens =
totalTokens ?? resolvedPromptTokens + resolvedCompletionTokens;

return {
promptTokens: resolvedPromptTokens,
completionTokens: resolvedCompletionTokens,
totalTokens: resolvedTotalTokens,
promptTokensDetails:
typeof cachedTokens === "number" || typeof cacheWriteTokens === "number"
? {
cachedTokens:
typeof cachedTokens === "number" ? cachedTokens : undefined,
cacheWriteTokens:
typeof cacheWriteTokens === "number"
? cacheWriteTokens
: undefined,
}
: undefined,
};
}

protected async *_streamComplete(
prompt: string,
signal: AbortSignal,
Expand Down Expand Up @@ -139,11 +191,12 @@ class Bedrock extends BaseLLM {
cacheReadInputTokens: 0,
cacheWriteInputTokens: 0,
};
let usage: Usage | undefined;

try {
for await (const chunk of response.stream) {
if (chunk.metadata?.usage) {
console.log(`${JSON.stringify(chunk.metadata.usage)}`);
usage = this.parseBedrockUsage(chunk.metadata.usage) ?? usage;
}

const contentBlockDelta: ContentBlockDelta | undefined =
Expand Down Expand Up @@ -241,6 +294,14 @@ class Bedrock extends BaseLLM {
// Clean up state and let the original error bubble up to the retry decorator
throw error;
}

if (usage) {
yield {
role: "assistant",
content: "",
usage,
};
}
}

/**
Expand Down
Loading
Loading