From ec9ceca4f168f6884a6a1785bd238789c2b284fa Mon Sep 17 00:00:00 2001 From: Aryan Date: Tue, 26 May 2026 18:44:34 -0700 Subject: [PATCH] fix(inference): rename J/total-token title to avoid input-metric heuristic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chart auto-detects "input metrics" by checking if the Y-axis title contains the word "input" — and if so, swaps the X-axis from interactivity to P99 TTFT (correct behavior for input-throughput / input-cost metrics). My J/total-token title was "Measured Joules per Token (input + output)" which tripped that heuristic incorrectly: J/total-token covers ALL tokens, so the X-axis should stay on interactivity, not switch to P99 TTFT. Rename to "Measured Joules per Token (incl. prompt)" — same meaning, no "input" trigger word. --- .../app/src/components/inference/inference-chart-config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/app/src/components/inference/inference-chart-config.json b/packages/app/src/components/inference/inference-chart-config.json index 9581fce2..ad5e7726 100644 --- a/packages/app/src/components/inference/inference-chart-config.json +++ b/packages/app/src/components/inference/inference-chart-config.json @@ -97,7 +97,7 @@ "y_measuredJPerOutputToken_roofline": "lower_right", "y_measuredJPerTotalToken": "measuredJPerTotalToken.y", "y_measuredJPerTotalToken_label": "Measured J per Token (J/tok)", - "y_measuredJPerTotalToken_title": "Measured Joules per Token (input + output)", + "y_measuredJPerTotalToken_title": "Measured Joules per Token (incl. prompt)", "y_measuredJPerTotalToken_roofline": "lower_right", "y_cost_limit": 5, "y_latency_limit": 60 @@ -199,7 +199,7 @@ "y_measuredJPerOutputToken_roofline": "lower_left", "y_measuredJPerTotalToken": "measuredJPerTotalToken.y", "y_measuredJPerTotalToken_label": "Measured J per Token (J/tok)", - "y_measuredJPerTotalToken_title": "Measured Joules per Token (input + output)", + "y_measuredJPerTotalToken_title": "Measured Joules per Token (incl. prompt)", "y_measuredJPerTotalToken_roofline": "lower_left", "y_cost_limit": 5, "y_latency_limit": 60