From 6b4db525bd9def3dd62885937470bc53fbae337d Mon Sep 17 00:00:00 2001 From: "posthog[bot]" <206114724+posthog[bot]@users.noreply.github.com> Date: Mon, 15 Jun 2026 11:05:59 +0000 Subject: [PATCH] fix(agent): surface a fallback message for empty turns + wire TASK_RUN_COMPLETED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users reported empty agent responses — the model "thinks" then the turn finishes with nothing shown, clustering in plan mode. The root cause is a turn that completes (`end_turn`) without delivering any user-visible prose: most often a plan-mode turn that only emits encrypted/`redacted_thinking` (which renders nothing) and then calls `ExitPlanMode` with no text block. - Track whether any `agent_message_chunk` reached the client during a turn (via a thin client wrapper) and, at both `end_turn` completion points (the `result` message and the `session_state_changed: idle` path), surface a fallback message when none did. Skipped for `max_tokens`/`max_turn_requests`, refusals, cancellations, and SDK structured-output turns. - Wire up the dormant `TASK_RUN_COMPLETED` analytics event with `stop_reason` and an `empty_output` flag so the failure mode is observable for triage. Generated-By: PostHog Code Task-Id: 6388fb34-ec04-4d1d-8031-9c260ede0d0a --- .../claude/claude-agent.streamed-text.test.ts | 85 +++++++++++++ .../agent/src/adapters/claude/claude-agent.ts | 112 ++++++++++++++++-- packages/core/src/sessions/sessionService.ts | 67 ++++++++--- packages/shared/src/analytics-events.ts | 18 ++- 4 files changed, 256 insertions(+), 26 deletions(-) diff --git a/packages/agent/src/adapters/claude/claude-agent.streamed-text.test.ts b/packages/agent/src/adapters/claude/claude-agent.streamed-text.test.ts index da450eaca..8efee5ed4 100644 --- a/packages/agent/src/adapters/claude/claude-agent.streamed-text.test.ts +++ b/packages/agent/src/adapters/claude/claude-agent.streamed-text.test.ts @@ -150,6 +150,37 @@ function resultSuccess(sessionId: string) { }; } +function thinkingDelta(sessionId: string, thinking: string) { + return { + type: "stream_event", + parent_tool_use_id: null, + session_id: sessionId, + uuid: `think-${thinking}`, + event: { + type: "content_block_delta", + index: 0, + delta: { type: "thinking_delta", thinking }, + }, + }; +} + +function exitPlanModeAssistant(sessionId: string, apiId: string) { + return { + type: "assistant", + parent_tool_use_id: null, + session_id: sessionId, + uuid: `assistant-${apiId}`, + message: { + id: apiId, + role: "assistant", + content: [ + { type: "redacted_thinking", data: "encrypted" }, + { type: "tool_use", id: "tool-1", name: "ExitPlanMode", input: {} }, + ], + }, + }; +} + function messageChunkTexts( calls: ClientMocks["sessionUpdate"]["mock"]["calls"], ): string[] { @@ -216,4 +247,58 @@ describe("ClaudeAcpAgent.prompt — streamed assistant text wiring", () => { "gateway answer", ]); }); + + it("surfaces a fallback message when a turn ends with no prose", async () => { + const { agent, client } = makeAgent(); + const sessionId = "s-empty"; + const { query, input } = installFakeSession(agent, sessionId); + + const promptPromise = agent.prompt({ + sessionId, + prompt: [{ type: "text", text: "make a plan" }], + }); + await tick(); + + // Plan-mode turn: encrypted thinking + ExitPlanMode, no text block. + await echoUserMessage(query, input); + await send(query, messageStart(sessionId, "msg_plan")); + await send(query, exitPlanModeAssistant(sessionId, "msg_plan")); + await send(query, resultSuccess(sessionId)); + + const result = await promptPromise; + expect(result.stopReason).toBe("end_turn"); + expect((result._meta as { emptyOutput?: boolean }).emptyOutput).toBe(true); + const chunks = messageChunkTexts(client.sessionUpdate.mock.calls); + expect(chunks).toHaveLength(1); + expect(chunks[0]).toContain("without a written response"); + }); + + it("does not surface a fallback when thinking-only turn also emits text", async () => { + const { agent, client } = makeAgent(); + const sessionId = "s-thinking-text"; + const { query, input } = installFakeSession(agent, sessionId); + + const promptPromise = agent.prompt({ + sessionId, + prompt: [{ type: "text", text: "hi" }], + }); + await tick(); + + await echoUserMessage(query, input); + await send(query, messageStart(sessionId, "msg_t")); + await send(query, thinkingDelta(sessionId, "pondering")); + await send(query, textDelta(sessionId, "here is my answer")); + await send( + query, + assistantMessage(sessionId, "msg_t", "here is my answer"), + ); + await send(query, resultSuccess(sessionId)); + + const result = await promptPromise; + expect(result.stopReason).toBe("end_turn"); + expect((result._meta as { emptyOutput?: boolean }).emptyOutput).toBe(false); + expect(messageChunkTexts(client.sessionUpdate.mock.calls)).toEqual([ + "here is my answer", + ]); + }); }); diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts index 8df465ae2..9b2120ef3 100644 --- a/packages/agent/src/adapters/claude/claude-agent.ts +++ b/packages/agent/src/adapters/claude/claude-agent.ts @@ -143,6 +143,13 @@ const DEFAULT_FORCE_CANCEL_GRACE_MS = 30_000; const MAX_TITLE_LENGTH = 256; const LOCAL_ONLY_COMMANDS = new Set(["/context", "/heapdump", "/extra-usage"]); +// Shown when a turn completes (`end_turn`) without delivering any user-visible +// prose. Without it the UI renders a silent blank turn — most commonly a +// plan-mode turn where the model only produced (encrypted) thinking before +// calling `ExitPlanMode`, which carries no `agent_message_chunk` of its own. +const EMPTY_TURN_FALLBACK_TEXT = + "_The agent finished this turn without a written response. This can happen in plan mode when it reasons through a plan but doesn't post a message. You can ask it to continue or send your request again._"; + // Best-effort: silent on ENOENT, logs other errors so permission failures // aren't masked. function readClaudeMdQuietly(cwd: string, logger: Logger): string | undefined { @@ -501,10 +508,37 @@ export class ClaudeAcpAgent extends BaseAcpAgent { | undefined )?.terminal_output === true; + // Track whether any user-visible prose (`agent_message_chunk`) reached the + // client during this turn. A turn that ends in `end_turn` having emitted + // none is a silent blank turn (e.g. plan-mode thinking + `ExitPlanMode`); + // the `end_turn` completion points below surface a fallback message in that + // case. Wrapping the client keeps the signal accurate across every emit + // path — streamed text, consolidated messages, and the direct emits below. + let emittedAgentMessageChunk = false; + const trackingClient = new Proxy(this.client, { + get: (target, prop, receiver) => { + if (prop === "sessionUpdate") { + return ( + notification: Parameters[0], + ) => { + if ( + (notification as { update?: { sessionUpdate?: string } }).update + ?.sessionUpdate === "agent_message_chunk" + ) { + emittedAgentMessageChunk = true; + } + return target.sessionUpdate(notification); + }; + } + const value = Reflect.get(target, prop, receiver); + return typeof value === "function" ? value.bind(target) : value; + }, + }); + const context = { session: this.session, sessionId: params.sessionId, - client: this.client, + client: trackingClient, toolUseCache: this.toolUseCache, toolUseStreamCache: this.toolUseStreamCache, fileContentCache: this.fileContentCache, @@ -615,7 +649,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent { compactionInProgress ) { compactionInProgress = false; - await this.client.sessionUpdate({ + await trackingClient.sessionUpdate({ sessionId: params.sessionId, update: { sessionUpdate: "agent_message_chunk", @@ -634,7 +668,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent { const reason = message.compact_error ? `: ${message.compact_error}` : "."; - await this.client.sessionUpdate({ + await trackingClient.sessionUpdate({ sessionId: params.sessionId, update: { sessionUpdate: "agent_message_chunk", @@ -672,7 +706,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent { "Slash command produced no output; treating as unsupported", { sessionId: params.sessionId, command: cmd }, ); - await this.client.sessionUpdate({ + await trackingClient.sessionUpdate({ sessionId: params.sessionId, update: { sessionUpdate: "agent_message_chunk", @@ -708,8 +742,20 @@ export class ClaudeAcpAgent extends BaseAcpAgent { }, }); + const idleStopReason = this.session.cancelled + ? "cancelled" + : "end_turn"; + let idleEmptyOutput = false; + if (idleStopReason === "end_turn") { + idleEmptyOutput = await this.surfaceEmptyTurnFallback( + params.sessionId, + emittedAgentMessageChunk, + ); + } + return { - stopReason: this.session.cancelled ? "cancelled" : "end_turn", + stopReason: idleStopReason, + _meta: { emptyOutput: idleEmptyOutput }, }; } await handleSystemMessage(message, context); @@ -814,7 +860,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent { (message as { stop_reason?: string }).stop_reason === "refusal" ) { if (lastRefusalExplanation) { - await this.client.sessionUpdate({ + await trackingClient.sessionUpdate({ sessionId: params.sessionId, update: { sessionUpdate: "agent_message_chunk", @@ -845,7 +891,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent { message.subtype === "success" && message.result ) { - await this.client.sessionUpdate({ + await trackingClient.sessionUpdate({ sessionId: params.sessionId, update: { sessionUpdate: "agent_message_chunk", @@ -854,7 +900,28 @@ export class ClaudeAcpAgent extends BaseAcpAgent { }); } - return { stopReason: result.stopReason ?? "end_turn", usage }; + const effectiveStopReason = result.stopReason ?? "end_turn"; + // Guarantee the UI never renders a silent blank turn. A successful + // `end_turn` that delivered no prose (e.g. plan-mode thinking + + // `ExitPlanMode`) gets a fallback message. Skipped when the SDK's + // native structured output carried the response instead of a chunk. + let emptyOutput = false; + if ( + effectiveStopReason === "end_turn" && + (message as { structured_output?: unknown }).structured_output == + null + ) { + emptyOutput = await this.surfaceEmptyTurnFallback( + params.sessionId, + emittedAgentMessageChunk, + ); + } + + return { + stopReason: effectiveStopReason, + usage, + _meta: { emptyOutput }, + }; } case "stream_event": { @@ -1133,6 +1200,35 @@ export class ClaudeAcpAgent extends BaseAcpAgent { } } + /** + * Surfaces a fallback message when a turn finished without delivering any + * user-visible prose. Returns `true` when the fallback was shown (the turn + * produced empty output), `false` when real content already reached the + * client. Guarantees the UI never renders a silent blank turn. + */ + private async surfaceEmptyTurnFallback( + sessionId: string, + emittedAgentMessageChunk: boolean, + ): Promise { + if (emittedAgentMessageChunk) { + return false; + } + this.logger.warn( + "Turn ended with no agent_message_chunk; surfacing fallback", + { + sessionId, + }, + ); + await this.client.sessionUpdate({ + sessionId, + update: { + sessionUpdate: "agent_message_chunk", + content: { type: "text", text: EMPTY_TURN_FALLBACK_TEXT }, + }, + }); + return true; + } + // Called by BaseAcpAgent#cancel() to interrupt the session protected async interrupt(): Promise { this.session.cancelled = true; diff --git a/packages/core/src/sessions/sessionService.ts b/packages/core/src/sessions/sessionService.ts index 0eb24e96d..e031e1e40 100644 --- a/packages/core/src/sessions/sessionService.ts +++ b/packages/core/src/sessions/sessionService.ts @@ -29,7 +29,10 @@ import { type StoredLogEntry, type TaskRunStatus, } from "@posthog/shared"; -import { ANALYTICS_EVENTS } from "@posthog/shared/analytics-events"; +import { + ANALYTICS_EVENTS, + type StopReason, +} from "@posthog/shared/analytics-events"; import { type CloudTaskPermissionRequestUpdate, type CloudTaskUpdatePayload, @@ -93,6 +96,33 @@ const GITHUB_AUTHORIZATION_REQUIRED_CODE = "github_authorization_required"; const AUTO_RETRY_MAX_ATTEMPTS = 2; const AUTO_RETRY_DELAY_MS = 10_000; +const KNOWN_STOP_REASONS = new Set([ + "end_turn", + "max_tokens", + "max_turn_requests", + "refusal", + "cancelled", + "error", + "timeout", +]); + +// Maps the agent's raw turn stop reason onto the analytics `StopReason` union, +// falling back to "other" so a new/unknown reason is still counted. +function normalizeStopReason(raw: string | undefined): StopReason { + return raw && KNOWN_STOP_REASONS.has(raw as StopReason) + ? (raw as StopReason) + : "other"; +} + +// Counts the prompts a session has sent — used as `prompts_sent` on run-level +// analytics. A `session/prompt` request is the canonical "a turn started" +// marker in the event log. +function countPromptsSent(session: AgentSession): number { + return session.events.filter( + (e) => "method" in e.message && e.message.method === "session/prompt", + ).length; +} + class GitHubAuthorizationRequiredForCloudHandoffError extends Error { constructor( message = "Connect GitHub before continuing this task in cloud.", @@ -1431,6 +1461,21 @@ export class SessionService { const stopReason = (msg.result as { stopReason?: string }).stopReason; const hasQueuedMessages = this.drainQueuedMessages(taskRunId, session); + // Record the completed turn so empty/odd outcomes are observable. The + // `_meta.emptyOutput` flag is set by the agent when a turn finished + // without delivering any user-visible prose (see `surfaceEmptyTurnFallback`). + const emptyOutput = + (msg.result as { _meta?: { emptyOutput?: unknown } })._meta + ?.emptyOutput === true; + this.d.track(ANALYTICS_EVENTS.TASK_RUN_COMPLETED, { + task_id: session.taskId, + execution_type: "local", + duration_seconds: Math.round((Date.now() - session.startedAt) / 1000), + prompts_sent: countPromptsSent(session), + stop_reason: normalizeStopReason(stopReason), + empty_output: emptyOutput, + }); + // Only notify when queue is empty - queued messages will start a new turn if (stopReason && !hasQueuedMessages) { this.d.notifyPromptComplete( @@ -1920,17 +1965,11 @@ export class SessionService { sessionId: session.taskRunId, }); - const durationSeconds = Math.round( - (Date.now() - session.startedAt) / 1000, - ); - const promptCount = session.events.filter( - (e) => "method" in e.message && e.message.method === "session/prompt", - ).length; this.d.track(ANALYTICS_EVENTS.TASK_RUN_CANCELLED, { task_id: taskId, execution_type: "local", - duration_seconds: durationSeconds, - prompts_sent: promptCount, + duration_seconds: Math.round((Date.now() - session.startedAt) / 1000), + prompts_sent: countPromptsSent(session), }); return result; @@ -2360,17 +2399,11 @@ export class SessionService { method: "cancel", }); - const durationSeconds = Math.round( - (Date.now() - session.startedAt) / 1000, - ); - const promptCount = session.events.filter( - (e) => "method" in e.message && e.message.method === "session/prompt", - ).length; this.d.track(ANALYTICS_EVENTS.TASK_RUN_CANCELLED, { task_id: session.taskId, execution_type: "cloud", - duration_seconds: durationSeconds, - prompts_sent: promptCount, + duration_seconds: Math.round((Date.now() - session.startedAt) / 1000), + prompts_sent: countPromptsSent(session), }); if (!result.success) { diff --git a/packages/shared/src/analytics-events.ts b/packages/shared/src/analytics-events.ts index b3ff31e1f..e8cc411b5 100644 --- a/packages/shared/src/analytics-events.ts +++ b/packages/shared/src/analytics-events.ts @@ -30,7 +30,18 @@ type GitActionType = export type FeedbackType = "good" | "bad" | "general"; type FileOpenSource = "sidebar" | "agent-suggestion" | "search" | "diff"; export type FileChangeType = "added" | "modified" | "deleted"; -type StopReason = "user_cancelled" | "completed" | "error" | "timeout"; +// Mirrors the agent/ACP turn stop reasons so `TASK_RUN_COMPLETED` carries the +// raw reason a turn ended. `other` is the catch-all for any future/unmapped +// value so dashboards never silently drop a completion. +export type StopReason = + | "end_turn" + | "max_tokens" + | "max_turn_requests" + | "refusal" + | "cancelled" + | "error" + | "timeout" + | "other"; export type SkillButtonId = | "add-analytics" | "create-feature-flags" @@ -110,6 +121,11 @@ export interface TaskRunCompletedProperties { duration_seconds: number; prompts_sent: number; stop_reason: StopReason; + // True when the turn finished without delivering any user-visible prose + // (`agent_message_chunk`). Surfaces the "empty agent response" failure mode + // — most often a plan-mode turn that only produced (encrypted) thinking + // before calling `ExitPlanMode` — for triage. + empty_output: boolean; } export interface TaskRunCancelledProperties {