diff --git a/src/core/dispatch.ts b/src/core/dispatch.ts index a04e928fa..ea29c2ed7 100644 --- a/src/core/dispatch.ts +++ b/src/core/dispatch.ts @@ -100,7 +100,12 @@ async function dispatchKnownCommand( return await handleOpenCommand(device, interactor, positionals, context); case 'close': { const app = positionals[0]; - if (!app) return { closed: 'session', ...successText('Closed session') }; + if (!app) { + if (device.platform === 'web') { + await interactor.close(''); + } + return { closed: 'session', ...successText('Closed session') }; + } await interactor.close(app); return { app, ...successText(`Closed: ${app}`) }; } diff --git a/src/daemon/handlers/__tests__/session-close-shutdown.test.ts b/src/daemon/handlers/__tests__/session-close-shutdown.test.ts index dd259bdf3..5611ab571 100644 --- a/src/daemon/handlers/__tests__/session-close-shutdown.test.ts +++ b/src/daemon/handlers/__tests__/session-close-shutdown.test.ts @@ -47,11 +47,14 @@ import { handleSessionCommands } from '../session.ts'; import { teardownSessionResources } from '../session-close.ts'; import { shutdownSimulator } from '../../../platforms/ios/simulator.ts'; import { runCmd } from '../../../utils/exec.ts'; +import { dispatchCommand } from '../../../core/dispatch.ts'; import { cleanupAppleXctracePerfCapture } from '../../../platforms/ios/perf-xctrace.ts'; import { cleanupAndroidNativePerfSession } from '../../../platforms/android/perf.ts'; +import { WEB_DESKTOP_DEVICE } from '../../../__tests__/test-utils/index.ts'; const mockShutdownSimulator = vi.mocked(shutdownSimulator); const mockRunCmd = vi.mocked(runCmd); +const mockDispatchCommand = vi.mocked(dispatchCommand); const mockCleanupAppleXctracePerfCapture = vi.mocked(cleanupAppleXctracePerfCapture); const mockCleanupAndroidNativePerfSession = vi.mocked(cleanupAndroidNativePerfSession); @@ -345,6 +348,36 @@ test('close stops active Android native perf capture before deleting session', a expect(sessionStore.get(sessionName)).toBeUndefined(); }); +test('close dispatches web session cleanup without a positional target', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'web-close-session'; + sessionStore.set(sessionName, makeSession(sessionName, WEB_DESKTOP_DEVICE)); + + const response = await handleSessionCommands({ + req: { + token: 't', + session: sessionName, + command: 'close', + positionals: [], + flags: {}, + }, + sessionName, + logPath: path.join(os.tmpdir(), 'daemon.log'), + sessionStore, + invoke: noopInvoke, + }); + + expect(response?.ok).toBe(true); + expect(mockDispatchCommand).toHaveBeenCalledWith( + WEB_DESKTOP_DEVICE, + 'close', + [], + undefined, + expect.objectContaining({ logPath: expect.stringContaining('daemon.log') }), + ); + expect(sessionStore.get(sessionName)).toBeUndefined(); +}); + test('daemon session teardown stops active Android native perf capture', async () => { const sessionName = 'android-active-native-perf-teardown-session'; const activeCapture = { diff --git a/src/daemon/handlers/session-close.ts b/src/daemon/handlers/session-close.ts index 871f26854..175bb855d 100644 --- a/src/daemon/handlers/session-close.ts +++ b/src/daemon/handlers/session-close.ts @@ -111,11 +111,11 @@ export async function handleCloseCommand(params: { } await stopSessionApplePerfCapture(session); await stopSessionAndroidNativePerfCapture(session); - if (req.positionals && req.positionals.length > 0) { + if (shouldDispatchPlatformClose(req, session)) { if (shouldStopAppleRunnerBeforeTargetedClose(session)) { await stopAppleRunnerForClose(session); } - await dispatchCommand(session.device, 'close', req.positionals, req.flags?.out, { + await dispatchCommand(session.device, 'close', req.positionals ?? [], req.flags?.out, { ...contextFromFlags(logPath, req.flags, session.appBundleId, session.trace?.outPath), }); await settleIosSimulator(session.device, IOS_SIMULATOR_POST_CLOSE_SETTLE_MS); @@ -172,6 +172,14 @@ export async function handleCloseCommand(params: { return { ok: true, data: { session: session.name, ...successText(`Closed: ${session.name}`) } }; } +function shouldDispatchPlatformClose(req: DaemonRequest, session: SessionState): boolean { + return hasCloseTarget(req) || session.device.platform === 'web'; +} + +function hasCloseTarget(req: DaemonRequest): boolean { + return (req.positionals?.length ?? 0) > 0; +} + async function closeWithoutSession(req: DaemonRequest, logPath: string): Promise { if (!req.positionals || req.positionals.length === 0) { return errorResponse('SESSION_NOT_FOUND', 'No active session'); diff --git a/src/daemon/request-router.ts b/src/daemon/request-router.ts index 08231ab42..b7374e7de 100644 --- a/src/daemon/request-router.ts +++ b/src/daemon/request-router.ts @@ -33,6 +33,7 @@ import { type RequestExecutionScope, } from './request-execution-scope.ts'; import { canRunReplayScopedAction } from './daemon-command-registry.ts'; +import { createAgentBrowserWebProvider } from '../platforms/web/agent-browser-provider.ts'; // --------------------------------------------------------------------------- // Request handler API @@ -135,7 +136,9 @@ export function createRequestHandler(deps: RequestRouterDeps): DaemonInvokeFn { appleRunnerProvider, appleToolProvider, linuxToolProvider, - webProvider, + webProvider: + webProvider ?? + (shouldUseDefaultWebProvider(lockedScope) ? createDefaultWebProvider : undefined), appLogProvider, recordingProvider, }, @@ -199,6 +202,13 @@ export function createRequestHandler(deps: RequestRouterDeps): DaemonInvokeFn { return handleRequest; } +const createDefaultWebProvider: WebProviderResolver = ({ req, session }) => + createAgentBrowserWebProvider({ session: session?.name ?? req.session }); + +function shouldUseDefaultWebProvider(scope: LockedRequestScope): boolean { + return scope.existingSession?.device.platform === 'web' || scope.req.flags?.platform === 'web'; +} + function unauthorizedResponse(): DaemonResponse { return { ok: false, diff --git a/src/platforms/web/agent-browser-provider.test.ts b/src/platforms/web/agent-browser-provider.test.ts new file mode 100644 index 000000000..57d292449 --- /dev/null +++ b/src/platforms/web/agent-browser-provider.test.ts @@ -0,0 +1,234 @@ +import assert from 'node:assert/strict'; +import { test } from 'vitest'; +import { createAgentBrowserWebProvider } from './agent-browser-provider.ts'; +import type { WebSnapshotResult } from './provider.ts'; +import { withCommandExecutorOverride, type ExecResult } from '../../utils/exec.ts'; +import { AppError } from '../../utils/errors.ts'; +import { + buildSelectorChainForNode, + parseSelectorChain, + resolveSelectorChain, +} from '../../daemon/selectors.ts'; +import { attachRefs } from '../../utils/snapshot.ts'; + +type AgentBrowserCall = { + cmd: string; + args: string[]; +}; + +test('agent-browser provider maps supported operations to session-scoped JSON commands', async () => { + const calls: AgentBrowserCall[] = []; + const provider = createAgentBrowserWebProvider({ session: 'web-session' }); + + await withCommandExecutorOverride(recordingExecutor(calls), async () => { + await provider.open('https://example.test'); + await provider.screenshot('/tmp/page.png', { fullscreen: true }); + await provider.click(10.4, 20.6); + await provider.fill(11, 22, 'Ada'); + await provider.typeText('hello'); + await provider.scroll('down', { pixels: 400 }); + await provider.close(); + }); + + assert.deepEqual( + calls.map((call) => call.args), + [ + ['open', 'https://example.test', '--json', '--session', 'web-session'], + ['screenshot', '--full', '/tmp/page.png', '--json', '--session', 'web-session'], + ['mouse', 'move', '10', '21', '--json', '--session', 'web-session'], + ['mouse', 'down', '--json', '--session', 'web-session'], + ['mouse', 'up', '--json', '--session', 'web-session'], + ['mouse', 'move', '11', '22', '--json', '--session', 'web-session'], + ['mouse', 'down', '--json', '--session', 'web-session'], + ['mouse', 'up', '--json', '--session', 'web-session'], + ['press', expectedSelectAllShortcut(), '--json', '--session', 'web-session'], + ['keyboard', 'type', 'Ada', '--json', '--session', 'web-session'], + ['keyboard', 'type', 'hello', '--json', '--session', 'web-session'], + ['scroll', 'down', '400', '--json', '--session', 'web-session'], + ['close', '--json', '--session', 'web-session'], + ], + ); +}); + +test('agent-browser provider normalizes snapshot refs, labels, values, parents, and rects', async () => { + const calls: AgentBrowserCall[] = []; + const provider = createAgentBrowserWebProvider({ session: 'web-session' }); + + const snapshot = await withCommandExecutorOverride( + snapshotExecutor(calls), + async () => await provider.snapshot({ interactiveOnly: true, depth: 4, scope: '#main' }), + ); + + assert.deepEqual(calls[0]?.args, [ + 'snapshot', + '--interactive', + '--compact', + '--depth', + '4', + '--selector', + '#main', + '--json', + '--session', + 'web-session', + ]); + assertNormalizedSnapshot(snapshot); + assertRoleSelectorResolves(snapshot); +}); + +test('agent-browser provider surfaces stale ref failures during snapshot geometry lookup', async () => { + const provider = createAgentBrowserWebProvider({ session: 'web-session' }); + + await assert.rejects( + () => + withCommandExecutorOverride( + async (_cmd, args) => { + if (args[0] === 'snapshot') { + return jsonResult({ + success: true, + data: { + refs: { e1: { role: 'button', name: 'Save' } }, + snapshot: 'button "Save" [ref=e1]', + }, + }); + } + return jsonResult({ success: false, error: 'Stale ref @e1' }); + }, + async () => await provider.snapshot(), + ), + (error: unknown) => + error instanceof AppError && + error.code === 'COMMAND_FAILED' && + error.message === 'Stale ref @e1', + ); +}); + +test('agent-browser provider adds doctor guidance for missing binary and invalid JSON', async () => { + const provider = createAgentBrowserWebProvider(); + + await assert.rejects( + () => + withCommandExecutorOverride( + async () => { + throw new AppError('TOOL_MISSING', 'agent-browser not found'); + }, + async () => await provider.open('https://example.test'), + ), + (error: unknown) => + error instanceof AppError && + error.code === 'TOOL_MISSING' && + error.details?.hint === + 'Install agent-browser and run `agent-browser doctor --offline --quick` to verify the local browser setup.', + ); + + await assert.rejects( + () => + withCommandExecutorOverride( + async () => ({ stdout: 'not-json', stderr: '', exitCode: 0 }), + async () => await provider.open('https://example.test'), + ), + (error: unknown) => + error instanceof AppError && + error.code === 'COMMAND_FAILED' && + error.message === 'agent-browser returned invalid JSON' && + typeof error.details?.hint === 'string', + ); +}); + +function recordingExecutor(calls: AgentBrowserCall[]) { + return async (cmd: string, args: string[]): Promise => { + calls.push({ cmd, args }); + return jsonResult({ success: true, data: {} }); + }; +} + +function snapshotExecutor(calls: AgentBrowserCall[]) { + return async (cmd: string, args: string[], options: { allowFailure?: boolean }) => { + calls.push({ cmd, args }); + if (args[0] === 'snapshot') return snapshotPayload(); + if (args.slice(0, 3).join(' ') === 'get box @e3') { + assert.equal(options.allowFailure, true); + return jsonResult({ success: false, error: 'No box for element' }, 1); + } + if (args[0] === 'get' && args[1] === 'box') return boxPayload(args[2]); + return jsonResult({ success: true, data: {} }); + }; +} + +function assertNormalizedSnapshot(snapshot: WebSnapshotResult): void { + assert.deepEqual(snapshot.nodes, [ + expectedNode(0, 'heading', 'Welcome', undefined, 0, { x: 1, y: 2, width: 100, height: 20 }), + expectedNode(1, 'textbox', 'Name', 'Ada', 1, { x: 11, y: 12, width: 100, height: 20 }, 0), + expectedNode(2, 'button', 'Save', undefined, 1, undefined, 0), + expectedNode(3, 'link', 'Docs', undefined, 1, { x: 31, y: 32, width: 100, height: 20 }, 0), + ]); +} + +function assertRoleSelectorResolves(snapshot: WebSnapshotResult): void { + const nodesWithRefs = attachRefs(snapshot.nodes); + const selectorChain = buildSelectorChainForNode(nodesWithRefs[2]!, 'web'); + assert.deepEqual(selectorChain, ['role="button" label="Save"', 'label="Save"']); + const resolved = resolveSelectorChain(nodesWithRefs, parseSelectorChain(selectorChain[0]!), { + platform: 'web', + }); + assert.equal(resolved?.node.label, 'Save'); +} + +function expectedNode( + index: number, + type: string, + label: string, + value: string | undefined, + depth: number, + rect: { x: number; y: number; width: number; height: number } | undefined, + parentIndex?: number, +) { + return { + index, + type, + role: type, + label, + value, + depth, + enabled: undefined, + focused: undefined, + ...(parentIndex === undefined ? {} : { parentIndex }), + ...(rect ? { rect } : {}), + }; +} + +function snapshotPayload(): ExecResult { + return jsonResult({ + success: true, + data: { + refs: { + e1: { role: 'heading', name: 'Welcome' }, + e2: { role: 'textbox', name: 'Name' }, + e3: { role: 'button', name: 'Save' }, + e4: { role: 'link', name: 'Docs' }, + }, + snapshot: [ + '- heading "Welcome" [ref=e1]', + ' - textbox "Name" [ref=e2]: Ada', + ' - button "Save" [ref=e3]', + ' - link "Docs" [ref=e4]', + ].join('\n'), + truncated: false, + }, + }); +} + +function boxPayload(ref: string | undefined): ExecResult { + const offset = ref === '@e1' ? 0 : ref === '@e2' ? 10 : 30; + return jsonResult({ + success: true, + data: { x: offset + 1, y: offset + 2, width: 100, height: 20 }, + }); +} + +function jsonResult(value: unknown, exitCode = 0): ExecResult { + return { stdout: JSON.stringify(value), stderr: '', exitCode }; +} + +function expectedSelectAllShortcut(): string { + return process.platform === 'darwin' ? 'Meta+a' : 'Control+a'; +} diff --git a/src/platforms/web/agent-browser-provider.ts b/src/platforms/web/agent-browser-provider.ts new file mode 100644 index 000000000..6b9afdf8c --- /dev/null +++ b/src/platforms/web/agent-browser-provider.ts @@ -0,0 +1,280 @@ +import { runCmd } from '../../utils/exec.ts'; +import { AppError, asAppError } from '../../utils/errors.ts'; +import type { Rect } from '../../utils/snapshot.ts'; +import { normalizeAgentBrowserSnapshot } from './agent-browser-snapshot.ts'; +import { + isJsonObject, + readNumberProperty, + readStringProperty, + type JsonObject, +} from './json-utils.ts'; +import type { WebProvider, WebSnapshotOptions, WebSnapshotResult } from './provider.ts'; + +const AGENT_BROWSER = 'agent-browser'; +const AGENT_BROWSER_TIMEOUT_MS = 30_000; +const AGENT_BROWSER_DOCTOR_HINT = + 'Install agent-browser and run `agent-browser doctor --offline --quick` to verify the local browser setup.'; + +type AgentBrowserProviderOptions = { + session?: string; +}; + +export function createAgentBrowserWebProvider( + options: AgentBrowserProviderOptions = {}, +): WebProvider { + const session = options.session?.trim(); + const runJson = async (args: string[]): Promise => + await runAgentBrowserJson(args, session); + + return { + async open(target) { + await runJson(['open', target]); + }, + async close() { + await runJson(['close']); + }, + async snapshot(snapshotOptions) { + return await captureAgentBrowserSnapshot(runJson, snapshotOptions); + }, + async screenshot(outPath, screenshotOptions) { + await runJson(['screenshot', ...(screenshotOptions?.fullscreen ? ['--full'] : []), outPath]); + }, + async click(x, y) { + await clickCoordinates(runJson, x, y); + }, + async fill(x, y, text) { + // The shared web interactor is coordinate-first; bridge that to low-level + // browser input until a future ref-targeted web path can call native fill. + await clickCoordinates(runJson, x, y); + await runJson(['press', selectAllShortcut()]); + await runJson(['keyboard', 'type', text]); + }, + async typeText(text) { + await runJson(['keyboard', 'type', text]); + }, + async scroll(direction, scrollOptions) { + const distance = scrollOptions?.pixels ?? scrollOptions?.amount; + await runJson(['scroll', direction, ...(distance === undefined ? [] : [String(distance)])]); + }, + }; +} + +async function clickCoordinates( + runJson: (args: string[]) => Promise, + x: number, + y: number, +): Promise { + await runJson(['mouse', 'move', String(Math.round(x)), String(Math.round(y))]); + await runJson(['mouse', 'down']); + await runJson(['mouse', 'up']); +} + +async function captureAgentBrowserSnapshot( + runJson: (args: string[]) => Promise, + options: WebSnapshotOptions | undefined, +): Promise { + const data = await runJson(buildSnapshotArgs(options)); + return await normalizeAgentBrowserSnapshot(data, async (ref) => await fetchRefRect(runJson, ref)); +} + +function buildSnapshotArgs(options: WebSnapshotOptions | undefined): string[] { + return [ + 'snapshot', + ...(options?.interactiveOnly ? ['--interactive'] : []), + ...(options?.raw ? [] : ['--compact']), + ...(typeof options?.depth === 'number' ? ['--depth', String(options.depth)] : []), + ...(options?.scope ? ['--selector', options.scope] : []), + ]; +} + +async function fetchRefRect( + runJson: (args: string[]) => Promise, + ref: string, +): Promise { + try { + return parseRect(await runJson(['get', 'box', browserRefSelector(ref)])); + } catch (error) { + if (isIgnorableBoxError(error)) return undefined; + throw error; + } +} + +function isIgnorableBoxError(error: unknown): boolean { + const message = error instanceof Error ? error.message : String(error); + return !/\bstale\b/i.test(message) && /\bbox\b|not visible|not found|no element/i.test(message); +} + +async function runAgentBrowserJson(args: string[], session: string | undefined): Promise { + const cliArgs = [...args, '--json', ...(session ? ['--session', session] : [])]; + const result = await runAgentBrowserCommand(cliArgs); + const parsed = parseAgentBrowserJson(result.stdout, result.stderr, cliArgs, result.exitCode); + return unwrapAgentBrowserJson(parsed, result, cliArgs); +} + +async function runAgentBrowserCommand(cliArgs: string[]): Promise<{ + stdout: string; + stderr: string; + exitCode: number; +}> { + let stdout = ''; + let stderr = ''; + let exitCode = 0; + try { + const result = await runCmd(AGENT_BROWSER, cliArgs, { + allowFailure: true, + timeoutMs: AGENT_BROWSER_TIMEOUT_MS, + }); + stdout = result.stdout; + stderr = result.stderr; + exitCode = result.exitCode; + } catch (error) { + throw mapAgentBrowserRunError(error, cliArgs); + } + + return { stdout, stderr, exitCode }; +} + +function unwrapAgentBrowserJson( + parsed: unknown, + result: { stdout: string; stderr: string; exitCode: number }, + cliArgs: string[], +): unknown { + if (!isJsonObject(parsed)) return parsed; + + const success = parsed.success ?? parsed.ok; + if (success === false) { + throw new AppError(toErrorCode(parsed.code), readEnvelopeErrorMessage(parsed), { + cmd: AGENT_BROWSER, + args: cliArgs, + hint: readStringProperty(parsed, 'hint') ?? AGENT_BROWSER_DOCTOR_HINT, + agentBrowserError: parsed.error, + }); + } + if (result.exitCode !== 0) { + throw new AppError('COMMAND_FAILED', 'agent-browser command failed', { + cmd: AGENT_BROWSER, + args: cliArgs, + exitCode: result.exitCode, + stdout: result.stdout.slice(0, 500), + stderr: result.stderr.slice(0, 500), + hint: readStringProperty(parsed, 'hint') ?? AGENT_BROWSER_DOCTOR_HINT, + }); + } + + return Object.hasOwn(parsed, 'data') ? parsed.data : parsed; +} + +function parseAgentBrowserJson( + stdout: string, + stderr: string, + args: string[], + exitCode: number, +): unknown { + try { + return JSON.parse(stdout); + } catch (error) { + const commandFailed = exitCode !== 0; + throw new AppError( + 'COMMAND_FAILED', + commandFailed ? 'agent-browser command failed' : 'agent-browser returned invalid JSON', + { + cmd: AGENT_BROWSER, + args, + ...(commandFailed ? { exitCode } : {}), + stdout: stdout.slice(0, 500), + stderr: stderr.slice(0, 500), + hint: AGENT_BROWSER_DOCTOR_HINT, + }, + error instanceof Error ? error : undefined, + ); + } +} + +function mapAgentBrowserRunError(error: unknown, args: string[]): AppError { + const appError = asAppError(error); + if (appError.code === 'TOOL_MISSING') { + return new AppError( + 'TOOL_MISSING', + 'agent-browser not found in PATH', + { cmd: AGENT_BROWSER, args, hint: AGENT_BROWSER_DOCTOR_HINT }, + appError, + ); + } + if (appError.code === 'COMMAND_FAILED') { + return new AppError( + 'COMMAND_FAILED', + appError.message, + { + ...(appError.details ?? {}), + cmd: AGENT_BROWSER, + args, + hint: + typeof appError.details?.hint === 'string' + ? appError.details.hint + : AGENT_BROWSER_DOCTOR_HINT, + }, + appError, + ); + } + return appError; +} + +function readEnvelopeErrorMessage(envelope: JsonObject): string { + const error = envelope.error; + if (typeof error === 'string' && error.trim()) return error.trim(); + if (isJsonObject(error)) { + const message = readStringProperty(error, 'message') ?? readStringProperty(error, 'error'); + if (message) return message; + } + return readStringProperty(envelope, 'message') ?? 'agent-browser command failed'; +} + +function toErrorCode(value: unknown): 'COMMAND_FAILED' | (string & {}) { + return typeof value === 'string' && value.length > 0 ? value : 'COMMAND_FAILED'; +} + +function browserRefSelector(ref: string): string { + return `@${ref}`; +} + +function selectAllShortcut(): string { + return process.platform === 'darwin' ? 'Meta+a' : 'Control+a'; +} + +function parseRect(data: unknown): Rect | undefined { + const candidate = isJsonObject(data) && isJsonObject(data.box) ? data.box : data; + if (!isJsonObject(candidate)) return undefined; + return rectFromPointSize(candidate) ?? rectFromEdges(candidate); +} + +function rectFromPointSize(candidate: JsonObject): Rect | undefined { + const x = readNumberProperty(candidate, 'x'); + const y = readNumberProperty(candidate, 'y'); + const width = readNumberProperty(candidate, 'width'); + const height = readNumberProperty(candidate, 'height'); + return buildRect(x, y, width, height); +} + +function rectFromEdges(candidate: JsonObject): Rect | undefined { + const left = readNumberProperty(candidate, 'left'); + const top = readNumberProperty(candidate, 'top'); + const right = readNumberProperty(candidate, 'right'); + const bottom = readNumberProperty(candidate, 'bottom'); + return buildRect(left, top, diffNumbers(right, left), diffNumbers(bottom, top)); +} + +function buildRect( + x: number | undefined, + y: number | undefined, + width: number | undefined, + height: number | undefined, +): Rect | undefined { + if (x === undefined || y === undefined || width === undefined || height === undefined) { + return undefined; + } + return { x, y, width, height }; +} + +function diffNumbers(a: number | undefined, b: number | undefined): number | undefined { + return a === undefined || b === undefined ? undefined : a - b; +} diff --git a/src/platforms/web/agent-browser-snapshot.ts b/src/platforms/web/agent-browser-snapshot.ts new file mode 100644 index 000000000..d8bb15ce3 --- /dev/null +++ b/src/platforms/web/agent-browser-snapshot.ts @@ -0,0 +1,211 @@ +import type { RawSnapshotNode, Rect } from '../../utils/snapshot.ts'; +import { + isJsonObject, + readBooleanProperty, + readProperty, + readStringProperty, + type JsonObject, +} from './json-utils.ts'; +import type { WebSnapshotResult } from './provider.ts'; + +type SnapshotRefRecord = { + ref: string; + data?: JsonObject; +}; + +type SnapshotDraftNode = { + ref: string; + node: RawSnapshotNode; +}; + +const MAX_CONCURRENT_BOX_FETCHES = 8; + +export async function normalizeAgentBrowserSnapshot( + data: unknown, + fetchBox: (ref: string) => Promise, +): Promise { + const snapshotText = readStringProperty(data, 'snapshot') ?? ''; + const refs = collectSnapshotRefs(readProperty(data, 'refs')); + const drafts = parseSnapshotDraftNodes(snapshotText, refs); + + await attachDraftRects(drafts, fetchBox); + + return { + nodes: drafts.map((draft, index) => ({ ...draft.node, index })), + truncated: readBooleanProperty(data, 'truncated'), + }; +} + +async function attachDraftRects( + drafts: SnapshotDraftNode[], + fetchBox: (ref: string) => Promise, +): Promise { + for (let index = 0; index < drafts.length; index += MAX_CONCURRENT_BOX_FETCHES) { + const chunk = drafts.slice(index, index + MAX_CONCURRENT_BOX_FETCHES); + const rects = await Promise.all(chunk.map((draft) => fetchBox(draft.ref))); + for (const [chunkIndex, rect] of rects.entries()) { + if (rect) chunk[chunkIndex]!.node.rect = rect; + } + } +} + +function parseSnapshotDraftNodes( + snapshotText: string, + refs: SnapshotRefRecord[], +): SnapshotDraftNode[] { + const byRef = new Map(refs.map((ref) => [ref.ref, ref])); + const drafts: SnapshotDraftNode[] = []; + const seenRefs = new Set(); + const lastIndexByDepth = new Map(); + + for (const line of snapshotText.split(/\r?\n/)) { + const ref = extractBrowserRef(line); + if (!ref) continue; + seenRefs.add(ref); + const metadata = byRef.get(ref)?.data; + const depth = inferSnapshotDepth(line); + const node = snapshotNodeFromLine(line, metadata, depth); + const parentIndex = findParentIndex(lastIndexByDepth, depth); + if (parentIndex !== undefined) node.parentIndex = parentIndex; + drafts.push({ ref, node: { ...node, index: drafts.length } }); + lastIndexByDepth.set(depth, drafts.length - 1); + } + + for (const ref of refs) { + if (seenRefs.has(ref.ref)) continue; + drafts.push({ + ref: ref.ref, + node: { ...snapshotNodeFromMetadata(ref.data), index: drafts.length }, + }); + } + + return drafts; +} + +function snapshotNodeFromLine( + line: string, + metadata: JsonObject | undefined, + depth: number, +): RawSnapshotNode { + const type = extractRole(line) ?? readMetadataString(metadata, ['role', 'type']); + return { + index: 0, + type, + role: type, + label: extractQuotedText(line) ?? readMetadataString(metadata, ['label', 'name', 'text']), + value: extractValue(line) ?? readMetadataString(metadata, ['value']), + depth, + enabled: readMetadataBoolean(metadata, ['enabled']), + focused: readMetadataBoolean(metadata, ['focused']), + }; +} + +function snapshotNodeFromMetadata(metadata: JsonObject | undefined): RawSnapshotNode { + const type = readMetadataString(metadata, ['role', 'type']); + return { + index: 0, + type, + role: type, + label: readMetadataString(metadata, ['label', 'name', 'text']), + value: readMetadataString(metadata, ['value']), + enabled: readMetadataBoolean(metadata, ['enabled']), + focused: readMetadataBoolean(metadata, ['focused']), + }; +} + +function collectSnapshotRefs(value: unknown): SnapshotRefRecord[] { + if (Array.isArray(value)) { + return value.flatMap((entry) => { + const record = isJsonObject(entry) ? entry : undefined; + const ref = normalizeBrowserRef(readMetadataString(record, ['ref', 'id']) ?? String(entry)); + return ref ? [{ ref, data: record }] : []; + }); + } + if (!isJsonObject(value)) return []; + return Object.entries(value).flatMap(([key, entry]) => { + const ref = normalizeBrowserRef(key); + if (!ref) return []; + return [{ ref, data: isJsonObject(entry) ? entry : undefined }]; + }); +} + +function inferSnapshotDepth(line: string): number { + const leadingWhitespace = line.match(/^\s*/)?.[0].length ?? 0; + return Math.floor(leadingWhitespace / 2); +} + +function findParentIndex(lastIndexByDepth: Map, depth: number): number | undefined { + for (let candidateDepth = depth - 1; candidateDepth >= 0; candidateDepth -= 1) { + const parent = lastIndexByDepth.get(candidateDepth); + if (parent !== undefined) return parent; + } + return undefined; +} + +function extractBrowserRef(line: string): string | null { + return normalizeBrowserRef( + line.match(/\bref=['"]?(@?e\d+)['"]?/i)?.[1] ?? line.match(/@?(e\d+)\b/i)?.[1], + ); +} + +function normalizeBrowserRef(value: string | undefined): string | null { + const ref = value?.trim().replace(/^@/, ''); + return ref && /^e\d+$/i.test(ref) ? ref.toLowerCase() : null; +} + +function extractRole(line: string): string | undefined { + const cleaned = line + .replace(/\[[^\]]*ref[^\]]*\]/gi, '') + .replace(/@?e\d+\b/gi, '') + .replace(/^[\s|├└─>*-]+/g, '') + .trim(); + return cleaned.match(/^([A-Za-z][\w-]*)\b/)?.[1]; +} + +function extractQuotedText(line: string): string | undefined { + return line.match(/"([^"]+)"/)?.[1] ?? line.match(/'([^']+)'/)?.[1]; +} + +function extractValue(line: string): string | undefined { + return ( + extractTrailingAriaValue(line) ?? + line + .match(/\bvalue=(?:"([^"]*)"|'([^']*)'|([^\s\]]+))/i) + ?.slice(1) + .find(isString) ?? + line + .match(/\bvalue:\s*(?:"([^"]*)"|'([^']*)'|([^\s\]]+))/i) + ?.slice(1) + .find(isString) + ); +} + +function extractTrailingAriaValue(line: string): string | undefined { + const value = line.match(/\]\s*:\s*(.+)$/)?.[1]?.trim(); + return value || undefined; +} + +function readMetadataString(metadata: JsonObject | undefined, keys: string[]): string | undefined { + if (!metadata) return undefined; + for (const key of keys) { + const value = metadata[key]; + if (typeof value === 'string' && value.length > 0) return value; + } + return undefined; +} + +function readMetadataBoolean( + metadata: JsonObject | undefined, + keys: string[], +): boolean | undefined { + if (!metadata) return undefined; + for (const key of keys) { + const value = metadata[key]; + if (typeof value === 'boolean') return value; + } + return undefined; +} + +function isString(value: unknown): value is string { + return typeof value === 'string'; +} diff --git a/src/platforms/web/json-utils.ts b/src/platforms/web/json-utils.ts new file mode 100644 index 000000000..191daf3cb --- /dev/null +++ b/src/platforms/web/json-utils.ts @@ -0,0 +1,24 @@ +export type JsonObject = Record; + +export function isJsonObject(value: unknown): value is JsonObject { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +export function readProperty(value: unknown, key: string): unknown { + return isJsonObject(value) ? value[key] : undefined; +} + +export function readStringProperty(value: unknown, key: string): string | undefined { + const property = readProperty(value, key); + return typeof property === 'string' ? property : undefined; +} + +export function readBooleanProperty(value: unknown, key: string): boolean | undefined { + const property = readProperty(value, key); + return typeof property === 'boolean' ? property : undefined; +} + +export function readNumberProperty(value: JsonObject, key: string): number | undefined { + const property = value[key]; + return typeof property === 'number' && Number.isFinite(property) ? property : undefined; +} diff --git a/src/platforms/web/provider.ts b/src/platforms/web/provider.ts index b62ff9d38..6801a0e98 100644 --- a/src/platforms/web/provider.ts +++ b/src/platforms/web/provider.ts @@ -1,8 +1,8 @@ import type { ScrollDirection } from '../../core/scroll-gesture.ts'; import type { SessionSurface } from '../../core/session-surface.ts'; -import { AppError } from '../../utils/errors.ts'; import { createScopedProvider } from '../../utils/scoped-provider.ts'; import type { RawSnapshotNode } from '../../utils/snapshot.ts'; +import { createAgentBrowserWebProvider } from './agent-browser-provider.ts'; export type WebOpenOptions = { url?: string; @@ -39,16 +39,7 @@ export type WebProvider = { readText?(x: number, y: number): Promise; }; -const localWebProvider: WebProvider = { - open: () => unsupportedLocalWebProvider(), - close: () => unsupportedLocalWebProvider(), - snapshot: () => unsupportedLocalWebProvider(), - screenshot: () => unsupportedLocalWebProvider(), - click: () => unsupportedLocalWebProvider(), - fill: () => unsupportedLocalWebProvider(), - typeText: () => unsupportedLocalWebProvider(), - scroll: () => unsupportedLocalWebProvider(), -}; +const localWebProvider: WebProvider = createAgentBrowserWebProvider(); const webProviderScope = createScopedProvider(localWebProvider); @@ -62,10 +53,3 @@ export async function withWebProvider( ): Promise { return await webProviderScope.run(provider, fn); } - -async function unsupportedLocalWebProvider(): Promise { - throw new AppError( - 'UNSUPPORTED_OPERATION', - 'Web automation requires a request-scoped web provider.', - ); -} diff --git a/src/utils/__tests__/output.test.ts b/src/utils/__tests__/output.test.ts index 02209795a..e9b09e9c8 100644 --- a/src/utils/__tests__/output.test.ts +++ b/src/utils/__tests__/output.test.ts @@ -1365,6 +1365,29 @@ test('formatSnapshotText suppresses sparse snapshot hint for depth-limited reads assert.doesNotMatch(text, /sparse accessibility snapshot/); }); +test('formatSnapshotText renders web textboxes as text fields and suppresses native sparse hint', () => { + const text = withNoColor(() => + formatSnapshotText({ + nodes: [ + { + ref: 'e1', + index: 0, + depth: 0, + type: 'textbox', + role: 'textbox', + label: 'Email ', + value: 'ada@example.com', + }, + ], + truncated: false, + snapshotDiagnostics: { stats: { platform: 'web' } }, + }), + ); + + assert.match(text, /@e1 \[text-field\] "ada@example\.com"/); + assert.doesNotMatch(text, /sparse accessibility snapshot/); +}); + test('formatSnapshotText keeps flattened output and adds duplicate nav warning', () => { const nodes = Array.from({ length: 24 }, (_, index) => ({ ref: `e${index + 1}`, diff --git a/src/utils/output.ts b/src/utils/output.ts index 0f9636b84..e56616fbd 100644 --- a/src/utils/output.ts +++ b/src/utils/output.ts @@ -616,7 +616,7 @@ function buildSnapshotNotices( ): string[] { const notices = readSnapshotWarnings(data); // The structured snapshot quality verdict already carries a sharper version of this hint. - if (!data.snapshotQuality) { + if (shouldRenderLegacySparseSnapshotHint(data)) { const sparseSnapshotHint = formatSparseSnapshotHint(nodes, options); if (sparseSnapshotHint) notices.push(sparseSnapshotHint); } @@ -632,6 +632,19 @@ function buildSnapshotNotices( return notices; } +function shouldRenderLegacySparseSnapshotHint(data: Record): boolean { + return !data.snapshotQuality && !isWebSnapshotData(data); +} + +function isWebSnapshotData(data: Record): boolean { + const diagnostics = data.snapshotDiagnostics; + if (!diagnostics || typeof diagnostics !== 'object') return false; + const stats = (diagnostics as { stats?: unknown }).stats; + return Boolean( + stats && typeof stats === 'object' && (stats as { platform?: unknown }).platform === 'web', + ); +} + function formatSparseSnapshotHint( nodes: SnapshotNode[], options: Pick, diff --git a/src/utils/snapshot-lines.ts b/src/utils/snapshot-lines.ts index e49d2442e..3f81d9ce1 100644 --- a/src/utils/snapshot-lines.ts +++ b/src/utils/snapshot-lines.ts @@ -23,6 +23,7 @@ const ROLE_LABELS: Record = { cell: 'cell', statictext: 'text', checkedtextview: 'text', + textbox: 'text-field', textfield: 'text-field', edittext: 'text-field', textarea: 'text-view',