diff --git a/scripts/integration-progress-model.ts b/scripts/integration-progress-model.ts index acceb86dc..1d29b5372 100644 --- a/scripts/integration-progress-model.ts +++ b/scripts/integration-progress-model.ts @@ -371,6 +371,11 @@ function summarizeProviderPressure(files) { pattern: /\bLinuxToolProvider\b|\blinuxToolProvider\b|\brunCommand\b|\bwhichCommand\b|\bxdotool\b|\bydotool\b|\bxclip\b|\bscrot\b|\bgrim\b|\bwmctrl\b|\bpkill\b/g, }, + { + name: 'Web semantic provider', + pattern: + /\bWebProvider\b|\bwebProvider\b|\bwithWebProvider\b|\bresolveWebProvider\b|\['web'/g, + }, { name: 'Recording provider', pattern: /\bRecordingProvider\b|\brecordingProvider\b|\bstartRecording\b/g, diff --git a/test/integration/provider-scenarios/fixtures.ts b/test/integration/provider-scenarios/fixtures.ts index fa00cb69d..5f69a6cbb 100644 --- a/test/integration/provider-scenarios/fixtures.ts +++ b/test/integration/provider-scenarios/fixtures.ts @@ -66,6 +66,15 @@ export const PROVIDER_SCENARIO_LINUX: DeviceInfo = { booted: true, }; +export const PROVIDER_SCENARIO_WEB: DeviceInfo = { + platform: 'web', + id: 'agent-browser-chrome', + name: 'Agent Browser Chrome', + kind: 'device', + target: 'desktop', + booted: true, +}; + export function createDemoIosApp(prefix: string): { tempRoot: string; appPath: string } { const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), prefix)); const appPath = path.join(tempRoot, 'Demo.app'); diff --git a/test/integration/provider-scenarios/web-desktop.test.ts b/test/integration/provider-scenarios/web-desktop.test.ts new file mode 100644 index 000000000..47e5472c2 --- /dev/null +++ b/test/integration/provider-scenarios/web-desktop.test.ts @@ -0,0 +1,161 @@ +import assert from 'node:assert/strict'; +import fs from 'node:fs'; +import { test } from 'vitest'; +import { assertFlatToolCall, assertPngFile } from './assertions.ts'; +import { PROVIDER_SCENARIO_WEB } from './fixtures.ts'; +import { createProviderScenarioTempPath, withProviderScenarioResource } from './harness.ts'; +import { runProviderScenario } from './scenario.ts'; +import { createWebDesktopWorld } from './web-world.ts'; + +const WEB_URL = 'https://example.test/dashboard'; + +test('Provider-backed integration web desktop flow uses semantic web provider calls', async () => { + await withProviderScenarioResource(createWebDesktopWorld, async ({ daemon, semanticCalls }) => { + const screenshotPath = createProviderScenarioTempPath( + 'agent-device-provider-scenario-web', + 'png', + ); + + try { + const devices = await daemon.client().devices.list({ platform: 'web' }); + assert.equal(devices.length, 1); + assert.equal(devices[0]?.platform, 'web'); + assert.equal(devices[0]?.id, PROVIDER_SCENARIO_WEB.id); + assert.equal(devices[0]?.target, 'desktop'); + + await runProviderScenario(daemon, [ + { + name: 'open web URL', + command: 'open', + positionals: [WEB_URL], + flags: { platform: 'web' }, + }, + { + name: 'capture interactive web snapshot', + command: 'snapshot', + flags: { snapshotInteractiveOnly: true }, + assert: (snapshot) => { + const labels = snapshot.json?.result?.data?.nodes?.map( + (node: { label?: string }) => node.label, + ); + assert.deepEqual(labels, [ + WEB_URL, + 'Ready', + 'Email', + 'Submit order', + 'Ready', + 'Below the fold', + ]); + }, + }, + { + name: 'read snapshot ref text', + command: 'get', + positionals: ['text', '@e2'], + expectData: { text: 'Ready' }, + }, + { + name: 'find visible text', + command: 'find', + positionals: ['text', 'Submit order', 'exists'], + expectData: { found: true }, + }, + { + name: 'assert visible text', + command: 'is', + positionals: ['visible', 'label="Submit order"'], + expectData: { pass: true }, + }, + { + name: 'wait for text', + command: 'wait', + positionals: ['text', 'Ready', '100'], + expectData: { text: 'Ready' }, + }, + { + name: 'click submit ref', + command: 'click', + positionals: ['@e4'], + expectData: { x: 84, y: 166 }, + }, + { + name: 'fill email ref', + command: 'fill', + positionals: ['@e3', 'qa@example.test'], + flags: { delayMs: 1 }, + expectData: { text: 'qa@example.test' }, + }, + { + name: 'type suffix', + command: 'type', + positionals: [' ok'], + expectData: { text: ' ok' }, + }, + { + name: 'scroll by pixels', + command: 'scroll', + positionals: ['down'], + flags: { pixels: 240 }, + expectData: { pixels: 240 }, + }, + { + name: 'capture web screenshot artifact', + command: 'screenshot', + positionals: [screenshotPath], + flags: { + screenshotFullscreen: true, + screenshotNoStabilize: true, + }, + expectData: { path: screenshotPath }, + assert: () => { + assertPngFile(screenshotPath); + }, + }, + ]); + + const actions = daemon.session()?.actions ?? []; + assert.ok( + actions.some( + (action) => action.command === 'click' && action.positionals.join(' ') === '@e4', + ), + 'Expected ref click action to be recorded on the session', + ); + assert.ok( + actions.some( + (action) => + action.command === 'fill' && + action.positionals.join(' ') === '@e3 qa@example.test' && + action.flags.delayMs === 1, + ), + 'Expected ref fill action to be recorded on the session', + ); + assert.ok( + actions.some( + (action) => action.command === 'type' && action.positionals.join(' ') === ' ok', + ), + 'Expected type action to be recorded on the session', + ); + + const close = await daemon.callCommand('close', [WEB_URL]); + assert.equal(close.statusCode, 200, JSON.stringify(close.json)); + + assertFlatToolCall(semanticCalls, ['web', 'open', WEB_URL, '']); + assertFlatToolCall(semanticCalls, ['web', 'snapshot', 'true', '']); + assertFlatToolCall(semanticCalls, ['web', 'click', '84', '166']); + assertFlatToolCall(semanticCalls, ['web', 'fill', '144', '114', 'qa@example.test', '1']); + assertFlatToolCall(semanticCalls, ['web', 'type', ' ok', '0']); + assertFlatToolCall(semanticCalls, ['web', 'scroll', 'down', '', '240']); + assertFlatToolCall(semanticCalls, [ + 'web', + 'screenshot', + screenshotPath, + 'true', + 'false', + 'app', + ]); + assertFlatToolCall(semanticCalls, ['web', 'close', WEB_URL]); + } finally { + fs.rmSync(screenshotPath, { force: true }); + } + }); +}, 10_000); diff --git a/test/integration/provider-scenarios/web-world.ts b/test/integration/provider-scenarios/web-world.ts new file mode 100644 index 000000000..82c9b500c --- /dev/null +++ b/test/integration/provider-scenarios/web-world.ts @@ -0,0 +1,192 @@ +import fs from 'node:fs'; +import type { WebProvider } from '../../../src/platforms/web/provider.ts'; +import type { RawSnapshotNode } from '../../../src/utils/snapshot.ts'; +import { validPng } from './assertions.ts'; +import { PROVIDER_SCENARIO_WEB } from './fixtures.ts'; +import { createProviderScenarioHarness, type ProviderScenarioHarness } from './harness.ts'; +import type { FlatToolCall } from './providers.ts'; + +const INPUT_RECT = { x: 24, y: 96, width: 240, height: 36 }; +const BUTTON_RECT = { x: 24, y: 148, width: 120, height: 36 }; + +type WebPageState = { + openedTarget: string; + inputValue: string; + statusText: string; + scrolled: boolean; +}; + +export type WebDesktopWorld = { + daemon: ProviderScenarioHarness; + semanticCalls: FlatToolCall[]; + close: () => Promise; +}; + +export async function createWebDesktopWorld(): Promise { + const semanticCalls: FlatToolCall[] = []; + const state: WebPageState = { + openedTarget: 'about:blank', + inputValue: '', + statusText: 'Ready', + scrolled: false, + }; + + const provider: WebProvider = { + open: async (target, options) => { + semanticCalls.push(['web', 'open', target, options?.url ?? '']); + state.openedTarget = target; + state.statusText = 'Ready'; + }, + close: async (target) => { + semanticCalls.push(['web', 'close', target ?? '']); + }, + snapshot: async (options) => { + semanticCalls.push([ + 'web', + 'snapshot', + String(options?.interactiveOnly ?? ''), + String(options?.surface ?? ''), + ]); + return { nodes: webSnapshotNodes(state), truncated: false }; + }, + screenshot: async (outPath, options) => { + semanticCalls.push([ + 'web', + 'screenshot', + outPath, + String(options?.fullscreen ?? ''), + String(options?.stabilize ?? ''), + String(options?.surface ?? ''), + ]); + fs.writeFileSync(outPath, validPng()); + }, + click: async (x, y) => { + semanticCalls.push(['web', 'click', String(x), String(y)]); + if (pointInRect(x, y, BUTTON_RECT)) { + state.statusText = 'Submitted'; + } + }, + fill: async (x, y, text, options) => { + semanticCalls.push([ + 'web', + 'fill', + String(x), + String(y), + text, + String(options?.delayMs ?? 0), + ]); + if (pointInRect(x, y, INPUT_RECT)) { + state.inputValue = text; + } + }, + typeText: async (text, options) => { + semanticCalls.push(['web', 'type', text, String(options?.delayMs ?? 0)]); + state.inputValue += text; + }, + scroll: async (direction, options) => { + semanticCalls.push([ + 'web', + 'scroll', + direction, + String(options?.amount ?? ''), + String(options?.pixels ?? ''), + ]); + state.scrolled = true; + }, + }; + + const daemon = await createProviderScenarioHarness({ + webProvider: () => provider, + deviceInventoryProvider: async () => [PROVIDER_SCENARIO_WEB], + }); + + let closed = false; + return { + daemon, + semanticCalls, + close: async () => { + if (closed) return; + closed = true; + await daemon.close(); + }, + }; +} + +function webSnapshotNodes(state: WebPageState): RawSnapshotNode[] { + return [ + { + index: 0, + role: 'document', + label: state.openedTarget, + rect: { x: 0, y: 0, width: 390, height: 720 }, + enabled: true, + hittable: true, + visibleToUser: true, + depth: 0, + }, + { + index: 1, + role: 'static text', + label: 'Ready', + rect: { x: 24, y: 32, width: 160, height: 28 }, + enabled: true, + hittable: true, + visibleToUser: true, + depth: 1, + parentIndex: 0, + }, + { + index: 2, + role: 'text field', + label: 'Email', + value: state.inputValue, + rect: INPUT_RECT, + enabled: true, + hittable: true, + visibleToUser: true, + depth: 1, + parentIndex: 0, + }, + { + index: 3, + role: 'button', + label: 'Submit order', + rect: BUTTON_RECT, + enabled: true, + hittable: true, + visibleToUser: true, + depth: 1, + parentIndex: 0, + }, + { + index: 4, + role: 'static text', + label: state.statusText, + rect: { x: 24, y: 204, width: 180, height: 28 }, + enabled: true, + hittable: true, + visibleToUser: true, + depth: 1, + parentIndex: 0, + }, + { + index: 5, + role: 'static text', + label: state.scrolled ? 'Scrolled section' : 'Below the fold', + rect: { x: 24, y: 620, width: 180, height: 28 }, + enabled: true, + hittable: true, + visibleToUser: true, + depth: 1, + parentIndex: 0, + }, + ]; +} + +function pointInRect( + x: number, + y: number, + rect: { x: number; y: number; width: number; height: number }, +): boolean { + return x >= rect.x && x <= rect.x + rect.width && y >= rect.y && y <= rect.y + rect.height; +}